diff --git a/src/neptune_scale/api/attribute.py b/src/neptune_scale/api/attribute.py index a4d914a..8b228df 100644 --- a/src/neptune_scale/api/attribute.py +++ b/src/neptune_scale/api/attribute.py @@ -23,6 +23,7 @@ from neptune_scale.sync.files.queue import FileUploadQueue from neptune_scale.sync.metadata_splitter import MetadataSplitter from neptune_scale.sync.operations_queue import OperationsQueue +from neptune_scale.sync.parameters import MAX_FILE_UPLOAD_BUFFER_SIZE from neptune_scale.sync.util import arg_to_datetime __all__ = ("Attribute", "AttributeStore") @@ -223,7 +224,7 @@ def extend( @warn_unsupported_kwargs def upload( self, - path: Optional[Path] = None, + path: Optional[str] = None, *, data: Optional[Union[str, bytes]] = None, mime_type: Optional[str] = None, @@ -236,7 +237,7 @@ def upload( if data is not None: verify_type("data", data, (str, bytes, type(None))) - verify_max_length("data", data, 10 * 1024**2) + verify_max_length("data", data, MAX_FILE_UPLOAD_BUFFER_SIZE) verify_type("mime_type", mime_type, (str, type(None))) verify_type("target_basename", target_basename, (str, type(None))) @@ -254,7 +255,7 @@ def upload( local_path = Path(path) if not local_path.exists(): - raise ValueError(f"Path `{path}` does not exist") + raise FileNotFoundError(f"Path `{path}` does not exist") if not local_path.is_file(): raise ValueError(f"Path `{path}` is not a file") diff --git a/src/neptune_scale/sync/parameters.py b/src/neptune_scale/sync/parameters.py index f3d4b7f..dbdf91e 100644 --- a/src/neptune_scale/sync/parameters.py +++ b/src/neptune_scale/sync/parameters.py @@ -31,3 +31,8 @@ # Status tracking MAX_REQUESTS_STATUS_BATCH_SIZE = 1000 + +# Files + +# Maximum size of file data provided via a buffer (as opposed to a file on a filesystem) +MAX_FILE_UPLOAD_BUFFER_SIZE = 10 * 1024**2 # 10MB diff --git a/tests/unit/test_file_upload.py b/tests/unit/test_file_upload.py index 592ddb7..ccc1d39 100644 --- a/tests/unit/test_file_upload.py +++ b/tests/unit/test_file_upload.py @@ -1,3 +1,4 @@ +import tempfile from datetime import datetime from pathlib import Path from queue import Empty @@ -112,7 +113,7 @@ def test_queue_wait_for_completion(queue): assert queue.wait_for_completion(timeout=1) -def test_successful_upload(worker, queue, errors_queue): +def test_successful_upload_from_buffer(worker, queue, errors_queue): data = b"test" def expect_bytes(source, _url, _mime_type): @@ -141,6 +142,56 @@ def expect_bytes(source, _url, _mime_type): errors_queue.get(timeout=1) +def test_successful_upload_from_file(worker, queue, errors_queue): + data = b"test" + + def expect_bytes(source, _url, _mime_type): + assert source.read() == data + + with ( + patch("neptune_scale.sync.files.worker.upload_file", Mock(side_effect=expect_bytes)) as upload_file, + tempfile.NamedTemporaryFile() as temp_file, + ): + temp_file.write(data) + temp_file.flush() + + queue.submit( + attribute_path="attr.txt", + local_path=Path(temp_file.name), + data=None, + target_path=None, + target_basename=None, + timestamp=datetime.now(), + ) + assert queue.wait_for_completion(timeout=10) + assert queue.active_uploads == 0 + + worker.close() + + worker._request_upload_url.assert_called_once() + worker._submit_attribute.assert_called_once() + worker._wait_for_completion.assert_called_once() + + upload_file.assert_called_once() + with pytest.raises(Empty): + errors_queue.get(timeout=1) + + +def test_file_does_not_exist(worker, queue, errors_queue): + queue.submit( + attribute_path="attr.txt", + local_path=Path("/does/not/exist"), + data=None, + target_path=None, + target_basename=None, + timestamp=datetime.now(), + ) + assert queue.wait_for_completion(timeout=10) + assert queue.active_uploads == 0 + + assert isinstance(errors_queue.get(timeout=1), FileNotFoundError) + + def test_upload_error(worker, queue, errors_queue): """Trigger an error in upload_file and check if the error is propagated to the errors_queue.""" error = NeptuneScaleError(message="This error is expected to happen") diff --git a/tests/unit/test_log_file.py b/tests/unit/test_log_file.py new file mode 100644 index 0000000..4ebb90f --- /dev/null +++ b/tests/unit/test_log_file.py @@ -0,0 +1,80 @@ +import tempfile +from pathlib import Path +from unittest.mock import Mock + +import pytest +from pytest import fixture + +from neptune_scale import Run +from neptune_scale.sync.parameters import MAX_FILE_UPLOAD_BUFFER_SIZE + + +@fixture +def run(api_token): + run = Run(project="workspace/project", api_token=api_token, run_id="run_id", mode="disabled") + run._attr_store.upload_file = Mock() + + return run + + +def test_data_and_path_arguments(run): + with pytest.raises(ValueError) as exc: + run.log_file("file.txt") + + exc.match("Either `path` or `data` must be provided") + + with pytest.raises(ValueError) as exc: + run.log_file("file.txt", data=b"", path="/some/file.txt") + + exc.match("Only one of `path` or `data` can be provided") + + +def test_too_large_data(run): + with pytest.raises(ValueError) as exc: + run.log_file("file.txt", data=b"a" * MAX_FILE_UPLOAD_BUFFER_SIZE + b"foo") + + exc.match("must not exceed") + + +def test_file_upload_not_a_file(run): + with pytest.raises(ValueError) as exc, tempfile.TemporaryDirectory() as temp_dir: + run.log_file("file.txt", path=temp_dir) + + exc.match("is not a file") + + +def test_file_upload_file_does_not_exist(run): + with pytest.raises(FileNotFoundError) as exc: + run.log_file("file.txt", path="/does/not/exist") + + exc.match("does not exist") + + +def test_file_upload_with_data(run): + run.log_file("file.txt", data=b"foo") + + run._attr_store.upload_file.assert_called_once_with( + attribute_path="file.txt", + data=b"foo", + local_path=None, + target_basename=None, + target_path=None, + timestamp=None, + ) + + +def test_file_upload_with_local_file(run): + with tempfile.NamedTemporaryFile() as temp_file: + temp_file.write(b"foo") + temp_file.flush() + + run.log_file("file.txt", path=temp_file.name) + + run._attr_store.upload_file.assert_called_once_with( + attribute_path="file.txt", + data=None, + local_path=Path(temp_file.name), + target_basename=None, + target_path=None, + timestamp=None, + )