diff --git a/CHANGELOG.md b/CHANGELOG.md index fa182614..9357de28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +## Unreleased + +### New features + +### Breaking changes +* Separate completely Rust logs and Python logs. Rust logs are configured through RUST_LOG, + S3_CONNECTOR_ENABLE_CRT_LOGS and S3_CONNECTOR_LOGS_DIR_PATH environment variables. + + ## v1.2.0 (March 13, 2024) ### New features @@ -11,6 +20,7 @@ ### Breaking changes * No breaking changes. + ## v1.1.4 (February 26, 2024) ### New features @@ -37,7 +47,6 @@ * No breaking changes. - ## v1.1.2 (January 19, 2024) ### New features @@ -48,7 +57,6 @@ * No breaking changes. - ## v1.1.1 (December 11, 2023) ### New features @@ -69,13 +77,12 @@ * No breaking changes. - ## v1.0.0 (November 22, 2023) * The Amazon S3 Connector for PyTorch delivers high throughput for PyTorch training jobs that access and store data in Amazon S3. ### New features -* S3IterableDataset and S3MapDataset, which allow building either an iterable-style or map-style dataset, using your S3 -stored data, by specifying an S3 URI (a bucket and optional prefix) and the region the bucket is in. +* S3IterableDataset and S3MapDataset, which allow building either an iterable-style or map-style dataset, using your S3 + stored data, by specifying an S3 URI (a bucket and optional prefix) and the region the bucket is in. * Support for multiprocess data loading for the above datasets. * S3Checkpoint, an interface for saving and loading model checkpoints directly to and from an S3 bucket. diff --git a/doc/DEVELOPMENT.md b/doc/DEVELOPMENT.md index ecfd25a9..ab201b31 100644 --- a/doc/DEVELOPMENT.md +++ b/doc/DEVELOPMENT.md @@ -92,3 +92,34 @@ Fill in the path of the Python executable in your virtual environment (`venv/bin as the program argument. Then put a breakpoint in the Rust/C code and try running it. +#### Enabling Logging +The Python logger handles the logging messages from the Python implementation. +The logs of our Rust components are handled through a +[tracing_subscriber](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/). + +You have the following configuration options to filter Rust log messages: +- Default - If you do not have `RUST_LOG` environment variable set up at all, Rust logging will cover `ERROR` level + of Mountpoint S3 Client Rust component. + +- Mountpoint S3 Client logs - Configure [RUST_LOG](https://docs.rs/env_logger/latest/env_logger/#enabling-logging) variable. For example, setting + `RUST_LOG=debug` will enable logging of DEBUG messages from Mountpoint S3 Client. + +- CRT logs - Configure `S3_CONNECTOR_ENABLE_CRT_LOGS` variable, similarly to + [RUST_LOG](https://docs.rs/env_logger/latest/env_logger/#enabling-logging). This will enable the logs from CRT + component and override the `RUST_LOG` setup. + + **Please note that these logs are very noisy, do NOT enable them unless otherwise instructed.** + +Additionally, you can set up `S3_CONNECTOR_LOGS_DIR_PATH` with the path to a local folder where you have WRITE +permissions. When this is set up, the Rust logs will be written at this location, +with `s3torchconnectorclient.log` prefix, rolling on an hourly basis. + +Example: +- Enable Mountpoint S3 Client logs with DEBUG level to be written at `/tmp/s3torchconnector-logs` +```sh + export RUST_LOG=debug + export S3_CONNECTOR_LOGS_DIR_PATH="/tmp/s3torchconnector-logs" + python ./my_test.py +``` +After running this, you will find the logs under `/tmp/s3torchconnector-logs`. The logging messages will be appended +to the most recent file of format `s3torchconnectorclient.log.YYYY-MM-DD-HH`. \ No newline at end of file diff --git a/s3torchconnectorclient/Cargo.lock b/s3torchconnectorclient/Cargo.lock index bf039de9..2f65cb48 100644 --- a/s3torchconnectorclient/Cargo.lock +++ b/s3torchconnectorclient/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "ahash" -version = "0.8.8" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42cd52102d3df161c77a887b608d7a4897d7cc112886a9537b738a887a03aaff" +checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" dependencies = [ "cfg-if", "once_cell", @@ -31,13 +31,15 @@ checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" [[package]] name = "async-channel" -version = "1.9.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35" +checksum = "f28243a43d821d11341ab73c80bed182dc015c514b951616cf79bd4af39af0c3" dependencies = [ "concurrent-queue", - "event-listener 2.5.3", + "event-listener 5.2.0", + "event-listener-strategy 0.5.0", "futures-core", + "pin-project-lite", ] [[package]] @@ -66,7 +68,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d034b430882f8381900d3fe6f0aaa3ad94f2cb4ac519b429692a1bc2dda4ae7b" dependencies = [ "event-listener 4.0.3", - "event-listener-strategy", + "event-listener-strategy 0.4.0", "pin-project-lite", ] @@ -290,15 +292,20 @@ dependencies = [ [[package]] name = "event-listener" -version = "2.5.3" +version = "4.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" +checksum = "67b215c49b2b248c855fb73579eb1f4f26c38ffdc12973e20e07b91d78d5646e" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] [[package]] name = "event-listener" -version = "4.0.3" +version = "5.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b215c49b2b248c855fb73579eb1f4f26c38ffdc12973e20e07b91d78d5646e" +checksum = "2b5fb89194fa3cad959b833185b3063ba881dbfc7030680b314250779fb4cc91" dependencies = [ "concurrent-queue", "parking", @@ -315,6 +322,16 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "event-listener-strategy" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "feedafcaa9b749175d5ac357452a9d41ea2911da598fde46ce1fe02c37751291" +dependencies = [ + "event-listener 5.2.0", + "pin-project-lite", +] + [[package]] name = "fastrand" version = "2.0.1" @@ -625,26 +642,14 @@ dependencies = [ [[package]] name = "metrics" -version = "0.21.1" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fde3af1a009ed76a778cb84fdef9e7dbbdf5775ae3e4cc1f434a6a307f6f76c5" +checksum = "cd71d9db2e4287c3407fa04378b8c2ee570aebe0854431562cdd89ca091854f4" dependencies = [ "ahash", - "metrics-macros", "portable-atomic", ] -[[package]] -name = "metrics-macros" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b4faf00617defe497754acde3024865bc143d44a86799b24e191ecff91354f" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.48", -] - [[package]] name = "minimal-lexical" version = "0.2.1" @@ -653,9 +658,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "mountpoint-s3-client" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2f4a6aa3c34c8327d4723f0fc80301e89a226b97f36fb80b65110b6a1df8ea6" +checksum = "f590cf9c81d96c727d61edac60f03f781a62d3c766208af39eb0ee038c66be1a" dependencies = [ "async-io", "async-lock", @@ -670,6 +675,7 @@ dependencies = [ "md-5", "metrics", "mountpoint-s3-crt", + "mountpoint-s3-crt-sys", "once_cell", "percent-encoding", "pin-project", @@ -687,9 +693,9 @@ dependencies = [ [[package]] name = "mountpoint-s3-crt" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9bf9ef7a549496401d3009181504896c513b842ef285d51e0a1583dab1fcd35" +checksum = "b66a617b42bd5216f78746ccd3a769aa6291ec82790c875b6faba8ef1d4a8acc" dependencies = [ "async-channel", "futures", @@ -703,9 +709,9 @@ dependencies = [ [[package]] name = "mountpoint-s3-crt-sys" -version = "0.5.3" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282f6f10de9d91ff31b45bc45aa37b101721eeddb1c7a80f011755a93d3055f3" +checksum = "837d2772731bfe53bf43999214ef5897b22a62f6dc1a67c0ab679ff2a47a765c" dependencies = [ "bindgen", "cc", @@ -1471,14 +1477,15 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "which" -version = "4.4.2" +version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +checksum = "7fa5e0c10bf77f44aac573e498d1a82d5fbd5e91f6fc0a99e7be4b38e85e101c" dependencies = [ "either", "home", "once_cell", "rustix", + "windows-sys 0.52.0", ] [[package]] diff --git a/s3torchconnectorclient/Cargo.toml b/s3torchconnectorclient/Cargo.toml index 034649c8..4be68b46 100644 --- a/s3torchconnectorclient/Cargo.toml +++ b/s3torchconnectorclient/Cargo.toml @@ -19,8 +19,8 @@ built = "0.7" pyo3 = { version = "0.19.2" } pyo3-log = "0.8.3" futures = "0.3.28" -mountpoint-s3-client = { version = "0.7.0", features = ["mock"] } -mountpoint-s3-crt = "0.6.1" +mountpoint-s3-client = { version = "0.8.0", features = ["mock"] } +mountpoint-s3-crt = "0.6.2" log = "0.4.20" tracing = { version = "0.1.40", default-features = false, features = ["std", "log"] } tracing-subscriber = { version = "0.3.18", features = ["fmt", "env-filter"]} diff --git a/s3torchconnectorclient/python/tst/integration/test_logging.py b/s3torchconnectorclient/python/tst/integration/test_logging.py index 31bf9214..a2ed161a 100644 --- a/s3torchconnectorclient/python/tst/integration/test_logging.py +++ b/s3torchconnectorclient/python/tst/integration/test_logging.py @@ -3,9 +3,12 @@ import os.path import sys import tempfile +from typing import List import pytest +from conftest import BucketPrefixFixture + PYTHON_TEST_CODE = """ import logging import os @@ -13,10 +16,16 @@ s3_uri = sys.argv[1] region = sys.argv[2] -os.environ["ENABLE_CRT_LOGS"] = sys.argv[3] -logs_dir_path = sys.argv[4] +enable_crt_logs = sys.argv[3] +default_rust_log = sys.argv[4] +logs_dir_path = sys.argv[5] + +if enable_crt_logs != "": + os.environ["S3_CONNECTOR_ENABLE_CRT_LOGS"] = enable_crt_logs +if default_rust_log != "": + os.environ["RUST_LOG"] = default_rust_log if logs_dir_path != "": - os.environ["CRT_LOGS_DIR_PATH"] = logs_dir_path + os.environ["S3_CONNECTOR_LOGS_DIR_PATH"] = logs_dir_path from s3torchconnector import S3MapDataset @@ -35,7 +44,7 @@ @pytest.mark.parametrize( - "log_level, should_contain, should_not_contain", + "enable_crt_logs, should_contain, should_not_contain", [ ( "info", @@ -68,17 +77,22 @@ ("OFF", ["INFO s3torchconnector.s3map_dataset"], ["awscrt"]), ], ) -def test_logging_valid(log_level, should_contain, should_not_contain, image_directory): - out, err = _start_subprocess(log_level, image_directory) +def test_crt_logging( + enable_crt_logs: str, + should_contain: List[str], + should_not_contain: List[str], + image_directory: BucketPrefixFixture, +): + out, err = _start_subprocess(image_directory, enable_crt_logs=enable_crt_logs) assert err == "" assert all(s in out for s in should_contain) assert all(s not in out for s in should_not_contain) -def test_logging_to_file(image_directory): +def test_logging_to_file_env_filters_unset(image_directory: BucketPrefixFixture): with tempfile.TemporaryDirectory() as log_dir: print("Created temporary directory", log_dir) - out, err = _start_subprocess("INFO", image_directory, log_dir) + out, err = _start_subprocess(image_directory, logs_directory=log_dir) # Standard output contains Python output assert err == "" assert "INFO s3torchconnector.s3map_dataset" in out @@ -89,14 +103,84 @@ def test_logging_to_file(image_directory): assert os.path.isfile(log_file) with open(log_file) as f: file_content = f.read() - assert all(s in file_content for s in ["awscrt", "INFO"]) - assert all( - s not in file_content - for s in ["INFO s3torchconnector.s3map_dataset", "DEBUG", "TRACE"] - ) + assert file_content == "" + + +def test_default_logging_env_filters_unset(image_directory: BucketPrefixFixture): + out, err = _start_subprocess(image_directory) + # Standard output contains Python output + assert err == "" + assert "INFO s3torchconnector.s3map_dataset" in out + assert "awscrt" not in out + + +@pytest.mark.parametrize( + "enable_crt_logs, default_rust_log, out_should_contain, out_should_not_contain, file_should_contain, file_should_not_contain", + [ + ( + "INFO", + "", + ["INFO s3torchconnector.s3map_dataset"], + ["awscrt"], + ["awscrt", "INFO"], + ["INFO s3torchconnector.s3map_dataset", "DEBUG", "TRACE"], + ), + ( + "", + "DEBUG", + ["INFO s3torchconnector.s3map_dataset"], + ["awscrt"], + ["DEBUG", "mountpoint_s3_client"], + ["awscrt", "INFO s3torchconnector.s3map_dataset", "TRACE"], + ), + ( + "DEBUG", + "TRACE", + ["INFO s3torchconnector.s3map_dataset"], + ["awscrt"], + ["DEBUG", "mountpoint_s3_client", "awscrt"], + ["INFO s3torchconnector.s3map_dataset", "TRACE awscrt"], + ), + ], +) +def test_logging_to_file( + enable_crt_logs: str, + default_rust_log: str, + out_should_contain: List[str], + out_should_not_contain: List[str], + file_should_contain: List[str], + file_should_not_contain: List[str], + image_directory: BucketPrefixFixture, +): + with tempfile.TemporaryDirectory() as log_dir: + print("Created temporary directory", log_dir) + out, err = _start_subprocess( + image_directory, + enable_crt_logs=enable_crt_logs, + default_rust_log=default_rust_log, + logs_directory=log_dir, + ) + # Standard output contains Python output + assert err == "" + assert all(s in out for s in out_should_contain) + assert all(s not in out for s in out_should_not_contain) + files = os.listdir(log_dir) + assert len(files) == 1 + log_file = os.path.join(log_dir, files[0]) + assert os.path.isfile(log_file) + with open(log_file) as f: + file_content = f.read() + assert all(s in file_content for s in file_should_contain) + assert all(s not in file_content for s in file_should_not_contain) -def _start_subprocess(log_level, image_directory, logs_directory=""): +def _start_subprocess( + image_directory: BucketPrefixFixture, + *, + enable_crt_logs: str = "", + default_rust_log: str = "", + logs_directory: str = "" +): process = subprocess.Popen( [ sys.executable, @@ -104,7 +188,8 @@ def _start_subprocess(log_level, image_directory, logs_directory=""): PYTHON_TEST_CODE, image_directory.s3_uri, image_directory.region, - log_level, + enable_crt_logs, + default_rust_log, logs_directory, ], stdout=subprocess.PIPE, diff --git a/s3torchconnectorclient/python/tst/integration/test_mountpoint_s3_integration.py b/s3torchconnectorclient/python/tst/integration/test_mountpoint_s3_integration.py index 55065514..85094001 100644 --- a/s3torchconnectorclient/python/tst/integration/test_mountpoint_s3_integration.py +++ b/s3torchconnectorclient/python/tst/integration/test_mountpoint_s3_integration.py @@ -3,22 +3,26 @@ import hashlib import logging import math +import os import pickle import sys import uuid import random import pytest +os.environ["RUST_LOG"] = "DEBUG" + from s3torchconnectorclient._mountpoint_s3_client import ( MountpointS3Client, S3Exception, ListObjectStream, ) +from conftest import BucketPrefixFixture + logging.basicConfig( format="%(levelname)s %(name)s %(asctime)-15s %(filename)s:%(lineno)d %(message)s" ) -logging.getLogger().setLevel(1) log = logging.getLogger(__name__) @@ -26,7 +30,7 @@ TEST_USER_AGENT_PREFIX = "integration-tests" -def test_get_object(sample_directory): +def test_get_object(sample_directory: BucketPrefixFixture): client = MountpointS3Client(sample_directory.region, TEST_USER_AGENT_PREFIX) stream = client.get_object( sample_directory.bucket, f"{sample_directory.prefix}hello_world.txt" @@ -36,7 +40,7 @@ def test_get_object(sample_directory): assert full_data == HELLO_WORLD_DATA -def test_get_object_with_endpoint(sample_directory): +def test_get_object_with_endpoint(sample_directory: BucketPrefixFixture): client = MountpointS3Client( sample_directory.region, TEST_USER_AGENT_PREFIX, @@ -50,7 +54,7 @@ def test_get_object_with_endpoint(sample_directory): assert full_data == HELLO_WORLD_DATA -def test_get_object_with_unpickled_client(sample_directory): +def test_get_object_with_unpickled_client(sample_directory: BucketPrefixFixture): original_client = MountpointS3Client( sample_directory.region, TEST_USER_AGENT_PREFIX ) @@ -66,7 +70,7 @@ def test_get_object_with_unpickled_client(sample_directory): assert full_data == HELLO_WORLD_DATA -def test_get_object_invalid_bucket(sample_directory): +def test_get_object_invalid_bucket(sample_directory: BucketPrefixFixture): client = MountpointS3Client(sample_directory.region, TEST_USER_AGENT_PREFIX) with pytest.raises(S3Exception, match="Service error: The bucket does not exist"): next( @@ -76,7 +80,7 @@ def test_get_object_invalid_bucket(sample_directory): ) -def test_get_object_invalid_prefix(sample_directory): +def test_get_object_invalid_prefix(sample_directory: BucketPrefixFixture): client = MountpointS3Client(sample_directory.region, TEST_USER_AGENT_PREFIX) with pytest.raises(S3Exception, match="Service error: The key does not exist"): next( @@ -86,7 +90,7 @@ def test_get_object_invalid_prefix(sample_directory): ) -def test_list_objects(image_directory): +def test_list_objects(image_directory: BucketPrefixFixture): client = MountpointS3Client(image_directory.region, TEST_USER_AGENT_PREFIX) stream = client.list_objects(image_directory.bucket) @@ -100,7 +104,7 @@ def test_list_objects(image_directory): assert keys > expected_img_10_keys -def test_list_objects_with_prefix(image_directory): +def test_list_objects_with_prefix(image_directory: BucketPrefixFixture): client = MountpointS3Client(image_directory.region, TEST_USER_AGENT_PREFIX) stream = client.list_objects(image_directory.bucket, image_directory.prefix) @@ -113,7 +117,7 @@ def test_list_objects_with_prefix(image_directory): assert keys == expected_img_10_keys -def test_multi_list_requests_return_same_list(image_directory): +def test_multi_list_requests_return_same_list(image_directory: BucketPrefixFixture): client = MountpointS3Client(image_directory.region, TEST_USER_AGENT_PREFIX) stream = client.list_objects(image_directory.bucket, image_directory.prefix) @@ -141,7 +145,9 @@ def test_multi_list_requests_return_same_list(image_directory): ), ], ) -def test_put_object(filename: str, content: bytes, put_object_tests_directory): +def test_put_object( + filename: str, content: bytes, put_object_tests_directory: BucketPrefixFixture +): client = MountpointS3Client( put_object_tests_directory.region, TEST_USER_AGENT_PREFIX ) @@ -160,7 +166,7 @@ def test_put_object(filename: str, content: bytes, put_object_tests_directory): assert b"".join(get_stream) == content -def test_put_object_overwrite(put_object_tests_directory): +def test_put_object_overwrite(put_object_tests_directory: BucketPrefixFixture): client = MountpointS3Client( put_object_tests_directory.region, TEST_USER_AGENT_PREFIX ) @@ -189,7 +195,9 @@ def test_put_object_overwrite(put_object_tests_directory): @pytest.mark.parametrize("max_keys", {1, 4, 1000}) -def test_s3_list_object_with_continuation(max_keys: int, image_directory): +def test_s3_list_object_with_continuation( + max_keys: int, image_directory: BucketPrefixFixture +): client = MountpointS3Client(image_directory.region, TEST_USER_AGENT_PREFIX) stream = client.list_objects( image_directory.bucket, prefix=image_directory.prefix, max_keys=max_keys @@ -229,7 +237,9 @@ def test_s3_list_object_with_continuation(max_keys: int, image_directory): ), ], ) -def test_put_object_mpu(part_count: int, part_size: int, put_object_tests_directory): +def test_put_object_mpu( + part_count: int, part_size: int, put_object_tests_directory: BucketPrefixFixture +): data_to_write = randbytes(part_count * part_size) client = MountpointS3Client( @@ -250,7 +260,7 @@ def test_put_object_mpu(part_count: int, part_size: int, put_object_tests_direct assert b"".join(get_stream) == data_to_write -def test_head_object(sample_directory): +def test_head_object(sample_directory: BucketPrefixFixture): client = MountpointS3Client(sample_directory.region, TEST_USER_AGENT_PREFIX) object_info = client.head_object( sample_directory.bucket, @@ -270,7 +280,7 @@ def test_head_object(sample_directory): assert object_info.etag == expected_etag -def test_delete_object(sample_directory): +def test_delete_object(sample_directory: BucketPrefixFixture): client = MountpointS3Client(sample_directory.region, TEST_USER_AGENT_PREFIX) client.delete_object( sample_directory.bucket, @@ -285,7 +295,7 @@ def test_delete_object(sample_directory): ) -def test_delete_object_does_not_exist(empty_directory): +def test_delete_object_does_not_exist(empty_directory: BucketPrefixFixture): client = MountpointS3Client(empty_directory.region, TEST_USER_AGENT_PREFIX) client.delete_object( empty_directory.bucket, f"{empty_directory.prefix}hello_world.txt" @@ -293,7 +303,7 @@ def test_delete_object_does_not_exist(empty_directory): # Assert no exception is thrown in case the object already doesn't exist - implicit -def test_delete_object_invalid_bucket(empty_directory): +def test_delete_object_invalid_bucket(empty_directory: BucketPrefixFixture): client = MountpointS3Client(empty_directory.region, TEST_USER_AGENT_PREFIX) with pytest.raises(S3Exception, match="Service error: The bucket does not exist"): client.delete_object( @@ -311,5 +321,5 @@ def _parse_list_result(stream: ListObjectStream, max_keys: int): return object_infos -def randbytes(n): +def randbytes(n: int): return random.getrandbits(n * 8).to_bytes(n, sys.byteorder) diff --git a/s3torchconnectorclient/python/tst/unit/test_mountpoint_s3_client.py b/s3torchconnectorclient/python/tst/unit/test_mountpoint_s3_client.py index 2f199a56..450b69dc 100644 --- a/s3torchconnectorclient/python/tst/unit/test_mountpoint_s3_client.py +++ b/s3torchconnectorclient/python/tst/unit/test_mountpoint_s3_client.py @@ -4,9 +4,9 @@ import logging import pickle import pytest -from typing import Set +from typing import Set, Optional -from s3torchconnectorclient import LOG_TRACE, __version__ +from s3torchconnectorclient import __version__ from s3torchconnectorclient._mountpoint_s3_client import ( S3Exception, GetObjectStream, @@ -19,7 +19,6 @@ logging.basicConfig( format="%(levelname)s %(name)s %(asctime)-15s %(filename)s:%(lineno)d %(message)s" ) -logging.getLogger().setLevel(LOG_TRACE) REGION = "us-east-1" @@ -126,7 +125,7 @@ def test_get_object_throws_stop_iteration(): (set()), ], ) -def test_list_objects(expected_keys): +def test_list_objects(expected_keys: Set[str]): mock_client = MockMountpointS3Client(REGION, MOCK_BUCKET) for key in expected_keys: mock_client.add_object(key, b"") @@ -284,7 +283,9 @@ def test_mountpoint_client_pickles(): ("https://us-east-1.amazonaws.com", "https://us-east-1.amazonaws.com"), ], ) -def test_mountpoint_client_creation_with_region_and_endpoint(endpoint, expected): +def test_mountpoint_client_creation_with_region_and_endpoint( + endpoint: Optional[str], expected: Optional[str] +): client = MountpointS3Client(region=REGION, endpoint=endpoint) assert isinstance(client, MountpointS3Client) assert client.endpoint == expected diff --git a/s3torchconnectorclient/rust/src/logger_setup.rs b/s3torchconnectorclient/rust/src/logger_setup.rs index c62cde8b..33d59750 100644 --- a/s3torchconnectorclient/rust/src/logger_setup.rs +++ b/s3torchconnectorclient/rust/src/logger_setup.rs @@ -3,62 +3,54 @@ * // SPDX-License-Identifier: BSD */ use std::{env}; -use log::{LevelFilter}; use mountpoint_s3_crt::common::rust_log_adapter::RustLogAdapter; use pyo3::{PyResult}; -use pyo3_log::Logger; use tracing_subscriber::{filter::EnvFilter}; use tracing_subscriber::util::{SubscriberInitExt}; use crate::exception::python_exception; -pub const ENABLE_CRT_LOGS_ENV_VAR: &str = "ENABLE_CRT_LOGS"; -pub const CRT_LOGS_DIR_PATH_ENV_VAR: &str = "CRT_LOGS_DIR_PATH"; +pub const ENABLE_CRT_LOGS_ENV_VAR: &str = "S3_CONNECTOR_ENABLE_CRT_LOGS"; +pub const LOGS_DIR_PATH_ENV_VAR: &str = "S3_CONNECTOR_LOGS_DIR_PATH"; pub fn setup_logging() -> PyResult<()> { let enable_crt_logs = env::var(ENABLE_CRT_LOGS_ENV_VAR); + let mut filter= EnvFilter::from_default_env(); if enable_crt_logs.is_ok() { - enable_crt_logging() - } else { - enable_default_logging() + RustLogAdapter::try_init().map_err(python_exception)?; + filter = EnvFilter::from_env(ENABLE_CRT_LOGS_ENV_VAR); } -} - -fn enable_crt_logging() -> PyResult<()> { - RustLogAdapter::try_init().map_err(python_exception)?; - let filter = EnvFilter::try_from_env(ENABLE_CRT_LOGS_ENV_VAR).map_err(python_exception)?; - let crt_logs_path = env::var(CRT_LOGS_DIR_PATH_ENV_VAR).ok(); + let crt_logs_path = env::var(LOGS_DIR_PATH_ENV_VAR).ok(); match crt_logs_path { Some(logs_path) => { - let logfile = tracing_appender::rolling::hourly(logs_path, "s3torchconnectorclient.log"); - let subscriber_builder = tracing_subscriber::fmt() - .with_writer(logfile) - .with_env_filter(filter) - .with_ansi(false); - subscriber_builder.finish().try_init().map_err(python_exception)?; - }, + enable_file_logging(filter, logs_path)?; + } None => { - let subscriber_builder = tracing_subscriber::fmt() - .with_env_filter(filter) - .with_ansi(false); - subscriber_builder.finish().try_init().map_err(python_exception)?; + enable_default_logging(filter)?; } } Ok(()) } -fn enable_default_logging() -> PyResult<()> { - let logger = Logger::default() - .filter_target( - "mountpoint_s3_client::s3_crt_client::request".to_owned(), - LevelFilter::Off, - ) - .filter(LevelFilter::Trace); +fn enable_file_logging(filter: EnvFilter, logs_path: String) -> PyResult<()> { + let logfile = tracing_appender::rolling::hourly(logs_path, "s3torchconnectorclient.log"); + let subscriber_builder = tracing_subscriber::fmt() + .with_writer(logfile) + .with_env_filter(filter) + .with_ansi(false); + subscriber_builder.finish().try_init().map_err(python_exception)?; + + Ok(()) +} - logger.install().map_err(python_exception)?; +fn enable_default_logging(filter: EnvFilter) -> PyResult<()> { + let subscriber_builder = tracing_subscriber::fmt() + .with_env_filter(filter) + .with_ansi(false); + subscriber_builder.finish().try_init().map_err(python_exception)?; Ok(()) } @@ -70,7 +62,7 @@ mod tests { use pyo3::PyResult; use crate::logger_setup::{ENABLE_CRT_LOGS_ENV_VAR, setup_logging}; - fn check_valid_log_level(log_level: &str) { + fn check_valid_crt_log_level(log_level: &str) { pyo3::prepare_freethreaded_python(); env::set_var(ENABLE_CRT_LOGS_ENV_VAR, log_level); let result: PyResult<()> = setup_logging(); @@ -79,41 +71,66 @@ mod tests { rusty_fork_test! { #[test] - fn test_environment_variable_unset() { + fn test_crt_environment_variable_unset() { pyo3::prepare_freethreaded_python(); env::remove_var(ENABLE_CRT_LOGS_ENV_VAR); let result: PyResult<()> = setup_logging(); assert!(result.is_ok()); } + #[test] + fn test_rust_log_environment_variable_unset() { + pyo3::prepare_freethreaded_python(); + env::remove_var("RUST_LOG"); + let result: PyResult<()> = setup_logging(); + assert!(result.is_ok()); + } + #[test] - fn test_logging_off() { - check_valid_log_level("OFF"); + fn test_crt_logging_off() { + check_valid_crt_log_level("OFF"); } #[test] - fn test_logging_level_error() { - check_valid_log_level("ERROR"); + fn test_crt_logging_level_error() { + check_valid_crt_log_level("ERROR"); } #[test] - fn test_logging_level_warn() { - check_valid_log_level("WARN"); + fn test_crt_logging_level_warn() { + check_valid_crt_log_level("WARN"); } #[test] - fn test_logging_level_info() { - check_valid_log_level("INFO"); + fn test_crt_logging_level_info() { + check_valid_crt_log_level("INFO"); } #[test] - fn test_logging_level_debug() { - check_valid_log_level("debug"); + fn test_crt_logging_level_debug() { + check_valid_crt_log_level("debug"); } #[test] - fn test_logging_level_trace() { - check_valid_log_level("trace"); + fn test_crt_logging_level_trace() { + check_valid_crt_log_level("trace"); + } + + #[test] + fn test_default_logging_level_debug() { + pyo3::prepare_freethreaded_python(); + env::set_var("RUST_LOG", "debug"); + let result: PyResult<()> = setup_logging(); + assert!(result.is_ok()); + } + + #[test] + fn test_set_both_logging_levels() { + pyo3::prepare_freethreaded_python(); + env::set_var("RUST_LOG", "debug"); + env::set_var(ENABLE_CRT_LOGS_ENV_VAR, "info"); + let result: PyResult<()> = setup_logging(); + assert!(result.is_ok()); } } }