From df70dd4be13ea20c86ba3b0fa138756cb4b01ea4 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Thu, 16 Jan 2025 11:43:18 +0100 Subject: [PATCH] Update to `crate-2.0.0`, which uses `orjson` for JSON marshalling --- CHANGES.md | 1 + cratedb_toolkit/cfr/systable.py | 4 --- cratedb_toolkit/util/sqlalchemy.py | 51 +----------------------------- pyproject.toml | 3 +- tests/sqlalchemy/test_patch.py | 17 ++++++---- 5 files changed, 15 insertions(+), 61 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 5866a569..5e899d7e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ ## Unreleased - Fixed connectivity for `jobstats collect` - Refactored code and improved CLI interface of `ctk info` vs. `ctk cfr` +- Dependencies: Updated to `crate-2.0.0`, which uses `orjson` for JSON marshalling ## 2025/01/13 v0.0.30 - Dependencies: Minimize dependencies of core installation, diff --git a/cratedb_toolkit/cfr/systable.py b/cratedb_toolkit/cfr/systable.py index 18159cb1..679aeb71 100644 --- a/cratedb_toolkit/cfr/systable.py +++ b/cratedb_toolkit/cfr/systable.py @@ -32,7 +32,6 @@ from cratedb_toolkit.info.core import InfoContainer from cratedb_toolkit.util import DatabaseAdapter from cratedb_toolkit.util.cli import error_logger -from cratedb_toolkit.util.sqlalchemy import patch_encoder logger = logging.getLogger(__name__) @@ -256,6 +255,3 @@ def load_table(self, path: Path) -> "pl.DataFrame": return pl.read_parquet(path) else: raise NotImplementedError(f"Input format not implemented: {path.suffix}") - - -patch_encoder() diff --git a/cratedb_toolkit/util/sqlalchemy.py b/cratedb_toolkit/util/sqlalchemy.py index 6d591ca4..7838106d 100644 --- a/cratedb_toolkit/util/sqlalchemy.py +++ b/cratedb_toolkit/util/sqlalchemy.py @@ -4,57 +4,8 @@ TODO: Refactor to `crate` or `sqlalchemy-cratedb` packages. """ -import calendar -import datetime as dt -import json -from decimal import Decimal -from uuid import UUID - -from sqlalchemy_cratedb.dialect import TYPES_MAP - -try: - import numpy as np - - has_numpy = True -except ImportError: - has_numpy = False - from sqlalchemy import types as sqltypes - - -def patch_encoder(): - import crate.client.http - - crate.client.http.CrateJsonEncoder = CrateJsonEncoderWithNumPy - - -class CrateJsonEncoderWithNumPy(json.JSONEncoder): - epoch_aware = dt.datetime(1970, 1, 1, tzinfo=dt.timezone.utc) - epoch_naive = dt.datetime(1970, 1, 1) - - def default(self, o): - # Vanilla CrateDB Python. - if isinstance(o, (Decimal, UUID)): - return str(o) - if isinstance(o, dt.datetime): - if o.tzinfo is not None: - delta = o - self.epoch_aware - else: - delta = o - self.epoch_naive - return int(delta.microseconds / 1000.0 + (delta.seconds + delta.days * 24 * 3600) * 1000.0) - if isinstance(o, dt.date): - return calendar.timegm(o.timetuple()) * 1000 - - # NumPy ndarray and friends. - # https://stackoverflow.com/a/49677241 - if has_numpy: - if isinstance(o, np.integer): - return int(o) - elif isinstance(o, np.floating): - return float(o) - elif isinstance(o, np.ndarray): - return o.tolist() - return json.JSONEncoder.default(self, o) +from sqlalchemy_cratedb.dialect import TYPES_MAP def patch_types_map(): diff --git a/pyproject.toml b/pyproject.toml index 87a5d7d1..045f1b9c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,6 +91,7 @@ dependencies = [ "colorama<1", "colorlog", "crash", + "crate>=2.0.0.dev3", "cratedb-sqlparse==0.0.10", 'importlib-metadata; python_version < "3.8"', 'importlib-resources; python_version < "3.9"', @@ -100,7 +101,7 @@ dependencies = [ "python-dotenv<2", "python-slugify<9", "pyyaml<7", - "sqlalchemy-cratedb>=0.40,<1", + "sqlalchemy-cratedb @ git+https://github.com/crate/sqlalchemy-cratedb.git@crate-orjson", "sqlparse<0.6", "tqdm<5", 'typing-extensions<5; python_version <= "3.7"', diff --git a/tests/sqlalchemy/test_patch.py b/tests/sqlalchemy/test_patch.py index 0d44eb29..15d81dc7 100644 --- a/tests/sqlalchemy/test_patch.py +++ b/tests/sqlalchemy/test_patch.py @@ -1,10 +1,9 @@ import datetime -import json import pytest import sqlalchemy as sa +from crate.client.http import json_dumps -from cratedb_toolkit.util.sqlalchemy import CrateJsonEncoderWithNumPy from tests.conftest import TESTDRIVE_DATA_SCHEMA @@ -48,18 +47,24 @@ def test_inspector_patched(database, needs_sqlalchemy2): def test_json_encoder_date(): """ Verify the extended JSON encoder also accepts Python's `date` types. + + TODO: Move to different test file, as this no longer requires + monkeypatching after using orjson for marshalling. """ data = {"date": datetime.date(2024, 6, 4)} - encoded = json.dumps(data, cls=CrateJsonEncoderWithNumPy) - assert encoded == '{"date": 1717459200000}' + encoded = json_dumps(data) + assert encoded == b'{"date":"2024-06-04"}' def test_json_encoder_numpy(): """ Verify the extended JSON encoder also accepts NumPy types. + + TODO: Move to different test file, as this no longer requires + monkeypatching after using orjson for marshalling. """ np = pytest.importorskip("numpy") data = {"scalar-int": np.float32(42.42).astype(int), "scalar-float": np.float32(42.42), "ndarray": np.ndarray([1])} - encoded = json.dumps(data, cls=CrateJsonEncoderWithNumPy) - assert encoded == """{"scalar-int": 42, "scalar-float": 42.41999816894531, "ndarray": [2.08e-322]}""" + encoded = json_dumps(data) + assert encoded == b"""{"scalar-int":42,"scalar-float":42.42,"ndarray":[2.08e-322]}"""