Skip to content

Commit

Permalink
Update to crate-2.0.0, which uses orjson for JSON marshalling
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Jan 16, 2025
1 parent a69a9ca commit 053cea5
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 61 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Unreleased
- Fixed connectivity for `jobstats collect`
- Refactored code and improved CLI interface of `ctk info` vs. `ctk cfr`
- Dependencies: Updated to `crate-2.0.0`, which uses `orjson` for JSON marshalling

## 2025/01/13 v0.0.30
- Dependencies: Minimize dependencies of core installation,
Expand Down
4 changes: 0 additions & 4 deletions cratedb_toolkit/cfr/systable.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
from cratedb_toolkit.info.core import InfoContainer
from cratedb_toolkit.util import DatabaseAdapter
from cratedb_toolkit.util.cli import error_logger
from cratedb_toolkit.util.sqlalchemy import patch_encoder

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -256,6 +255,3 @@ def load_table(self, path: Path) -> "pl.DataFrame":
return pl.read_parquet(path)
else:
raise NotImplementedError(f"Input format not implemented: {path.suffix}")


patch_encoder()
51 changes: 1 addition & 50 deletions cratedb_toolkit/util/sqlalchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,57 +4,8 @@
TODO: Refactor to `crate` or `sqlalchemy-cratedb` packages.
"""

import calendar
import datetime as dt
import json
from decimal import Decimal
from uuid import UUID

from sqlalchemy_cratedb.dialect import TYPES_MAP

try:
import numpy as np

has_numpy = True
except ImportError:
has_numpy = False

from sqlalchemy import types as sqltypes


def patch_encoder():
import crate.client.http

crate.client.http.CrateJsonEncoder = CrateJsonEncoderWithNumPy


class CrateJsonEncoderWithNumPy(json.JSONEncoder):
epoch_aware = dt.datetime(1970, 1, 1, tzinfo=dt.timezone.utc)
epoch_naive = dt.datetime(1970, 1, 1)

def default(self, o):
# Vanilla CrateDB Python.
if isinstance(o, (Decimal, UUID)):
return str(o)
if isinstance(o, dt.datetime):
if o.tzinfo is not None:
delta = o - self.epoch_aware
else:
delta = o - self.epoch_naive
return int(delta.microseconds / 1000.0 + (delta.seconds + delta.days * 24 * 3600) * 1000.0)
if isinstance(o, dt.date):
return calendar.timegm(o.timetuple()) * 1000

# NumPy ndarray and friends.
# https://stackoverflow.com/a/49677241
if has_numpy:
if isinstance(o, np.integer):
return int(o)
elif isinstance(o, np.floating):
return float(o)
elif isinstance(o, np.ndarray):
return o.tolist()
return json.JSONEncoder.default(self, o)
from sqlalchemy_cratedb.dialect import TYPES_MAP


def patch_types_map():
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ dependencies = [
"python-dotenv<2",
"python-slugify<9",
"pyyaml<7",
"sqlalchemy-cratedb>=0.40,<1",
"sqlalchemy-cratedb>=0.41.0.dev1",
"sqlparse<0.6",
"tqdm<5",
'typing-extensions<5; python_version <= "3.7"',
Expand Down
17 changes: 11 additions & 6 deletions tests/sqlalchemy/test_patch.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import datetime
import json

import pytest
import sqlalchemy as sa
from crate.client.http import json_dumps

from cratedb_toolkit.util.sqlalchemy import CrateJsonEncoderWithNumPy
from tests.conftest import TESTDRIVE_DATA_SCHEMA


Expand Down Expand Up @@ -48,18 +47,24 @@ def test_inspector_patched(database, needs_sqlalchemy2):
def test_json_encoder_date():
"""
Verify the extended JSON encoder also accepts Python's `date` types.
TODO: Move to different test file, as this no longer requires
monkeypatching after using orjson for marshalling.
"""
data = {"date": datetime.date(2024, 6, 4)}
encoded = json.dumps(data, cls=CrateJsonEncoderWithNumPy)
assert encoded == '{"date": 1717459200000}'
encoded = json_dumps(data)
assert encoded == b'{"date":"2024-06-04"}'


def test_json_encoder_numpy():
"""
Verify the extended JSON encoder also accepts NumPy types.
TODO: Move to different test file, as this no longer requires
monkeypatching after using orjson for marshalling.
"""
np = pytest.importorskip("numpy")

data = {"scalar-int": np.float32(42.42).astype(int), "scalar-float": np.float32(42.42), "ndarray": np.ndarray([1])}
encoded = json.dumps(data, cls=CrateJsonEncoderWithNumPy)
assert encoded == """{"scalar-int": 42, "scalar-float": 42.41999816894531, "ndarray": [2.08e-322]}"""
encoded = json_dumps(data)
assert encoded == b"""{"scalar-int":42,"scalar-float":42.42,"ndarray":[2.08e-322]}"""

0 comments on commit 053cea5

Please sign in to comment.