Skip to content

Commit

Permalink
perf(python): get object __dict__ for faster field read/write (#2003)
Browse files Browse the repository at this point in the history
## What does this PR do?

This PR extract `__dict__` in object when serialize a dataclass without
slots for faster attribute set/set by 16%

## Related issues

<!--
Is there any related issue? Please attach here.

- #xxxx0
- #xxxx1
- #xxxx2
-->

## Does this PR introduce any user-facing change?

<!--
If any user-facing interface changes, please [open an
issue](https://github.com/apache/fury/issues/new/choose) describing the
need to do so and update the document if necessary.
-->

- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?

## Benchmark

For following object:
```python
COMPLEX_OBJECT = ComplexObject1(
    f1=ComplexObject2(f1=True, f2={-1: 2}),
    f2="abc",
    f3=["abc", "abc"],
    f4={1: 2},
    f5=2**7 - 1,
    f6=2**15 - 1,
    f7=2**31 - 1,
    f8=2**63 - 1,
    f9=1.0 / 2,
    f10=1 / 3.0,
    f12=[-1, 4],
)
```
This PR gives a 16% speed up
  • Loading branch information
chaokunyang authored Jan 12, 2025
1 parent b1ce978 commit d49816c
Showing 1 changed file with 21 additions and 4 deletions.
25 changes: 21 additions & 4 deletions python/pyfury/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ def __init__(self, fury, clz: type):
# This will get superclass type hints too.
self._type_hints = typing.get_type_hints(clz)
self._field_names = sorted(self._type_hints.keys())
self._has_slots = hasattr(clz, "__slots__")
# TODO compute hash
self._hash = len(self._field_names)
self._generated_write_method = self._gen_write_method()
Expand All @@ -300,16 +301,21 @@ def __init__(self, fury, clz: type):
def _gen_write_method(self):
context = {}
counter = itertools.count(0)
buffer, fury, value = "buffer", "fury", "value"
buffer, fury, value, value_dict = "buffer", "fury", "value", "value_dict"
context[fury] = self.fury
stmts = [
f'"""write method for {self.type_}"""',
f"{buffer}.write_int32({self._hash})",
]
if not self._has_slots:
stmts.append(f"{value_dict} = {value}.__dict__")
for field_name in self._field_names:
field_type = self._type_hints[field_name]
field_value = f"field_value{next(counter)}"
stmts.append(f"{field_value} = {value}.{field_name}")
if not self._has_slots:
stmts.append(f"{field_value} = {value_dict}['{field_name}']")
else:
stmts.append(f"{field_value} = {value}.{field_name}")
if field_type is bool:
stmts.extend(gen_write_nullable_basic_stmts(buffer, field_value, bool))
elif field_type == int:
Expand All @@ -332,7 +338,13 @@ def _gen_write_method(self):

def _gen_read_method(self):
context = dict(_jit_context)
buffer, fury, obj_class, obj = "buffer", "fury", "obj_class", "obj"
buffer, fury, obj_class, obj, obj_dict = (
"buffer",
"fury",
"obj_class",
"obj",
"obj_dict",
)
ref_resolver = "ref_resolver"
context[fury] = self.fury
context[obj_class] = self.type_
Expand All @@ -346,9 +358,14 @@ def _gen_read_method(self):
f""" raise ClassNotCompatibleError(
"Hash read_hash is not consistent with {self._hash} for {self.type_}")""",
]
if not self._has_slots:
stmts.append(f"{obj_dict} = {obj}.__dict__")

def set_action(value: str):
return f"{obj}.{field_name} = {value}"
if not self._has_slots:
return f"{obj_dict}['{field_name}'] = {value}"
else:
return f"{obj}.{field_name} = {value}"

for field_name in self._field_names:
field_type = self._type_hints[field_name]
Expand Down

0 comments on commit d49816c

Please sign in to comment.