Skip to content

Commit

Permalink
Safer conversion of unicode arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
padix-key committed Jan 12, 2024
1 parent 26897a9 commit 8dcfe58
Showing 1 changed file with 20 additions and 5 deletions.
25 changes: 20 additions & 5 deletions python-src/fastpdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,11 +254,11 @@ def set_structure(self, atoms):

# Write 'ATOM' and 'MODEL' records
# Convert Unicode arrays into uint32 arrays for usage in Rust
chain_id = np.frombuffer(atoms.chain_id, dtype=np.uint32).reshape(-1, 4)
ins_code = np.frombuffer(atoms.ins_code, dtype=np.uint32).reshape(-1, 1)
res_name = np.frombuffer(atoms.res_name, dtype=np.uint32).reshape(-1, 5)
atom_name = np.frombuffer(atoms.atom_name, dtype=np.uint32).reshape(-1, 6)
element = np.frombuffer(atoms.element, dtype=np.uint32).reshape(-1, 2)
chain_id = _convert_unicode_to_uint32(atoms.chain_id)
ins_code = _convert_unicode_to_uint32(atoms.ins_code)
res_name = _convert_unicode_to_uint32(atoms.res_name)
atom_name = _convert_unicode_to_uint32(atoms.atom_name)
element = _convert_unicode_to_uint32(atoms.element)

categories = atoms.get_annotation_categories()
atom_id = atoms.atom_id if "atom_id" in categories else None
Expand Down Expand Up @@ -320,3 +320,18 @@ def _index_models_and_atoms(self):
self._pdb_file.index_models_and_atoms()
self._model_start_i = self._pdb_file.model_start_i
self._atom_line_i = self._pdb_file.atom_line_i


def _convert_unicode_to_uint32(array):
"""
Convert a unicode string array into a 2D uint32 array.
The second dimension corresponds to the character position within a
string.
"""
dtype = array.dtype
if not np.issubdtype(dtype, np.str_):
raise TypeError("Expected unicode string array")
length = array.shape[0]
n_char = dtype.itemsize // 4
return np.frombuffer(array, dtype=np.uint32).reshape(length, n_char)

0 comments on commit 8dcfe58

Please sign in to comment.