Skip to content

Commit

Permalink
Refactor PDB and PSE Conversion code (#13)
Browse files Browse the repository at this point in the history
* stub iterator based views on the underlying data

* refactor the file conversions to their own directory
  • Loading branch information
zachcp authored Oct 28, 2024
1 parent fd5c537 commit ed0a0d9
Show file tree
Hide file tree
Showing 7 changed files with 475 additions and 190 deletions.
2 changes: 2 additions & 0 deletions ferritin-core/src/conversions/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
mod pdb;
mod pse;
119 changes: 119 additions & 0 deletions ferritin-core/src/conversions/pdb.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
use crate::core::AtomCollection;
use itertools::Itertools;
use pdbtbx::{Element, PDB};

impl From<&PDB> for AtomCollection {
// the PDB API requires us to iterate:
// PDB --> Chain --> Residue --> Atom if we want data from all.
// Here we collect all the data in one go and return an AtomCollection
fn from(pdb_data: &PDB) -> Self {
let (coords, is_hetero, atom_names, res_ids, res_names, elements, chain_ids): (
Vec<[f32; 3]>,
Vec<bool>,
Vec<String>,
Vec<i32>,
Vec<String>,
Vec<Element>,
Vec<String>,
) = pdb_data
.chains()
.flat_map(|chain| {
let chain_id = chain.id().to_string();
chain.residues().flat_map(move |residue| {
let (res_number, _insertion_code) = residue.id();
let res_id = res_number as i32;
let res_name = residue.name().unwrap_or_default().to_string();
let chain_id = chain_id.clone();
residue.atoms().filter_map(move |atom| {
atom.element().map(|element| {
let (x, y, z) = atom.pos();
(
[x as f32, y as f32, z as f32],
atom.hetero(),
atom.name().to_string(),
res_id,
res_name.clone(),
element,
chain_id.clone(),
)
})
})
})
})
.multiunzip();

let mut ac = AtomCollection::new(
coords.len(),
coords,
res_ids,
res_names,
is_hetero,
elements,
atom_names,
chain_ids,
None,
);

ac.connect_via_residue_names();
ac
}
}

#[cfg(test)]
mod tests {
use crate::core::AtomCollection;
use itertools::Itertools;
use pdbtbx::{self, Element};
use std::path::PathBuf;

#[test]
fn test_pdb_from() {
let manifest_dir = env!("CARGO_MANIFEST_DIR");
let file_path = PathBuf::from(manifest_dir)
.join("tests")
.join("data")
.join("101m.cif");

let (pdb_data, _errors) = pdbtbx::open(file_path.to_str().unwrap()).unwrap();
assert_eq!(pdb_data.atom_count(), 1413);

// check Atom Collection Numbers
let ac = AtomCollection::from(&pdb_data);
assert_eq!(ac.coords().len(), 1413);
assert_eq!(ac.bonds().unwrap().len(), 1095);

// 338 Residues
let res_ids: Vec<i32> = ac.resids().into_iter().cloned().unique().collect();
let res_max = res_ids.iter().max().unwrap();
assert_eq!(res_max, &338);

// Check resnames
let res_names: Vec<String> = ac
.resnames()
.into_iter()
.cloned()
.unique()
.sorted()
.collect();
assert_eq!(
res_names,
[
"ALA", "ARG", "ASN", "ASP", "GLN", "GLU", "GLY", "HEM", "HIS", "HOH", "ILE", "LEU",
"LYS", "MET", "NBN", "PHE", "PRO", "SER", "SO4", "THR", "TRP", "TYR", "VAL"
]
);

// Take a peek at the unique elements
let elements: Vec<Element> = ac
.elements()
.into_iter()
.cloned()
.unique()
.sorted()
.collect();
assert_eq!(
elements,
[Element::C, Element::N, Element::O, Element::S, Element::Fe,]
);
}
}
91 changes: 91 additions & 0 deletions ferritin-core/src/conversions/pse.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
use crate::core::{AtomCollection, Bond, BondOrder};
use ferritin_pymol::PSEData;
use itertools::Itertools;
use pdbtbx::Element;

impl From<&PSEData> for AtomCollection {
fn from(pse_data: &PSEData) -> Self {
let mols = pse_data.get_molecule_data();

// Pymol: most of the descriptive data is there
let atoms: Vec<&ferritin_pymol::pymolparsing::parsing::AtomInfo> =
mols.iter().flat_map(|mol| mol.atom.iter()).collect();

// Pymol: coord sets are maintained seperately.
let coord_sets: Vec<&ferritin_pymol::pymolparsing::parsing::CoordSet> =
mols.iter().flat_map(|mol| mol.coord_set.iter()).collect();

let coords: Vec<[f32; 3]> = coord_sets
.iter()
.flat_map(|c| c.get_coords_as_vec())
.collect();

// Pymol: most of the descriptive data is there
let pymol_bonds: Vec<&ferritin_pymol::pymolparsing::parsing::Bond> =
mols.iter().flat_map(|mol| mol.bond.iter()).collect();

let bonds = pymol_bonds
.iter()
.map(|b| Bond::new(b.index_1, b.index_2, BondOrder::match_bond(b.order)))
.collect();

// pull out specific fields
let (res_names, res_ids, chain_ids, is_hetero, elements, atom_names): (
Vec<String>,
Vec<i32>,
Vec<String>,
Vec<bool>,
Vec<Element>,
Vec<String>,
) = atoms
.iter()
.map(|a| {
(
a.resn.to_string(),
a.resv,
a.chain.to_string(),
a.is_hetatm,
a.elem,
a.name.to_string(),
)
})
.multiunzip();

AtomCollection::new(
atoms.len() as usize, // size
coords,
res_ids,
res_names,
is_hetero,
elements,
chain_ids,
atom_names,
Some(bonds), //bonds
)
}
}

#[cfg(test)]
mod tests {
use crate::core::AtomCollection;
use ferritin_pymol::PSEData;
use pdbtbx;
use std::path::PathBuf;

#[test]
fn test_pse_from() {
let manifest_dir = env!("CARGO_MANIFEST_DIR");
let file_path = PathBuf::from(manifest_dir)
.join("tests")
.join("data")
.join("example.pse");

let psedata = PSEData::load(file_path.to_str().unwrap()).expect("local pse path");

// check Atom Collection Numbers
let ac = AtomCollection::from(&psedata);
assert_eq!(ac.size(), 1519);
assert_eq!(ac.coords().len(), 1519);
assert_eq!(ac.bonds().unwrap().len(), 1537); // 1537 bonds
}
}
Loading

0 comments on commit ed0a0d9

Please sign in to comment.