-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor PDB and PSE Conversion code (#13)
* stub iterator based views on the underlying data * refactor the file conversions to their own directory
- Loading branch information
Showing
7 changed files
with
475 additions
and
190 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
mod pdb; | ||
mod pse; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
use crate::core::AtomCollection; | ||
use itertools::Itertools; | ||
use pdbtbx::{Element, PDB}; | ||
|
||
impl From<&PDB> for AtomCollection { | ||
// the PDB API requires us to iterate: | ||
// PDB --> Chain --> Residue --> Atom if we want data from all. | ||
// Here we collect all the data in one go and return an AtomCollection | ||
fn from(pdb_data: &PDB) -> Self { | ||
let (coords, is_hetero, atom_names, res_ids, res_names, elements, chain_ids): ( | ||
Vec<[f32; 3]>, | ||
Vec<bool>, | ||
Vec<String>, | ||
Vec<i32>, | ||
Vec<String>, | ||
Vec<Element>, | ||
Vec<String>, | ||
) = pdb_data | ||
.chains() | ||
.flat_map(|chain| { | ||
let chain_id = chain.id().to_string(); | ||
chain.residues().flat_map(move |residue| { | ||
let (res_number, _insertion_code) = residue.id(); | ||
let res_id = res_number as i32; | ||
let res_name = residue.name().unwrap_or_default().to_string(); | ||
let chain_id = chain_id.clone(); | ||
residue.atoms().filter_map(move |atom| { | ||
atom.element().map(|element| { | ||
let (x, y, z) = atom.pos(); | ||
( | ||
[x as f32, y as f32, z as f32], | ||
atom.hetero(), | ||
atom.name().to_string(), | ||
res_id, | ||
res_name.clone(), | ||
element, | ||
chain_id.clone(), | ||
) | ||
}) | ||
}) | ||
}) | ||
}) | ||
.multiunzip(); | ||
|
||
let mut ac = AtomCollection::new( | ||
coords.len(), | ||
coords, | ||
res_ids, | ||
res_names, | ||
is_hetero, | ||
elements, | ||
atom_names, | ||
chain_ids, | ||
None, | ||
); | ||
|
||
ac.connect_via_residue_names(); | ||
ac | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use crate::core::AtomCollection; | ||
use itertools::Itertools; | ||
use pdbtbx::{self, Element}; | ||
use std::path::PathBuf; | ||
|
||
#[test] | ||
fn test_pdb_from() { | ||
let manifest_dir = env!("CARGO_MANIFEST_DIR"); | ||
let file_path = PathBuf::from(manifest_dir) | ||
.join("tests") | ||
.join("data") | ||
.join("101m.cif"); | ||
|
||
let (pdb_data, _errors) = pdbtbx::open(file_path.to_str().unwrap()).unwrap(); | ||
assert_eq!(pdb_data.atom_count(), 1413); | ||
|
||
// check Atom Collection Numbers | ||
let ac = AtomCollection::from(&pdb_data); | ||
assert_eq!(ac.coords().len(), 1413); | ||
assert_eq!(ac.bonds().unwrap().len(), 1095); | ||
|
||
// 338 Residues | ||
let res_ids: Vec<i32> = ac.resids().into_iter().cloned().unique().collect(); | ||
let res_max = res_ids.iter().max().unwrap(); | ||
assert_eq!(res_max, &338); | ||
|
||
// Check resnames | ||
let res_names: Vec<String> = ac | ||
.resnames() | ||
.into_iter() | ||
.cloned() | ||
.unique() | ||
.sorted() | ||
.collect(); | ||
assert_eq!( | ||
res_names, | ||
[ | ||
"ALA", "ARG", "ASN", "ASP", "GLN", "GLU", "GLY", "HEM", "HIS", "HOH", "ILE", "LEU", | ||
"LYS", "MET", "NBN", "PHE", "PRO", "SER", "SO4", "THR", "TRP", "TYR", "VAL" | ||
] | ||
); | ||
|
||
// Take a peek at the unique elements | ||
let elements: Vec<Element> = ac | ||
.elements() | ||
.into_iter() | ||
.cloned() | ||
.unique() | ||
.sorted() | ||
.collect(); | ||
assert_eq!( | ||
elements, | ||
[Element::C, Element::N, Element::O, Element::S, Element::Fe,] | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
use crate::core::{AtomCollection, Bond, BondOrder}; | ||
use ferritin_pymol::PSEData; | ||
use itertools::Itertools; | ||
use pdbtbx::Element; | ||
|
||
impl From<&PSEData> for AtomCollection { | ||
fn from(pse_data: &PSEData) -> Self { | ||
let mols = pse_data.get_molecule_data(); | ||
|
||
// Pymol: most of the descriptive data is there | ||
let atoms: Vec<&ferritin_pymol::pymolparsing::parsing::AtomInfo> = | ||
mols.iter().flat_map(|mol| mol.atom.iter()).collect(); | ||
|
||
// Pymol: coord sets are maintained seperately. | ||
let coord_sets: Vec<&ferritin_pymol::pymolparsing::parsing::CoordSet> = | ||
mols.iter().flat_map(|mol| mol.coord_set.iter()).collect(); | ||
|
||
let coords: Vec<[f32; 3]> = coord_sets | ||
.iter() | ||
.flat_map(|c| c.get_coords_as_vec()) | ||
.collect(); | ||
|
||
// Pymol: most of the descriptive data is there | ||
let pymol_bonds: Vec<&ferritin_pymol::pymolparsing::parsing::Bond> = | ||
mols.iter().flat_map(|mol| mol.bond.iter()).collect(); | ||
|
||
let bonds = pymol_bonds | ||
.iter() | ||
.map(|b| Bond::new(b.index_1, b.index_2, BondOrder::match_bond(b.order))) | ||
.collect(); | ||
|
||
// pull out specific fields | ||
let (res_names, res_ids, chain_ids, is_hetero, elements, atom_names): ( | ||
Vec<String>, | ||
Vec<i32>, | ||
Vec<String>, | ||
Vec<bool>, | ||
Vec<Element>, | ||
Vec<String>, | ||
) = atoms | ||
.iter() | ||
.map(|a| { | ||
( | ||
a.resn.to_string(), | ||
a.resv, | ||
a.chain.to_string(), | ||
a.is_hetatm, | ||
a.elem, | ||
a.name.to_string(), | ||
) | ||
}) | ||
.multiunzip(); | ||
|
||
AtomCollection::new( | ||
atoms.len() as usize, // size | ||
coords, | ||
res_ids, | ||
res_names, | ||
is_hetero, | ||
elements, | ||
chain_ids, | ||
atom_names, | ||
Some(bonds), //bonds | ||
) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use crate::core::AtomCollection; | ||
use ferritin_pymol::PSEData; | ||
use pdbtbx; | ||
use std::path::PathBuf; | ||
|
||
#[test] | ||
fn test_pse_from() { | ||
let manifest_dir = env!("CARGO_MANIFEST_DIR"); | ||
let file_path = PathBuf::from(manifest_dir) | ||
.join("tests") | ||
.join("data") | ||
.join("example.pse"); | ||
|
||
let psedata = PSEData::load(file_path.to_str().unwrap()).expect("local pse path"); | ||
|
||
// check Atom Collection Numbers | ||
let ac = AtomCollection::from(&psedata); | ||
assert_eq!(ac.size(), 1519); | ||
assert_eq!(ac.coords().len(), 1519); | ||
assert_eq!(ac.bonds().unwrap().len(), 1537); // 1537 bonds | ||
} | ||
} |
Oops, something went wrong.