From 60ff7a36a987648a51ddcaabebdd194702702e5d Mon Sep 17 00:00:00 2001 From: zachcp Date: Mon, 28 Oct 2024 18:01:09 -0400 Subject: [PATCH] Create Selection API (#14) * update conversion file locations * update selection code * fix lifetime and borrowing issues * cargo-machete && bump --- Cargo.lock | 59 ++--- ferritin-core/src/core/atomcollection.rs | 231 ++++-------------- .../src/{ => core}/conversions/mod.rs | 0 .../src/{ => core}/conversions/pdb.rs | 0 .../src/{ => core}/conversions/pse.rs | 2 +- ferritin-core/src/core/mod.rs | 2 + ferritin-core/src/core/selection/mod.rs | 7 + ferritin-core/src/core/selection/selection.rs | 30 +++ ferritin-core/src/core/selection/selector.rs | 81 ++++++ ferritin-core/src/core/selection/view.rs | 72 ++++++ ferritin-core/src/lib.rs | 1 - ferritin-molviewspec/Cargo.toml | 8 - ferritin-pymol/Cargo.toml | 3 - 13 files changed, 265 insertions(+), 231 deletions(-) rename ferritin-core/src/{ => core}/conversions/mod.rs (100%) rename ferritin-core/src/{ => core}/conversions/pdb.rs (100%) rename ferritin-core/src/{ => core}/conversions/pse.rs (99%) create mode 100644 ferritin-core/src/core/selection/mod.rs create mode 100644 ferritin-core/src/core/selection/selection.rs create mode 100644 ferritin-core/src/core/selection/selector.rs create mode 100644 ferritin-core/src/core/selection/view.rs diff --git a/Cargo.lock b/Cargo.lock index df0be11f..87a6a359 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -180,9 +180,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.15" +version = "0.6.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" +checksum = "23a1e53f0f5d86382dafe1cf314783b2044280f406e7e1506368220ad11b1338" dependencies = [ "anstyle", "anstyle-parse", @@ -195,36 +195,36 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" +checksum = "8365de52b16c035ff4fcafe0092ba9390540e3e352870ac09933bebcaa2c8c56" [[package]] name = "anstyle-parse" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.4" +version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" dependencies = [ "anstyle", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1460,9 +1460,9 @@ dependencies = [ [[package]] name = "colorchoice" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" [[package]] name = "com" @@ -1901,9 +1901,9 @@ checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" [[package]] name = "fdeflate" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8090f921a24b04994d9929e204f50b498a33ea6ba559ffaa05e04f7ee7fb5ab" +checksum = "07c6f4c64c1d33a3111c4466f7365ebdcc37c5bd1ea0d62aae2e3d722aacbedb" dependencies = [ "simd-adler32", ] @@ -1933,17 +1933,9 @@ dependencies = [ name = "ferritin-molviewspec" version = "0.1.0" dependencies = [ - "bitflags 2.6.0", "chrono", - "clap", - "itertools", - "once_cell", - "pdbtbx", "serde", - "serde-pickle", - "serde_bytes", "serde_json", - "serde_repr", "urlencoding", "validator", ] @@ -1953,7 +1945,6 @@ name = "ferritin-pymol" version = "0.1.0" dependencies = [ "bitflags 2.6.0", - "chrono", "clap", "ferritin-molviewspec", "itertools", @@ -1964,8 +1955,6 @@ dependencies = [ "serde_bytes", "serde_json", "serde_repr", - "urlencoding", - "validator", ] [[package]] @@ -2601,9 +2590,9 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.8" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" +checksum = "a00419de735aac21d53b0de5ce2c03bd3627277cf471300f27ebc89f7d828047" [[package]] name = "libredox" @@ -3647,9 +3636,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.38.37" +version = "0.38.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" +checksum = "aa260229e6538e52293eeb577aabd09945a09d6d9cc0fc550ed7529056c2e32a" dependencies = [ "bitflags 2.6.0", "errno", @@ -3702,9 +3691,9 @@ checksum = "cd0b0ec5f1c1ca621c432a25813d8d60c88abe6d3e08a3eb9cf37d97a0fe3d73" [[package]] name = "serde" -version = "1.0.213" +version = "1.0.214" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ea7893ff5e2466df8d720bb615088341b295f849602c6956047f8f80f0e9bc1" +checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" dependencies = [ "serde_derive", ] @@ -3733,9 +3722,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.213" +version = "1.0.214" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5" +checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" dependencies = [ "proc-macro2 1.0.89", "quote 1.0.37", diff --git a/ferritin-core/src/core/atomcollection.rs b/ferritin-core/src/core/atomcollection.rs index aaca6db5..51c80964 100644 --- a/ferritin-core/src/core/atomcollection.rs +++ b/ferritin-core/src/core/atomcollection.rs @@ -1,113 +1,7 @@ use super::constants::get_bonds_canonical20; -use itertools::izip; -use itertools::Itertools; +use crate::core::selection::{AtomSelector, AtomView, Selection}; +use itertools::{izip, Itertools}; use pdbtbx::Element; -use std::ops::BitAnd; // import - -pub struct AtomSelector<'a> { - collection: &'a AtomCollection, - current_selection: Selection, -} - -impl<'a> AtomSelector<'a> { - // Chainable methods - pub fn chain(mut self, chain_id: &str) -> Self { - let chain_selection = self.collection.select_by_chain(chain_id); - self.current_selection = &self.current_selection & &chain_selection; - self - } - - pub fn residue(mut self, res_name: &str) -> Self { - let res_selection = self.collection.select_by_residue(res_name); - self.current_selection = &self.current_selection & &res_selection; - self - } - - pub fn element(mut self, element: Element) -> Self { - let element_selection = self - .collection - .elements - .iter() - .enumerate() - .filter(|(_, &e)| e == element) - .map(|(i, _)| i) - .collect(); - self.current_selection = &self.current_selection & &Selection::new(element_selection); - self - } - - pub fn sphere(mut self, center: [f32; 3], radius: f32) -> Self { - let sphere_selection = self - .collection - .coords - .iter() - .enumerate() - .filter(|(_, &pos)| { - let dx = pos[0] - center[0]; - let dy = pos[1] - center[1]; - let dz = pos[2] - center[2]; - (dx * dx + dy * dy + dz * dz).sqrt() <= radius - }) - .map(|(i, _)| i) - .collect(); - self.current_selection = &self.current_selection & &Selection::new(sphere_selection); - self - } - - // Custom predicate selection - pub fn filter(mut self, predicate: F) -> Self - where - F: Fn(usize) -> bool, - { - let filtered = self - .current_selection - .indices - .iter() - .filter(|&&idx| predicate(idx)) - .copied() - .collect(); - self.current_selection = Selection::new(filtered); - self - } - - // Finalize the selection and create a view - pub fn collect(&self) -> AtomView { - AtomView { - collection: self.collection, - selection: &self.current_selection, - } - } -} - -#[derive(Clone, Debug)] -pub struct Selection { - indices: Vec, -} - -impl Selection { - fn new(indices: Vec) -> Self { - Selection { indices } - } - - // Combine selections using & operator - fn and(&self, other: &Selection) -> Selection { - let indices: Vec = self - .indices - .iter() - .filter(|&&idx| other.indices.contains(&idx)) - .cloned() - .collect(); - Selection::new(indices) - } -} - -impl BitAnd for &Selection { - type Output = Selection; - - fn bitand(self, other: Self) -> Selection { - self.and(other) - } -} pub struct AtomCollection { size: usize, @@ -156,12 +50,8 @@ impl AtomCollection { } } pub fn select(&self) -> AtomSelector { - AtomSelector { - collection: self, - current_selection: Selection::new((0..self.size).collect()), - } + AtomSelector::new(self) } - pub fn size(&self) -> usize { self.size } @@ -189,7 +79,7 @@ impl AtomCollection { unimplemented!() } - pub fn calculate_distance(&self, atoms: AtomCollection) { + pub fn calculate_distance(&self, _atoms: AtomCollection) { // def distance(atoms1, atoms2, box=None): // """ // Measure the euclidian distance between atoms. @@ -361,75 +251,23 @@ impl AtomCollection { Selection::new(indices) } - pub fn view<'a, 'b>(&'a self, selection: &'b Selection) -> AtomView<'a, 'b> { - AtomView { - collection: self, - selection, - } + pub fn view(&self, selection: Selection) -> AtomView { + AtomView::new(self, selection) } -} - -pub struct AtomView<'a, 'b> { - collection: &'a AtomCollection, - selection: &'b Selection, -} - -impl<'a, 'b> IntoIterator for &'a AtomView<'a, 'b> { - type Item = AtomRef<'a>; - type IntoIter = AtomIterator<'a, 'b>; - - fn into_iter(self) -> Self::IntoIter { - AtomIterator { - view: self, - current: 0, - } + pub fn get_coord(&self, idx: usize) -> &[f32; 3] { + &self.coords[idx] } -} - -pub struct AtomRef<'a> { - pub coords: &'a [f32; 3], - pub res_id: &'a i32, - pub res_name: &'a String, - pub element: &'a Element, - // ... other fields -} - -pub struct AtomIterator<'a, 'b> { - view: &'a AtomView<'a, 'b>, - current: usize, -} - -impl<'a, 'b> Iterator for AtomIterator<'a, 'b> { - type Item = AtomRef<'a>; - - fn next(&mut self) -> Option { - if self.current >= self.view.selection.indices.len() { - return None; - } - - let idx = self.view.selection.indices[self.current]; - self.current += 1; - Some(AtomRef { - coords: &self.view.collection.coords[idx], - res_id: &self.view.collection.res_ids[idx], - res_name: &self.view.collection.res_names[idx], - element: &self.view.collection.elements[idx], - }) + pub fn get_res_id(&self, idx: usize) -> &i32 { + &self.res_ids[idx] } -} -impl<'a, 'b> AtomView<'a, 'b> { - pub fn coords(&self) -> Vec<[f32; 3]> { - self.selection - .indices - .iter() - .map(|&i| self.collection.coords[i]) - .collect() + pub fn get_res_name(&self, idx: usize) -> &String { + &self.res_names[idx] } - pub fn size(&self) -> usize { - self.selection.indices.len() + pub fn get_element(&self, idx: usize) -> &Element { + &self.elements[idx] } } @@ -494,20 +332,47 @@ impl BondOrder { #[cfg(test)] mod tests { - use crate::core::atomcollection::AtomCollection; - use itertools::Itertools; - use pdbtbx; + use crate::AtomCollection; + use pdbtbx::{self, Element}; use std::path::PathBuf; - #[test] - fn test_addbonds() { + fn get_file() -> PathBuf { let manifest_dir = env!("CARGO_MANIFEST_DIR"); - let file_path = PathBuf::from(manifest_dir) + PathBuf::from(manifest_dir) .join("tests") .join("data") - .join("101m.cif"); + .join("101m.cif") + } + #[test] + fn test_simple_conversion() { + let file_path = get_file(); let (pdb, _errors) = pdbtbx::open(file_path.to_str().unwrap()).unwrap(); assert_eq!(pdb.atom_count(), 1413); + let ac = AtomCollection::from(&pdb); + assert_eq!(ac.size(), 1413); + } + + #[test] + fn test_selection_api() { + let file_path = get_file(); + let (pdb, _errors) = pdbtbx::open(file_path.to_str().unwrap()).unwrap(); + let ac = AtomCollection::from(&pdb); + assert_eq!(ac.size(), 1413); + + let selected_atoms = ac + .select() + .chain("A") + .residue("GLY") + .element(Element::C) + .collect(); + + assert_eq!(selected_atoms.size(), 22); + + // let carbon_coords: Vec<[f32; 3]> = selected_atoms + // .into_iter() + // .filter(|atom| *atom.element == Element::C) + // .map(|atom| *atom.coords) + // .collect(); } } diff --git a/ferritin-core/src/conversions/mod.rs b/ferritin-core/src/core/conversions/mod.rs similarity index 100% rename from ferritin-core/src/conversions/mod.rs rename to ferritin-core/src/core/conversions/mod.rs diff --git a/ferritin-core/src/conversions/pdb.rs b/ferritin-core/src/core/conversions/pdb.rs similarity index 100% rename from ferritin-core/src/conversions/pdb.rs rename to ferritin-core/src/core/conversions/pdb.rs diff --git a/ferritin-core/src/conversions/pse.rs b/ferritin-core/src/core/conversions/pse.rs similarity index 99% rename from ferritin-core/src/conversions/pse.rs rename to ferritin-core/src/core/conversions/pse.rs index db5578d6..3e8b87fc 100644 --- a/ferritin-core/src/conversions/pse.rs +++ b/ferritin-core/src/core/conversions/pse.rs @@ -69,7 +69,7 @@ impl From<&PSEData> for AtomCollection { mod tests { use crate::core::AtomCollection; use ferritin_pymol::PSEData; - use pdbtbx; + use std::path::PathBuf; #[test] diff --git a/ferritin-core/src/core/mod.rs b/ferritin-core/src/core/mod.rs index 10b56ebc..add087ee 100644 --- a/ferritin-core/src/core/mod.rs +++ b/ferritin-core/src/core/mod.rs @@ -1,5 +1,7 @@ mod atomcollection; mod constants; +mod conversions; mod featureizer; +mod selection; pub use atomcollection::{AtomCollection, Bond, BondOrder}; diff --git a/ferritin-core/src/core/selection/mod.rs b/ferritin-core/src/core/selection/mod.rs new file mode 100644 index 00000000..bf88648e --- /dev/null +++ b/ferritin-core/src/core/selection/mod.rs @@ -0,0 +1,7 @@ +mod selection; +mod selector; +mod view; + +pub use selection::Selection; +pub use selector::AtomSelector; +pub use view::{AtomIterator, AtomRef, AtomView}; diff --git a/ferritin-core/src/core/selection/selection.rs b/ferritin-core/src/core/selection/selection.rs new file mode 100644 index 00000000..e3568587 --- /dev/null +++ b/ferritin-core/src/core/selection/selection.rs @@ -0,0 +1,30 @@ +use std::ops::BitAnd; + +#[derive(Clone, Debug)] +pub struct Selection { + pub(crate) indices: Vec, +} + +impl Selection { + pub fn new(indices: Vec) -> Self { + Selection { indices } + } + + pub fn and(&self, other: &Selection) -> Selection { + let indices: Vec = self + .indices + .iter() + .filter(|&&idx| other.indices.contains(&idx)) + .cloned() + .collect(); + Selection::new(indices) + } +} + +impl BitAnd for &Selection { + type Output = Selection; + + fn bitand(self, other: Self) -> Selection { + self.and(other) + } +} diff --git a/ferritin-core/src/core/selection/selector.rs b/ferritin-core/src/core/selection/selector.rs new file mode 100644 index 00000000..9662ff4d --- /dev/null +++ b/ferritin-core/src/core/selection/selector.rs @@ -0,0 +1,81 @@ +use super::selection::Selection; +use super::view::AtomView; +use crate::core::AtomCollection; +use pdbtbx::Element; + +pub struct AtomSelector<'a> { + collection: &'a AtomCollection, + current_selection: Selection, +} + +impl<'a> AtomSelector<'a> { + pub(crate) fn new(collection: &AtomCollection) -> AtomSelector<'_> { + let size = collection.size() as usize; + AtomSelector { + collection, + current_selection: Selection::new((0..size).collect()), + } + } + + pub fn chain(mut self, chain_id: &str) -> Self { + let chain_selection = self.collection.select_by_chain(chain_id); + self.current_selection = &self.current_selection & &chain_selection; + self + } + + pub fn residue(mut self, res_name: &str) -> Self { + let res_selection = self.collection.select_by_residue(res_name); + self.current_selection = &self.current_selection & &res_selection; + self + } + + pub fn element(mut self, element: Element) -> Self { + let element_selection = self + .collection + .elements() + .iter() + .enumerate() + .filter(|(_, &e)| e == element) + .map(|(i, _)| i) + .collect(); + self.current_selection = &self.current_selection & &Selection::new(element_selection); + self + } + + pub fn sphere(mut self, center: [f32; 3], radius: f32) -> Self { + let sphere_selection = self + .collection + .coords() + .iter() + .enumerate() + .filter(|(_, &pos)| { + let dx = pos[0] - center[0]; + let dy = pos[1] - center[1]; + let dz = pos[2] - center[2]; + (dx * dx + dy * dy + dz * dz).sqrt() <= radius + }) + .map(|(i, _)| i) + .collect(); + self.current_selection = &self.current_selection & &Selection::new(sphere_selection); + self + } + + pub fn filter(mut self, predicate: F) -> Self + where + F: Fn(usize) -> bool, + { + let filtered = self + .current_selection + .indices + .iter() + .filter(|&&idx| predicate(idx)) + .copied() + .collect(); + self.current_selection = Selection::new(filtered); + self + } + + pub fn collect(self) -> AtomView<'a> { + AtomView::new(self.collection, self.current_selection) + } +} diff --git a/ferritin-core/src/core/selection/view.rs b/ferritin-core/src/core/selection/view.rs new file mode 100644 index 00000000..f1f4ae66 --- /dev/null +++ b/ferritin-core/src/core/selection/view.rs @@ -0,0 +1,72 @@ +use super::selection::Selection; +use crate::core::AtomCollection; +use pdbtbx::Element; + +pub struct AtomView<'a> { + collection: &'a AtomCollection, + selection: Selection, +} + +impl<'a> AtomView<'a> { + pub(crate) fn new(collection: &'a AtomCollection, selection: Selection) -> Self { + AtomView { + collection, + selection, + } + } + pub fn coords(&self) -> Vec<[f32; 3]> { + self.selection + .indices + .iter() + .map(|&i| *self.collection.get_coord(i)) + .collect() + } + + pub fn size(&self) -> usize { + self.selection.indices.len() + } +} +pub struct AtomRef<'a> { + pub coords: &'a [f32; 3], + pub res_id: &'a i32, + pub res_name: &'a String, + pub element: &'a Element, + // ... other fields +} + +pub struct AtomIterator<'a> { + view: &'a AtomView<'a>, + current: usize, +} + +impl<'a> IntoIterator for &'a AtomView<'a> { + type Item = AtomRef<'a>; + type IntoIter = AtomIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { + AtomIterator { + view: self, + current: 0, + } + } +} + +impl<'a> Iterator for AtomIterator<'a> { + type Item = AtomRef<'a>; + + fn next(&mut self) -> Option { + if self.current >= self.view.selection.indices.len() { + return None; + } + + let idx = self.view.selection.indices[self.current]; + self.current += 1; + + Some(AtomRef { + coords: &self.view.collection.get_coord(idx), + res_id: &self.view.collection.get_res_id(idx), + res_name: &self.view.collection.get_res_name(idx), + element: &self.view.collection.get_element(idx), + }) + } +} diff --git a/ferritin-core/src/lib.rs b/ferritin-core/src/lib.rs index 5f1c16c0..b4a73ffd 100644 --- a/ferritin-core/src/lib.rs +++ b/ferritin-core/src/lib.rs @@ -1,4 +1,3 @@ -mod conversions; pub mod core; pub use core::{AtomCollection, Bond, BondOrder}; pub use pdbtbx::Element; diff --git a/ferritin-molviewspec/Cargo.toml b/ferritin-molviewspec/Cargo.toml index e4827e41..d977ddbd 100644 --- a/ferritin-molviewspec/Cargo.toml +++ b/ferritin-molviewspec/Cargo.toml @@ -7,19 +7,11 @@ license.workspace = true description.workspace = true [dependencies] -clap = { version = "4.5.20", features = ["derive"] } -serde-pickle = "1.1" chrono = "0.4.38" validator = { version = "0.18", features = ["derive"] } -serde_bytes = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } urlencoding = { workspace = true } -serde_repr = { workspace = true } -once_cell = { workspace = true } -bitflags = { workspace = true } -pdbtbx.workspace = true -itertools.workspace = true [dev.dependencies] ferritin-pymol = { path = "../ferritin-pymol" } diff --git a/ferritin-pymol/Cargo.toml b/ferritin-pymol/Cargo.toml index 8a4ec4b6..7e33ec10 100644 --- a/ferritin-pymol/Cargo.toml +++ b/ferritin-pymol/Cargo.toml @@ -10,12 +10,9 @@ description.workspace = true ferritin-molviewspec = { path = "../ferritin-molviewspec" } clap = { version = "4.5.20", features = ["derive"] } serde-pickle = "1.1" -chrono = "0.4.38" -validator = { version = "0.18", features = ["derive"] } serde_bytes = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } -urlencoding = { workspace = true } serde_repr = { workspace = true } once_cell = { workspace = true } bitflags = { workspace = true }