1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
use crate::AtomCollection;
use itertools::Itertools;
use pdbtbx::{Element, PDB};

impl From<&PDB> for AtomCollection {
    // the PDB API requires us to iterate:
    // PDB --> Chain --> Residue --> Atom if we want data from all.
    // Here we collect all the data in one go and return an AtomCollection
    fn from(pdb_data: &PDB) -> Self {
        let (coords, is_hetero, atom_names, res_ids, res_names, elements, chain_ids): (
            Vec<[f32; 3]>,
            Vec<bool>,
            Vec<String>,
            Vec<i32>,
            Vec<String>,
            Vec<Element>,
            Vec<String>,
        ) = pdb_data
            .chains()
            .flat_map(|chain| {
                let chain_id = chain.id().to_string();
                chain.residues().flat_map(move |residue| {
                    let (res_number, _insertion_code) = residue.id();
                    let res_id = res_number as i32;
                    let res_name = residue.name().unwrap_or_default().to_string();
                    let chain_id = chain_id.clone();
                    residue.atoms().filter_map(move |atom| {
                        atom.element().map(|element| {
                            let (x, y, z) = atom.pos();
                            (
                                [x as f32, y as f32, z as f32],
                                atom.hetero(),
                                atom.name().to_string(),
                                res_id,
                                res_name.clone(),
                                element,
                                chain_id.clone(),
                            )
                        })
                    })
                })
            })
            .multiunzip();

        let mut ac = AtomCollection::new(
            coords.len(),
            coords,
            res_ids,
            res_names,
            is_hetero,
            elements,
            atom_names,
            chain_ids,
            None,
        );

        ac.connect_via_residue_names();
        ac
    }
}

#[cfg(test)]
mod tests {
    use crate::AtomCollection;
    use ferritin_test_data::TestFile;
    use itertools::Itertools;
    use pdbtbx::{self, Element};

    #[test]
    fn test_pdb_from() {
        let (prot_file, _temp) = TestFile::protein_01().create_temp().unwrap();
        let (pdb_data, _) = pdbtbx::open(prot_file).unwrap();
        assert_eq!(pdb_data.atom_count(), 1413);

        // check Atom Collection Numbers
        let ac = AtomCollection::from(&pdb_data);
        assert_eq!(ac.get_coords().len(), 1413);
        assert_eq!(ac.get_bonds().unwrap().len(), 1095);

        // 338 Residues
        let res_ids: Vec<i32> = ac.get_resids().into_iter().cloned().unique().collect();
        let res_max = res_ids.iter().max().unwrap();
        assert_eq!(res_max, &338);

        // Check resnames
        let res_names: Vec<String> = ac
            .get_resnames()
            .into_iter()
            .cloned()
            .unique()
            .sorted()
            .collect();
        assert_eq!(
            res_names,
            [
                "ALA", "ARG", "ASN", "ASP", "GLN", "GLU", "GLY", "HEM", "HIS", "HOH", "ILE", "LEU",
                "LYS", "MET", "NBN", "PHE", "PRO", "SER", "SO4", "THR", "TRP", "TYR", "VAL"
            ]
        );

        // Take a peek at the unique elements
        let elements: Vec<Element> = ac
            .get_elements()
            .into_iter()
            .cloned()
            .unique()
            .sorted()
            .collect();
        assert_eq!(
            elements,
            [Element::C, Element::N, Element::O, Element::S, Element::Fe,]
        );
    }
}