diff --git a/deeprank2/utils/buildgraph.py b/deeprank2/utils/buildgraph.py index 5a22ed1cd..38da5632f 100644 --- a/deeprank2/utils/buildgraph.py +++ b/deeprank2/utils/buildgraph.py @@ -14,11 +14,8 @@ _log = logging.getLogger(__name__) +PDB_GET = "x,y,z,name,altLoc,occ,element,chainID,resSeq,resName,iCode" -def _get_atom_data(pdb: pdb2sql_object | pdb2sql_interface, **kwargs): - return list(pdb.get( - "x,y,z,name,altLoc,occ,element,chainID,resSeq,resName,iCode", - **kwargs)) def _add_atom_to_residue(atom: Atom, residue: Residue): @@ -36,57 +33,56 @@ def _add_atom_to_residue(atom: Atom, residue: Residue): residue.add_atom(atom) -def _add_atom_data_to_structure(structure: PDBStructure, # pylint: disable=too-many-arguments, too-many-locals - x: float, y: float, z: float, - atom_name: str, - altloc: str, occupancy: float, - element_name: str, - chain_id: str, - residue_number: int, - residue_name: str, - insertion_code: str): - """ - This is a subroutine, to be used in other methods for converting pdb2sql atomic data into a +def _add_atom_data_to_structure( + structure: PDBStructure, + pdb_obj: pdb2sql_object | pdb2sql_interface, + **kwargs +): + """This is a subroutine, to be used in other methods for converting pdb2sql atomic data into a deeprank structure object. It should be called for one atom. Args: - structure (:class:`PDBStructure`): Where this atom should be added to. - x (float): x-coordinate of atom. - y (float): y-coordinate of atom. - z (float): z-coordinate of atom. - atom_name (str): Name of atom: 'CA', 'C', 'N', 'O', 'CB', etc. - altloc (str): Pdb alternative location id for this atom (can be empty): 'A', 'B', 'C', etc. - occupancy (float): Pdb occupancy of this atom, ranging from 0.0 to 1.0. Should be used with altloc. - element_name (str): Pdb element symbol of this atom: 'C', 'O', 'H', 'N', 'S'. - chain_id (str): Pdb chain identifier: 'A', 'B', 'C', etc. - residue_number (int): Pdb residue number, a positive integer. - residue_name (str): Pdb residue name: "ALA", "CYS", "ASP", etc. - insertion_code (str): Pdb residue insertion code (can be empty) : '', 'A', 'B', 'C', etc. + structure (:class:`PDBStructure`): The structure to which this atom should be added to. + pdb (pdb2sql_object | pdb2sql_interface): The pdb2sql object to retrieve the data from. + kwargs: as required by the get function for the pdb object. """ - # Make sure not to take the same atom twice. - if altloc is not None and altloc != "" and altloc != "A": - return + retrieved_data = PDB_GET.split(sep=',') + for data_values in pdb_obj.get(PDB_GET, **kwargs): + atom_data = dict(zip(retrieved_data, data_values)) - insertion_code = None if insertion_code == "" else insertion_code - amino_acid = amino_acids_by_code[residue_name] if residue_name in amino_acids_by_code else None - atom_position = np.array([x, y, z]) + # Make sure not to take the same atom twice. + if atom_data["altLoc"] is not None and atom_data["altLoc"] != "" and atom_data["altLoc"] != "A": + return - if not structure.has_chain(chain_id): - structure.add_chain(Chain(structure, chain_id)) - chain = structure.get_chain(chain_id) + atom_data["iCode"] = None if atom_data["iCode"] == "" else atom_data["iCode"] - if not chain.has_residue(residue_number, insertion_code): - chain.add_residue(Residue(chain, residue_number, amino_acid, insertion_code)) - residue = chain.get_residue(residue_number, insertion_code) + try: + atom_data["aa"] = amino_acids_by_code[atom_data["resName"]] + except KeyError: + atom_data["aa"] = None + atom_data["coordinates"] = np.array(data_values[:3]) - atom = Atom( - residue, atom_name, AtomicElement[element_name], atom_position, occupancy - ) - _add_atom_to_residue(atom, residue) + if not structure.has_chain(atom_data["chainID"]): + structure.add_chain(Chain(structure, atom_data["chainID"])) + chain = structure.get_chain(atom_data["chainID"]) -def get_structure(pdb: pdb2sql_object, id_: str) -> PDBStructure: + if not chain.has_residue(atom_data["resSeq"], atom_data["iCode"]): + chain.add_residue(Residue(chain, atom_data["resSeq"], atom_data["aa"], atom_data["iCode"])) + residue = chain.get_residue(atom_data["resSeq"], atom_data["iCode"]) + + atom = Atom( + residue, + atom_data["name"], + AtomicElement[atom_data["element"]], + atom_data["coordinates"], + atom_data["occ"], + ) + _add_atom_to_residue(atom, residue) + + +def get_structure(pdb_obj: pdb2sql_object, id_: str) -> PDBStructure: """Builds a structure from rows in a pdb file. Args: @@ -97,10 +93,7 @@ def get_structure(pdb: pdb2sql_object, id_: str) -> PDBStructure: PDBStructure: The structure object, giving access to chains, residues, atoms. """ structure = PDBStructure(id_) - - for atom_data in _get_atom_data(pdb, model=0): - _add_atom_data_to_structure(structure, *atom_data) - + _add_atom_data_to_structure(structure, pdb_obj, model=0) return structure @@ -121,10 +114,8 @@ def get_contact_atoms( chain1=chain_ids[0], chain2=chain_ids[1], ) - for atom_data in _get_atom_data(interface, - rowID=atom_indexes[chain_ids[0]] + atom_indexes[chain_ids[1]] - ): - _add_atom_data_to_structure(structure, *atom_data) + pdb_rowID = atom_indexes[chain_ids[0]] + atom_indexes[chain_ids[1]] + _add_atom_data_to_structure(structure, interface, rowID=pdb_rowID) finally: interface._close() # pylint: disable=protected-access