Coverage for biobb_structure_utils / utils / closest_residues.py: 89%
74 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-16 14:59 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-16 14:59 +0000
1#!/usr/bin/env python3
3"""Module containing the ClosestResidues class and the command line interface."""
4from typing import Optional
5import Bio.PDB
6from biobb_common.generic.biobb_object import BiobbObject
7from biobb_common.tools import file_utils as fu
8from biobb_common.tools.file_utils import launchlogger
10from biobb_structure_utils.utils.common import (
11 _from_string_to_list,
12 check_input_path,
13 check_output_path,
14 create_biopython_residue,
15 create_output_file,
16 create_residues_list,
17)
20class ClosestResidues(BiobbObject):
21 """
22 | biobb_structure_utils ClosestResidues
23 | Class to search closest residues from a 3D structure using Biopython.
24 | Return all residues that have at least one atom within radius of center from a list of given residues.
26 Args:
27 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/2vgb.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476).
28 output_residues_path (str): Output molcules file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_closest_residues.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476).
29 properties (dic - Python dictionary object containing the tool parameters, not input/output files):
30 * **residues** (*list*) - (None) List of comma separated res_id or list of dictionaries with the name | res_id | chain | model of the residues to find the closest neighbours. Format: [{"name": "HIS", "res_id": "72", "chain": "A", "model": "1"}].
31 * **radius** (*float*) - (5) Distance in Ångströms to neighbours of the given list of residues.
32 * **preserve_target** (*bool*) - (True) Whether or not to preserve the target residues in the output structure.
33 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
34 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
35 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
37 Examples:
38 This is a use example of how to use the building block from Python::
40 from biobb_structure_utils.utils.closest_residues import closest_residues
41 prop = {
42 'residues': [
43 {
44 'name': 'HIS',
45 'res_id': '72',
46 'chain': 'A',
47 'model': '1'
48 }
49 ],
50 'radius': 5,
51 'preserve_target': False
52 }
53 closest_residues(input_structure_path='/path/to/myStructure.pdb',
54 output_residues_path='/path/to/newResidues.pdb',
55 properties=prop)
57 Info:
58 * wrapped_software:
59 * name: In house using Biopython
60 * version: >=1.79
61 * license: other
62 * ontology:
63 * name: EDAM
64 * schema: http://edamontology.org/EDAM.owl
66 """
68 def __init__(
69 self, input_structure_path, output_residues_path, properties=None, **kwargs
70 ) -> None:
71 properties = properties or {}
73 # Call parent class constructor
74 super().__init__(properties)
75 self.locals_var_dict = locals().copy()
77 # Input/Output files
78 self.io_dict = {
79 "in": {"input_structure_path": input_structure_path},
80 "out": {"output_residues_path": output_residues_path},
81 }
83 # Properties specific for BB
84 self.residues = _from_string_to_list(properties.get("residues", []))
85 self.radius = properties.get("radius", 5)
86 self.preserve_target = properties.get("preserve_target", True)
87 self.properties = properties
89 # Check the properties
90 self.check_properties(properties)
91 self.check_arguments()
93 @launchlogger
94 def launch(self) -> int:
95 """Execute the :class:`ClosestResidues <utils.closest_residues.ClosestResidues>` utils.closest_residues.ClosestResidues object."""
97 self.io_dict["in"]["input_structure_path"] = check_input_path(
98 self.io_dict["in"]["input_structure_path"],
99 self.out_log,
100 self.__class__.__name__,
101 )
102 self.io_dict["out"]["output_residues_path"] = check_output_path(
103 self.io_dict["out"]["output_residues_path"],
104 self.out_log,
105 self.__class__.__name__,
106 )
108 # Setup Biobb
109 if self.check_restart():
110 return 0
111 self.stage_files()
113 # Business code
114 # get list of Residues from properties
115 list_residues = create_residues_list(self.residues, self.out_log)
117 # load input into BioPython structure
118 structure = Bio.PDB.PDBParser(QUIET=True).get_structure(
119 "structure", self.stage_io_dict["in"]["input_structure_path"]
120 )
122 str_residues = []
123 # format selected residues
124 for residue in structure.get_residues():
125 r = create_biopython_residue(residue)
126 if list_residues:
127 for res in list_residues:
128 match = True
129 for code in res["code"]:
130 if res[code].strip() != r[code].strip():
131 match = False
132 break
133 if match:
134 str_residues.append(r)
135 else:
136 str_residues.append(r)
138 # get target residues in BioPython format
139 target_residues = []
140 for sr in str_residues:
141 # try for residues, if exception, try as HETATM
142 try:
143 target_residues.append(
144 structure[int(sr["model"]) - 1][sr["chain"]][int(sr["res_id"])]
145 )
146 except KeyError:
147 target_residues.append(
148 structure[int(sr["model"]) - 1][sr["chain"]][
149 "H_" + sr["name"], int(sr["res_id"]), " "
150 ]
151 )
152 except Exception:
153 fu.log(
154 self.__class__.__name__ + ": Unable to find residue %s",
155 sr["res_id"],
156 self.out_log,
157 )
159 # get all atoms from target_residues
160 target_atoms = Bio.PDB.Selection.unfold_entities(target_residues, "A")
161 # get all atoms of input structure
162 all_atoms = Bio.PDB.Selection.unfold_entities(structure, "A")
163 # generate NeighborSearch object
164 ns = Bio.PDB.NeighborSearch(all_atoms)
165 # set comprehension list
166 nearby_residues = {
167 res
168 for center_atom in target_atoms
169 for res in ns.search(center_atom.coord, self.radius, "R")
170 }
172 # format nearby residues to pure python objects
173 neighbor_residues = []
174 for residue in nearby_residues:
175 r = create_biopython_residue(residue)
176 neighbor_residues.append(r)
178 # if preserve_target == False, don't add the residues of self.residues to the final structure
179 if not self.preserve_target:
180 neighbor_residues = [x for x in neighbor_residues if x not in str_residues]
182 fu.log("Found %d nearby residues" % len(neighbor_residues), self.out_log)
184 if len(neighbor_residues) == 0:
185 fu.log(
186 self.__class__.__name__ + ": No neighbour residues found, exiting",
187 self.out_log,
188 )
189 raise SystemExit(
190 self.__class__.__name__ + ": No neighbour residues found, exiting"
191 )
193 create_output_file(
194 0,
195 self.stage_io_dict["in"]["input_structure_path"],
196 neighbor_residues,
197 self.stage_io_dict["out"]["output_residues_path"],
198 self.out_log,
199 )
201 self.return_code = 0
203 # Copy files to host
204 self.copy_to_host()
206 # Remove temporal files
207 self.remove_tmp_files()
209 self.check_arguments(output_files_created=True, raise_exception=False)
211 return self.return_code
214def closest_residues(
215 input_structure_path: str,
216 output_residues_path: str,
217 properties: Optional[dict] = None,
218 **kwargs,
219) -> int:
220 """Create the :class:`ClosestResidues <utils.closest_residues.ClosestResidues>` class and
221 execute the :meth:`launch() <utils.closest_residues.ClosestResidues.launch>` method."""
222 return ClosestResidues(**dict(locals())).launch()
225closest_residues.__doc__ = ClosestResidues.__doc__
226main = ClosestResidues.get_main(closest_residues, "Search closest residues to a list of given residues.")
228if __name__ == "__main__":
229 main()