Coverage for biobb_structure_utils/utils/closest_residues.py: 79%
85 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 11:54 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 11:54 +0000
1#!/usr/bin/env python3
3"""Module containing the ClosestResidues class and the command line interface."""
5import argparse
6from typing import Optional
8import Bio.PDB
9from biobb_common.configuration import settings
10from biobb_common.generic.biobb_object import BiobbObject
11from biobb_common.tools import file_utils as fu
12from biobb_common.tools.file_utils import launchlogger
14from biobb_structure_utils.utils.common import (
15 _from_string_to_list,
16 check_input_path,
17 check_output_path,
18 create_biopython_residue,
19 create_output_file,
20 create_residues_list,
21)
24class ClosestResidues(BiobbObject):
25 """
26 | biobb_structure_utils ClosestResidues
27 | Class to search closest residues from a 3D structure using Biopython.
28 | Return all residues that have at least one atom within radius of center from a list of given residues.
30 Args:
31 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/2vgb.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476).
32 output_residues_path (str): Output molcules file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_closest_residues.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476).
33 properties (dic - Python dictionary object containing the tool parameters, not input/output files):
34 * **residues** (*list*) - (None) List of comma separated res_id or list of dictionaries with the name | res_id | chain | model of the residues to find the closest neighbours. Format: [{"name": "HIS", "res_id": "72", "chain": "A", "model": "1"}].
35 * **radius** (*float*) - (5) Distance in Ångströms to neighbours of the given list of residues.
36 * **preserve_target** (*bool*) - (True) Whether or not to preserve the target residues in the output structure.
37 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
38 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
39 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
41 Examples:
42 This is a use example of how to use the building block from Python::
44 from biobb_structure_utils.utils.closest_residues import closest_residues
45 prop = {
46 'residues': [
47 {
48 'name': 'HIS',
49 'res_id': '72',
50 'chain': 'A',
51 'model': '1'
52 }
53 ],
54 'radius': 5,
55 'preserve_target': False
56 }
57 closest_residues(input_structure_path='/path/to/myStructure.pdb',
58 output_residues_path='/path/to/newResidues.pdb',
59 properties=prop)
61 Info:
62 * wrapped_software:
63 * name: In house using Biopython
64 * version: >=1.79
65 * license: other
66 * ontology:
67 * name: EDAM
68 * schema: http://edamontology.org/EDAM.owl
70 """
72 def __init__(
73 self, input_structure_path, output_residues_path, properties=None, **kwargs
74 ) -> None:
75 properties = properties or {}
77 # Call parent class constructor
78 super().__init__(properties)
79 self.locals_var_dict = locals().copy()
81 # Input/Output files
82 self.io_dict = {
83 "in": {"input_structure_path": input_structure_path},
84 "out": {"output_residues_path": output_residues_path},
85 }
87 # Properties specific for BB
88 self.residues = _from_string_to_list(properties.get("residues", []))
89 self.radius = properties.get("radius", 5)
90 self.preserve_target = properties.get("preserve_target", True)
91 self.properties = properties
93 # Check the properties
94 self.check_properties(properties)
95 self.check_arguments()
97 @launchlogger
98 def launch(self) -> int:
99 """Execute the :class:`ClosestResidues <utils.closest_residues.ClosestResidues>` utils.closest_residues.ClosestResidues object."""
101 self.io_dict["in"]["input_structure_path"] = check_input_path(
102 self.io_dict["in"]["input_structure_path"],
103 self.out_log,
104 self.__class__.__name__,
105 )
106 self.io_dict["out"]["output_residues_path"] = check_output_path(
107 self.io_dict["out"]["output_residues_path"],
108 self.out_log,
109 self.__class__.__name__,
110 )
112 # Setup Biobb
113 if self.check_restart():
114 return 0
115 self.stage_files()
117 # Business code
118 # get list of Residues from properties
119 list_residues = create_residues_list(self.residues, self.out_log)
121 # load input into BioPython structure
122 structure = Bio.PDB.PDBParser(QUIET=True).get_structure(
123 "structure", self.stage_io_dict["in"]["input_structure_path"]
124 )
126 str_residues = []
127 # format selected residues
128 for residue in structure.get_residues():
129 r = create_biopython_residue(residue)
130 if list_residues:
131 for res in list_residues:
132 match = True
133 for code in res["code"]:
134 if res[code].strip() != r[code].strip():
135 match = False
136 break
137 if match:
138 str_residues.append(r)
139 else:
140 str_residues.append(r)
142 # get target residues in BioPython format
143 target_residues = []
144 for sr in str_residues:
145 # try for residues, if exception, try as HETATM
146 try:
147 target_residues.append(
148 structure[int(sr["model"]) - 1][sr["chain"]][int(sr["res_id"])]
149 )
150 except KeyError:
151 target_residues.append(
152 structure[int(sr["model"]) - 1][sr["chain"]][
153 "H_" + sr["name"], int(sr["res_id"]), " "
154 ]
155 )
156 except Exception:
157 fu.log(
158 self.__class__.__name__ + ": Unable to find residue %s",
159 sr["res_id"],
160 self.out_log,
161 )
163 # get all atoms from target_residues
164 target_atoms = Bio.PDB.Selection.unfold_entities(target_residues, "A")
165 # get all atoms of input structure
166 all_atoms = Bio.PDB.Selection.unfold_entities(structure, "A")
167 # generate NeighborSearch object
168 ns = Bio.PDB.NeighborSearch(all_atoms)
169 # set comprehension list
170 nearby_residues = {
171 res
172 for center_atom in target_atoms
173 for res in ns.search(center_atom.coord, self.radius, "R")
174 }
176 # format nearby residues to pure python objects
177 neighbor_residues = []
178 for residue in nearby_residues:
179 r = create_biopython_residue(residue)
180 neighbor_residues.append(r)
182 # if preserve_target == False, don't add the residues of self.residues to the final structure
183 if not self.preserve_target:
184 neighbor_residues = [x for x in neighbor_residues if x not in str_residues]
186 fu.log("Found %d nearby residues" % len(neighbor_residues), self.out_log)
188 if len(neighbor_residues) == 0:
189 fu.log(
190 self.__class__.__name__ + ": No neighbour residues found, exiting",
191 self.out_log,
192 )
193 raise SystemExit(
194 self.__class__.__name__ + ": No neighbour residues found, exiting"
195 )
197 create_output_file(
198 0,
199 self.stage_io_dict["in"]["input_structure_path"],
200 neighbor_residues,
201 self.stage_io_dict["out"]["output_residues_path"],
202 self.out_log,
203 )
205 self.return_code = 0
207 # Copy files to host
208 self.copy_to_host()
210 # Remove temporal files
211 # self.tmp_files.append(self.stage_io_dict.get("unique_dir", ""))
212 self.remove_tmp_files()
214 self.check_arguments(output_files_created=True, raise_exception=False)
216 return self.return_code
219def closest_residues(
220 input_structure_path: str,
221 output_residues_path: str,
222 properties: Optional[dict] = None,
223 **kwargs,
224) -> int:
225 """Execute the :class:`ClosestResidues <utils.closest_residues.ClosestResidues>` class and
226 execute the :meth:`launch() <utils.closest_residues.ClosestResidues.launch>` method."""
228 return ClosestResidues(
229 input_structure_path=input_structure_path,
230 output_residues_path=output_residues_path,
231 properties=properties,
232 **kwargs,
233 ).launch()
235 closest_residues.__doc__ = ClosestResidues.__doc__
238def main():
239 """Command line execution of this building block. Please check the command line documentation."""
240 parser = argparse.ArgumentParser(
241 description="Search closest residues to a list of given residues.",
242 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999),
243 )
244 parser.add_argument(
245 "-c",
246 "--config",
247 required=False,
248 help="This file can be a YAML file, JSON file or JSON string",
249 )
251 # Specific args of each building block
252 required_args = parser.add_argument_group("required arguments")
253 required_args.add_argument(
254 "-i",
255 "--input_structure_path",
256 required=True,
257 help="Input structure file path. Accepted formats: pdb.",
258 )
259 required_args.add_argument(
260 "-o",
261 "--output_residues_path",
262 required=True,
263 help="Output residues file path. Accepted formats: pdb.",
264 )
266 args = parser.parse_args()
267 config = args.config if args.config else None
268 properties = settings.ConfReader(config=config).get_prop_dic()
270 # Specific call of each building block
271 closest_residues(
272 input_structure_path=args.input_structure_path,
273 output_residues_path=args.output_residues_path,
274 properties=properties,
275 )
278if __name__ == "__main__":
279 main()