Coverage for biobb_vs/utils/box_residues.py: 84%
100 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 12:00 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 12:00 +0000
1#!/usr/bin/env python3
3"""Module containing the BoxResidues class and the command line interface."""
5import argparse
6import warnings
7from pathlib import PurePath
8from typing import Optional
10import numpy as np
11from Bio import BiopythonDeprecationWarning
12from biobb_common.configuration import settings
13from biobb_common.generic.biobb_object import BiobbObject
14from biobb_common.tools import file_utils as fu
15from biobb_common.tools.file_utils import launchlogger
17from biobb_vs.utils.common import (
18 _from_string_to_list,
19 check_input_path,
20 check_output_path,
21 get_box_coordinates,
22)
24with warnings.catch_warnings():
25 warnings.simplefilter("ignore", BiopythonDeprecationWarning)
26 # try:
27 # import Bio.SubsMat.MatrixInfo
28 # except ImportError:
29 import Bio.Align.substitution_matrices
30 import Bio.pairwise2
31 import Bio.PDB
34class BoxResidues(BiobbObject):
35 """
36 | biobb_vs BoxResidues
37 | This class sets the center and the size of a rectangular parallelepiped box around a set of residues.
38 | Sets the center and the size of a rectangular parallelepiped box around a selection of residues found in a given PDB. The residue identifiers that compose the selection (i.e. binding site) are provided by a property list.
40 Args:
41 input_pdb_path (str): PDB protein structure for which the box will be build. Its size and center will be set around the 'resid_list' property once mapped against this PDB. File type: input. `Sample file <https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/data/utils/input_box_residues.pdb>`_. Accepted formats: pdb (edam:format_1476).
42 output_pdb_path (str): PDB including the annotation of the box center and size as REMARKs. File type: output. `Sample file <https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/reference/utils/ref_output_box_residues.pdb>`_. Accepted formats: pdb (edam:format_1476).
43 properties (dic - Python dictionary object containing the tool parameters, not input/output files):
44 * **resid_list** (*list*) - (None) List with all the residue numbers to form a cavity or binding site. Mandatory property.
45 * **offset** (*float*) - (2.0) [0.1~1000|0.1] Extra distance (Angstroms) between the last residue atom and the box boundary.
46 * **box_coordinates** (*bool*) - (False) Add box coordinates as 8 ATOM records.
47 * **residue_offset** (*int*) - (0) [0~1000|1] Residue id offset.
48 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
49 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
50 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
52 Examples:
53 This is a use example of how to use the building block from Python::
55 from biobb_vs.utils.box_residues import box_residues
56 prop = {
57 'resid_list': [718, 743, 745, 762, 766, 796, 790, 791, 793, 794, 788],
58 'offset': 2,
59 'box_coordinates': True
60 }
61 box_residues(input_pdb_path='/path/to/myStructure.pdb',
62 output_pdb_path='/path/to/newBox.pdb',
63 properties=prop)
65 Info:
66 * wrapped_software:
67 * name: In house using Biopython
68 * version: >=1.76
69 * license: Apache-2.0
70 * ontology:
71 * name: EDAM
72 * schema: http://edamontology.org/EDAM.owl
74 """
76 def __init__(
77 self, input_pdb_path, output_pdb_path, properties=None, **kwargs
78 ) -> None:
79 properties = properties or {}
81 # Call parent class constructor
82 super().__init__(properties)
83 self.locals_var_dict = locals().copy()
85 # Input/Output files
86 self.io_dict = {
87 "in": {"input_pdb_path": input_pdb_path},
88 "out": {"output_pdb_path": output_pdb_path},
89 }
91 # Properties specific for BB
92 self.resid_list = _from_string_to_list(properties.get("resid_list", []))
93 self.offset = float(properties.get("offset", 2.0))
94 self.box_coordinates = float(properties.get("box_coordinates", False))
95 self.residue_offset = properties.get("residue_offset", 0)
96 self.properties = properties
98 # Check the properties
99 self.check_properties(properties)
100 self.check_arguments()
102 def check_data_params(self, out_log, err_log):
103 """Checks all the input/output paths and parameters"""
104 self.io_dict["in"]["input_pdb_path"] = check_input_path(
105 self.io_dict["in"]["input_pdb_path"],
106 "input_pdb_path",
107 self.out_log,
108 self.__class__.__name__,
109 )
110 self.io_dict["out"]["output_pdb_path"] = check_output_path(
111 self.io_dict["out"]["output_pdb_path"],
112 "output_pdb_path",
113 False,
114 self.out_log,
115 self.__class__.__name__,
116 )
118 @launchlogger
119 def launch(self) -> int:
120 """Execute the :class:`BoxResidues <utils.box_residues.BoxResidues>` utils.box_residues.BoxResidues object."""
122 # check input/output paths and parameters
123 self.check_data_params(self.out_log, self.err_log)
125 # Setup Biobb
126 if self.check_restart():
127 return 0
128 self.stage_files()
130 # Parse structure
131 fu.log(
132 "Loading input PDB structure %s" % (self.io_dict["in"]["input_pdb_path"]),
133 self.out_log,
134 self.global_log,
135 )
136 structure_name = PurePath(self.io_dict["in"]["input_pdb_path"]).name
137 parser = Bio.PDB.PDBParser(QUIET=True)
138 structPDB = parser.get_structure(
139 structure_name, self.io_dict["in"]["input_pdb_path"]
140 )
142 if len(structPDB):
143 structPDB = structPDB[0]
145 # Mapping residue structure into input structure
147 fu.log(
148 "Mapping residue structure into input structure",
149 self.out_log,
150 self.global_log,
151 )
153 # Listing residues to be selected from the residue structure
154 residPDB_res_list = []
155 for residPDB_res in self.resid_list:
156 if self.residue_offset:
157 residPDB_res_list.append((" ", residPDB_res + self.residue_offset, " "))
158 else:
159 residPDB_res_list.append((" ", residPDB_res, " "))
161 selection_res_list = []
162 selection_atoms_num = 0
163 for struct_chain in structPDB:
164 for struct_res in struct_chain:
165 if struct_res.get_id() in residPDB_res_list:
166 selection_res_list.append(struct_res)
167 selection_atoms_num += len(struct_res.get_list())
169 if len(selection_res_list) == 0:
170 fu.log(
171 self.__class__.__name__ + ": Cannot match any of the residues listed in [%s] into %s"
172 % (
173 ", ".join(str(v) for v in self.resid_list),
174 self.io_dict["in"]["input_pdb_path"],
175 ),
176 self.out_log,
177 )
178 raise SystemExit(
179 self.__class__.__name__ + ": Cannot match any of the residues listed in [%s] into %s"
180 % (
181 ", ".join(str(v) for v in self.resid_list),
182 self.io_dict["in"]["input_pdb_path"],
183 )
184 )
185 elif len(selection_res_list) != len(residPDB_res_list):
186 fu.log(
187 "Cannot match all the residues listed in %s into %s. Found %s out of %s"
188 % (
189 ", ".join(str(v) for v in self.resid_list),
190 self.io_dict["in"]["input_pdb_path"],
191 len(selection_res_list),
192 len(residPDB_res_list),
193 ),
194 self.out_log,
195 )
196 else:
197 fu.log(
198 "Selection of residues successfully matched",
199 self.out_log,
200 self.global_log,
201 )
203 # Compute binding site box size
205 # compute box center
206 selection_box_center = (
207 sum(atom.coord for res in selection_res_list for atom in res.get_atoms()) / selection_atoms_num
208 )
209 fu.log(
210 "Binding site center (Angstroms): %10.3f%10.3f%10.3f"
211 % (
212 selection_box_center[0],
213 selection_box_center[1],
214 selection_box_center[2],
215 ),
216 self.out_log,
217 self.global_log,
218 )
220 # compute box size
221 selection_coords_max = np.amax(
222 [atom.coord for res in selection_res_list for atom in res.get_atoms()],
223 axis=0,
224 )
225 selection_box_size = selection_coords_max - selection_box_center
226 if self.offset:
227 selection_box_size = [c + self.offset for c in selection_box_size]
228 fu.log(
229 "Binding site size (Angstroms): %10.3f%10.3f%10.3f"
230 % (selection_box_size[0], selection_box_size[1], selection_box_size[2]),
231 self.out_log,
232 self.global_log,
233 )
235 # compute volume
236 vol = np.prod(selection_box_size) * 2**3
237 fu.log("Volume (cubic Angstroms): %.0f" % (vol), self.out_log, self.global_log)
239 # add box details as PDB remarks
240 remarks = "REMARK BOX CENTER:%10.3f%10.3f%10.3f" % (
241 selection_box_center[0],
242 selection_box_center[1],
243 selection_box_center[2],
244 )
245 remarks += " SIZE:%10.3f%10.3f%10.3f" % (
246 selection_box_size[0],
247 selection_box_size[1],
248 selection_box_size[2],
249 )
251 selection_box_coords_txt = ""
252 # add (optional) box coordinates as 8 ATOM records
253 if self.box_coordinates:
254 fu.log("Adding box coordinates", self.out_log, self.global_log)
255 selection_box_coords_txt = get_box_coordinates(
256 selection_box_center, selection_box_size
257 )
259 with open(self.io_dict["out"]["output_pdb_path"], "w") as f:
260 f.seek(0, 0)
261 f.write(remarks.rstrip("\r\n") + "\n" + selection_box_coords_txt)
263 fu.log(
264 "Saving output PDB file (with box setting annotations): %s"
265 % (self.io_dict["out"]["output_pdb_path"]),
266 self.out_log,
267 self.global_log,
268 )
270 # Copy files to host
271 self.copy_to_host()
273 # self.tmp_files.extend([self.stage_io_dict.get("unique_dir", "")])
274 self.remove_tmp_files()
276 return 0
279def box_residues(
280 input_pdb_path: str,
281 output_pdb_path: str,
282 properties: Optional[dict] = None,
283 **kwargs,
284) -> int:
285 """Execute the :class:`BoxResidues <utils.box_residues.BoxResidues>` class and
286 execute the :meth:`launch() <utils.box_residues.BoxResidues.launch>` method."""
288 return BoxResidues(
289 input_pdb_path=input_pdb_path,
290 output_pdb_path=output_pdb_path,
291 properties=properties,
292 **kwargs,
293 ).launch()
295 box_residues.__doc__ = BoxResidues.__doc__
298def main():
299 """Command line execution of this building block. Please check the command line documentation."""
300 parser = argparse.ArgumentParser(
301 description="Sets the center and the size of a rectangular parallelepiped box around a selection of residues found in a given PDB.",
302 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999),
303 )
304 parser.add_argument("--config", required=False, help="Configuration file")
306 # Specific args of each building block
307 required_args = parser.add_argument_group("required arguments")
308 required_args.add_argument(
309 "--input_pdb_path",
310 required=True,
311 help="PDB protein structure for which the box will be build. Its size and center will be set around the 'resid_list' property once mapped against this PDB. Accepted formats: pdb.",
312 )
313 required_args.add_argument(
314 "--output_pdb_path",
315 required=True,
316 help="PDB including the annotation of the box center and size as REMARKs. Accepted formats: pdb.",
317 )
319 args = parser.parse_args()
320 args.config = args.config or "{}"
321 properties = settings.ConfReader(config=args.config).get_prop_dic()
323 # Specific call of each building block
324 box_residues(
325 input_pdb_path=args.input_pdb_path,
326 output_pdb_path=args.output_pdb_path,
327 properties=properties,
328 )
331if __name__ == "__main__":
332 main()