Coverage for biobb_vs / utils / box_residues.py: 93%
89 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-16 15:25 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-16 15:25 +0000
1#!/usr/bin/env python3
3"""Module containing the BoxResidues class and the command line interface."""
4import warnings
5from pathlib import PurePath
6from typing import Optional
7import numpy as np
8from Bio import BiopythonDeprecationWarning
9from biobb_common.generic.biobb_object import BiobbObject
10from biobb_common.tools import file_utils as fu
11from biobb_common.tools.file_utils import launchlogger
13from biobb_vs.utils.common import (
14 _from_string_to_list,
15 check_input_path,
16 check_output_path,
17 get_box_coordinates,
18)
20with warnings.catch_warnings():
21 warnings.simplefilter("ignore", BiopythonDeprecationWarning)
22 # try:
23 # import Bio.SubsMat.MatrixInfo
24 # except ImportError:
25 import Bio.Align.substitution_matrices
26 import Bio.pairwise2
27 import Bio.PDB
30class BoxResidues(BiobbObject):
31 """
32 | biobb_vs BoxResidues
33 | This class sets the center and the size of a rectangular parallelepiped box around a set of residues.
34 | Sets the center and the size of a rectangular parallelepiped box around a selection of residues found in a given PDB. The residue identifiers that compose the selection (i.e. binding site) are provided by a property list.
36 Args:
37 input_pdb_path (str): PDB protein structure for which the box will be build. Its size and center will be set around the 'resid_list' property once mapped against this PDB. File type: input. `Sample file <https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/data/utils/input_box_residues.pdb>`_. Accepted formats: pdb (edam:format_1476).
38 output_pdb_path (str): PDB including the annotation of the box center and size as REMARKs. File type: output. `Sample file <https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/reference/utils/ref_output_box_residues.pdb>`_. Accepted formats: pdb (edam:format_1476).
39 properties (dic - Python dictionary object containing the tool parameters, not input/output files):
40 * **resid_list** (*list*) - (None) List with all the residue numbers to form a cavity or binding site. Mandatory property.
41 * **offset** (*float*) - (2.0) [0.1~1000|0.1] Extra distance (Angstroms) between the last residue atom and the box boundary.
42 * **box_coordinates** (*bool*) - (False) Add box coordinates as 8 ATOM records.
43 * **residue_offset** (*int*) - (0) [0~1000|1] Residue id offset.
44 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
45 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
46 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
48 Examples:
49 This is a use example of how to use the building block from Python::
51 from biobb_vs.utils.box_residues import box_residues
52 prop = {
53 'resid_list': [718, 743, 745, 762, 766, 796, 790, 791, 793, 794, 788],
54 'offset': 2,
55 'box_coordinates': True
56 }
57 box_residues(input_pdb_path='/path/to/myStructure.pdb',
58 output_pdb_path='/path/to/newBox.pdb',
59 properties=prop)
61 Info:
62 * wrapped_software:
63 * name: In house using Biopython
64 * version: >=1.76
65 * license: Apache-2.0
66 * ontology:
67 * name: EDAM
68 * schema: http://edamontology.org/EDAM.owl
70 """
72 def __init__(
73 self, input_pdb_path, output_pdb_path, properties=None, **kwargs
74 ) -> None:
75 properties = properties or {}
77 # Call parent class constructor
78 super().__init__(properties)
79 self.locals_var_dict = locals().copy()
81 # Input/Output files
82 self.io_dict = {
83 "in": {"input_pdb_path": input_pdb_path},
84 "out": {"output_pdb_path": output_pdb_path},
85 }
87 # Properties specific for BB
88 self.resid_list = _from_string_to_list(properties.get("resid_list", []))
89 self.offset = float(properties.get("offset", 2.0))
90 self.box_coordinates = float(properties.get("box_coordinates", False))
91 self.residue_offset = properties.get("residue_offset", 0)
92 self.properties = properties
94 # Check the properties
95 self.check_properties(properties)
96 self.check_arguments()
98 def check_data_params(self, out_log, err_log):
99 """Checks all the input/output paths and parameters"""
100 self.io_dict["in"]["input_pdb_path"] = check_input_path(
101 self.io_dict["in"]["input_pdb_path"],
102 "input_pdb_path",
103 self.out_log,
104 self.__class__.__name__,
105 )
106 self.io_dict["out"]["output_pdb_path"] = check_output_path(
107 self.io_dict["out"]["output_pdb_path"],
108 "output_pdb_path",
109 False,
110 self.out_log,
111 self.__class__.__name__,
112 )
114 @launchlogger
115 def launch(self) -> int:
116 """Execute the :class:`BoxResidues <utils.box_residues.BoxResidues>` utils.box_residues.BoxResidues object."""
118 # check input/output paths and parameters
119 self.check_data_params(self.out_log, self.err_log)
121 # Setup Biobb
122 if self.check_restart():
123 return 0
124 self.stage_files()
126 # Parse structure
127 fu.log(
128 "Loading input PDB structure %s" % (self.io_dict["in"]["input_pdb_path"]),
129 self.out_log,
130 self.global_log,
131 )
132 structure_name = PurePath(self.io_dict["in"]["input_pdb_path"]).name
133 parser = Bio.PDB.PDBParser(QUIET=True)
134 structPDB = parser.get_structure(
135 structure_name, self.io_dict["in"]["input_pdb_path"]
136 )
138 if len(structPDB):
139 structPDB = structPDB[0]
141 # Mapping residue structure into input structure
143 fu.log(
144 "Mapping residue structure into input structure",
145 self.out_log,
146 self.global_log,
147 )
149 # Listing residues to be selected from the residue structure
150 residPDB_res_list = []
151 for residPDB_res in self.resid_list:
152 if self.residue_offset:
153 residPDB_res_list.append((" ", residPDB_res + self.residue_offset, " "))
154 else:
155 residPDB_res_list.append((" ", residPDB_res, " "))
157 selection_res_list = []
158 selection_atoms_num = 0
159 for struct_chain in structPDB:
160 for struct_res in struct_chain:
161 if struct_res.get_id() in residPDB_res_list:
162 selection_res_list.append(struct_res)
163 selection_atoms_num += len(struct_res.get_list())
165 if len(selection_res_list) == 0:
166 fu.log(
167 self.__class__.__name__ + ": Cannot match any of the residues listed in [%s] into %s"
168 % (
169 ", ".join(str(v) for v in self.resid_list),
170 self.io_dict["in"]["input_pdb_path"],
171 ),
172 self.out_log,
173 )
174 raise SystemExit(
175 self.__class__.__name__ + ": Cannot match any of the residues listed in [%s] into %s"
176 % (
177 ", ".join(str(v) for v in self.resid_list),
178 self.io_dict["in"]["input_pdb_path"],
179 )
180 )
181 elif len(selection_res_list) != len(residPDB_res_list):
182 fu.log(
183 "Cannot match all the residues listed in %s into %s. Found %s out of %s"
184 % (
185 ", ".join(str(v) for v in self.resid_list),
186 self.io_dict["in"]["input_pdb_path"],
187 len(selection_res_list),
188 len(residPDB_res_list),
189 ),
190 self.out_log,
191 )
192 else:
193 fu.log(
194 "Selection of residues successfully matched",
195 self.out_log,
196 self.global_log,
197 )
199 # Compute binding site box size
201 # compute box center
202 selection_box_center = (
203 sum(atom.coord for res in selection_res_list for atom in res.get_atoms()) / selection_atoms_num
204 )
205 fu.log(
206 "Binding site center (Angstroms): %10.3f%10.3f%10.3f"
207 % (
208 selection_box_center[0],
209 selection_box_center[1],
210 selection_box_center[2],
211 ),
212 self.out_log,
213 self.global_log,
214 )
216 # compute box size
217 selection_coords_max = np.amax(
218 [atom.coord for res in selection_res_list for atom in res.get_atoms()],
219 axis=0,
220 )
221 selection_box_size = selection_coords_max - selection_box_center
222 if self.offset:
223 selection_box_size = [c + self.offset for c in selection_box_size]
224 fu.log(
225 "Binding site size (Angstroms): %10.3f%10.3f%10.3f"
226 % (selection_box_size[0], selection_box_size[1], selection_box_size[2]),
227 self.out_log,
228 self.global_log,
229 )
231 # compute volume
232 vol = np.prod(selection_box_size) * 2**3
233 fu.log("Volume (cubic Angstroms): %.0f" % (vol), self.out_log, self.global_log)
235 # add box details as PDB remarks
236 remarks = "REMARK BOX CENTER:%10.3f%10.3f%10.3f" % (
237 selection_box_center[0],
238 selection_box_center[1],
239 selection_box_center[2],
240 )
241 remarks += " SIZE:%10.3f%10.3f%10.3f" % (
242 selection_box_size[0],
243 selection_box_size[1],
244 selection_box_size[2],
245 )
247 selection_box_coords_txt = ""
248 # add (optional) box coordinates as 8 ATOM records
249 if self.box_coordinates:
250 fu.log("Adding box coordinates", self.out_log, self.global_log)
251 selection_box_coords_txt = get_box_coordinates(
252 selection_box_center, selection_box_size
253 )
255 with open(self.io_dict["out"]["output_pdb_path"], "w") as f:
256 f.seek(0, 0)
257 f.write(remarks.rstrip("\r\n") + "\n" + selection_box_coords_txt)
259 fu.log(
260 "Saving output PDB file (with box setting annotations): %s"
261 % (self.io_dict["out"]["output_pdb_path"]),
262 self.out_log,
263 self.global_log,
264 )
266 # Copy files to host
267 self.copy_to_host()
268 self.remove_tmp_files()
270 return 0
273def box_residues(
274 input_pdb_path: str,
275 output_pdb_path: str,
276 properties: Optional[dict] = None,
277 **kwargs,
278) -> int:
279 """Create the :class:`BoxResidues <utils.box_residues.BoxResidues>` class and
280 execute the :meth:`launch() <utils.box_residues.BoxResidues.launch>` method."""
281 return BoxResidues(**dict(locals())).launch()
284box_residues.__doc__ = BoxResidues.__doc__
285main = BoxResidues.get_main(box_residues, "Sets the center and the size of a rectangular parallelepiped box around a selection of residues found in a given PDB.")
288if __name__ == "__main__":
289 main()