Coverage for biobb_vs / utils / box_residues.py: 93%

89 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-16 15:25 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the BoxResidues class and the command line interface.""" 

4import warnings 

5from pathlib import PurePath 

6from typing import Optional 

7import numpy as np 

8from Bio import BiopythonDeprecationWarning 

9from biobb_common.generic.biobb_object import BiobbObject 

10from biobb_common.tools import file_utils as fu 

11from biobb_common.tools.file_utils import launchlogger 

12 

13from biobb_vs.utils.common import ( 

14 _from_string_to_list, 

15 check_input_path, 

16 check_output_path, 

17 get_box_coordinates, 

18) 

19 

20with warnings.catch_warnings(): 

21 warnings.simplefilter("ignore", BiopythonDeprecationWarning) 

22 # try: 

23 # import Bio.SubsMat.MatrixInfo 

24 # except ImportError: 

25 import Bio.Align.substitution_matrices 

26 import Bio.pairwise2 

27 import Bio.PDB 

28 

29 

30class BoxResidues(BiobbObject): 

31 """ 

32 | biobb_vs BoxResidues 

33 | This class sets the center and the size of a rectangular parallelepiped box around a set of residues. 

34 | Sets the center and the size of a rectangular parallelepiped box around a selection of residues found in a given PDB. The residue identifiers that compose the selection (i.e. binding site) are provided by a property list. 

35 

36 Args: 

37 input_pdb_path (str): PDB protein structure for which the box will be build. Its size and center will be set around the 'resid_list' property once mapped against this PDB. File type: input. `Sample file <https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/data/utils/input_box_residues.pdb>`_. Accepted formats: pdb (edam:format_1476). 

38 output_pdb_path (str): PDB including the annotation of the box center and size as REMARKs. File type: output. `Sample file <https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/reference/utils/ref_output_box_residues.pdb>`_. Accepted formats: pdb (edam:format_1476). 

39 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

40 * **resid_list** (*list*) - (None) List with all the residue numbers to form a cavity or binding site. Mandatory property. 

41 * **offset** (*float*) - (2.0) [0.1~1000|0.1] Extra distance (Angstroms) between the last residue atom and the box boundary. 

42 * **box_coordinates** (*bool*) - (False) Add box coordinates as 8 ATOM records. 

43 * **residue_offset** (*int*) - (0) [0~1000|1] Residue id offset. 

44 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

45 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

46 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

47 

48 Examples: 

49 This is a use example of how to use the building block from Python:: 

50 

51 from biobb_vs.utils.box_residues import box_residues 

52 prop = { 

53 'resid_list': [718, 743, 745, 762, 766, 796, 790, 791, 793, 794, 788], 

54 'offset': 2, 

55 'box_coordinates': True 

56 } 

57 box_residues(input_pdb_path='/path/to/myStructure.pdb', 

58 output_pdb_path='/path/to/newBox.pdb', 

59 properties=prop) 

60 

61 Info: 

62 * wrapped_software: 

63 * name: In house using Biopython 

64 * version: >=1.76 

65 * license: Apache-2.0 

66 * ontology: 

67 * name: EDAM 

68 * schema: http://edamontology.org/EDAM.owl 

69 

70 """ 

71 

72 def __init__( 

73 self, input_pdb_path, output_pdb_path, properties=None, **kwargs 

74 ) -> None: 

75 properties = properties or {} 

76 

77 # Call parent class constructor 

78 super().__init__(properties) 

79 self.locals_var_dict = locals().copy() 

80 

81 # Input/Output files 

82 self.io_dict = { 

83 "in": {"input_pdb_path": input_pdb_path}, 

84 "out": {"output_pdb_path": output_pdb_path}, 

85 } 

86 

87 # Properties specific for BB 

88 self.resid_list = _from_string_to_list(properties.get("resid_list", [])) 

89 self.offset = float(properties.get("offset", 2.0)) 

90 self.box_coordinates = float(properties.get("box_coordinates", False)) 

91 self.residue_offset = properties.get("residue_offset", 0) 

92 self.properties = properties 

93 

94 # Check the properties 

95 self.check_properties(properties) 

96 self.check_arguments() 

97 

98 def check_data_params(self, out_log, err_log): 

99 """Checks all the input/output paths and parameters""" 

100 self.io_dict["in"]["input_pdb_path"] = check_input_path( 

101 self.io_dict["in"]["input_pdb_path"], 

102 "input_pdb_path", 

103 self.out_log, 

104 self.__class__.__name__, 

105 ) 

106 self.io_dict["out"]["output_pdb_path"] = check_output_path( 

107 self.io_dict["out"]["output_pdb_path"], 

108 "output_pdb_path", 

109 False, 

110 self.out_log, 

111 self.__class__.__name__, 

112 ) 

113 

114 @launchlogger 

115 def launch(self) -> int: 

116 """Execute the :class:`BoxResidues <utils.box_residues.BoxResidues>` utils.box_residues.BoxResidues object.""" 

117 

118 # check input/output paths and parameters 

119 self.check_data_params(self.out_log, self.err_log) 

120 

121 # Setup Biobb 

122 if self.check_restart(): 

123 return 0 

124 self.stage_files() 

125 

126 # Parse structure 

127 fu.log( 

128 "Loading input PDB structure %s" % (self.io_dict["in"]["input_pdb_path"]), 

129 self.out_log, 

130 self.global_log, 

131 ) 

132 structure_name = PurePath(self.io_dict["in"]["input_pdb_path"]).name 

133 parser = Bio.PDB.PDBParser(QUIET=True) 

134 structPDB = parser.get_structure( 

135 structure_name, self.io_dict["in"]["input_pdb_path"] 

136 ) 

137 

138 if len(structPDB): 

139 structPDB = structPDB[0] 

140 

141 # Mapping residue structure into input structure 

142 

143 fu.log( 

144 "Mapping residue structure into input structure", 

145 self.out_log, 

146 self.global_log, 

147 ) 

148 

149 # Listing residues to be selected from the residue structure 

150 residPDB_res_list = [] 

151 for residPDB_res in self.resid_list: 

152 if self.residue_offset: 

153 residPDB_res_list.append((" ", residPDB_res + self.residue_offset, " ")) 

154 else: 

155 residPDB_res_list.append((" ", residPDB_res, " ")) 

156 

157 selection_res_list = [] 

158 selection_atoms_num = 0 

159 for struct_chain in structPDB: 

160 for struct_res in struct_chain: 

161 if struct_res.get_id() in residPDB_res_list: 

162 selection_res_list.append(struct_res) 

163 selection_atoms_num += len(struct_res.get_list()) 

164 

165 if len(selection_res_list) == 0: 

166 fu.log( 

167 self.__class__.__name__ + ": Cannot match any of the residues listed in [%s] into %s" 

168 % ( 

169 ", ".join(str(v) for v in self.resid_list), 

170 self.io_dict["in"]["input_pdb_path"], 

171 ), 

172 self.out_log, 

173 ) 

174 raise SystemExit( 

175 self.__class__.__name__ + ": Cannot match any of the residues listed in [%s] into %s" 

176 % ( 

177 ", ".join(str(v) for v in self.resid_list), 

178 self.io_dict["in"]["input_pdb_path"], 

179 ) 

180 ) 

181 elif len(selection_res_list) != len(residPDB_res_list): 

182 fu.log( 

183 "Cannot match all the residues listed in %s into %s. Found %s out of %s" 

184 % ( 

185 ", ".join(str(v) for v in self.resid_list), 

186 self.io_dict["in"]["input_pdb_path"], 

187 len(selection_res_list), 

188 len(residPDB_res_list), 

189 ), 

190 self.out_log, 

191 ) 

192 else: 

193 fu.log( 

194 "Selection of residues successfully matched", 

195 self.out_log, 

196 self.global_log, 

197 ) 

198 

199 # Compute binding site box size 

200 

201 # compute box center 

202 selection_box_center = ( 

203 sum(atom.coord for res in selection_res_list for atom in res.get_atoms()) / selection_atoms_num 

204 ) 

205 fu.log( 

206 "Binding site center (Angstroms): %10.3f%10.3f%10.3f" 

207 % ( 

208 selection_box_center[0], 

209 selection_box_center[1], 

210 selection_box_center[2], 

211 ), 

212 self.out_log, 

213 self.global_log, 

214 ) 

215 

216 # compute box size 

217 selection_coords_max = np.amax( 

218 [atom.coord for res in selection_res_list for atom in res.get_atoms()], 

219 axis=0, 

220 ) 

221 selection_box_size = selection_coords_max - selection_box_center 

222 if self.offset: 

223 selection_box_size = [c + self.offset for c in selection_box_size] 

224 fu.log( 

225 "Binding site size (Angstroms): %10.3f%10.3f%10.3f" 

226 % (selection_box_size[0], selection_box_size[1], selection_box_size[2]), 

227 self.out_log, 

228 self.global_log, 

229 ) 

230 

231 # compute volume 

232 vol = np.prod(selection_box_size) * 2**3 

233 fu.log("Volume (cubic Angstroms): %.0f" % (vol), self.out_log, self.global_log) 

234 

235 # add box details as PDB remarks 

236 remarks = "REMARK BOX CENTER:%10.3f%10.3f%10.3f" % ( 

237 selection_box_center[0], 

238 selection_box_center[1], 

239 selection_box_center[2], 

240 ) 

241 remarks += " SIZE:%10.3f%10.3f%10.3f" % ( 

242 selection_box_size[0], 

243 selection_box_size[1], 

244 selection_box_size[2], 

245 ) 

246 

247 selection_box_coords_txt = "" 

248 # add (optional) box coordinates as 8 ATOM records 

249 if self.box_coordinates: 

250 fu.log("Adding box coordinates", self.out_log, self.global_log) 

251 selection_box_coords_txt = get_box_coordinates( 

252 selection_box_center, selection_box_size 

253 ) 

254 

255 with open(self.io_dict["out"]["output_pdb_path"], "w") as f: 

256 f.seek(0, 0) 

257 f.write(remarks.rstrip("\r\n") + "\n" + selection_box_coords_txt) 

258 

259 fu.log( 

260 "Saving output PDB file (with box setting annotations): %s" 

261 % (self.io_dict["out"]["output_pdb_path"]), 

262 self.out_log, 

263 self.global_log, 

264 ) 

265 

266 # Copy files to host 

267 self.copy_to_host() 

268 self.remove_tmp_files() 

269 

270 return 0 

271 

272 

273def box_residues( 

274 input_pdb_path: str, 

275 output_pdb_path: str, 

276 properties: Optional[dict] = None, 

277 **kwargs, 

278) -> int: 

279 """Create the :class:`BoxResidues <utils.box_residues.BoxResidues>` class and 

280 execute the :meth:`launch() <utils.box_residues.BoxResidues.launch>` method.""" 

281 return BoxResidues(**dict(locals())).launch() 

282 

283 

284box_residues.__doc__ = BoxResidues.__doc__ 

285main = BoxResidues.get_main(box_residues, "Sets the center and the size of a rectangular parallelepiped box around a selection of residues found in a given PDB.") 

286 

287 

288if __name__ == "__main__": 

289 main()