Coverage for biobb_vs/utils/box_residues.py: 84%

100 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-28 12:00 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the BoxResidues class and the command line interface.""" 

4 

5import argparse 

6import warnings 

7from pathlib import PurePath 

8from typing import Optional 

9 

10import numpy as np 

11from Bio import BiopythonDeprecationWarning 

12from biobb_common.configuration import settings 

13from biobb_common.generic.biobb_object import BiobbObject 

14from biobb_common.tools import file_utils as fu 

15from biobb_common.tools.file_utils import launchlogger 

16 

17from biobb_vs.utils.common import ( 

18 _from_string_to_list, 

19 check_input_path, 

20 check_output_path, 

21 get_box_coordinates, 

22) 

23 

24with warnings.catch_warnings(): 

25 warnings.simplefilter("ignore", BiopythonDeprecationWarning) 

26 # try: 

27 # import Bio.SubsMat.MatrixInfo 

28 # except ImportError: 

29 import Bio.Align.substitution_matrices 

30 import Bio.pairwise2 

31 import Bio.PDB 

32 

33 

34class BoxResidues(BiobbObject): 

35 """ 

36 | biobb_vs BoxResidues 

37 | This class sets the center and the size of a rectangular parallelepiped box around a set of residues. 

38 | Sets the center and the size of a rectangular parallelepiped box around a selection of residues found in a given PDB. The residue identifiers that compose the selection (i.e. binding site) are provided by a property list. 

39 

40 Args: 

41 input_pdb_path (str): PDB protein structure for which the box will be build. Its size and center will be set around the 'resid_list' property once mapped against this PDB. File type: input. `Sample file <https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/data/utils/input_box_residues.pdb>`_. Accepted formats: pdb (edam:format_1476). 

42 output_pdb_path (str): PDB including the annotation of the box center and size as REMARKs. File type: output. `Sample file <https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/reference/utils/ref_output_box_residues.pdb>`_. Accepted formats: pdb (edam:format_1476). 

43 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

44 * **resid_list** (*list*) - (None) List with all the residue numbers to form a cavity or binding site. Mandatory property. 

45 * **offset** (*float*) - (2.0) [0.1~1000|0.1] Extra distance (Angstroms) between the last residue atom and the box boundary. 

46 * **box_coordinates** (*bool*) - (False) Add box coordinates as 8 ATOM records. 

47 * **residue_offset** (*int*) - (0) [0~1000|1] Residue id offset. 

48 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

49 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

50 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

51 

52 Examples: 

53 This is a use example of how to use the building block from Python:: 

54 

55 from biobb_vs.utils.box_residues import box_residues 

56 prop = { 

57 'resid_list': [718, 743, 745, 762, 766, 796, 790, 791, 793, 794, 788], 

58 'offset': 2, 

59 'box_coordinates': True 

60 } 

61 box_residues(input_pdb_path='/path/to/myStructure.pdb', 

62 output_pdb_path='/path/to/newBox.pdb', 

63 properties=prop) 

64 

65 Info: 

66 * wrapped_software: 

67 * name: In house using Biopython 

68 * version: >=1.76 

69 * license: Apache-2.0 

70 * ontology: 

71 * name: EDAM 

72 * schema: http://edamontology.org/EDAM.owl 

73 

74 """ 

75 

76 def __init__( 

77 self, input_pdb_path, output_pdb_path, properties=None, **kwargs 

78 ) -> None: 

79 properties = properties or {} 

80 

81 # Call parent class constructor 

82 super().__init__(properties) 

83 self.locals_var_dict = locals().copy() 

84 

85 # Input/Output files 

86 self.io_dict = { 

87 "in": {"input_pdb_path": input_pdb_path}, 

88 "out": {"output_pdb_path": output_pdb_path}, 

89 } 

90 

91 # Properties specific for BB 

92 self.resid_list = _from_string_to_list(properties.get("resid_list", [])) 

93 self.offset = float(properties.get("offset", 2.0)) 

94 self.box_coordinates = float(properties.get("box_coordinates", False)) 

95 self.residue_offset = properties.get("residue_offset", 0) 

96 self.properties = properties 

97 

98 # Check the properties 

99 self.check_properties(properties) 

100 self.check_arguments() 

101 

102 def check_data_params(self, out_log, err_log): 

103 """Checks all the input/output paths and parameters""" 

104 self.io_dict["in"]["input_pdb_path"] = check_input_path( 

105 self.io_dict["in"]["input_pdb_path"], 

106 "input_pdb_path", 

107 self.out_log, 

108 self.__class__.__name__, 

109 ) 

110 self.io_dict["out"]["output_pdb_path"] = check_output_path( 

111 self.io_dict["out"]["output_pdb_path"], 

112 "output_pdb_path", 

113 False, 

114 self.out_log, 

115 self.__class__.__name__, 

116 ) 

117 

118 @launchlogger 

119 def launch(self) -> int: 

120 """Execute the :class:`BoxResidues <utils.box_residues.BoxResidues>` utils.box_residues.BoxResidues object.""" 

121 

122 # check input/output paths and parameters 

123 self.check_data_params(self.out_log, self.err_log) 

124 

125 # Setup Biobb 

126 if self.check_restart(): 

127 return 0 

128 self.stage_files() 

129 

130 # Parse structure 

131 fu.log( 

132 "Loading input PDB structure %s" % (self.io_dict["in"]["input_pdb_path"]), 

133 self.out_log, 

134 self.global_log, 

135 ) 

136 structure_name = PurePath(self.io_dict["in"]["input_pdb_path"]).name 

137 parser = Bio.PDB.PDBParser(QUIET=True) 

138 structPDB = parser.get_structure( 

139 structure_name, self.io_dict["in"]["input_pdb_path"] 

140 ) 

141 

142 if len(structPDB): 

143 structPDB = structPDB[0] 

144 

145 # Mapping residue structure into input structure 

146 

147 fu.log( 

148 "Mapping residue structure into input structure", 

149 self.out_log, 

150 self.global_log, 

151 ) 

152 

153 # Listing residues to be selected from the residue structure 

154 residPDB_res_list = [] 

155 for residPDB_res in self.resid_list: 

156 if self.residue_offset: 

157 residPDB_res_list.append((" ", residPDB_res + self.residue_offset, " ")) 

158 else: 

159 residPDB_res_list.append((" ", residPDB_res, " ")) 

160 

161 selection_res_list = [] 

162 selection_atoms_num = 0 

163 for struct_chain in structPDB: 

164 for struct_res in struct_chain: 

165 if struct_res.get_id() in residPDB_res_list: 

166 selection_res_list.append(struct_res) 

167 selection_atoms_num += len(struct_res.get_list()) 

168 

169 if len(selection_res_list) == 0: 

170 fu.log( 

171 self.__class__.__name__ + ": Cannot match any of the residues listed in [%s] into %s" 

172 % ( 

173 ", ".join(str(v) for v in self.resid_list), 

174 self.io_dict["in"]["input_pdb_path"], 

175 ), 

176 self.out_log, 

177 ) 

178 raise SystemExit( 

179 self.__class__.__name__ + ": Cannot match any of the residues listed in [%s] into %s" 

180 % ( 

181 ", ".join(str(v) for v in self.resid_list), 

182 self.io_dict["in"]["input_pdb_path"], 

183 ) 

184 ) 

185 elif len(selection_res_list) != len(residPDB_res_list): 

186 fu.log( 

187 "Cannot match all the residues listed in %s into %s. Found %s out of %s" 

188 % ( 

189 ", ".join(str(v) for v in self.resid_list), 

190 self.io_dict["in"]["input_pdb_path"], 

191 len(selection_res_list), 

192 len(residPDB_res_list), 

193 ), 

194 self.out_log, 

195 ) 

196 else: 

197 fu.log( 

198 "Selection of residues successfully matched", 

199 self.out_log, 

200 self.global_log, 

201 ) 

202 

203 # Compute binding site box size 

204 

205 # compute box center 

206 selection_box_center = ( 

207 sum(atom.coord for res in selection_res_list for atom in res.get_atoms()) / selection_atoms_num 

208 ) 

209 fu.log( 

210 "Binding site center (Angstroms): %10.3f%10.3f%10.3f" 

211 % ( 

212 selection_box_center[0], 

213 selection_box_center[1], 

214 selection_box_center[2], 

215 ), 

216 self.out_log, 

217 self.global_log, 

218 ) 

219 

220 # compute box size 

221 selection_coords_max = np.amax( 

222 [atom.coord for res in selection_res_list for atom in res.get_atoms()], 

223 axis=0, 

224 ) 

225 selection_box_size = selection_coords_max - selection_box_center 

226 if self.offset: 

227 selection_box_size = [c + self.offset for c in selection_box_size] 

228 fu.log( 

229 "Binding site size (Angstroms): %10.3f%10.3f%10.3f" 

230 % (selection_box_size[0], selection_box_size[1], selection_box_size[2]), 

231 self.out_log, 

232 self.global_log, 

233 ) 

234 

235 # compute volume 

236 vol = np.prod(selection_box_size) * 2**3 

237 fu.log("Volume (cubic Angstroms): %.0f" % (vol), self.out_log, self.global_log) 

238 

239 # add box details as PDB remarks 

240 remarks = "REMARK BOX CENTER:%10.3f%10.3f%10.3f" % ( 

241 selection_box_center[0], 

242 selection_box_center[1], 

243 selection_box_center[2], 

244 ) 

245 remarks += " SIZE:%10.3f%10.3f%10.3f" % ( 

246 selection_box_size[0], 

247 selection_box_size[1], 

248 selection_box_size[2], 

249 ) 

250 

251 selection_box_coords_txt = "" 

252 # add (optional) box coordinates as 8 ATOM records 

253 if self.box_coordinates: 

254 fu.log("Adding box coordinates", self.out_log, self.global_log) 

255 selection_box_coords_txt = get_box_coordinates( 

256 selection_box_center, selection_box_size 

257 ) 

258 

259 with open(self.io_dict["out"]["output_pdb_path"], "w") as f: 

260 f.seek(0, 0) 

261 f.write(remarks.rstrip("\r\n") + "\n" + selection_box_coords_txt) 

262 

263 fu.log( 

264 "Saving output PDB file (with box setting annotations): %s" 

265 % (self.io_dict["out"]["output_pdb_path"]), 

266 self.out_log, 

267 self.global_log, 

268 ) 

269 

270 # Copy files to host 

271 self.copy_to_host() 

272 

273 # self.tmp_files.extend([self.stage_io_dict.get("unique_dir", "")]) 

274 self.remove_tmp_files() 

275 

276 return 0 

277 

278 

279def box_residues( 

280 input_pdb_path: str, 

281 output_pdb_path: str, 

282 properties: Optional[dict] = None, 

283 **kwargs, 

284) -> int: 

285 """Execute the :class:`BoxResidues <utils.box_residues.BoxResidues>` class and 

286 execute the :meth:`launch() <utils.box_residues.BoxResidues.launch>` method.""" 

287 

288 return BoxResidues( 

289 input_pdb_path=input_pdb_path, 

290 output_pdb_path=output_pdb_path, 

291 properties=properties, 

292 **kwargs, 

293 ).launch() 

294 

295 box_residues.__doc__ = BoxResidues.__doc__ 

296 

297 

298def main(): 

299 """Command line execution of this building block. Please check the command line documentation.""" 

300 parser = argparse.ArgumentParser( 

301 description="Sets the center and the size of a rectangular parallelepiped box around a selection of residues found in a given PDB.", 

302 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999), 

303 ) 

304 parser.add_argument("--config", required=False, help="Configuration file") 

305 

306 # Specific args of each building block 

307 required_args = parser.add_argument_group("required arguments") 

308 required_args.add_argument( 

309 "--input_pdb_path", 

310 required=True, 

311 help="PDB protein structure for which the box will be build. Its size and center will be set around the 'resid_list' property once mapped against this PDB. Accepted formats: pdb.", 

312 ) 

313 required_args.add_argument( 

314 "--output_pdb_path", 

315 required=True, 

316 help="PDB including the annotation of the box center and size as REMARKs. Accepted formats: pdb.", 

317 ) 

318 

319 args = parser.parse_args() 

320 args.config = args.config or "{}" 

321 properties = settings.ConfReader(config=args.config).get_prop_dic() 

322 

323 # Specific call of each building block 

324 box_residues( 

325 input_pdb_path=args.input_pdb_path, 

326 output_pdb_path=args.output_pdb_path, 

327 properties=properties, 

328 ) 

329 

330 

331if __name__ == "__main__": 

332 main()