Coverage for biobb_cmip/cmip/cmip_run.py: 45%

157 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-28 09:52 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the Cmip class and the command line interface.""" 

4import os 

5import json 

6import argparse 

7from typing import Optional 

8from typing import Any 

9import shutil 

10from pathlib import Path 

11from biobb_common.generic.biobb_object import BiobbObject 

12from biobb_common.configuration import settings 

13from biobb_common.tools import file_utils as fu 

14from biobb_common.tools.file_utils import launchlogger 

15from biobb_cmip.cmip.common import create_params_file 

16from biobb_cmip.cmip.common import params_preset 

17from biobb_cmip.cmip.common import get_grid 

18 

19 

20class CmipRun(BiobbObject): 

21 """ 

22 | biobb_cmip Titration 

23 | Wrapper class for the CMIP cmip module. 

24 | The CMIP cmip module. CMIP cmip module compute classical molecular interaction potentials. 

25 

26 Args: 

27 input_pdb_path (str): Path to the input PDB file. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_cmip/master/biobb_cmip/test/data/cmip/1kim_h.pdb>`_. Accepted formats: pdb (edam:format_1476). 

28 input_probe_pdb_path (str) (Optional): Path to the input probe file in PDB format. File type: input. Accepted formats: pdb (edam:format_1476). 

29 output_pdb_path (str) (Optional): Path to the output PDB file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_cmip/master/biobb_cmip/test/reference/cmip/1kim_neutral.pdb>`_. Accepted formats: pdb (edam:format_1476). 

30 output_grd_path (str) (Optional): Path to the output grid file in GRD format. File type: output. Accepted formats: grd (edam:format_2330). 

31 output_cube_path (str) (Optional): Path to the output grid file in cube format. File type: output. Accepted formats: cube (edam:format_2330). 

32 output_rst_path (str) (Optional): Path to the output restart file. File type: output. Accepted formats: txt (edam:format_2330). 

33 input_rst_path (str) (Optional): Path to the input restart file. File type: input. Accepted formats: txt (edam:format_2330). 

34 output_byat_path (str) (Optional): Path to the output atom by atom energy file. File type: output. Accepted formats: txt (edam:format_2330), out (edam:format_2330). 

35 output_log_path (str) (Optional): Path to the output CMIP log file LOG. File type: output. `Sample file <https://github.com/bioexcel/biobb_cmip/raw/master/biobb_cmip/test/reference/cmip/ref_cmip.log>`_. Accepted formats: log (edam:format_2330). 

36 input_vdw_params_path (str) (Optional): Path to the CMIP input Van der Waals force parameters, if not provided the CMIP conda installation one is used ("$CONDA_PREFIX/share/cmip/dat/vdwprm"). File type: input. Accepted formats: txt (edam:format_2330). 

37 input_params_path (str) (Optional): Path to the CMIP input parameters file. File type: input. Accepted formats: txt (edam:format_2330). 

38 output_json_box_path (str) (Optional): Path to the output CMIP box in JSON format. File type: output. `Sample file <https://github.com/bioexcel/biobb_cmip/raw/master/biobb_cmip/test/reference/cmip/ref_box.json>`_. Accepted formats: json (edam:format_3464). 

39 output_json_external_box_path (str) (Optional): Path to the output external CMIP box in JSON format. File type: output. `Sample file <https://github.com/bioexcel/biobb_cmip/raw/master/biobb_cmip/test/reference/cmip/ref_box.json>`_. Accepted formats: json (edam:format_3464). 

40 input_json_box_path (str) (Optional): Path to the input CMIP box in JSON format. File type: input. `Sample file <https://github.com/bioexcel/biobb_cmip/raw/master/biobb_cmip/test/reference/cmip/ref_box.json>`_. Accepted formats: json (edam:format_3464). 

41 input_json_external_box_path (str) (Optional): Path to the input CMIP box in JSON format. File type: input. `Sample file <https://github.com/bioexcel/biobb_cmip/raw/master/biobb_cmip/test/reference/cmip/ref_box.json>`_. Accepted formats: json (edam:format_3464). 

42 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

43 * **execution_type** (*str*) - ("mip_pos") Default options for the params file, each one creates a different params file. Values: check_only (Dry Run of CMIP), mip_pos (MIP O+ Mehler Solmajer dielectric), mip_neg (MIP O- Mehler Solmajer dielectric), mip_neu (MIP Oxygen Mehler Solmajer dielectric), solvation (Solvation & MEP), pb_interaction_energy (Docking Interaction energy calculation. PB electrostatics), docking (Docking Mehler Solmajer dielectric), docking_rst (Docking from restart file). 

44 * **params** (*dict*) - ({}) CMIP options specification. 

45 * **binary_path** (*str*) - ("cmip") Path to the CMIP cmip executable binary. 

46 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

47 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

48 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

49 * **container_path** (*str*) - (None) Path to the binary executable of your container. 

50 * **container_image** (*str*) - ("cmip/cmip:latest") Container Image identifier. 

51 * **container_volume_path** (*str*) - ("/data") Path to an internal directory in the container. 

52 * **container_working_dir** (*str*) - (None) Path to the internal CWD in the container. 

53 * **container_user_id** (*str*) - (None) User number id to be mapped inside the container. 

54 * **container_shell_path** (*str*) - ("/bin/bash") Path to the binary executable of the container shell. 

55 

56 

57 Examples: 

58 This is a use example of how to use the building block from Python:: 

59 

60 from biobb_cmip.cmip.cmip import cmip 

61 prop = { 'binary_path': 'cmip' } 

62 cmip(input_pdb_path='/path/to/myStructure.pdb', 

63 output_pdb_path='/path/to/newStructure.pdb', 

64 output_log_path='/path/to/newStructureLog.log', 

65 properties=prop) 

66 

67 Info: 

68 * wrapped_software: 

69 * name: CMIP cmip 

70 * version: 2.7.0 

71 * license: Apache-2.0 

72 * ontology: 

73 * name: EDAM 

74 * schema: http://edamontology.org/EDAM.owl 

75 """ 

76 

77 def __init__(self, input_pdb_path: str, input_probe_pdb_path: Optional[str] = None, output_pdb_path: Optional[str] = None, 

78 output_grd_path: Optional[str] = None, output_cube_path: Optional[str] = None, output_rst_path: Optional[str] = None, 

79 input_rst_path: Optional[str] = None, output_byat_path: Optional[str] = None, output_log_path: Optional[str] = None, 

80 input_vdw_params_path: Optional[str] = None, input_params_path: Optional[str] = None, output_json_box_path: Optional[str] = None, 

81 output_json_external_box_path: Optional[str] = None, input_json_box_path: Optional[str] = None, 

82 input_json_external_box_path: Optional[str] = None, properties: Optional[dict] = None, **kwargs) -> None: 

83 

84 properties = properties or {} 

85 

86 # Call parent class constructor 

87 super().__init__(properties) 

88 self.locals_var_dict = locals().copy() 

89 

90 # Input/Output files 

91 self.io_dict = { 

92 "in": {"input_pdb_path": input_pdb_path, "input_probe_pdb_path": input_probe_pdb_path, 

93 "input_vdw_params_path": input_vdw_params_path, "input_params_path": input_params_path, 

94 "input_json_box_path": input_json_box_path, 

95 "input_json_external_box_path": input_json_external_box_path, 

96 "input_rst_path": input_rst_path}, 

97 "out": {"output_pdb_path": output_pdb_path, "output_grd_path": output_grd_path, 

98 "output_cube_path": output_cube_path, "output_rst_path": output_rst_path, 

99 "output_byat_path": output_byat_path, "output_log_path": output_log_path, 

100 "output_json_box_path": output_json_box_path, 

101 "output_json_external_box_path": output_json_external_box_path} 

102 } 

103 

104 # Properties specific for BB 

105 self.binary_path = properties.get('binary_path', 'cmip') 

106 self.execution_type = properties.get('execution_type', 'mip_pos') 

107 self.params = {k: str(v) for k, v in properties.get('params', dict()).items()} 

108 

109 if not self.io_dict['in'].get('input_vdw_params_path'): 

110 self.io_dict['in']['input_vdw_params_path'] = f"{os.environ.get('CONDA_PREFIX')}/share/cmip/dat/vdwprm" 

111 self.io_dict['in']['combined_params_path'] = properties.get('combined_params_path', 'params') 

112 

113 # Check the properties 

114 self.check_properties(properties) 

115 self.check_arguments() 

116 

117 @launchlogger 

118 def launch(self) -> int: 

119 """Execute the :class:`Cmip <cmip.cmip.Cmip>` object.""" 

120 

121 # Setup Biobb 

122 if self.check_restart(): 

123 return 0 

124 

125 # Check if output_pdb_path ends with ".pdb" and does not contain underscores 

126 if self.io_dict['out']['output_pdb_path']: 

127 if (not self.io_dict['out']['output_pdb_path'].endswith('.pdb')) or \ 

128 ("_" in str(Path(self.io_dict['out']['output_pdb_path']).name)): 

129 fu.log(f"ERROR: output_pdb_path ({self.io_dict['out']['output_pdb_path']}) " 

130 f"name must end in .pdb and not contain underscores", self.out_log, self.global_log) 

131 raise ValueError(f"ERROR: output_pdb_path ({self.io_dict['out']['output_pdb_path']})" 

132 f"name must end in .pdb and not contain underscores") 

133 

134 params_preset_dict: dict[str, Any] = params_preset(execution_type=self.execution_type) 

135 if self.io_dict['in']["input_json_external_box_path"]: 

136 params_preset_dict["readgrid0"] = 0 

137 origin, size, grid_params = get_grid(self.io_dict['in']["input_json_external_box_path"]) 

138 params_preset_dict['grid_int0'] = \ 

139 f"INTX0={grid_params['INT'][0]},INTY0={grid_params['INT'][1]},INTZ0={grid_params['INT'][2]}" 

140 params_preset_dict['grid_cen0'] = \ 

141 f"CENX0={grid_params['CEN'][0]},CENY0={grid_params['CEN'][1]},CENZ0={grid_params['CEN'][2]}" 

142 params_preset_dict['grid_dim0'] = \ 

143 f"DIMX0={grid_params['DIM'][0]},DIMY0={grid_params['DIM'][1]},DIMZ0={grid_params['DIM'][2]}" 

144 

145 if self.io_dict['in']["input_json_box_path"]: 

146 params_preset_dict["readgrid"] = 0 

147 origin, size, grid_params = get_grid(self.io_dict['in']["input_json_box_path"]) 

148 params_preset_dict['grid_int'] = \ 

149 f"INTX={grid_params['INT'][0]},INTY={grid_params['INT'][1]},INTZ={grid_params['INT'][2]}" 

150 params_preset_dict['grid_cen'] = \ 

151 f"CENX={grid_params['CEN'][0]},CENY={grid_params['CEN'][1]},CENZ={grid_params['CEN'][2]}" 

152 params_preset_dict['grid_dim'] = \ 

153 f"DIMX={grid_params['DIM'][0]},DIMY={grid_params['DIM'][1]},DIMZ={grid_params['DIM'][2]}" 

154 

155 if self.io_dict['out']['output_json_box_path'] or self.io_dict['out']['output_json_external_box_path']: 

156 params_preset_dict['WRITELOG'] = 1 

157 key_value_log_dir = fu.create_unique_dir() 

158 self.io_dict['out']['key_value_log_path'] = str(Path(key_value_log_dir).joinpath("key_value_cmip_log.log")) 

159 self.tmp_files.append(key_value_log_dir) 

160 

161 # Restart OUT 

162 if self.io_dict["out"].get("output_rst_path"): 

163 params_preset_dict['FULLRST'] = 1 # type: ignore 

164 params_preset_dict['OREST'] = 1 

165 

166 # Restart IN 

167 if self.io_dict['in']["input_rst_path"]: 

168 params_preset_dict['IREST'] = 2 

169 if not self.io_dict["out"].get("output_rst_path"): 

170 self.io_dict["out"]["output_rst_path"] = fu.create_unique_file_path() 

171 shutil.copy2(self.io_dict['in']["input_rst_path"], self.io_dict["out"]["output_rst_path"]) 

172 

173 else: 

174 params_preset_dict['IREST'] = 0 

175 

176 combined_params_dir = fu.create_unique_dir() 

177 self.io_dict['in']['combined_params_path'] = create_params_file( 

178 output_params_path=str(Path(combined_params_dir).joinpath(self.io_dict['in']['combined_params_path'])), 

179 input_params_path=self.io_dict['in'].get('input_params_path'), 

180 params_preset_dict=params_preset_dict, 

181 params_properties_dict=self.params) 

182 

183 self.stage_files() 

184 

185 self.cmd = [self.binary_path, 

186 '-i', self.stage_io_dict['in']['combined_params_path'], 

187 '-vdw', self.stage_io_dict['in']['input_vdw_params_path'], 

188 '-hs', self.stage_io_dict['in']['input_pdb_path']] 

189 

190 if self.stage_io_dict["in"].get("input_probe_pdb_path") and Path( 

191 self.io_dict["in"].get("input_probe_pdb_path", "")).exists(): 

192 self.cmd.append('-pr') 

193 self.cmd.append(self.stage_io_dict["in"].get("input_probe_pdb_path")) 

194 

195 if self.stage_io_dict["out"].get("output_pdb_path"): 

196 self.cmd.append('-outpdb') 

197 self.cmd.append(self.stage_io_dict['out']['output_pdb_path']) 

198 

199 if self.stage_io_dict["out"].get("output_grd_path"): 

200 self.cmd.append('-grdout') 

201 self.cmd.append(self.stage_io_dict["out"]["output_grd_path"]) 

202 

203 if self.stage_io_dict["out"].get("output_cube_path"): 

204 self.cmd.append('-cube') 

205 self.cmd.append(self.stage_io_dict["out"]["output_cube_path"]) 

206 

207 if self.stage_io_dict["out"].get("output_rst_path"): 

208 self.cmd.append('-rst') 

209 self.cmd.append(self.stage_io_dict["out"]["output_rst_path"]) 

210 

211 if self.stage_io_dict["out"].get("output_byat_path"): 

212 self.cmd.append('-byat') 

213 self.cmd.append(self.stage_io_dict["out"]["output_byat_path"]) 

214 

215 if self.stage_io_dict["out"].get("output_log_path"): 

216 self.cmd.append('-o') 

217 self.cmd.append(self.stage_io_dict["out"]["output_log_path"]) 

218 

219 if self.stage_io_dict['out'].get('output_json_box_path') or self.stage_io_dict['out'].get('output_json_external_box_path'): 

220 self.cmd.append('-l') 

221 self.cmd.append(self.stage_io_dict["out"]["key_value_log_path"]) 

222 

223 # Run Biobb block 

224 self.run_biobb() 

225 

226 # CMIP removes or adds a .pdb extension from pdb output name 

227 # manual copy_to_host or unstage 

228 if self.io_dict['out'].get('output_pdb_path'): 

229 output_pdb_path = str(Path(self.stage_io_dict["unique_dir"]).joinpath(Path(self.io_dict['out'].get('output_pdb_path', '')).name)) 

230 if Path(output_pdb_path[:-4]).exists(): 

231 shutil.move(output_pdb_path[:-4], self.io_dict['out'].get('output_pdb_path', '')) 

232 elif Path(output_pdb_path + ".pdb").exists(): 

233 shutil.move(output_pdb_path + ".pdb", self.io_dict['out'].get('output_pdb_path', '')) 

234 elif not Path(output_pdb_path).exists(): 

235 fu.log(f"WARNING: File not found output_pdb_path: {output_pdb_path}", self.out_log, self.global_log) 

236 

237 # Replace "ATOMTM" tag for "ATOM " 

238 

239 output_pdb_path = self.io_dict['out'].get('output_pdb_path', '') 

240 if output_pdb_path: 

241 if Path(output_pdb_path).exists(): 

242 with open(output_pdb_path) as pdb_file: 

243 list_pdb_lines = pdb_file.readlines() 

244 with open(output_pdb_path, 'w') as pdb_file: 

245 for line in list_pdb_lines: 

246 pdb_file.write(line.replace('ATOMTM', 'ATOM ')) 

247 else: 

248 fu.log(f"WARNING: File not found output_pdb_path: {output_pdb_path} Abs Path: {Path(output_pdb_path).resolve()}", self.out_log, self.global_log) 

249 

250 # Create json_box_path file from CMIP log file 

251 if self.io_dict['out'].get('output_json_box_path'): 

252 origin, size, grid_params = get_grid(self.stage_io_dict["out"]["output_log_path"]) 

253 grid_params['DIM'] = (int(grid_params['DIM'][0]), 

254 int(grid_params['DIM'][1]), 

255 int(grid_params['DIM'][2])) 

256 size_dict = {'x': round(grid_params['DIM'][0] * grid_params['INT'][0], 3), 

257 'y': round(grid_params['DIM'][1] * grid_params['INT'][1], 3), 

258 'z': round(grid_params['DIM'][2] * grid_params['INT'][2], 3)} 

259 origin_dict = {'x': round(grid_params['CEN'][0] - size_dict['x'] / 2, 3), 

260 'y': round(grid_params['CEN'][1] - size_dict['y'] / 2, 3), 

261 'z': round(grid_params['CEN'][0] - size_dict['z'] / 2, 3)} 

262 grid_dict = {'origin': origin_dict, 

263 'size': size_dict, 

264 'params': grid_params} 

265 with open(self.io_dict['out'].get('output_json_box_path', ''), 'w') as json_file: 

266 json_file.write(json.dumps(grid_dict, indent=4)) 

267 

268 # Create external_json_box_path file from CMIP log file 

269 if self.io_dict['out'].get('output_json_external_box_path'): 

270 origin, size, grid_params = get_grid(self.stage_io_dict["out"]["output_log_path"], True) 

271 grid_params['DIM'] = (int(grid_params['DIM'][0]), 

272 int(grid_params['DIM'][1]), 

273 int(grid_params['DIM'][2])) 

274 size_dict = {'x': round(grid_params['DIM'][0] * grid_params['INT'][0], 3), 

275 'y': round(grid_params['DIM'][1] * grid_params['INT'][1], 3), 

276 'z': round(grid_params['DIM'][2] * grid_params['INT'][2], 3)} 

277 origin_dict = {'x': round(grid_params['CEN'][0] - size_dict['x'] / 2, 3), 

278 'y': round(grid_params['CEN'][1] - size_dict['y'] / 2, 3), 

279 'z': round(grid_params['CEN'][0] - size_dict['z'] / 2, 3)} 

280 grid_dict = {'origin': origin_dict, 

281 'size': size_dict, 

282 'params': grid_params} 

283 with open(self.io_dict['out'].get('output_json_external_box_path', ''), 'w') as json_file: 

284 json_file.write(json.dumps(grid_dict, indent=4)) 

285 

286 # Copy files to host 

287 self.copy_to_host() 

288 

289 # remove temporary folder(s) 

290 self.tmp_files.extend([ 

291 # self.stage_io_dict.get("unique_dir", ""), 

292 combined_params_dir 

293 ]) 

294 self.remove_tmp_files() 

295 

296 self.check_arguments(output_files_created=True, raise_exception=False) 

297 

298 return self.return_code 

299 

300 

301def cmip_run(input_pdb_path: str, input_probe_pdb_path: Optional[str] = None, output_pdb_path: Optional[str] = None, 

302 output_grd_path: Optional[str] = None, output_cube_path: Optional[str] = None, output_rst_path: Optional[str] = None, 

303 output_byat_path: Optional[str] = None, output_log_path: Optional[str] = None, 

304 input_vdw_params_path: Optional[str] = None, input_params_path: Optional[str] = None, output_json_box_path: Optional[str] = None, 

305 output_json_external_box_path: Optional[str] = None, input_json_box_path: Optional[str] = None, 

306 input_json_external_box_path: Optional[str] = None, properties: Optional[dict] = None, **kwargs) -> int: 

307 """Create :class:`Cmip <cmip.cmip.Cmip>` class and 

308 execute the :meth:`launch() <cmip.cmip.Cmip.launch>` method.""" 

309 

310 return CmipRun(input_pdb_path=input_pdb_path, input_probe_pdb_path=input_probe_pdb_path, 

311 output_pdb_path=output_pdb_path, output_grd_path=output_grd_path, output_cube_path=output_cube_path, 

312 output_rst_path=output_rst_path, output_byat_path=output_byat_path, output_log_path=output_log_path, 

313 input_vdw_params_path=input_vdw_params_path, input_params_path=input_params_path, 

314 output_json_box_path=output_json_box_path, output_json_external_box_path=output_json_external_box_path, 

315 input_json_box_path=input_json_box_path, input_json_external_box_path=input_json_external_box_path, 

316 properties=properties, **kwargs).launch() 

317 

318 cmip_run.__doc__ = CmipRun.__doc__ 

319 

320 

321def main(): 

322 parser = argparse.ArgumentParser(description="Wrapper of the CMIP cmip module.", 

323 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

324 parser.add_argument('-c', '--config', required=False, help="This file can be a YAML file, JSON file or JSON string") 

325 

326 # Specific args of each building block 

327 required_args = parser.add_argument_group('required arguments') 

328 required_args.add_argument('--input_pdb_path', required=True) 

329 parser.add_argument('--input_probe_pdb_path', required=False) 

330 parser.add_argument('--output_pdb_path', required=False) 

331 parser.add_argument('--output_grd_path', required=False) 

332 parser.add_argument('--output_cube_path', required=False) 

333 parser.add_argument('--output_rst_path', required=False) 

334 parser.add_argument('--output_byat_path', required=False) 

335 parser.add_argument('--output_log_path', required=False) 

336 parser.add_argument('--input_vdw_params_path', required=False) 

337 parser.add_argument('--input_params_path', required=False) 

338 parser.add_argument('--output_json_box_path', required=False) 

339 parser.add_argument('--output_json_external_box_path', required=False) 

340 parser.add_argument('--input_json_box_path', required=False) 

341 parser.add_argument('--input_json_external_box_path', required=False) 

342 

343 args = parser.parse_args() 

344 config = args.config if args.config else None 

345 properties = settings.ConfReader(config=config).get_prop_dic() 

346 

347 # Specific call of each building block 

348 cmip_run(input_pdb_path=args.input_pdb_path, input_probe_pdb_path=args.input_probe_pdb_path, 

349 output_pdb_path=args.output_pdb_path, output_grd_path=args.output_grd_path, 

350 output_cube_path=args.output_cube_path, output_rst_path=args.output_rst_path, 

351 output_byat_path=args.output_byat_path, output_log_path=args.output_log_path, 

352 input_vdw_params_path=args.input_vdw_params_path, input_params_path=args.input_params_path, 

353 output_json_box_path=args.output_json_box_path, 

354 output_json_external_box_path=args.output_json_external_box_path, input_json_box_path=args.input_json_box_path, 

355 input_json_external_box_path=args.input_json_external_box_path, properties=properties) 

356 

357 

358if __name__ == '__main__': 

359 main()