Coverage for biobb_cmip / cmip / cmip_run.py: 52%

137 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-22 14:29 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the Cmip class and the command line interface.""" 

4import os 

5import json 

6from typing import Optional 

7from typing import Any 

8import shutil 

9from pathlib import Path, PurePath 

10from biobb_common.generic.biobb_object import BiobbObject 

11from biobb_common.tools import file_utils as fu 

12from biobb_common.tools.file_utils import launchlogger 

13from biobb_cmip.cmip.common import create_params_file 

14from biobb_cmip.cmip.common import params_preset 

15from biobb_cmip.cmip.common import get_grid 

16 

17 

18class CmipRun(BiobbObject): 

19 """ 

20 | biobb_cmip Titration 

21 | Wrapper class for the CMIP cmip module. 

22 | The CMIP cmip module. CMIP cmip module compute classical molecular interaction potentials. 

23 

24 Args: 

25 input_pdb_path (str): Path to the input PDB file. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_cmip/master/biobb_cmip/test/data/cmip/RBD-hACE2.hACE2.cmip.pdb>`_. Accepted formats: pdb (edam:format_1476). 

26 input_probe_pdb_path (str) (Optional): Path to the input probe file in PDB format. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_cmip/master/biobb_cmip/test/data/cmip/RBD-hACE2.RBD.cmip.pdb>`_. Accepted formats: pdb (edam:format_1476). 

27 output_pdb_path (str) (Optional): Path to the output PDB file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_cmip/master/biobb_cmip/test/reference/cmip/RBD.energies.byat.out>`_. Accepted formats: pdb (edam:format_1476). 

28 output_grd_path (str) (Optional): Path to the output grid file in GRD format. File type: output. Accepted formats: grd (edam:format_2330). 

29 output_cube_path (str) (Optional): Path to the output grid file in cube format. File type: output. Accepted formats: cube (edam:format_2330). 

30 output_rst_path (str) (Optional): Path to the output restart file. File type: output. Accepted formats: txt (edam:format_2330). 

31 input_rst_path (str) (Optional): Path to the input restart file. File type: input. Accepted formats: txt (edam:format_2330). 

32 output_byat_path (str) (Optional): Path to the output atom by atom energy file. File type: output. Accepted formats: txt (edam:format_2330), out (edam:format_2330). 

33 output_log_path (str) (Optional): Path to the output CMIP log file LOG. File type: output. Accepted formats: log (edam:format_2330). 

34 input_vdw_params_path (str) (Optional): Path to the CMIP input Van der Waals force parameters, if not provided the CMIP conda installation one is used ("$CONDA_PREFIX/share/cmip/dat/vdwprm"). File type: input. Accepted formats: txt (edam:format_2330). 

35 input_params_path (str) (Optional): Path to the CMIP input parameters file. File type: input. Accepted formats: txt (edam:format_2330). 

36 output_json_box_path (str) (Optional): Path to the output CMIP box in JSON format. File type: output. Accepted formats: json (edam:format_3464). 

37 output_json_external_box_path (str) (Optional): Path to the output external CMIP box in JSON format. File type: output. Accepted formats: json (edam:format_3464). 

38 input_json_box_path (str) (Optional): Path to the input CMIP box in JSON format. File type: input. Accepted formats: json (edam:format_3464). 

39 input_json_external_box_path (str) (Optional): Path to the input CMIP box in JSON format. File type: input. Accepted formats: json (edam:format_3464). 

40 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

41 * **execution_type** (*str*) - ("mip_pos") Default options for the params file, each one creates a different params file. Values: check_only (Dry Run of CMIP), mip_pos (MIP O+ Mehler Solmajer dielectric), mip_neg (MIP O- Mehler Solmajer dielectric), mip_neu (MIP Oxygen Mehler Solmajer dielectric), solvation (Solvation & MEP), pb_interaction_energy (Docking Interaction energy calculation. PB electrostatics), docking (Docking Mehler Solmajer dielectric), docking_rst (Docking from restart file). 

42 * **params** (*dict*) - ({}) CMIP options specification. 

43 * **binary_path** (*str*) - ("cmip") Path to the CMIP cmip executable binary. 

44 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

45 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

46 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

47 * **container_path** (*str*) - (None) Path to the binary executable of your container. 

48 * **container_image** (*str*) - ("cmip/cmip:latest") Container Image identifier. 

49 * **container_volume_path** (*str*) - ("/data") Path to an internal directory in the container. 

50 * **container_working_dir** (*str*) - (None) Path to the internal CWD in the container. 

51 * **container_user_id** (*str*) - (None) User number id to be mapped inside the container. 

52 * **container_shell_path** (*str*) - ("/bin/bash") Path to the binary executable of the container shell. 

53 

54 

55 Examples: 

56 This is a use example of how to use the building block from Python:: 

57 

58 from biobb_cmip.cmip.cmip import cmip 

59 prop = { 'binary_path': 'cmip' } 

60 cmip(input_pdb_path='/path/to/myStructure.pdb', 

61 output_pdb_path='/path/to/newStructure.pdb', 

62 output_log_path='/path/to/newStructureLog.log', 

63 properties=prop) 

64 

65 Info: 

66 * wrapped_software: 

67 * name: CMIP cmip 

68 * version: 2.7.0 

69 * license: Apache-2.0 

70 * ontology: 

71 * name: EDAM 

72 * schema: http://edamontology.org/EDAM.owl 

73 """ 

74 

75 def __init__(self, input_pdb_path: str, input_probe_pdb_path: Optional[str] = None, output_pdb_path: Optional[str] = None, 

76 output_grd_path: Optional[str] = None, output_cube_path: Optional[str] = None, output_rst_path: Optional[str] = None, 

77 input_rst_path: Optional[str] = None, output_byat_path: Optional[str] = None, output_log_path: Optional[str] = None, 

78 input_vdw_params_path: Optional[str] = None, input_params_path: Optional[str] = None, output_json_box_path: Optional[str] = None, 

79 output_json_external_box_path: Optional[str] = None, input_json_box_path: Optional[str] = None, 

80 input_json_external_box_path: Optional[str] = None, properties: Optional[dict] = None, **kwargs) -> None: 

81 

82 properties = properties or {} 

83 

84 # Call parent class constructor 

85 super().__init__(properties) 

86 self.locals_var_dict = locals().copy() 

87 

88 # Input/Output files 

89 self.io_dict = { 

90 "in": {"input_pdb_path": input_pdb_path, "input_probe_pdb_path": input_probe_pdb_path, 

91 "input_vdw_params_path": input_vdw_params_path, "input_params_path": input_params_path, 

92 "input_json_box_path": input_json_box_path, 

93 "input_json_external_box_path": input_json_external_box_path, 

94 "input_rst_path": input_rst_path}, 

95 "out": {"output_pdb_path": output_pdb_path, "output_grd_path": output_grd_path, 

96 "output_cube_path": output_cube_path, "output_rst_path": output_rst_path, 

97 "output_byat_path": output_byat_path, "output_log_path": output_log_path, 

98 "output_json_box_path": output_json_box_path, 

99 "output_json_external_box_path": output_json_external_box_path} 

100 } 

101 

102 # Properties specific for BB 

103 self.binary_path = properties.get('binary_path', 'cmip') 

104 self.execution_type = properties.get('execution_type', 'mip_pos') 

105 self.params = {k: str(v) for k, v in properties.get('params', dict()).items()} 

106 

107 if not self.io_dict['in'].get('input_vdw_params_path'): 

108 self.io_dict['in']['input_vdw_params_path'] = f"{os.environ.get('CONDA_PREFIX')}/share/cmip/dat/vdwprm" 

109 self.io_dict['in']['combined_params_path'] = properties.get('combined_params_path', 'params') 

110 

111 # Check the properties 

112 self.check_properties(properties) 

113 self.check_arguments() 

114 

115 @launchlogger 

116 def launch(self) -> int: 

117 """Execute the :class:`Cmip <cmip.cmip.Cmip>` object.""" 

118 

119 # Setup Biobb 

120 if self.check_restart(): 

121 return 0 

122 

123 # Check if output_pdb_path ends with ".pdb" and does not contain underscores 

124 if self.io_dict['out']['output_pdb_path']: 

125 if (not self.io_dict['out']['output_pdb_path'].endswith('.pdb')) or \ 

126 ("_" in str(Path(self.io_dict['out']['output_pdb_path']).name)): 

127 fu.log(f"ERROR: output_pdb_path ({self.io_dict['out']['output_pdb_path']}) " 

128 f"name must end in .pdb and not contain underscores", self.out_log, self.global_log) 

129 raise ValueError(f"ERROR: output_pdb_path ({self.io_dict['out']['output_pdb_path']})" 

130 f"name must end in .pdb and not contain underscores") 

131 

132 params_preset_dict: dict[str, Any] = params_preset(execution_type=self.execution_type) 

133 if self.io_dict['in']["input_json_external_box_path"]: 

134 params_preset_dict["readgrid0"] = 0 

135 origin, size, grid_params = get_grid(self.io_dict['in']["input_json_external_box_path"]) 

136 params_preset_dict['grid_int0'] = \ 

137 f"INTX0={grid_params['INT'][0]},INTY0={grid_params['INT'][1]},INTZ0={grid_params['INT'][2]}" 

138 params_preset_dict['grid_cen0'] = \ 

139 f"CENX0={grid_params['CEN'][0]},CENY0={grid_params['CEN'][1]},CENZ0={grid_params['CEN'][2]}" 

140 params_preset_dict['grid_dim0'] = \ 

141 f"DIMX0={grid_params['DIM'][0]},DIMY0={grid_params['DIM'][1]},DIMZ0={grid_params['DIM'][2]}" 

142 

143 if self.io_dict['in']["input_json_box_path"]: 

144 params_preset_dict["readgrid"] = 0 

145 origin, size, grid_params = get_grid(self.io_dict['in']["input_json_box_path"]) 

146 params_preset_dict['grid_int'] = \ 

147 f"INTX={grid_params['INT'][0]},INTY={grid_params['INT'][1]},INTZ={grid_params['INT'][2]}" 

148 params_preset_dict['grid_cen'] = \ 

149 f"CENX={grid_params['CEN'][0]},CENY={grid_params['CEN'][1]},CENZ={grid_params['CEN'][2]}" 

150 params_preset_dict['grid_dim'] = \ 

151 f"DIMX={grid_params['DIM'][0]},DIMY={grid_params['DIM'][1]},DIMZ={grid_params['DIM'][2]}" 

152 

153 if self.io_dict['out']['output_json_box_path'] or self.io_dict['out']['output_json_external_box_path']: 

154 params_preset_dict['WRITELOG'] = 1 

155 key_value_log_dir = fu.create_unique_dir() 

156 self.io_dict['out']['key_value_log_path'] = str(Path(key_value_log_dir).joinpath("key_value_cmip_log.log")) 

157 self.tmp_files.append(key_value_log_dir) 

158 

159 # Restart OUT 

160 if self.io_dict["out"].get("output_rst_path"): 

161 params_preset_dict['FULLRST'] = 1 # type: ignore 

162 params_preset_dict['OREST'] = 1 

163 

164 # Restart IN 

165 if self.io_dict['in']["input_rst_path"]: 

166 params_preset_dict['IREST'] = 2 

167 if not self.io_dict["out"].get("output_rst_path"): 

168 self.io_dict["out"]["output_rst_path"] = fu.create_unique_file_path() 

169 shutil.copy2(self.io_dict['in']["input_rst_path"], self.io_dict["out"]["output_rst_path"]) 

170 

171 else: 

172 params_preset_dict['IREST'] = 0 

173 

174 combined_params_dir = fu.create_unique_dir() 

175 self.io_dict['in']['combined_params_path'] = create_params_file( 

176 output_params_path=str(Path(combined_params_dir).joinpath(self.io_dict['in']['combined_params_path'])), 

177 input_params_path=self.io_dict['in'].get('input_params_path'), 

178 params_preset_dict=params_preset_dict, 

179 params_properties_dict=self.params) 

180 

181 self.stage_files() 

182 

183 if self.container_path: 

184 working_dir = self.container_volume_path if self.container_volume_path else "/data" 

185 else: 

186 working_dir = self.stage_io_dict["unique_dir"] 

187 

188 self.cmd = ["cd", 

189 working_dir, 

190 ";", 

191 self.binary_path, 

192 '-i', PurePath(self.stage_io_dict['in']['combined_params_path']).name, 

193 '-vdw', PurePath(self.stage_io_dict['in']['input_vdw_params_path']).name, 

194 '-hs', PurePath(self.stage_io_dict['in']['input_pdb_path']).name] 

195 

196 if self.stage_io_dict["in"].get("input_probe_pdb_path") and Path( 

197 self.io_dict["in"].get("input_probe_pdb_path", "")).exists(): 

198 self.cmd.append('-pr') 

199 self.cmd.append(PurePath(self.stage_io_dict["in"].get("input_probe_pdb_path")).name) 

200 

201 if self.stage_io_dict["out"].get("output_pdb_path"): 

202 self.cmd.append('-outpdb') 

203 self.cmd.append(PurePath(self.stage_io_dict['out']['output_pdb_path']).name) 

204 

205 if self.stage_io_dict["out"].get("output_grd_path"): 

206 self.cmd.append('-grdout') 

207 self.cmd.append(PurePath(self.stage_io_dict["out"]["output_grd_path"]).name) 

208 

209 if self.stage_io_dict["out"].get("output_cube_path"): 

210 self.cmd.append('-cube') 

211 self.cmd.append(PurePath(self.stage_io_dict["out"]["output_cube_path"]).name) 

212 

213 if self.stage_io_dict["out"].get("output_rst_path"): 

214 self.cmd.append('-rst') 

215 self.cmd.append(PurePath(self.stage_io_dict["out"]["output_rst_path"]).name) 

216 

217 if self.stage_io_dict["out"].get("output_byat_path"): 

218 self.cmd.append('-byat') 

219 self.cmd.append(PurePath(self.stage_io_dict["out"]["output_byat_path"]).name) 

220 

221 if self.stage_io_dict["out"].get("output_log_path"): 

222 self.cmd.append('-o') 

223 self.cmd.append(PurePath(self.stage_io_dict["out"]["output_log_path"]).name) 

224 

225 if self.stage_io_dict['out'].get('output_json_box_path') or self.stage_io_dict['out'].get('output_json_external_box_path'): 

226 self.cmd.append('-l') 

227 self.cmd.append(PurePath(self.stage_io_dict["out"]["key_value_log_path"]).name) 

228 

229 # Run Biobb block 

230 self.run_biobb() 

231 

232 # CMIP removes or adds a .pdb extension from pdb output name 

233 # manual copy_to_host or unstage 

234 if self.io_dict['out'].get('output_pdb_path'): 

235 output_pdb_path = str(Path(self.stage_io_dict["unique_dir"]).joinpath(Path(self.io_dict['out'].get('output_pdb_path', '')).name)) 

236 if Path(output_pdb_path[:-4]).exists(): 

237 shutil.move(output_pdb_path[:-4], self.io_dict['out'].get('output_pdb_path', '')) 

238 elif Path(output_pdb_path + ".pdb").exists(): 

239 shutil.move(output_pdb_path + ".pdb", self.io_dict['out'].get('output_pdb_path', '')) 

240 elif not Path(output_pdb_path).exists(): 

241 fu.log(f"WARNING: File not found output_pdb_path: {output_pdb_path}", self.out_log, self.global_log) 

242 

243 # Replace "ATOMTM" tag for "ATOM " 

244 

245 output_pdb_path = self.io_dict['out'].get('output_pdb_path', '') 

246 if output_pdb_path: 

247 if Path(output_pdb_path).exists(): 

248 with open(output_pdb_path) as pdb_file: 

249 list_pdb_lines = pdb_file.readlines() 

250 with open(output_pdb_path, 'w') as pdb_file: 

251 for line in list_pdb_lines: 

252 pdb_file.write(line.replace('ATOMTM', 'ATOM ')) 

253 else: 

254 fu.log(f"WARNING: File not found output_pdb_path: {output_pdb_path} Abs Path: {Path(output_pdb_path).resolve()}", self.out_log, self.global_log) 

255 

256 # Create json_box_path file from CMIP log file 

257 if self.io_dict['out'].get('output_json_box_path'): 

258 origin, size, grid_params = get_grid(self.stage_io_dict["out"]["output_log_path"]) 

259 grid_params['DIM'] = (int(grid_params['DIM'][0]), 

260 int(grid_params['DIM'][1]), 

261 int(grid_params['DIM'][2])) 

262 size_dict = {'x': round(grid_params['DIM'][0] * grid_params['INT'][0], 3), 

263 'y': round(grid_params['DIM'][1] * grid_params['INT'][1], 3), 

264 'z': round(grid_params['DIM'][2] * grid_params['INT'][2], 3)} 

265 origin_dict = {'x': round(grid_params['CEN'][0] - size_dict['x'] / 2, 3), 

266 'y': round(grid_params['CEN'][1] - size_dict['y'] / 2, 3), 

267 'z': round(grid_params['CEN'][0] - size_dict['z'] / 2, 3)} 

268 grid_dict = {'origin': origin_dict, 

269 'size': size_dict, 

270 'params': grid_params} 

271 with open(self.io_dict['out'].get('output_json_box_path', ''), 'w') as json_file: 

272 json_file.write(json.dumps(grid_dict, indent=4)) 

273 

274 # Create external_json_box_path file from CMIP log file 

275 if self.io_dict['out'].get('output_json_external_box_path'): 

276 origin, size, grid_params = get_grid(self.stage_io_dict["out"]["output_log_path"], True) 

277 grid_params['DIM'] = (int(grid_params['DIM'][0]), 

278 int(grid_params['DIM'][1]), 

279 int(grid_params['DIM'][2])) 

280 size_dict = {'x': round(grid_params['DIM'][0] * grid_params['INT'][0], 3), 

281 'y': round(grid_params['DIM'][1] * grid_params['INT'][1], 3), 

282 'z': round(grid_params['DIM'][2] * grid_params['INT'][2], 3)} 

283 origin_dict = {'x': round(grid_params['CEN'][0] - size_dict['x'] / 2, 3), 

284 'y': round(grid_params['CEN'][1] - size_dict['y'] / 2, 3), 

285 'z': round(grid_params['CEN'][0] - size_dict['z'] / 2, 3)} 

286 grid_dict = {'origin': origin_dict, 

287 'size': size_dict, 

288 'params': grid_params} 

289 with open(self.io_dict['out'].get('output_json_external_box_path', ''), 'w') as json_file: 

290 json_file.write(json.dumps(grid_dict, indent=4)) 

291 

292 # Copy files to host 

293 self.copy_to_host() 

294 

295 # remove temporary folder(s) 

296 self.tmp_files.append(combined_params_dir) 

297 self.remove_tmp_files() 

298 

299 self.check_arguments(output_files_created=True, raise_exception=False) 

300 

301 return self.return_code 

302 

303 

304def cmip_run(input_pdb_path: str, input_probe_pdb_path: Optional[str] = None, output_pdb_path: Optional[str] = None, 

305 output_grd_path: Optional[str] = None, output_cube_path: Optional[str] = None, output_rst_path: Optional[str] = None, 

306 output_byat_path: Optional[str] = None, output_log_path: Optional[str] = None, 

307 input_vdw_params_path: Optional[str] = None, input_params_path: Optional[str] = None, output_json_box_path: Optional[str] = None, 

308 output_json_external_box_path: Optional[str] = None, input_json_box_path: Optional[str] = None, 

309 input_json_external_box_path: Optional[str] = None, properties: Optional[dict] = None, **kwargs) -> int: 

310 """Create :class:`Cmip <cmip.cmip.Cmip>` class and 

311 execute the :meth:`launch() <cmip.cmip.Cmip.launch>` method.""" 

312 return CmipRun(**dict(locals())).launch() 

313 

314 

315cmip_run.__doc__ = CmipRun.__doc__ 

316main = CmipRun.get_main(cmip_run, "Wrapper of the CMIP cmip module.") 

317 

318if __name__ == '__main__': 

319 main()