Coverage for biobb_cmip / cmip / cmip_run.py: 51%

134 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-05 12:09 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the Cmip class and the command line interface.""" 

4import os 

5import json 

6from typing import Optional 

7from typing import Any 

8import shutil 

9from pathlib import Path, PurePath 

10from biobb_common.generic.biobb_object import BiobbObject 

11from biobb_common.tools import file_utils as fu 

12from biobb_common.tools.file_utils import launchlogger 

13from biobb_cmip.cmip.common import create_params_file 

14from biobb_cmip.cmip.common import params_preset 

15from biobb_cmip.cmip.common import get_grid 

16 

17 

18class CmipRun(BiobbObject): 

19 """ 

20 | biobb_cmip Titration 

21 | Wrapper class for the CMIP cmip module. 

22 | The CMIP cmip module. CMIP cmip module compute classical molecular interaction potentials. 

23 

24 Args: 

25 input_pdb_path (str): Path to the input PDB file. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_cmip/master/biobb_cmip/test/data/cmip/1kim_h.pdb>`_. Accepted formats: pdb (edam:format_1476). 

26 input_probe_pdb_path (str) (Optional): Path to the input probe file in PDB format. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_cmip/master/biobb_cmip/test/data/cmip/RBD-hACE2.RBD.cmip.pdb>`_. Accepted formats: pdb (edam:format_1476). 

27 output_pdb_path (str) (Optional): Path to the output PDB file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_cmip/master/biobb_cmip/test/reference/cmip/1kim_neutral.pdb>`_. Accepted formats: pdb (edam:format_1476). 

28 output_grd_path (str) (Optional): Path to the output grid file in GRD format. File type: output. Accepted formats: grd (edam:format_2330). 

29 output_cube_path (str) (Optional): Path to the output grid file in cube format. File type: output. Accepted formats: cube (edam:format_2330). 

30 output_rst_path (str) (Optional): Path to the output restart file. File type: output. Accepted formats: txt (edam:format_2330). 

31 input_rst_path (str) (Optional): Path to the input restart file. File type: input. Accepted formats: txt (edam:format_2330). 

32 output_byat_path (str) (Optional): Path to the output atom by atom energy file. File type: output. Accepted formats: txt (edam:format_2330), out (edam:format_2330). 

33 output_log_path (str) (Optional): Path to the output CMIP log file LOG. File type: output. Accepted formats: log (edam:format_2330). 

34 input_vdw_params_path (str) (Optional): Path to the CMIP input Van der Waals force parameters, if not provided the CMIP conda installation one is used ("$CONDA_PREFIX/share/cmip/dat/vdwprm"). File type: input. Accepted formats: txt (edam:format_2330). 

35 input_params_path (str) (Optional): Path to the CMIP input parameters file. File type: input. Accepted formats: txt (edam:format_2330). 

36 output_json_box_path (str) (Optional): Path to the output CMIP box in JSON format. File type: output. Accepted formats: json (edam:format_3464). 

37 output_json_external_box_path (str) (Optional): Path to the output external CMIP box in JSON format. File type: output. Accepted formats: json (edam:format_3464). 

38 input_json_box_path (str) (Optional): Path to the input CMIP box in JSON format. File type: input. Accepted formats: json (edam:format_3464). 

39 input_json_external_box_path (str) (Optional): Path to the input CMIP box in JSON format. File type: input. Accepted formats: json (edam:format_3464). 

40 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

41 * **execution_type** (*str*) - ("mip_pos") Default options for the params file, each one creates a different params file. Values: check_only (Dry Run of CMIP), mip_pos (MIP O+ Mehler Solmajer dielectric), mip_neg (MIP O- Mehler Solmajer dielectric), mip_neu (MIP Oxygen Mehler Solmajer dielectric), solvation (Solvation & MEP), pb_interaction_energy (Docking Interaction energy calculation. PB electrostatics), docking (Docking Mehler Solmajer dielectric), docking_rst (Docking from restart file). 

42 * **params** (*dict*) - ({}) CMIP options specification. 

43 * **binary_path** (*str*) - ("cmip") Path to the CMIP cmip executable binary. 

44 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

45 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

46 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

47 * **container_path** (*str*) - (None) Path to the binary executable of your container. 

48 * **container_image** (*str*) - ("cmip/cmip:latest") Container Image identifier. 

49 * **container_volume_path** (*str*) - ("/data") Path to an internal directory in the container. 

50 * **container_working_dir** (*str*) - (None) Path to the internal CWD in the container. 

51 * **container_user_id** (*str*) - (None) User number id to be mapped inside the container. 

52 * **container_shell_path** (*str*) - ("/bin/bash") Path to the binary executable of the container shell. 

53 

54 

55 Examples: 

56 This is a use example of how to use the building block from Python:: 

57 

58 from biobb_cmip.cmip.cmip import cmip 

59 prop = { 'binary_path': 'cmip' } 

60 cmip(input_pdb_path='/path/to/myStructure.pdb', 

61 output_pdb_path='/path/to/newStructure.pdb', 

62 output_log_path='/path/to/newStructureLog.log', 

63 properties=prop) 

64 

65 Info: 

66 * wrapped_software: 

67 * name: CMIP cmip 

68 * version: 2.7.0 

69 * license: Apache-2.0 

70 * ontology: 

71 * name: EDAM 

72 * schema: http://edamontology.org/EDAM.owl 

73 """ 

74 

75 def __init__(self, input_pdb_path: str, input_probe_pdb_path: Optional[str] = None, output_pdb_path: Optional[str] = None, 

76 output_grd_path: Optional[str] = None, output_cube_path: Optional[str] = None, output_rst_path: Optional[str] = None, 

77 input_rst_path: Optional[str] = None, output_byat_path: Optional[str] = None, output_log_path: Optional[str] = None, 

78 input_vdw_params_path: Optional[str] = None, input_params_path: Optional[str] = None, output_json_box_path: Optional[str] = None, 

79 output_json_external_box_path: Optional[str] = None, input_json_box_path: Optional[str] = None, 

80 input_json_external_box_path: Optional[str] = None, properties: Optional[dict] = None, **kwargs) -> None: 

81 

82 properties = properties or {} 

83 

84 # Call parent class constructor 

85 super().__init__(properties) 

86 self.locals_var_dict = locals().copy() 

87 

88 # Input/Output files 

89 self.io_dict = { 

90 "in": {"input_pdb_path": input_pdb_path, "input_probe_pdb_path": input_probe_pdb_path, 

91 "input_vdw_params_path": input_vdw_params_path, "input_params_path": input_params_path, 

92 "input_json_box_path": input_json_box_path, 

93 "input_json_external_box_path": input_json_external_box_path, 

94 "input_rst_path": input_rst_path}, 

95 "out": {"output_pdb_path": output_pdb_path, "output_grd_path": output_grd_path, 

96 "output_cube_path": output_cube_path, "output_rst_path": output_rst_path, 

97 "output_byat_path": output_byat_path, "output_log_path": output_log_path, 

98 "output_json_box_path": output_json_box_path, 

99 "output_json_external_box_path": output_json_external_box_path} 

100 } 

101 

102 # Properties specific for BB 

103 self.binary_path = properties.get('binary_path', 'cmip') 

104 self.execution_type = properties.get('execution_type', 'mip_pos') 

105 self.params = {k: str(v) for k, v in properties.get('params', dict()).items()} 

106 

107 if not self.io_dict['in'].get('input_vdw_params_path'): 

108 self.io_dict['in']['input_vdw_params_path'] = f"{os.environ.get('CONDA_PREFIX')}/share/cmip/dat/vdwprm" 

109 self.io_dict['in']['combined_params_path'] = properties.get('combined_params_path', 'params') 

110 

111 # Check the properties 

112 self.check_properties(properties) 

113 self.check_arguments() 

114 

115 @launchlogger 

116 def launch(self) -> int: 

117 """Execute the :class:`Cmip <cmip.cmip.Cmip>` object.""" 

118 

119 # Setup Biobb 

120 if self.check_restart(): 

121 return 0 

122 

123 # Check if output_pdb_path ends with ".pdb" and does not contain underscores 

124 if self.io_dict['out']['output_pdb_path']: 

125 if (not self.io_dict['out']['output_pdb_path'].endswith('.pdb')) or \ 

126 ("_" in str(Path(self.io_dict['out']['output_pdb_path']).name)): 

127 fu.log(f"ERROR: output_pdb_path ({self.io_dict['out']['output_pdb_path']}) " 

128 f"name must end in .pdb and not contain underscores", self.out_log, self.global_log) 

129 raise ValueError(f"ERROR: output_pdb_path ({self.io_dict['out']['output_pdb_path']})" 

130 f"name must end in .pdb and not contain underscores") 

131 

132 params_preset_dict: dict[str, Any] = params_preset(execution_type=self.execution_type) 

133 if self.io_dict['in']["input_json_external_box_path"]: 

134 params_preset_dict["readgrid0"] = 0 

135 origin, size, grid_params = get_grid(self.io_dict['in']["input_json_external_box_path"]) 

136 params_preset_dict['grid_int0'] = \ 

137 f"INTX0={grid_params['INT'][0]},INTY0={grid_params['INT'][1]},INTZ0={grid_params['INT'][2]}" 

138 params_preset_dict['grid_cen0'] = \ 

139 f"CENX0={grid_params['CEN'][0]},CENY0={grid_params['CEN'][1]},CENZ0={grid_params['CEN'][2]}" 

140 params_preset_dict['grid_dim0'] = \ 

141 f"DIMX0={grid_params['DIM'][0]},DIMY0={grid_params['DIM'][1]},DIMZ0={grid_params['DIM'][2]}" 

142 

143 if self.io_dict['in']["input_json_box_path"]: 

144 params_preset_dict["readgrid"] = 0 

145 origin, size, grid_params = get_grid(self.io_dict['in']["input_json_box_path"]) 

146 params_preset_dict['grid_int'] = \ 

147 f"INTX={grid_params['INT'][0]},INTY={grid_params['INT'][1]},INTZ={grid_params['INT'][2]}" 

148 params_preset_dict['grid_cen'] = \ 

149 f"CENX={grid_params['CEN'][0]},CENY={grid_params['CEN'][1]},CENZ={grid_params['CEN'][2]}" 

150 params_preset_dict['grid_dim'] = \ 

151 f"DIMX={grid_params['DIM'][0]},DIMY={grid_params['DIM'][1]},DIMZ={grid_params['DIM'][2]}" 

152 

153 if self.io_dict['out']['output_json_box_path'] or self.io_dict['out']['output_json_external_box_path']: 

154 params_preset_dict['WRITELOG'] = 1 

155 key_value_log_dir = fu.create_unique_dir() 

156 self.io_dict['out']['key_value_log_path'] = str(Path(key_value_log_dir).joinpath("key_value_cmip_log.log")) 

157 self.tmp_files.append(key_value_log_dir) 

158 

159 # Restart OUT 

160 if self.io_dict["out"].get("output_rst_path"): 

161 params_preset_dict['FULLRST'] = 1 # type: ignore 

162 params_preset_dict['OREST'] = 1 

163 

164 # Restart IN 

165 if self.io_dict['in']["input_rst_path"]: 

166 params_preset_dict['IREST'] = 2 

167 if not self.io_dict["out"].get("output_rst_path"): 

168 self.io_dict["out"]["output_rst_path"] = fu.create_unique_file_path() 

169 shutil.copy2(self.io_dict['in']["input_rst_path"], self.io_dict["out"]["output_rst_path"]) 

170 

171 else: 

172 params_preset_dict['IREST'] = 0 

173 

174 combined_params_dir = fu.create_unique_dir() 

175 self.io_dict['in']['combined_params_path'] = create_params_file( 

176 output_params_path=str(Path(combined_params_dir).joinpath(self.io_dict['in']['combined_params_path'])), 

177 input_params_path=self.io_dict['in'].get('input_params_path'), 

178 params_preset_dict=params_preset_dict, 

179 params_properties_dict=self.params) 

180 

181 self.stage_files() 

182 

183 self.cmd = ["cd", 

184 self.stage_io_dict["unique_dir"], 

185 ";", 

186 self.binary_path, 

187 '-i', PurePath(self.stage_io_dict['in']['combined_params_path']).name, 

188 '-vdw', PurePath(self.stage_io_dict['in']['input_vdw_params_path']).name, 

189 '-hs', PurePath(self.stage_io_dict['in']['input_pdb_path']).name] 

190 

191 if self.stage_io_dict["in"].get("input_probe_pdb_path") and Path( 

192 self.io_dict["in"].get("input_probe_pdb_path", "")).exists(): 

193 self.cmd.append('-pr') 

194 self.cmd.append(PurePath(self.stage_io_dict["in"].get("input_probe_pdb_path")).name) 

195 

196 if self.stage_io_dict["out"].get("output_pdb_path"): 

197 self.cmd.append('-outpdb') 

198 self.cmd.append(PurePath(self.stage_io_dict['out']['output_pdb_path']).name) 

199 

200 if self.stage_io_dict["out"].get("output_grd_path"): 

201 self.cmd.append('-grdout') 

202 self.cmd.append(PurePath(self.stage_io_dict["out"]["output_grd_path"]).name) 

203 

204 if self.stage_io_dict["out"].get("output_cube_path"): 

205 self.cmd.append('-cube') 

206 self.cmd.append(PurePath(self.stage_io_dict["out"]["output_cube_path"]).name) 

207 

208 if self.stage_io_dict["out"].get("output_rst_path"): 

209 self.cmd.append('-rst') 

210 self.cmd.append(PurePath(self.stage_io_dict["out"]["output_rst_path"]).name) 

211 

212 if self.stage_io_dict["out"].get("output_byat_path"): 

213 self.cmd.append('-byat') 

214 self.cmd.append(PurePath(self.stage_io_dict["out"]["output_byat_path"]).name) 

215 

216 if self.stage_io_dict["out"].get("output_log_path"): 

217 self.cmd.append('-o') 

218 self.cmd.append(PurePath(self.stage_io_dict["out"]["output_log_path"]).name) 

219 

220 if self.stage_io_dict['out'].get('output_json_box_path') or self.stage_io_dict['out'].get('output_json_external_box_path'): 

221 self.cmd.append('-l') 

222 self.cmd.append(PurePath(self.stage_io_dict["out"]["key_value_log_path"]).name) 

223 

224 # Run Biobb block 

225 self.run_biobb() 

226 

227 # CMIP removes or adds a .pdb extension from pdb output name 

228 # manual copy_to_host or unstage 

229 if self.io_dict['out'].get('output_pdb_path'): 

230 output_pdb_path = str(Path(self.stage_io_dict["unique_dir"]).joinpath(Path(self.io_dict['out'].get('output_pdb_path', '')).name)) 

231 if Path(output_pdb_path[:-4]).exists(): 

232 shutil.move(output_pdb_path[:-4], self.io_dict['out'].get('output_pdb_path', '')) 

233 elif Path(output_pdb_path + ".pdb").exists(): 

234 shutil.move(output_pdb_path + ".pdb", self.io_dict['out'].get('output_pdb_path', '')) 

235 elif not Path(output_pdb_path).exists(): 

236 fu.log(f"WARNING: File not found output_pdb_path: {output_pdb_path}", self.out_log, self.global_log) 

237 

238 # Replace "ATOMTM" tag for "ATOM " 

239 

240 output_pdb_path = self.io_dict['out'].get('output_pdb_path', '') 

241 if output_pdb_path: 

242 if Path(output_pdb_path).exists(): 

243 with open(output_pdb_path) as pdb_file: 

244 list_pdb_lines = pdb_file.readlines() 

245 with open(output_pdb_path, 'w') as pdb_file: 

246 for line in list_pdb_lines: 

247 pdb_file.write(line.replace('ATOMTM', 'ATOM ')) 

248 else: 

249 fu.log(f"WARNING: File not found output_pdb_path: {output_pdb_path} Abs Path: {Path(output_pdb_path).resolve()}", self.out_log, self.global_log) 

250 

251 # Create json_box_path file from CMIP log file 

252 if self.io_dict['out'].get('output_json_box_path'): 

253 origin, size, grid_params = get_grid(self.stage_io_dict["out"]["output_log_path"]) 

254 grid_params['DIM'] = (int(grid_params['DIM'][0]), 

255 int(grid_params['DIM'][1]), 

256 int(grid_params['DIM'][2])) 

257 size_dict = {'x': round(grid_params['DIM'][0] * grid_params['INT'][0], 3), 

258 'y': round(grid_params['DIM'][1] * grid_params['INT'][1], 3), 

259 'z': round(grid_params['DIM'][2] * grid_params['INT'][2], 3)} 

260 origin_dict = {'x': round(grid_params['CEN'][0] - size_dict['x'] / 2, 3), 

261 'y': round(grid_params['CEN'][1] - size_dict['y'] / 2, 3), 

262 'z': round(grid_params['CEN'][0] - size_dict['z'] / 2, 3)} 

263 grid_dict = {'origin': origin_dict, 

264 'size': size_dict, 

265 'params': grid_params} 

266 with open(self.io_dict['out'].get('output_json_box_path', ''), 'w') as json_file: 

267 json_file.write(json.dumps(grid_dict, indent=4)) 

268 

269 # Create external_json_box_path file from CMIP log file 

270 if self.io_dict['out'].get('output_json_external_box_path'): 

271 origin, size, grid_params = get_grid(self.stage_io_dict["out"]["output_log_path"], True) 

272 grid_params['DIM'] = (int(grid_params['DIM'][0]), 

273 int(grid_params['DIM'][1]), 

274 int(grid_params['DIM'][2])) 

275 size_dict = {'x': round(grid_params['DIM'][0] * grid_params['INT'][0], 3), 

276 'y': round(grid_params['DIM'][1] * grid_params['INT'][1], 3), 

277 'z': round(grid_params['DIM'][2] * grid_params['INT'][2], 3)} 

278 origin_dict = {'x': round(grid_params['CEN'][0] - size_dict['x'] / 2, 3), 

279 'y': round(grid_params['CEN'][1] - size_dict['y'] / 2, 3), 

280 'z': round(grid_params['CEN'][0] - size_dict['z'] / 2, 3)} 

281 grid_dict = {'origin': origin_dict, 

282 'size': size_dict, 

283 'params': grid_params} 

284 with open(self.io_dict['out'].get('output_json_external_box_path', ''), 'w') as json_file: 

285 json_file.write(json.dumps(grid_dict, indent=4)) 

286 

287 # Copy files to host 

288 self.copy_to_host() 

289 

290 # remove temporary folder(s) 

291 self.tmp_files.append(combined_params_dir) 

292 self.remove_tmp_files() 

293 

294 self.check_arguments(output_files_created=True, raise_exception=False) 

295 

296 return self.return_code 

297 

298 

299def cmip_run(input_pdb_path: str, input_probe_pdb_path: Optional[str] = None, output_pdb_path: Optional[str] = None, 

300 output_grd_path: Optional[str] = None, output_cube_path: Optional[str] = None, output_rst_path: Optional[str] = None, 

301 output_byat_path: Optional[str] = None, output_log_path: Optional[str] = None, 

302 input_vdw_params_path: Optional[str] = None, input_params_path: Optional[str] = None, output_json_box_path: Optional[str] = None, 

303 output_json_external_box_path: Optional[str] = None, input_json_box_path: Optional[str] = None, 

304 input_json_external_box_path: Optional[str] = None, properties: Optional[dict] = None, **kwargs) -> int: 

305 """Create :class:`Cmip <cmip.cmip.Cmip>` class and 

306 execute the :meth:`launch() <cmip.cmip.Cmip.launch>` method.""" 

307 return CmipRun(**dict(locals())).launch() 

308 

309 

310cmip_run.__doc__ = CmipRun.__doc__ 

311main = CmipRun.get_main(cmip_run, "Wrapper of the CMIP cmip module.") 

312 

313if __name__ == '__main__': 

314 main()