Coverage for biobb_common/biobb_common/generic/biobb_object.py: 53%

202 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-03-13 17:26 +0000

1"""Module containing the BiobbObject generic parent class.""" 

2import os 

3import importlib 

4import difflib 

5import typing 

6from typing import Optional, Mapping, Set, Union, Dict, List 

7import warnings 

8from pathlib import Path 

9from sys import platform 

10import shutil 

11from pydoc import locate 

12from biobb_common.tools import file_utils as fu 

13from biobb_common.command_wrapper import cmd_wrapper 

14 

15 

16class BiobbObject: 

17 """ 

18 | biobb_common BiobbObject 

19 | Generic parent class for the rest of the Biobb clases. 

20 | The BiobbOject class contains all the properties and methods that are common to all the biobb blocks. 

21 

22 Args: 

23 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

24 * **disable_sandbox** (*bool*) - (False) Disable the use of temporal unique directories aka sandbox. Only for local execution. 

25 * **chdir_sandbox** (*bool*) - (False) Change directory to the sandbox using just file names in the command line. Only for local execution. 

26 * **dev** (*str*) - (None) Adding additional options to command line. 

27 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

28 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

29 * **env_vars_dict** (*dict*) - ({}) Environment Variables Dictionary. 

30 * **container_path** (*str*) - (None) Path to the binary executable of your container. 

31 * **container_image** (*str*) - (None) Container Image identifier. 

32 * **container_volume_path** (*str*) - ("/data") Path to an internal directory in the container. 

33 * **container_working_dir** (*str*) - (None) Path to the internal CWD in the container. 

34 * **container_user_id** (*str*) - (None) User number id to be mapped inside the container. 

35 * **container_shell_path** (*str*) - ("/bin/bash -c") Path to the binary executable of the container shell. 

36 * **container_generic_command** (*str*) - ("run") Which command typically run or exec will be used to execute your image. 

37 """ 

38 

39 def __init__(self, properties: Optional[dict] = None, **kwargs) -> None: 

40 properties = properties or {} 

41 

42 # Input/Output files 

43 self.io_dict: Dict[str, Union[str, Dict[str, Union[str, Path]]]] = {"in": {}, "out": {}} 

44 

45 # container Specific 

46 self.container_path = properties.get("container_path") 

47 self.container_image: str = properties.get("container_image", '') 

48 self.container_volume_path = properties.get( 

49 "container_volume_path", "/data") 

50 self.container_working_dir = properties.get("container_working_dir") 

51 self.container_user_id = properties.get("container_user_id") 

52 self.container_shell_path = properties.get( 

53 "container_shell_path", "/bin/bash -c" 

54 ) 

55 self.container_generic_command = properties.get( 

56 "container_generic_command", "run" 

57 ) 

58 

59 # stage 

60 self.stage_io_dict: Dict[str, Union[str, Dict[str, Union[str, Path]]]] = {"in": {}, "out": {}} 

61 

62 # Properties common in all BB 

63 self.disable_sandbox: bool = properties.get("disable_sandbox", False) 

64 self.chdir_sandbox: bool = properties.get("chdir_sandbox", False) 

65 self.binary_path = properties.get("binary_path") 

66 self.can_write_console_log = properties.get( 

67 "can_write_console_log", True) 

68 self.global_log = properties.get("global_log", None) 

69 self.out_log = None 

70 self.err_log = None 

71 self.prefix = properties.get("prefix", None) 

72 self.step = properties.get("step", None) 

73 self.path = properties.get("path", "") 

74 self.remove_tmp = properties.get("remove_tmp", True) 

75 self.restart = properties.get("restart", False) 

76 self.cmd: List[str] = [] 

77 self.return_code = None 

78 self.tmp_files: List[Union[str, Path]] = [] 

79 self.env_vars_dict: typing.Mapping = properties.get( 

80 "env_vars_dict", {}) 

81 self.shell_path: typing.Union[str, Path] = properties.get( 

82 "shell_path", os.getenv("SHELL", "/bin/bash") 

83 ) 

84 

85 self.dev = properties.get("dev", None) 

86 self.check_extensions = properties.get("check_extensions", True) 

87 self.check_var_typing = properties.get("check_var_typing", True) 

88 self.locals_var_dict: Mapping[str, str] = dict() 

89 self.doc_arguments_dict, self.doc_properties_dict = fu.get_doc_dicts( 

90 self.__doc__ 

91 ) 

92 

93 try: 

94 self.version = importlib.import_module( 

95 self.__module__.split(".")[0] 

96 ).__version__ 

97 except Exception: 

98 self.version = None 

99 

100 def check_arguments( 

101 self, output_files_created: bool = False, raise_exception: bool = True 

102 ): 

103 for argument, argument_dict in self.doc_arguments_dict.items(): 

104 fu.check_argument( 

105 path=Path(self.locals_var_dict[argument]) 

106 if self.locals_var_dict.get(argument) 

107 else None, 

108 argument=argument, 

109 optional=argument_dict.get("optional", False), 

110 module_name=self.__module__, 

111 input_output=argument_dict.get( 

112 "input_output", "").lower().strip(), 

113 output_files_created=output_files_created, 

114 extension_list=list(argument_dict.get("formats")), 

115 check_extensions=self.check_extensions, 

116 raise_exception=raise_exception, 

117 out_log=self.out_log, 

118 ) 

119 

120 def check_properties( 

121 self, 

122 properties: dict, 

123 reserved_properties: Optional[Set[str]] = None, 

124 check_var_typing: bool = False, 

125 ): 

126 if not reserved_properties: 

127 reserved_properties = set() 

128 reserved_properties = {"system", "working_dir_path"}.union(reserved_properties) 

129 error_properties = set( 

130 [prop for prop in properties.keys() if prop not in self.__dict__.keys()] 

131 ) 

132 

133 # Check types 

134 if check_var_typing and self.doc_properties_dict: 

135 for prop, value in properties.items(): 

136 if self.doc_properties_dict.get(prop): 

137 property_type = self.doc_properties_dict[prop].get("type") 

138 if not isinstance(value, locate(property_type)): 

139 warnings.warn( 

140 f"Warning: {prop} property type not recognized. Got {type(value)} Expected {locate(property_type)}" 

141 ) 

142 

143 error_properties = set( 

144 [prop for prop in properties.keys() if prop not in self.__dict__.keys()] 

145 ) 

146 error_properties -= reserved_properties 

147 for error_property in error_properties: 

148 close_property = difflib.get_close_matches( 

149 error_property, self.__dict__.keys(), n=1, cutoff=0.01 

150 ) 

151 close_property = close_property[0] if close_property else "" 

152 warnings.warn( 

153 "Warning: %s is not a recognized property. The most similar property is: %s" 

154 % (error_property, close_property) 

155 ) 

156 

157 def check_restart(self) -> bool: 

158 if self.version: 

159 fu.log( 

160 f"Executing {self.__module__} Version: {self.version}", 

161 self.out_log, 

162 self.global_log, 

163 ) 

164 

165 if self.restart: 

166 if fu.check_complete_files(self.io_dict["out"].values()): 

167 fu.log( 

168 "Restart is enabled, this step: %s will the skipped" % self.step, 

169 self.out_log, 

170 self.global_log, 

171 ) 

172 return True 

173 return False 

174 

175 def stage_files(self): 

176 if self.disable_sandbox: 

177 self.stage_io_dict = self.io_dict.copy() 

178 self.stage_io_dict["unique_dir"] = os.getcwd() 

179 return 

180 

181 unique_dir = str(Path(fu.create_unique_dir()).resolve()) 

182 self.stage_io_dict = {"in": {}, "out": {}, "unique_dir": unique_dir} 

183 

184 # IN files COPY and assign INTERNAL PATH 

185 for file_ref, file_path in self.io_dict.get("in", {}).items(): 

186 if file_path: 

187 if Path(file_path).exists(): 

188 shutil.copy2(file_path, unique_dir) 

189 fu.log(f"Copy: {file_path} to {unique_dir}", self.out_log) 

190 # Container 

191 if self.container_path: 

192 self.stage_io_dict["in"][file_ref] = str( 

193 Path(self.container_volume_path).joinpath( 

194 Path(file_path).name 

195 ) 

196 ) 

197 # Local 

198 else: 

199 self.stage_io_dict["in"][file_ref] = str( 

200 Path(unique_dir).joinpath(Path(file_path).name) 

201 ) 

202 if self.chdir_sandbox: 

203 self.stage_io_dict["in"][file_ref] = str( 

204 Path(file_path).name 

205 ) 

206 else: 

207 # Default files in GMXLIB path like gmx_solvate -> input_solvent_gro_path (spc216.gro) 

208 self.stage_io_dict["in"][file_ref] = file_path 

209 

210 # OUT files assign INTERNAL PATH 

211 for file_ref, file_path in self.io_dict.get("out", {}).items(): 

212 if file_path: 

213 # Container 

214 if self.container_path: 

215 self.stage_io_dict["out"][file_ref] = str( 

216 Path(self.container_volume_path).joinpath( 

217 Path(file_path).name) 

218 ) 

219 # Local 

220 else: 

221 self.stage_io_dict["out"][file_ref] = str( 

222 Path(unique_dir).joinpath(Path(file_path).name) 

223 ) 

224 if self.chdir_sandbox: 

225 self.stage_io_dict["out"][file_ref] = str( 

226 Path(file_path).name) 

227 

228 def create_cmd_line(self): 

229 # Not documented and not listed option, only for devs 

230 if self.dev: 

231 fu.log( 

232 f"Adding development options: {self.dev}", self.out_log, self.global_log 

233 ) 

234 self.cmd += self.dev.split() 

235 

236 # Containers 

237 host_volume: str = str(self.stage_io_dict.get("unique_dir", '')) 

238 self.container_path = self.container_path or "" 

239 # Singularity 

240 if self.container_path.endswith("singularity"): 

241 fu.log( 

242 "Using Singularity image %s" % self.container_image, 

243 self.out_log, 

244 self.global_log, 

245 ) 

246 if not Path(self.container_image).exists(): 

247 fu.log( 

248 f"{self.container_image} does not exist trying to pull it", 

249 self.out_log, 

250 self.global_log, 

251 ) 

252 container_image_name = str( 

253 Path(self.container_image).with_suffix(".sif").name 

254 ) 

255 singularity_pull_cmd = [ 

256 self.container_path, 

257 "pull", 

258 "--name", 

259 container_image_name, 

260 self.container_image, 

261 ] 

262 try: 

263 from biobb_common.command_wrapper import cmd_wrapper 

264 

265 cmd_wrapper.CmdWrapper( 

266 singularity_pull_cmd, self.shell_path, self.out_log 

267 ).launch() 

268 if Path(container_image_name).exists(): 

269 self.container_image = container_image_name 

270 else: 

271 raise FileNotFoundError 

272 except FileNotFoundError: 

273 fu.log( 

274 f"{' '.join(singularity_pull_cmd)} not found", 

275 self.out_log, 

276 self.global_log, 

277 ) 

278 raise FileNotFoundError 

279 singularity_cmd = [ 

280 self.container_path, 

281 self.container_generic_command, 

282 "-e", 

283 ] 

284 

285 if self.env_vars_dict: 

286 singularity_cmd.append("--env") 

287 singularity_cmd.append( 

288 ",".join( 

289 f"{env_var_name}='{env_var_value}'" 

290 for env_var_name, env_var_value in self.env_vars_dict.items() 

291 ) 

292 ) 

293 

294 singularity_cmd.extend( 

295 [ 

296 "--bind", 

297 host_volume + ":" + self.container_volume_path, 

298 self.container_image, 

299 ] 

300 ) 

301 

302 # If we are working on a mac remove -e option because is still no available 

303 if platform == "darwin": 

304 if "-e" in singularity_cmd: 

305 singularity_cmd.remove("-e") 

306 

307 if not self.cmd and not self.container_shell_path: 

308 fu.log( 

309 "WARNING: The command-line is empty your container should know what to do automatically.", 

310 self.out_log, 

311 self.global_log, 

312 ) 

313 else: 

314 cmd = ['"' + " ".join(self.cmd) + '"'] 

315 singularity_cmd.append(self.container_shell_path) 

316 singularity_cmd.extend(cmd) 

317 self.cmd = singularity_cmd 

318 # Docker 

319 elif self.container_path.endswith("docker"): 

320 fu.log( 

321 "Using Docker image %s" % self.container_image, 

322 self.out_log, 

323 self.global_log, 

324 ) 

325 docker_cmd = [self.container_path, self.container_generic_command] 

326 if self.env_vars_dict: 

327 for env_var_name, env_var_value in self.env_vars_dict.items(): 

328 docker_cmd.append("-e") 

329 docker_cmd.append(f"{env_var_name}='{env_var_value}'") 

330 if self.container_working_dir: 

331 docker_cmd.append("-w") 

332 docker_cmd.append(self.container_working_dir) 

333 if self.container_volume_path: 

334 docker_cmd.append("-v") 

335 docker_cmd.append(host_volume + ":" + 

336 self.container_volume_path) 

337 if self.container_user_id: 

338 docker_cmd.append("--user") 

339 docker_cmd.append(self.container_user_id) 

340 

341 docker_cmd.append(self.container_image) 

342 

343 if not self.cmd and not self.container_shell_path: 

344 fu.log( 

345 "WARNING: The command-line is empty your container should know what to do automatically.", 

346 self.out_log, 

347 self.global_log, 

348 ) 

349 else: 

350 cmd = ['"' + " ".join(self.cmd) + '"'] 

351 docker_cmd.append(self.container_shell_path) 

352 docker_cmd.extend(cmd) 

353 self.cmd = docker_cmd 

354 # Pcocc 

355 elif self.container_path.endswith("pcocc"): 

356 # pcocc run -I racov56:pmx cli.py mutate -h 

357 fu.log( 

358 "Using pcocc image %s" % self.container_image, 

359 self.out_log, 

360 self.global_log, 

361 ) 

362 pcocc_cmd = [ 

363 self.container_path, 

364 self.container_generic_command, 

365 "-I", 

366 self.container_image, 

367 ] 

368 if self.container_working_dir: 

369 pcocc_cmd.append("--cwd") 

370 pcocc_cmd.append(self.container_working_dir) 

371 if self.container_volume_path: 

372 pcocc_cmd.append("--mount") 

373 pcocc_cmd.append(host_volume + ":" + 

374 self.container_volume_path) 

375 if self.container_user_id: 

376 pcocc_cmd.append("--user") 

377 pcocc_cmd.append(self.container_user_id) 

378 

379 if not self.cmd and not self.container_shell_path: 

380 fu.log( 

381 "WARNING: The command-line is empty your container should know what to do automatically.", 

382 self.out_log, 

383 self.global_log, 

384 ) 

385 else: 

386 cmd = ['\\"' + " ".join(self.cmd) + '\\"'] 

387 pcocc_cmd.append(self.container_shell_path) 

388 pcocc_cmd.extend(cmd) 

389 self.cmd = pcocc_cmd 

390 # Local execution 

391 else: 

392 pass 

393 # fu.log('Not using any container', self.out_log, self.global_log) 

394 

395 def execute_command(self): 

396 

397 cwd = os.getcwd() 

398 if self.chdir_sandbox: 

399 os.chdir(self.stage_io_dict["unique_dir"]) 

400 

401 self.return_code = cmd_wrapper.CmdWrapper( 

402 self.cmd, 

403 self.shell_path, 

404 self.out_log, 

405 self.err_log, 

406 self.global_log, 

407 self.env_vars_dict, 

408 ).launch() 

409 

410 if self.chdir_sandbox: 

411 os.chdir(cwd) 

412 

413 def copy_to_host(self): 

414 for file_ref, file_path in self.stage_io_dict["out"].items(): 

415 if file_path: 

416 sandbox_file_path = str( 

417 Path(self.stage_io_dict["unique_dir"]).joinpath( 

418 Path(file_path).name 

419 ) 

420 ) 

421 if Path(sandbox_file_path).exists(): 

422 # Dest file exists 

423 if Path(self.io_dict["out"][file_ref]).exists(): 

424 # Dest file exists and is NOT the same as the source file 

425 if not Path(sandbox_file_path).samefile( 

426 Path(self.io_dict["out"][file_ref]) 

427 ): 

428 shutil.copy2( 

429 sandbox_file_path, self.io_dict["out"][file_ref] 

430 ) 

431 # Dest file does not exist 

432 else: 

433 shutil.copy2(sandbox_file_path, 

434 self.io_dict["out"][file_ref]) 

435 

436 def run_biobb(self): 

437 self.create_cmd_line() 

438 self.execute_command() 

439 

440 def remove_tmp_files(self): 

441 if self.remove_tmp: 

442 fu.rm_file_list(self.tmp_files, self.out_log)