Coverage for biobb_common/biobb_common/generic/biobb_object.py: 53%
202 statements
« prev ^ index » next coverage.py v7.4.3, created at 2024-03-13 17:26 +0000
« prev ^ index » next coverage.py v7.4.3, created at 2024-03-13 17:26 +0000
1"""Module containing the BiobbObject generic parent class."""
2import os
3import importlib
4import difflib
5import typing
6from typing import Optional, Mapping, Set, Union, Dict, List
7import warnings
8from pathlib import Path
9from sys import platform
10import shutil
11from pydoc import locate
12from biobb_common.tools import file_utils as fu
13from biobb_common.command_wrapper import cmd_wrapper
16class BiobbObject:
17 """
18 | biobb_common BiobbObject
19 | Generic parent class for the rest of the Biobb clases.
20 | The BiobbOject class contains all the properties and methods that are common to all the biobb blocks.
22 Args:
23 properties (dict - Python dictionary object containing the tool parameters, not input/output files):
24 * **disable_sandbox** (*bool*) - (False) Disable the use of temporal unique directories aka sandbox. Only for local execution.
25 * **chdir_sandbox** (*bool*) - (False) Change directory to the sandbox using just file names in the command line. Only for local execution.
26 * **dev** (*str*) - (None) Adding additional options to command line.
27 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
28 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
29 * **env_vars_dict** (*dict*) - ({}) Environment Variables Dictionary.
30 * **container_path** (*str*) - (None) Path to the binary executable of your container.
31 * **container_image** (*str*) - (None) Container Image identifier.
32 * **container_volume_path** (*str*) - ("/data") Path to an internal directory in the container.
33 * **container_working_dir** (*str*) - (None) Path to the internal CWD in the container.
34 * **container_user_id** (*str*) - (None) User number id to be mapped inside the container.
35 * **container_shell_path** (*str*) - ("/bin/bash -c") Path to the binary executable of the container shell.
36 * **container_generic_command** (*str*) - ("run") Which command typically run or exec will be used to execute your image.
37 """
39 def __init__(self, properties: Optional[dict] = None, **kwargs) -> None:
40 properties = properties or {}
42 # Input/Output files
43 self.io_dict: Dict[str, Union[str, Dict[str, Union[str, Path]]]] = {"in": {}, "out": {}}
45 # container Specific
46 self.container_path = properties.get("container_path")
47 self.container_image: str = properties.get("container_image", '')
48 self.container_volume_path = properties.get(
49 "container_volume_path", "/data")
50 self.container_working_dir = properties.get("container_working_dir")
51 self.container_user_id = properties.get("container_user_id")
52 self.container_shell_path = properties.get(
53 "container_shell_path", "/bin/bash -c"
54 )
55 self.container_generic_command = properties.get(
56 "container_generic_command", "run"
57 )
59 # stage
60 self.stage_io_dict: Dict[str, Union[str, Dict[str, Union[str, Path]]]] = {"in": {}, "out": {}}
62 # Properties common in all BB
63 self.disable_sandbox: bool = properties.get("disable_sandbox", False)
64 self.chdir_sandbox: bool = properties.get("chdir_sandbox", False)
65 self.binary_path = properties.get("binary_path")
66 self.can_write_console_log = properties.get(
67 "can_write_console_log", True)
68 self.global_log = properties.get("global_log", None)
69 self.out_log = None
70 self.err_log = None
71 self.prefix = properties.get("prefix", None)
72 self.step = properties.get("step", None)
73 self.path = properties.get("path", "")
74 self.remove_tmp = properties.get("remove_tmp", True)
75 self.restart = properties.get("restart", False)
76 self.cmd: List[str] = []
77 self.return_code = None
78 self.tmp_files: List[Union[str, Path]] = []
79 self.env_vars_dict: typing.Mapping = properties.get(
80 "env_vars_dict", {})
81 self.shell_path: typing.Union[str, Path] = properties.get(
82 "shell_path", os.getenv("SHELL", "/bin/bash")
83 )
85 self.dev = properties.get("dev", None)
86 self.check_extensions = properties.get("check_extensions", True)
87 self.check_var_typing = properties.get("check_var_typing", True)
88 self.locals_var_dict: Mapping[str, str] = dict()
89 self.doc_arguments_dict, self.doc_properties_dict = fu.get_doc_dicts(
90 self.__doc__
91 )
93 try:
94 self.version = importlib.import_module(
95 self.__module__.split(".")[0]
96 ).__version__
97 except Exception:
98 self.version = None
100 def check_arguments(
101 self, output_files_created: bool = False, raise_exception: bool = True
102 ):
103 for argument, argument_dict in self.doc_arguments_dict.items():
104 fu.check_argument(
105 path=Path(self.locals_var_dict[argument])
106 if self.locals_var_dict.get(argument)
107 else None,
108 argument=argument,
109 optional=argument_dict.get("optional", False),
110 module_name=self.__module__,
111 input_output=argument_dict.get(
112 "input_output", "").lower().strip(),
113 output_files_created=output_files_created,
114 extension_list=list(argument_dict.get("formats")),
115 check_extensions=self.check_extensions,
116 raise_exception=raise_exception,
117 out_log=self.out_log,
118 )
120 def check_properties(
121 self,
122 properties: dict,
123 reserved_properties: Optional[Set[str]] = None,
124 check_var_typing: bool = False,
125 ):
126 if not reserved_properties:
127 reserved_properties = set()
128 reserved_properties = {"system", "working_dir_path"}.union(reserved_properties)
129 error_properties = set(
130 [prop for prop in properties.keys() if prop not in self.__dict__.keys()]
131 )
133 # Check types
134 if check_var_typing and self.doc_properties_dict:
135 for prop, value in properties.items():
136 if self.doc_properties_dict.get(prop):
137 property_type = self.doc_properties_dict[prop].get("type")
138 if not isinstance(value, locate(property_type)):
139 warnings.warn(
140 f"Warning: {prop} property type not recognized. Got {type(value)} Expected {locate(property_type)}"
141 )
143 error_properties = set(
144 [prop for prop in properties.keys() if prop not in self.__dict__.keys()]
145 )
146 error_properties -= reserved_properties
147 for error_property in error_properties:
148 close_property = difflib.get_close_matches(
149 error_property, self.__dict__.keys(), n=1, cutoff=0.01
150 )
151 close_property = close_property[0] if close_property else ""
152 warnings.warn(
153 "Warning: %s is not a recognized property. The most similar property is: %s"
154 % (error_property, close_property)
155 )
157 def check_restart(self) -> bool:
158 if self.version:
159 fu.log(
160 f"Executing {self.__module__} Version: {self.version}",
161 self.out_log,
162 self.global_log,
163 )
165 if self.restart:
166 if fu.check_complete_files(self.io_dict["out"].values()):
167 fu.log(
168 "Restart is enabled, this step: %s will the skipped" % self.step,
169 self.out_log,
170 self.global_log,
171 )
172 return True
173 return False
175 def stage_files(self):
176 if self.disable_sandbox:
177 self.stage_io_dict = self.io_dict.copy()
178 self.stage_io_dict["unique_dir"] = os.getcwd()
179 return
181 unique_dir = str(Path(fu.create_unique_dir()).resolve())
182 self.stage_io_dict = {"in": {}, "out": {}, "unique_dir": unique_dir}
184 # IN files COPY and assign INTERNAL PATH
185 for file_ref, file_path in self.io_dict.get("in", {}).items():
186 if file_path:
187 if Path(file_path).exists():
188 shutil.copy2(file_path, unique_dir)
189 fu.log(f"Copy: {file_path} to {unique_dir}", self.out_log)
190 # Container
191 if self.container_path:
192 self.stage_io_dict["in"][file_ref] = str(
193 Path(self.container_volume_path).joinpath(
194 Path(file_path).name
195 )
196 )
197 # Local
198 else:
199 self.stage_io_dict["in"][file_ref] = str(
200 Path(unique_dir).joinpath(Path(file_path).name)
201 )
202 if self.chdir_sandbox:
203 self.stage_io_dict["in"][file_ref] = str(
204 Path(file_path).name
205 )
206 else:
207 # Default files in GMXLIB path like gmx_solvate -> input_solvent_gro_path (spc216.gro)
208 self.stage_io_dict["in"][file_ref] = file_path
210 # OUT files assign INTERNAL PATH
211 for file_ref, file_path in self.io_dict.get("out", {}).items():
212 if file_path:
213 # Container
214 if self.container_path:
215 self.stage_io_dict["out"][file_ref] = str(
216 Path(self.container_volume_path).joinpath(
217 Path(file_path).name)
218 )
219 # Local
220 else:
221 self.stage_io_dict["out"][file_ref] = str(
222 Path(unique_dir).joinpath(Path(file_path).name)
223 )
224 if self.chdir_sandbox:
225 self.stage_io_dict["out"][file_ref] = str(
226 Path(file_path).name)
228 def create_cmd_line(self):
229 # Not documented and not listed option, only for devs
230 if self.dev:
231 fu.log(
232 f"Adding development options: {self.dev}", self.out_log, self.global_log
233 )
234 self.cmd += self.dev.split()
236 # Containers
237 host_volume: str = str(self.stage_io_dict.get("unique_dir", ''))
238 self.container_path = self.container_path or ""
239 # Singularity
240 if self.container_path.endswith("singularity"):
241 fu.log(
242 "Using Singularity image %s" % self.container_image,
243 self.out_log,
244 self.global_log,
245 )
246 if not Path(self.container_image).exists():
247 fu.log(
248 f"{self.container_image} does not exist trying to pull it",
249 self.out_log,
250 self.global_log,
251 )
252 container_image_name = str(
253 Path(self.container_image).with_suffix(".sif").name
254 )
255 singularity_pull_cmd = [
256 self.container_path,
257 "pull",
258 "--name",
259 container_image_name,
260 self.container_image,
261 ]
262 try:
263 from biobb_common.command_wrapper import cmd_wrapper
265 cmd_wrapper.CmdWrapper(
266 singularity_pull_cmd, self.shell_path, self.out_log
267 ).launch()
268 if Path(container_image_name).exists():
269 self.container_image = container_image_name
270 else:
271 raise FileNotFoundError
272 except FileNotFoundError:
273 fu.log(
274 f"{' '.join(singularity_pull_cmd)} not found",
275 self.out_log,
276 self.global_log,
277 )
278 raise FileNotFoundError
279 singularity_cmd = [
280 self.container_path,
281 self.container_generic_command,
282 "-e",
283 ]
285 if self.env_vars_dict:
286 singularity_cmd.append("--env")
287 singularity_cmd.append(
288 ",".join(
289 f"{env_var_name}='{env_var_value}'"
290 for env_var_name, env_var_value in self.env_vars_dict.items()
291 )
292 )
294 singularity_cmd.extend(
295 [
296 "--bind",
297 host_volume + ":" + self.container_volume_path,
298 self.container_image,
299 ]
300 )
302 # If we are working on a mac remove -e option because is still no available
303 if platform == "darwin":
304 if "-e" in singularity_cmd:
305 singularity_cmd.remove("-e")
307 if not self.cmd and not self.container_shell_path:
308 fu.log(
309 "WARNING: The command-line is empty your container should know what to do automatically.",
310 self.out_log,
311 self.global_log,
312 )
313 else:
314 cmd = ['"' + " ".join(self.cmd) + '"']
315 singularity_cmd.append(self.container_shell_path)
316 singularity_cmd.extend(cmd)
317 self.cmd = singularity_cmd
318 # Docker
319 elif self.container_path.endswith("docker"):
320 fu.log(
321 "Using Docker image %s" % self.container_image,
322 self.out_log,
323 self.global_log,
324 )
325 docker_cmd = [self.container_path, self.container_generic_command]
326 if self.env_vars_dict:
327 for env_var_name, env_var_value in self.env_vars_dict.items():
328 docker_cmd.append("-e")
329 docker_cmd.append(f"{env_var_name}='{env_var_value}'")
330 if self.container_working_dir:
331 docker_cmd.append("-w")
332 docker_cmd.append(self.container_working_dir)
333 if self.container_volume_path:
334 docker_cmd.append("-v")
335 docker_cmd.append(host_volume + ":" +
336 self.container_volume_path)
337 if self.container_user_id:
338 docker_cmd.append("--user")
339 docker_cmd.append(self.container_user_id)
341 docker_cmd.append(self.container_image)
343 if not self.cmd and not self.container_shell_path:
344 fu.log(
345 "WARNING: The command-line is empty your container should know what to do automatically.",
346 self.out_log,
347 self.global_log,
348 )
349 else:
350 cmd = ['"' + " ".join(self.cmd) + '"']
351 docker_cmd.append(self.container_shell_path)
352 docker_cmd.extend(cmd)
353 self.cmd = docker_cmd
354 # Pcocc
355 elif self.container_path.endswith("pcocc"):
356 # pcocc run -I racov56:pmx cli.py mutate -h
357 fu.log(
358 "Using pcocc image %s" % self.container_image,
359 self.out_log,
360 self.global_log,
361 )
362 pcocc_cmd = [
363 self.container_path,
364 self.container_generic_command,
365 "-I",
366 self.container_image,
367 ]
368 if self.container_working_dir:
369 pcocc_cmd.append("--cwd")
370 pcocc_cmd.append(self.container_working_dir)
371 if self.container_volume_path:
372 pcocc_cmd.append("--mount")
373 pcocc_cmd.append(host_volume + ":" +
374 self.container_volume_path)
375 if self.container_user_id:
376 pcocc_cmd.append("--user")
377 pcocc_cmd.append(self.container_user_id)
379 if not self.cmd and not self.container_shell_path:
380 fu.log(
381 "WARNING: The command-line is empty your container should know what to do automatically.",
382 self.out_log,
383 self.global_log,
384 )
385 else:
386 cmd = ['\\"' + " ".join(self.cmd) + '\\"']
387 pcocc_cmd.append(self.container_shell_path)
388 pcocc_cmd.extend(cmd)
389 self.cmd = pcocc_cmd
390 # Local execution
391 else:
392 pass
393 # fu.log('Not using any container', self.out_log, self.global_log)
395 def execute_command(self):
397 cwd = os.getcwd()
398 if self.chdir_sandbox:
399 os.chdir(self.stage_io_dict["unique_dir"])
401 self.return_code = cmd_wrapper.CmdWrapper(
402 self.cmd,
403 self.shell_path,
404 self.out_log,
405 self.err_log,
406 self.global_log,
407 self.env_vars_dict,
408 ).launch()
410 if self.chdir_sandbox:
411 os.chdir(cwd)
413 def copy_to_host(self):
414 for file_ref, file_path in self.stage_io_dict["out"].items():
415 if file_path:
416 sandbox_file_path = str(
417 Path(self.stage_io_dict["unique_dir"]).joinpath(
418 Path(file_path).name
419 )
420 )
421 if Path(sandbox_file_path).exists():
422 # Dest file exists
423 if Path(self.io_dict["out"][file_ref]).exists():
424 # Dest file exists and is NOT the same as the source file
425 if not Path(sandbox_file_path).samefile(
426 Path(self.io_dict["out"][file_ref])
427 ):
428 shutil.copy2(
429 sandbox_file_path, self.io_dict["out"][file_ref]
430 )
431 # Dest file does not exist
432 else:
433 shutil.copy2(sandbox_file_path,
434 self.io_dict["out"][file_ref])
436 def run_biobb(self):
437 self.create_cmd_line()
438 self.execute_command()
440 def remove_tmp_files(self):
441 if self.remove_tmp:
442 fu.rm_file_list(self.tmp_files, self.out_log)