Coverage for biobb_common/biobb_common/configuration/settings.py: 77%
93 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 07:21 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 07:21 +0000
1#!/usr/bin/env python3
3"""Settings loader module.
5This module contains the classes to read the different formats of the configuration files.
7The configuration files are composed by paths to the files and properties. There are several common properties for all
8the building blocks.
10Some yaml files contain a tool key with the name of the tool to be executed inside the step key. The tool key is used
11by the resp API to identify the tool to be executed.
14Syntax:
15 - **property** (*dataType*) - (Default value) Short description.
17Available common step properties: (Each Biobb step also has their specific properties)
18 - **tool** (*str*) - (None) Name of the tool to be executed, mostly used by the biobbAPI.
19 - **global_log** (*Logger object*) - (None) Log from the main workflow.
20 - **prefix** (*str*) - (None) Prefix if provided.
21 - **step** (*str*) - (None) Name of the step.
22 - **path** (*str*) - ('') Absolute path to the step working dir.
23 - **working_dir_path** (*str*) - (Current working dir) Workflow output directory.
24 - **global_properties_list** (*list*) - ([]) List of global properties.
25"""
27import yaml
28import json
29import logging
30from pathlib import Path
31from copy import deepcopy
32from biobb_common.tools import file_utils as fu
33from typing import Any, Optional
35GALAXY_CHARACTER_MAP = {
36 "__gt__": ">", "__lt__": "<", "__sq__": "'", "__dq__": '"', "__ob__": "[", "__cb__": "]",
37 "__oc__": "{", "__cc__": "}", "__cn__": "\n", "__cr__": "\r", "__tc__": "\t", "__pd__": "#"}
40def trans_galaxy_charmap(input_str):
41 """Fixes escape characters introduced by Galaxy on Json inputs"""
42 for ch in GALAXY_CHARACTER_MAP:
43 input_str = input_str.replace(ch, GALAXY_CHARACTER_MAP[ch])
44 return input_str
47class ConfReader:
48 """Configuration file loader for yaml format files.
50 Args:
51 config (str): Path to the configuration [YAML|JSON] file or JSON string.
52 system (str): System name from the systems section in the configuration file.
53 """
55 def __init__(self, config: Optional[str] = None, *args, **kwargs):
56 self.properties = self._read_config(config)
57 self.global_properties = self._get_global_properties()
58 self.working_dir_path = fu.get_working_dir_path(working_dir_path=self.global_properties.get("working_dir_path", None), restart=self.global_properties.get("restart", False))
60 def get_working_dir_path(self) -> str:
61 """get_working_dir_path() returns the working directory path.
63 Returns:
64 str: Working directory path.
65 """
66 return self.working_dir_path
68 def _read_config(self, config: Optional[str] = None) -> dict[str, Any]:
69 """_read_config() reads the configuration file and returns a dictionary.
70 """
71 if not config:
72 return dict()
73 config_dict = dict()
74 config_tokens = str(config).split("#")
75 if (json_string := config_tokens[0].strip()).startswith("{"):
76 config_dict = json.loads(trans_galaxy_charmap(json_string))
77 else:
78 config_file_path = Path(config_tokens[0]).resolve()
79 if not config_file_path.exists():
80 raise FileNotFoundError(f"Configuration file {config_file_path} not found.")
81 with open(config_file_path) as stream:
82 try:
83 config_dict = yaml.safe_load(stream) or {}
84 except yaml.YAMLError as yaml_error:
85 try:
86 config_dict = json.load(stream) or {}
87 except json.JSONDecodeError as json_error:
88 raise Exception(f"Error reading configuration file {config_file_path} is not a valid YAML: {yaml_error} or a valid JSON: {json_error}")
90 # Read just one step specified in the configuration file path
91 # i.e: Read just Editconf step from workflow_configuration.yaml file
92 # "/home/user/workflow_configuration.yaml#Editconf"
93 if len(config_tokens) > 1:
94 return config_dict[config_tokens[1]]
96 return config_dict
98 def _get_global_properties(self) -> dict[str, Any]:
99 """_get_global_properties() returns the global properties of the configuration file.
101 Returns:
102 dict: dictionary of global properties.
103 """
104 # Add default properties to the global properties
105 return deepcopy((self.properties.get("global_properties") or {}))
107 def _get_step_properties(self, key: str = "", prefix: str = "", global_log: Optional[logging.Logger] = None) -> dict[str, Any]:
108 """_get_step_properties() returns the properties of the configuration file.
110 Args:
111 global_properties (dict): Global properties.
112 key (str): Step name.
113 prefix (str): Prefix if provided.
114 global_log (Logger): Log from the main workflow.
116 Returns:
117 dict: dictionary of properties.
118 """
119 prop_dic = dict()
120 prop_dic.update(deepcopy(self.global_properties))
121 prop_dic["global_properties_list"] = list(self.global_properties.keys())
122 prop_dic["step"] = key
123 prop_dic["prefix"] = prefix
124 prop_dic["global_log"] = global_log
125 prop_dic["working_dir_path"] = self.working_dir_path
126 prop_dic["path"] = str(Path(self.working_dir_path).joinpath(prefix, key))
127 if key:
128 prop_dic["tool"] = self.properties[key].get("tool", None)
129 prop_dic.update(deepcopy((self.properties[key].get("properties") or {})))
130 else:
131 prop_dic["tool"] = self.properties.get("tool", None)
132 prop_dic.update(deepcopy((self.properties.get("properties") or {})))
134 return prop_dic
136 def get_prop_dic(self, prefix: str = "", global_log: Optional[logging.Logger] = None) -> dict[str, Any]:
137 """get_prop_dic() returns the properties dictionary where keys are the
138 step names in the configuration YAML file and every value contains another
139 nested dictionary containing the keys and values of each step properties section.
140 All the paths in the system section are copied in each nested dictionary.
141 For each nested dictionary the following keys are added:
142 | **path** (*str*): Absolute path to the step working dir.
143 | **step** (*str*): Name of the step.
144 | **prefix** (*str*): Prefix if provided.
145 | **global_log** (*Logger object*): Log from the main workflow.
146 | **tool** (*str*): Name of the tool to be executed, mostly used by the biobbAPI.
147 | **working_dir_path** (*str*): Workflow output directory.
148 | **global_properties_list** (*list*): List of global properties.
150 Args:
151 prefix (str): Prefix if provided.
152 global_log (:obj:Logger): Log from the main workflow.
154 Returns:
155 dict: dictionary of properties.
156 """
158 prop_dic: dict[str, Any] = dict()
159 for key in self.properties:
160 if key in ["global_properties", "paths", "properties", "tool"]:
161 continue
162 prop_dic[key] = self._get_step_properties(key=key, prefix=prefix, global_log=global_log)
164 if not prop_dic:
165 return self._get_step_properties(prefix=prefix, global_log=global_log)
167 return prop_dic
169 def get_paths_dic(self, prefix: str = "") -> dict[str, Any]:
170 paths_dic: dict[str, Any] = dict()
171 for key in self.properties:
172 if key in ["global_properties", "paths", "properties", "tool"]:
173 continue
174 paths_dic[key] = self._get_step_paths(key=key, prefix=prefix)
176 if not paths_dic:
177 return self._get_step_paths(prefix=prefix)
179 return paths_dic
181 def _get_step_paths(self, key: str = "", prefix: str = "") -> dict[str, Any]:
182 step_paths_dic = dict()
183 if key:
184 paths_dic = self.properties[key].get("paths", {})
185 else:
186 paths_dic = self.properties.get("paths", {})
187 for file_key, path_value in paths_dic.items():
188 if path_value.startswith("file:"):
189 step_paths_dic[file_key] = path_value.replace("file:", "")
190 continue
191 step_paths_dic[file_key] = str(Path(self.working_dir_path).joinpath(prefix, self._solve_dependency(key, path_value)))
193 return step_paths_dic
195 def _solve_dependency(self, step, dependency_str: str) -> str:
196 """_solve_dependency() solves the dependency of a path in the configuration file.
197 """
198 dependency_tokens = dependency_str.strip().split("/")
199 if dependency_tokens[0] != "dependency":
200 return str(Path(step).joinpath(dependency_str))
202 if not step:
203 raise Exception("Step name is required to solve dependency")
205 return str(Path(dependency_tokens[1]).joinpath(self.properties.get(dependency_tokens[1], {}).get('paths', {}).get(dependency_tokens[2], "")))