Coverage for biobb_common/biobb_common/configuration/settings.py: 77%

93 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-20 07:21 +0000

1#!/usr/bin/env python3 

2 

3"""Settings loader module. 

4 

5This module contains the classes to read the different formats of the configuration files. 

6 

7The configuration files are composed by paths to the files and properties. There are several common properties for all 

8the building blocks. 

9 

10Some yaml files contain a tool key with the name of the tool to be executed inside the step key. The tool key is used 

11by the resp API to identify the tool to be executed. 

12 

13 

14Syntax: 

15 - **property** (*dataType*) - (Default value) Short description. 

16 

17Available common step properties: (Each Biobb step also has their specific properties) 

18 - **tool** (*str*) - (None) Name of the tool to be executed, mostly used by the biobbAPI. 

19 - **global_log** (*Logger object*) - (None) Log from the main workflow. 

20 - **prefix** (*str*) - (None) Prefix if provided. 

21 - **step** (*str*) - (None) Name of the step. 

22 - **path** (*str*) - ('') Absolute path to the step working dir. 

23 - **working_dir_path** (*str*) - (Current working dir) Workflow output directory. 

24 - **global_properties_list** (*list*) - ([]) List of global properties. 

25""" 

26 

27import yaml 

28import json 

29import logging 

30from pathlib import Path 

31from copy import deepcopy 

32from biobb_common.tools import file_utils as fu 

33from typing import Any, Optional 

34 

35GALAXY_CHARACTER_MAP = { 

36 "__gt__": ">", "__lt__": "<", "__sq__": "'", "__dq__": '"', "__ob__": "[", "__cb__": "]", 

37 "__oc__": "{", "__cc__": "}", "__cn__": "\n", "__cr__": "\r", "__tc__": "\t", "__pd__": "#"} 

38 

39 

40def trans_galaxy_charmap(input_str): 

41 """Fixes escape characters introduced by Galaxy on Json inputs""" 

42 for ch in GALAXY_CHARACTER_MAP: 

43 input_str = input_str.replace(ch, GALAXY_CHARACTER_MAP[ch]) 

44 return input_str 

45 

46 

47class ConfReader: 

48 """Configuration file loader for yaml format files. 

49 

50 Args: 

51 config (str): Path to the configuration [YAML|JSON] file or JSON string. 

52 system (str): System name from the systems section in the configuration file. 

53 """ 

54 

55 def __init__(self, config: Optional[str] = None, *args, **kwargs): 

56 self.properties = self._read_config(config) 

57 self.global_properties = self._get_global_properties() 

58 self.working_dir_path = fu.get_working_dir_path(working_dir_path=self.global_properties.get("working_dir_path", None), restart=self.global_properties.get("restart", False)) 

59 

60 def get_working_dir_path(self) -> str: 

61 """get_working_dir_path() returns the working directory path. 

62 

63 Returns: 

64 str: Working directory path. 

65 """ 

66 return self.working_dir_path 

67 

68 def _read_config(self, config: Optional[str] = None) -> dict[str, Any]: 

69 """_read_config() reads the configuration file and returns a dictionary. 

70 """ 

71 if not config: 

72 return dict() 

73 config_dict = dict() 

74 config_tokens = str(config).split("#") 

75 if (json_string := config_tokens[0].strip()).startswith("{"): 

76 config_dict = json.loads(trans_galaxy_charmap(json_string)) 

77 else: 

78 config_file_path = Path(config_tokens[0]).resolve() 

79 if not config_file_path.exists(): 

80 raise FileNotFoundError(f"Configuration file {config_file_path} not found.") 

81 with open(config_file_path) as stream: 

82 try: 

83 config_dict = yaml.safe_load(stream) or {} 

84 except yaml.YAMLError as yaml_error: 

85 try: 

86 config_dict = json.load(stream) or {} 

87 except json.JSONDecodeError as json_error: 

88 raise Exception(f"Error reading configuration file {config_file_path} is not a valid YAML: {yaml_error} or a valid JSON: {json_error}") 

89 

90 # Read just one step specified in the configuration file path 

91 # i.e: Read just Editconf step from workflow_configuration.yaml file 

92 # "/home/user/workflow_configuration.yaml#Editconf" 

93 if len(config_tokens) > 1: 

94 return config_dict[config_tokens[1]] 

95 

96 return config_dict 

97 

98 def _get_global_properties(self) -> dict[str, Any]: 

99 """_get_global_properties() returns the global properties of the configuration file. 

100 

101 Returns: 

102 dict: dictionary of global properties. 

103 """ 

104 # Add default properties to the global properties 

105 return deepcopy((self.properties.get("global_properties") or {})) 

106 

107 def _get_step_properties(self, key: str = "", prefix: str = "", global_log: Optional[logging.Logger] = None) -> dict[str, Any]: 

108 """_get_step_properties() returns the properties of the configuration file. 

109 

110 Args: 

111 global_properties (dict): Global properties. 

112 key (str): Step name. 

113 prefix (str): Prefix if provided. 

114 global_log (Logger): Log from the main workflow. 

115 

116 Returns: 

117 dict: dictionary of properties. 

118 """ 

119 prop_dic = dict() 

120 prop_dic.update(deepcopy(self.global_properties)) 

121 prop_dic["global_properties_list"] = list(self.global_properties.keys()) 

122 prop_dic["step"] = key 

123 prop_dic["prefix"] = prefix 

124 prop_dic["global_log"] = global_log 

125 prop_dic["working_dir_path"] = self.working_dir_path 

126 prop_dic["path"] = str(Path(self.working_dir_path).joinpath(prefix, key)) 

127 if key: 

128 prop_dic["tool"] = self.properties[key].get("tool", None) 

129 prop_dic.update(deepcopy((self.properties[key].get("properties") or {}))) 

130 else: 

131 prop_dic["tool"] = self.properties.get("tool", None) 

132 prop_dic.update(deepcopy((self.properties.get("properties") or {}))) 

133 

134 return prop_dic 

135 

136 def get_prop_dic(self, prefix: str = "", global_log: Optional[logging.Logger] = None) -> dict[str, Any]: 

137 """get_prop_dic() returns the properties dictionary where keys are the 

138 step names in the configuration YAML file and every value contains another 

139 nested dictionary containing the keys and values of each step properties section. 

140 All the paths in the system section are copied in each nested dictionary. 

141 For each nested dictionary the following keys are added: 

142 | **path** (*str*): Absolute path to the step working dir. 

143 | **step** (*str*): Name of the step. 

144 | **prefix** (*str*): Prefix if provided. 

145 | **global_log** (*Logger object*): Log from the main workflow. 

146 | **tool** (*str*): Name of the tool to be executed, mostly used by the biobbAPI. 

147 | **working_dir_path** (*str*): Workflow output directory. 

148 | **global_properties_list** (*list*): List of global properties. 

149 

150 Args: 

151 prefix (str): Prefix if provided. 

152 global_log (:obj:Logger): Log from the main workflow. 

153 

154 Returns: 

155 dict: dictionary of properties. 

156 """ 

157 

158 prop_dic: dict[str, Any] = dict() 

159 for key in self.properties: 

160 if key in ["global_properties", "paths", "properties", "tool"]: 

161 continue 

162 prop_dic[key] = self._get_step_properties(key=key, prefix=prefix, global_log=global_log) 

163 

164 if not prop_dic: 

165 return self._get_step_properties(prefix=prefix, global_log=global_log) 

166 

167 return prop_dic 

168 

169 def get_paths_dic(self, prefix: str = "") -> dict[str, Any]: 

170 paths_dic: dict[str, Any] = dict() 

171 for key in self.properties: 

172 if key in ["global_properties", "paths", "properties", "tool"]: 

173 continue 

174 paths_dic[key] = self._get_step_paths(key=key, prefix=prefix) 

175 

176 if not paths_dic: 

177 return self._get_step_paths(prefix=prefix) 

178 

179 return paths_dic 

180 

181 def _get_step_paths(self, key: str = "", prefix: str = "") -> dict[str, Any]: 

182 step_paths_dic = dict() 

183 if key: 

184 paths_dic = self.properties[key].get("paths", {}) 

185 else: 

186 paths_dic = self.properties.get("paths", {}) 

187 for file_key, path_value in paths_dic.items(): 

188 if path_value.startswith("file:"): 

189 step_paths_dic[file_key] = path_value.replace("file:", "") 

190 continue 

191 step_paths_dic[file_key] = str(Path(self.working_dir_path).joinpath(prefix, self._solve_dependency(key, path_value))) 

192 

193 return step_paths_dic 

194 

195 def _solve_dependency(self, step, dependency_str: str) -> str: 

196 """_solve_dependency() solves the dependency of a path in the configuration file. 

197 """ 

198 dependency_tokens = dependency_str.strip().split("/") 

199 if dependency_tokens[0] != "dependency": 

200 return str(Path(step).joinpath(dependency_str)) 

201 

202 if not step: 

203 raise Exception("Step name is required to solve dependency") 

204 

205 return str(Path(dependency_tokens[1]).joinpath(self.properties.get(dependency_tokens[1], {}).get('paths', {}).get(dependency_tokens[2], "")))