Coverage for biobb_haddock/haddock/haddock3_config.py: 65%

55 statements  

« prev     ^ index     » next       coverage.py v7.10.2, created at 2025-08-07 08:48 +0000

1""" 

2Disclaimer 

3This code was obtained from the HADDOCK3 repository: 

4https://github.com/haddocking/haddock3/blob/main/src/haddock/gear/config.py 

5""" 

6import toml 

7import os 

8import re 

9from pathlib import Path 

10 

11# the re.ASCII parameter makes sure non-ascii chars are rejected in the \w key 

12 

13# Captures the main headers. 

14# https://regex101.com/r/9urqti/1 

15_main_header_re = re.compile(r"^ *\[(\w+)\]", re.ASCII) 

16 

17# regex by @sverhoeven 

18# Matches ['<name>.<digit>'] 

19_main_quoted_header_re = re.compile(r"^ *\[\'(\w+)\.\d+\'\]", re.ASCII) 

20 

21# Captures sub-headers 

22# https://regex101.com/r/6OpJJ8/1 

23# thanks https://stackoverflow.com/questions/39158902 

24_sub_header_re = re.compile(r"^ *\[(\w+)((?:\.\w+)+)\]", re.ASCII) 

25 

26# regex by @sverhoeven 

27_sub_quoted_header_re = re.compile( 

28 r"^ *\[\'(\w+)\.\d+\'((?:\.\w+)+)\]", 

29 re.ASCII, 

30) 

31 

32# Captures parameter uppercase boolean 

33_uppercase_bool_re = re.compile(r"(_?\w+((_?\w+?)+)?\s*=\s*)(True|False)", re.ASCII) 

34 

35 

36def load(fpath: str) -> dict[str, dict[str, dict[str, str]]]: 

37 """ 

38 Load an HADDOCK3 configuration file to a dictionary. 

39 

40 Accepts HADDOCK3 ``cfg`` files or pure ``toml`` files. 

41 

42 Parameters 

43 ---------- 

44 fpath : str or :external:py:class:`pathlib.Path` 

45 Path to user configuration file. 

46 

47 Returns 

48 ------- 

49 dictionary 

50 Representing the user configuration file where first level of 

51 keys are the module names. Step keys will have a numeric 

52 suffix, for example: ``module.1``. 

53 

54 .. see-also:: 

55 * :py:func:`loads` 

56 """ 

57 try: 

58 return loads(Path(fpath).read_text()) 

59 except Exception as err: 

60 raise Exception( 

61 "Something is wrong with the config file." 

62 ) from err # noqa: E501 

63 

64 

65def loads(cfg_str: str) -> dict[str, dict[str, dict[str, str]]]: 

66 """ 

67 Read a string representing a config file to a dictionary. 

68 

69 Config strings are converted to toml-compatible format and finally 

70 read by the toml library. 

71 

72 All headers (dictionary keys) will be suffixed by an integer 

73 starting at ``1``. For example: ``topoaa.1``. If the key is 

74 repeated, ``2`` is appended, and so forth. Even if specific 

75 integers are provided by the user, the suffix integers will be 

76 normalized. 

77 

78 Parameters 

79 ---------- 

80 cfg_str : str 

81 The string representing the config file. Accepted formats are 

82 the HADDOCK3 config file or pure `toml` syntax. 

83 

84 Returns 

85 ------- 

86 all_configs : dict 

87 A dictionary holding all the configuration file steps: 

88 

89 - 'raw_input': Original input file as provided by user. 

90 - 'cleaned_input': Regex cleaned input file. 

91 - 'loaded_cleaned_input': Dict of toml loaded cleaned input. 

92 - 'final_cfg': The config in the form of a dictionary. In which 

93 the order of the keys matters as it defines the order of the 

94 steps in the workflow. 

95 """ 

96 new_lines: list[str] = [] 

97 cfg_lines = cfg_str.split(os.linesep) 

98 counter: dict[str, int] = {} 

99 

100 # this for-loop normalizes all headers regardless of their input format. 

101 for line in cfg_lines: 

102 if group := _main_header_re.match(line): 

103 name = group[1] 

104 counter.setdefault(name, 0) 

105 counter[name] += 1 

106 count = counter[name] 

107 new_line = f"['{name}.{count}']" 

108 

109 elif group := _main_quoted_header_re.match(line): 

110 name = group[1] 

111 counter.setdefault(name, 0) 

112 counter[name] += 1 

113 count = counter[name] 

114 new_line = f"['{name}.{count}']" 

115 

116 elif group := _sub_header_re.match(line): 

117 name = group[1] 

118 count = counter[name] # name should be already defined here 

119 new_line = f"['{name}.{count}'{group[2]}]" 

120 

121 elif group := _sub_quoted_header_re.match(line): 

122 name = group[1] 

123 count = counter[name] # name should be already defined here 

124 new_line = f"['{name}.{count}'{group[2]}]" 

125 

126 elif group := _uppercase_bool_re.match(line): 

127 param = group[1] # Catches 'param = ' 

128 uppercase_bool = group[4] 

129 new_line = f"{param}{uppercase_bool.lower()}" # Lowercase bool 

130 

131 else: 

132 new_line = line 

133 

134 new_lines.append(new_line) 

135 

136 # Re-build workflow configuration file 

137 cfg = os.linesep.join(new_lines) 

138 

139 try: 

140 cfg_dict = toml.loads(cfg) # Try to load it with the toml library 

141 except Exception as err: 

142 print(cfg) 

143 raise Exception( 

144 "Some thing is wrong with the config file: " f"{str(err)}" 

145 ) from err 

146 

147 return cfg_dict 

148 

149 

150def save(cfg_dict: dict, path: str) -> None: 

151 """ 

152 Write a dictionary to a HADDOCK3 config file. 

153 

154 Write the HADDOCK3 parameter dictionary to a `.cfg` file. There is 

155 also the option to write in pure TOML format. Both are compatible with 

156 HADDOCK3. 

157 

158 Parameters 

159 ---------- 

160 cfg_dict : dict 

161 The dictionary containing the parameters. 

162 

163 path : str or pathlib.Path 

164 File name where to save the configuration file. 

165 

166 """ 

167 

168 with open(path, "w") as fout: 

169 toml.dump(cfg_dict, fout)