Coverage for biobb_haddock/haddock/haddock3_config.py: 65%
55 statements
« prev ^ index » next coverage.py v7.10.2, created at 2025-08-07 08:48 +0000
« prev ^ index » next coverage.py v7.10.2, created at 2025-08-07 08:48 +0000
1"""
2Disclaimer
3This code was obtained from the HADDOCK3 repository:
4https://github.com/haddocking/haddock3/blob/main/src/haddock/gear/config.py
5"""
6import toml
7import os
8import re
9from pathlib import Path
11# the re.ASCII parameter makes sure non-ascii chars are rejected in the \w key
13# Captures the main headers.
14# https://regex101.com/r/9urqti/1
15_main_header_re = re.compile(r"^ *\[(\w+)\]", re.ASCII)
17# regex by @sverhoeven
18# Matches ['<name>.<digit>']
19_main_quoted_header_re = re.compile(r"^ *\[\'(\w+)\.\d+\'\]", re.ASCII)
21# Captures sub-headers
22# https://regex101.com/r/6OpJJ8/1
23# thanks https://stackoverflow.com/questions/39158902
24_sub_header_re = re.compile(r"^ *\[(\w+)((?:\.\w+)+)\]", re.ASCII)
26# regex by @sverhoeven
27_sub_quoted_header_re = re.compile(
28 r"^ *\[\'(\w+)\.\d+\'((?:\.\w+)+)\]",
29 re.ASCII,
30)
32# Captures parameter uppercase boolean
33_uppercase_bool_re = re.compile(r"(_?\w+((_?\w+?)+)?\s*=\s*)(True|False)", re.ASCII)
36def load(fpath: str) -> dict[str, dict[str, dict[str, str]]]:
37 """
38 Load an HADDOCK3 configuration file to a dictionary.
40 Accepts HADDOCK3 ``cfg`` files or pure ``toml`` files.
42 Parameters
43 ----------
44 fpath : str or :external:py:class:`pathlib.Path`
45 Path to user configuration file.
47 Returns
48 -------
49 dictionary
50 Representing the user configuration file where first level of
51 keys are the module names. Step keys will have a numeric
52 suffix, for example: ``module.1``.
54 .. see-also::
55 * :py:func:`loads`
56 """
57 try:
58 return loads(Path(fpath).read_text())
59 except Exception as err:
60 raise Exception(
61 "Something is wrong with the config file."
62 ) from err # noqa: E501
65def loads(cfg_str: str) -> dict[str, dict[str, dict[str, str]]]:
66 """
67 Read a string representing a config file to a dictionary.
69 Config strings are converted to toml-compatible format and finally
70 read by the toml library.
72 All headers (dictionary keys) will be suffixed by an integer
73 starting at ``1``. For example: ``topoaa.1``. If the key is
74 repeated, ``2`` is appended, and so forth. Even if specific
75 integers are provided by the user, the suffix integers will be
76 normalized.
78 Parameters
79 ----------
80 cfg_str : str
81 The string representing the config file. Accepted formats are
82 the HADDOCK3 config file or pure `toml` syntax.
84 Returns
85 -------
86 all_configs : dict
87 A dictionary holding all the configuration file steps:
89 - 'raw_input': Original input file as provided by user.
90 - 'cleaned_input': Regex cleaned input file.
91 - 'loaded_cleaned_input': Dict of toml loaded cleaned input.
92 - 'final_cfg': The config in the form of a dictionary. In which
93 the order of the keys matters as it defines the order of the
94 steps in the workflow.
95 """
96 new_lines: list[str] = []
97 cfg_lines = cfg_str.split(os.linesep)
98 counter: dict[str, int] = {}
100 # this for-loop normalizes all headers regardless of their input format.
101 for line in cfg_lines:
102 if group := _main_header_re.match(line):
103 name = group[1]
104 counter.setdefault(name, 0)
105 counter[name] += 1
106 count = counter[name]
107 new_line = f"['{name}.{count}']"
109 elif group := _main_quoted_header_re.match(line):
110 name = group[1]
111 counter.setdefault(name, 0)
112 counter[name] += 1
113 count = counter[name]
114 new_line = f"['{name}.{count}']"
116 elif group := _sub_header_re.match(line):
117 name = group[1]
118 count = counter[name] # name should be already defined here
119 new_line = f"['{name}.{count}'{group[2]}]"
121 elif group := _sub_quoted_header_re.match(line):
122 name = group[1]
123 count = counter[name] # name should be already defined here
124 new_line = f"['{name}.{count}'{group[2]}]"
126 elif group := _uppercase_bool_re.match(line):
127 param = group[1] # Catches 'param = '
128 uppercase_bool = group[4]
129 new_line = f"{param}{uppercase_bool.lower()}" # Lowercase bool
131 else:
132 new_line = line
134 new_lines.append(new_line)
136 # Re-build workflow configuration file
137 cfg = os.linesep.join(new_lines)
139 try:
140 cfg_dict = toml.loads(cfg) # Try to load it with the toml library
141 except Exception as err:
142 print(cfg)
143 raise Exception(
144 "Some thing is wrong with the config file: " f"{str(err)}"
145 ) from err
147 return cfg_dict
150def save(cfg_dict: dict, path: str) -> None:
151 """
152 Write a dictionary to a HADDOCK3 config file.
154 Write the HADDOCK3 parameter dictionary to a `.cfg` file. There is
155 also the option to write in pure TOML format. Both are compatible with
156 HADDOCK3.
158 Parameters
159 ----------
160 cfg_dict : dict
161 The dictionary containing the parameters.
163 path : str or pathlib.Path
164 File name where to save the configuration file.
166 """
168 with open(path, "w") as fout:
169 toml.dump(cfg_dict, fout)