Coverage for biobb_chemistry/babelm/common.py: 63%
139 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-12 09:28 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-12 09:28 +0000
1"""Common functions for package biobb_chemistry.babel"""
3import re
4from pathlib import Path, PurePath
5from typing import Optional, Union
7from biobb_common.tools import file_utils as fu
10def check_input_path(path, out_log, classname):
11 """Checks input file"""
12 if not Path(path).exists():
13 fu.log(classname + ": Unexisting input file, exiting", out_log)
14 raise SystemExit(classname + ": Unexisting input file")
15 file_extension = PurePath(path).suffix
16 if not is_valid_input(file_extension[1:]):
17 fu.log(
18 classname + ": Format %s in input file is not compatible" % file_extension[1:],
19 out_log,
20 )
21 raise SystemExit(
22 classname + ": Format %s in input file is not compatible" % file_extension[1:]
23 )
24 if PurePath(path).name == path or not PurePath(path).is_absolute():
25 path = str(PurePath(Path.cwd()).joinpath(path))
27 return path
30def check_output_path(path, out_log, classname):
31 """Checks output path"""
32 if PurePath(path).parent and not Path(PurePath(path).parent).exists():
33 fu.log(
34 classname + ": Unexisting output %s output folder, exiting" % type, out_log
35 )
36 raise SystemExit(classname + ": Unexisting %s output folder" % type)
37 file_extension = PurePath(path).suffix
38 if not is_valid_input(file_extension[1:]):
39 fu.log(
40 classname + ": Format %s in input file is not compatible" % file_extension[1:],
41 out_log,
42 )
43 raise SystemExit(
44 classname + ": Format %s in output file is not compatible" % file_extension[1:]
45 )
47 return path
50def check_input_path_minimize(path, out_log, classname):
51 """Checks input file"""
52 if not Path(path).exists():
53 fu.log(classname + ": Unexisting input file, exiting", out_log)
54 raise SystemExit(classname + ": Unexisting input file")
55 file_extension = PurePath(path).suffix
56 if not is_valid_input_minimize(file_extension[1:]):
57 fu.log(
58 classname + ": Format %s in input file is not compatible" % file_extension[1:],
59 out_log,
60 )
61 raise SystemExit(
62 classname + ": Format %s in input file is not compatible" % file_extension[1:]
63 )
64 if PurePath(path).name == path or not PurePath(path).is_absolute():
65 path = str(PurePath(Path.cwd()).joinpath(path))
67 return path
70def check_output_path_minimize(path, out_log, classname):
71 """Checks output path"""
72 if PurePath(path).parent and not Path(PurePath(path).parent).exists():
73 fu.log(
74 classname + ": Unexisting output %s output folder, exiting" % type, out_log
75 )
76 raise SystemExit(classname + ": Unexisting %s output folder" % type)
77 file_extension = PurePath(path).suffix
78 if not is_valid_input_minimize(file_extension[1:]):
79 fu.log(
80 classname + ": Format %s in input file is not compatible" % file_extension[1:],
81 out_log,
82 )
83 raise SystemExit(
84 classname + ": Format %s in output file is not compatible" % file_extension[1:]
85 )
86 return path
89def get_binary_path(properties, type):
90 """Gets binary path"""
91 return properties.get(type, get_default_value(type))
94def get_input_format(input_format, input_path, out_log):
95 """Checks if provided input format is correct"""
96 infr = input_format
97 if not is_valid_input(infr):
98 file_extension = PurePath(input_path).suffix
99 fu.log(
100 "Format %s is not compatible as an input format, assigned input file extension: %s"
101 % (infr, file_extension[1:]),
102 out_log,
103 )
104 infr = file_extension[1:]
106 return infr
109def check_minimize_property(type, value, out_log):
110 """Checks all minimize properties"""
111 value = str(value)
113 if type == "criteria":
114 if re.match(r"(\d+(\.\d+)?)", value) or re.match(
115 r"[+\-]?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?", value
116 ):
117 return True
118 else:
119 fu.log(
120 "Criteria %s is not correct, assigned default value: %s"
121 % (value, get_default_value("criteria")),
122 out_log,
123 )
125 if type == "method":
126 if value in ["cg", "sd"]:
127 return True
128 else:
129 fu.log(
130 "Method %s is not correct, assigned default value: %s"
131 % (value, get_default_value("method")),
132 out_log,
133 )
135 if type == "force_field":
136 if value in ["GAFF", "Ghemical", "MMFF94", "MMFF94s", "UFF"]:
137 return True
138 else:
139 fu.log(
140 "Force field %s is not correct, no force field assigned" % (value),
141 out_log,
142 )
144 if type == "hydrogens":
145 if value == "True":
146 return True
147 elif value == "False":
148 pass
149 else:
150 fu.log(
151 "Hydrogens %s is not correct, assigned default value: %s"
152 % (value, get_default_value("hydrogens")),
153 out_log,
154 )
156 if type == "steps":
157 if re.match(r"^\d+$", value):
158 return True
159 else:
160 fu.log(
161 "Steps %s is not correct, assigned default value: %s"
162 % (value, get_default_value("steps")),
163 out_log,
164 )
166 if type == "cutoff":
167 if value == "True":
168 return True
169 elif value == "False":
170 pass
171 else:
172 fu.log(
173 "Cut-off %s is not correct, assigned default value: %s"
174 % (value, get_default_value("cutoff")),
175 out_log,
176 )
178 if type == "rvdw":
179 if re.match(r"(\d+(\.\d+)?)", value):
180 return True
181 else:
182 fu.log(
183 "Rvdw %s is not correct, assigned default value: %s"
184 % (value, get_default_value("rvdw")),
185 out_log,
186 )
188 if type == "rele":
189 if re.match(r"(\d+(\.\d+)?)", value):
190 return True
191 else:
192 fu.log(
193 "Rele %s is not correct, assigned default value: %s"
194 % (value, get_default_value("rele")),
195 out_log,
196 )
198 if type == "frequency":
199 if re.match(r"^\d+$", value):
200 return True
201 else:
202 fu.log(
203 "Frequency %s is not correct, assigned default value: %s"
204 % (value, get_default_value("frequency")),
205 out_log,
206 )
208 return False
211def get_output_format(output_format, output_path, out_log):
212 """Checks if provided output format is correct"""
213 oufr = output_format
214 if not is_valid_output(oufr):
215 file_extension = PurePath(output_path).suffix
216 fu.log(
217 "Format %s is not compatible as an output format, assigned output file extension: %s"
218 % (oufr, file_extension[1:]),
219 out_log,
220 )
221 oufr = file_extension[1:]
223 return oufr
226def get_coordinates(coordinates, out_log):
227 """Checks if provided coordinates value is correct"""
228 crd = str(coordinates)
229 if crd != "3" and crd != "2":
230 fu.log("Value %s is not compatible as a coordinates value" % crd, out_log)
231 crd = ""
233 return crd
236def get_ph(p, out_log):
237 """Checks if provided coordinates value is correct"""
238 ph = str(p)
239 if p and not isinstance(p, float) and not isinstance(p, int):
240 ph = ""
241 fu.log("Incorrect format for pH, no value assigned", out_log)
243 return ph
246def get_default_value(key):
247 """Gives default values according to the given key"""
248 default_values = {
249 "coordinates": 2,
250 "obabel_path": "obabel",
251 "obminimize_path": "obminimize",
252 "criteria": 1e-6,
253 "method": "cg",
254 "hydrogens": False,
255 "steps": 2500,
256 "cutoff": False,
257 "rvdw": 6.0,
258 "rele": 10.0,
259 "frequency": 10,
260 }
262 return default_values[key]
265def is_valid_input(ext):
266 """Checks if input file format is compatible with Open Babel"""
267 formats = [
268 "dat",
269 "ent",
270 "fa",
271 "fasta",
272 "gro",
273 "inp",
274 "log",
275 "mcif",
276 "mdl",
277 "mmcif",
278 "mol",
279 "mol2",
280 "pdb",
281 "pdbqt",
282 "png",
283 "sdf",
284 "smi",
285 "smiles",
286 "txt",
287 "xml",
288 "xtc",
289 ]
290 return ext in formats
293def is_valid_output(ext):
294 """Checks if output file format is compatible with Open Babel"""
295 formats = [
296 "ent",
297 "fa",
298 "fasta",
299 "gro",
300 "inp",
301 "mcif",
302 "mdl",
303 "mmcif",
304 "mol",
305 "mol2",
306 "pdb",
307 "pdbqt",
308 "png",
309 "sdf",
310 "smi",
311 "smiles",
312 "txt",
313 ]
314 return ext in formats
317def is_valid_input_minimize(ext):
318 """Checks if input file format is compatible with Obminimize"""
319 formats = ["pdb", "mol2"]
320 return ext in formats
323def is_valid_output_minimize(ext):
324 """Checks if output file format is compatible with Obminimize"""
325 formats = ["pdb", "mol2"]
326 return ext in formats
329# TODO: Move this function to biobb_common.tools.file_utils
330def _from_string_to_list(input_data: Optional[Union[str, list[str]]]) -> list[str]:
331 """
332 Converts a string to a list, splitting by commas or spaces. If the input is already a list, returns it as is.
333 Returns an empty list if input_data is None.
335 Parameters:
336 input_data (str, list, or None): The string, list, or None value to convert.
338 Returns:
339 list: A list of string elements or an empty list if input_data is None.
340 """
341 if input_data is None:
342 return []
344 if isinstance(input_data, list):
345 # If input is already a list, return it
346 return input_data
348 # If input is a string, determine the delimiter based on presence of commas
349 delimiter = "," if "," in input_data else " "
350 items = input_data.split(delimiter)
352 # Remove whitespace from each item and ignore empty strings
353 processed_items = [item.strip() for item in items if item.strip()]
355 return processed_items