Coverage for biobb_chemistry/babelm/common.py: 63%

139 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-03-12 09:28 +0000

1"""Common functions for package biobb_chemistry.babel""" 

2 

3import re 

4from pathlib import Path, PurePath 

5from typing import Optional, Union 

6 

7from biobb_common.tools import file_utils as fu 

8 

9 

10def check_input_path(path, out_log, classname): 

11 """Checks input file""" 

12 if not Path(path).exists(): 

13 fu.log(classname + ": Unexisting input file, exiting", out_log) 

14 raise SystemExit(classname + ": Unexisting input file") 

15 file_extension = PurePath(path).suffix 

16 if not is_valid_input(file_extension[1:]): 

17 fu.log( 

18 classname + ": Format %s in input file is not compatible" % file_extension[1:], 

19 out_log, 

20 ) 

21 raise SystemExit( 

22 classname + ": Format %s in input file is not compatible" % file_extension[1:] 

23 ) 

24 if PurePath(path).name == path or not PurePath(path).is_absolute(): 

25 path = str(PurePath(Path.cwd()).joinpath(path)) 

26 

27 return path 

28 

29 

30def check_output_path(path, out_log, classname): 

31 """Checks output path""" 

32 if PurePath(path).parent and not Path(PurePath(path).parent).exists(): 

33 fu.log( 

34 classname + ": Unexisting output %s output folder, exiting" % type, out_log 

35 ) 

36 raise SystemExit(classname + ": Unexisting %s output folder" % type) 

37 file_extension = PurePath(path).suffix 

38 if not is_valid_input(file_extension[1:]): 

39 fu.log( 

40 classname + ": Format %s in input file is not compatible" % file_extension[1:], 

41 out_log, 

42 ) 

43 raise SystemExit( 

44 classname + ": Format %s in output file is not compatible" % file_extension[1:] 

45 ) 

46 

47 return path 

48 

49 

50def check_input_path_minimize(path, out_log, classname): 

51 """Checks input file""" 

52 if not Path(path).exists(): 

53 fu.log(classname + ": Unexisting input file, exiting", out_log) 

54 raise SystemExit(classname + ": Unexisting input file") 

55 file_extension = PurePath(path).suffix 

56 if not is_valid_input_minimize(file_extension[1:]): 

57 fu.log( 

58 classname + ": Format %s in input file is not compatible" % file_extension[1:], 

59 out_log, 

60 ) 

61 raise SystemExit( 

62 classname + ": Format %s in input file is not compatible" % file_extension[1:] 

63 ) 

64 if PurePath(path).name == path or not PurePath(path).is_absolute(): 

65 path = str(PurePath(Path.cwd()).joinpath(path)) 

66 

67 return path 

68 

69 

70def check_output_path_minimize(path, out_log, classname): 

71 """Checks output path""" 

72 if PurePath(path).parent and not Path(PurePath(path).parent).exists(): 

73 fu.log( 

74 classname + ": Unexisting output %s output folder, exiting" % type, out_log 

75 ) 

76 raise SystemExit(classname + ": Unexisting %s output folder" % type) 

77 file_extension = PurePath(path).suffix 

78 if not is_valid_input_minimize(file_extension[1:]): 

79 fu.log( 

80 classname + ": Format %s in input file is not compatible" % file_extension[1:], 

81 out_log, 

82 ) 

83 raise SystemExit( 

84 classname + ": Format %s in output file is not compatible" % file_extension[1:] 

85 ) 

86 return path 

87 

88 

89def get_binary_path(properties, type): 

90 """Gets binary path""" 

91 return properties.get(type, get_default_value(type)) 

92 

93 

94def get_input_format(input_format, input_path, out_log): 

95 """Checks if provided input format is correct""" 

96 infr = input_format 

97 if not is_valid_input(infr): 

98 file_extension = PurePath(input_path).suffix 

99 fu.log( 

100 "Format %s is not compatible as an input format, assigned input file extension: %s" 

101 % (infr, file_extension[1:]), 

102 out_log, 

103 ) 

104 infr = file_extension[1:] 

105 

106 return infr 

107 

108 

109def check_minimize_property(type, value, out_log): 

110 """Checks all minimize properties""" 

111 value = str(value) 

112 

113 if type == "criteria": 

114 if re.match(r"(\d+(\.\d+)?)", value) or re.match( 

115 r"[+\-]?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?", value 

116 ): 

117 return True 

118 else: 

119 fu.log( 

120 "Criteria %s is not correct, assigned default value: %s" 

121 % (value, get_default_value("criteria")), 

122 out_log, 

123 ) 

124 

125 if type == "method": 

126 if value in ["cg", "sd"]: 

127 return True 

128 else: 

129 fu.log( 

130 "Method %s is not correct, assigned default value: %s" 

131 % (value, get_default_value("method")), 

132 out_log, 

133 ) 

134 

135 if type == "force_field": 

136 if value in ["GAFF", "Ghemical", "MMFF94", "MMFF94s", "UFF"]: 

137 return True 

138 else: 

139 fu.log( 

140 "Force field %s is not correct, no force field assigned" % (value), 

141 out_log, 

142 ) 

143 

144 if type == "hydrogens": 

145 if value == "True": 

146 return True 

147 elif value == "False": 

148 pass 

149 else: 

150 fu.log( 

151 "Hydrogens %s is not correct, assigned default value: %s" 

152 % (value, get_default_value("hydrogens")), 

153 out_log, 

154 ) 

155 

156 if type == "steps": 

157 if re.match(r"^\d+$", value): 

158 return True 

159 else: 

160 fu.log( 

161 "Steps %s is not correct, assigned default value: %s" 

162 % (value, get_default_value("steps")), 

163 out_log, 

164 ) 

165 

166 if type == "cutoff": 

167 if value == "True": 

168 return True 

169 elif value == "False": 

170 pass 

171 else: 

172 fu.log( 

173 "Cut-off %s is not correct, assigned default value: %s" 

174 % (value, get_default_value("cutoff")), 

175 out_log, 

176 ) 

177 

178 if type == "rvdw": 

179 if re.match(r"(\d+(\.\d+)?)", value): 

180 return True 

181 else: 

182 fu.log( 

183 "Rvdw %s is not correct, assigned default value: %s" 

184 % (value, get_default_value("rvdw")), 

185 out_log, 

186 ) 

187 

188 if type == "rele": 

189 if re.match(r"(\d+(\.\d+)?)", value): 

190 return True 

191 else: 

192 fu.log( 

193 "Rele %s is not correct, assigned default value: %s" 

194 % (value, get_default_value("rele")), 

195 out_log, 

196 ) 

197 

198 if type == "frequency": 

199 if re.match(r"^\d+$", value): 

200 return True 

201 else: 

202 fu.log( 

203 "Frequency %s is not correct, assigned default value: %s" 

204 % (value, get_default_value("frequency")), 

205 out_log, 

206 ) 

207 

208 return False 

209 

210 

211def get_output_format(output_format, output_path, out_log): 

212 """Checks if provided output format is correct""" 

213 oufr = output_format 

214 if not is_valid_output(oufr): 

215 file_extension = PurePath(output_path).suffix 

216 fu.log( 

217 "Format %s is not compatible as an output format, assigned output file extension: %s" 

218 % (oufr, file_extension[1:]), 

219 out_log, 

220 ) 

221 oufr = file_extension[1:] 

222 

223 return oufr 

224 

225 

226def get_coordinates(coordinates, out_log): 

227 """Checks if provided coordinates value is correct""" 

228 crd = str(coordinates) 

229 if crd != "3" and crd != "2": 

230 fu.log("Value %s is not compatible as a coordinates value" % crd, out_log) 

231 crd = "" 

232 

233 return crd 

234 

235 

236def get_ph(p, out_log): 

237 """Checks if provided coordinates value is correct""" 

238 ph = str(p) 

239 if p and not isinstance(p, float) and not isinstance(p, int): 

240 ph = "" 

241 fu.log("Incorrect format for pH, no value assigned", out_log) 

242 

243 return ph 

244 

245 

246def get_default_value(key): 

247 """Gives default values according to the given key""" 

248 default_values = { 

249 "coordinates": 2, 

250 "obabel_path": "obabel", 

251 "obminimize_path": "obminimize", 

252 "criteria": 1e-6, 

253 "method": "cg", 

254 "hydrogens": False, 

255 "steps": 2500, 

256 "cutoff": False, 

257 "rvdw": 6.0, 

258 "rele": 10.0, 

259 "frequency": 10, 

260 } 

261 

262 return default_values[key] 

263 

264 

265def is_valid_input(ext): 

266 """Checks if input file format is compatible with Open Babel""" 

267 formats = [ 

268 "dat", 

269 "ent", 

270 "fa", 

271 "fasta", 

272 "gro", 

273 "inp", 

274 "log", 

275 "mcif", 

276 "mdl", 

277 "mmcif", 

278 "mol", 

279 "mol2", 

280 "pdb", 

281 "pdbqt", 

282 "png", 

283 "sdf", 

284 "smi", 

285 "smiles", 

286 "txt", 

287 "xml", 

288 "xtc", 

289 ] 

290 return ext in formats 

291 

292 

293def is_valid_output(ext): 

294 """Checks if output file format is compatible with Open Babel""" 

295 formats = [ 

296 "ent", 

297 "fa", 

298 "fasta", 

299 "gro", 

300 "inp", 

301 "mcif", 

302 "mdl", 

303 "mmcif", 

304 "mol", 

305 "mol2", 

306 "pdb", 

307 "pdbqt", 

308 "png", 

309 "sdf", 

310 "smi", 

311 "smiles", 

312 "txt", 

313 ] 

314 return ext in formats 

315 

316 

317def is_valid_input_minimize(ext): 

318 """Checks if input file format is compatible with Obminimize""" 

319 formats = ["pdb", "mol2"] 

320 return ext in formats 

321 

322 

323def is_valid_output_minimize(ext): 

324 """Checks if output file format is compatible with Obminimize""" 

325 formats = ["pdb", "mol2"] 

326 return ext in formats 

327 

328 

329# TODO: Move this function to biobb_common.tools.file_utils 

330def _from_string_to_list(input_data: Optional[Union[str, list[str]]]) -> list[str]: 

331 """ 

332 Converts a string to a list, splitting by commas or spaces. If the input is already a list, returns it as is. 

333 Returns an empty list if input_data is None. 

334 

335 Parameters: 

336 input_data (str, list, or None): The string, list, or None value to convert. 

337 

338 Returns: 

339 list: A list of string elements or an empty list if input_data is None. 

340 """ 

341 if input_data is None: 

342 return [] 

343 

344 if isinstance(input_data, list): 

345 # If input is already a list, return it 

346 return input_data 

347 

348 # If input is a string, determine the delimiter based on presence of commas 

349 delimiter = "," if "," in input_data else " " 

350 items = input_data.split(delimiter) 

351 

352 # Remove whitespace from each item and ignore empty strings 

353 processed_items = [item.strip() for item in items if item.strip()] 

354 

355 return processed_items