Coverage for biobb_io / api / common.py: 61%

205 statements  

« prev     ^ index     » next       coverage.py v7.13.3, created at 2026-02-04 10:56 +0000

1"""Common functions for package api""" 

2 

3import json 

4import os 

5import re 

6import urllib.request 

7import urllib.parse 

8from pathlib import Path, PurePath 

9 

10import requests 

11from biobb_common.tools import file_utils as fu 

12 

13 

14def check_output_path(path, argument, optional, out_log, classname) -> str: 

15 """Checks output file""" 

16 if optional and not path: 

17 return "" 

18 if PurePath(path).parent and not Path(PurePath(path).parent).exists(): 

19 fu.log(classname + ": Unexisting %s folder, exiting" % argument, out_log) 

20 raise SystemExit(classname + ": Unexisting %s folder" % argument) 

21 file_extension = PurePath(path).suffix 

22 if not is_valid_file(file_extension[1:], argument): 

23 fu.log( 

24 classname + ": Format %s in %s file is not compatible" 

25 % (file_extension[1:], argument), 

26 out_log, 

27 ) 

28 raise SystemExit( 

29 classname + ": Format %s in %s file is not compatible" 

30 % (file_extension[1:], argument) 

31 ) 

32 return path 

33 

34 

35def is_valid_file(ext, argument): 

36 """Checks if file format is compatible""" 

37 formats = { 

38 "output_sdf_path": ["sdf"], 

39 "output_pdb_path": ["pdb"], 

40 "output_simulations": ["json"], 

41 "output_simulation": ["zip"], 

42 "output_pdb_zip_path": ["zip"], 

43 "output_mutations_list_txt": ["txt"], 

44 "output_json_path": ["json"], 

45 "output_fasta_path": ["fasta"], 

46 "output_mmcif_path": ["mmcif", "cif"], 

47 "output_top_path": ["pdb"], 

48 "output_trj_path": ["mdcrd", "trr", "xtc"] 

49 } 

50 return ext in formats[argument] 

51 

52 

53def download_pdb(pdb_code, api_id, out_log=None, global_log=None): 

54 """ 

55 Returns: 

56 String: Content of the pdb file. 

57 """ 

58 

59 if api_id == "mmb": 

60 url = "https://mdb-login.bsc.es/api/pdb/" + pdb_code + "/coords/?" 

61 elif api_id == "pdb": 

62 url = "https://files.rcsb.org/download/" + pdb_code + ".pdb" 

63 elif api_id == "pdbe": 

64 url = "https://www.ebi.ac.uk/pdbe/entry-files/download/pdb" + pdb_code + ".ent" 

65 

66 fu.log("Downloading %s from: %s" % (pdb_code, url), out_log, global_log) 

67 return requests.get(url).content.decode("utf-8") 

68 

69 

70def download_af(uniprot_code, out_log=None, global_log=None, classname=None): 

71 """ 

72 Returns: 

73 String: Content of the pdb file. 

74 """ 

75 

76 url = "https://alphafold.ebi.ac.uk/files/AF-" + uniprot_code + "-F1-model_v6.pdb" 

77 

78 fu.log("Downloading %s from: %s" % (uniprot_code, url), out_log, global_log) 

79 

80 r = requests.get(url) 

81 if r.status_code == 404: 

82 fu.log(classname + ": Incorrect Uniprot Code: %s" % (uniprot_code), out_log) 

83 raise SystemExit(classname + ": Incorrect Uniprot Code: %s" % (uniprot_code)) 

84 

85 return r.content.decode("utf-8") 

86 

87 

88def download_mddb_top(project_id, node_id, selection, out_log=None, global_log=None, classname=None): 

89 """ 

90 Returns: 

91 String: Content of the pdb file. 

92 """ 

93 

94 url = "https://" + node_id + ".mddbr.eu/api/rest/v1/projects/" + project_id + "/structure?selection=" + urllib.parse.quote(str(selection)) 

95 

96 fu.log("Downloading %s topology from: %s" % (project_id, url), out_log, global_log) 

97 

98 r = requests.get(url) 

99 if r.status_code == 404: 

100 fu.log(classname + ": Incorrect url, check project_id, node_id and selection: %s" % (url), out_log) 

101 raise SystemExit(classname + ": Incorrect url, check project_id, node_id and selection: %s" % (url)) 

102 

103 return r.content.decode("utf-8") 

104 

105 

106def download_mddb_trj(project_id, node_id, trj_format, frames, selection, out_log=None, global_log=None, classname=None): 

107 """ 

108 Returns: 

109 String: Content of the trajectory file. 

110 """ 

111 

112 url = "https://" + node_id + ".mddbr.eu/api/rest/v1/projects/" + project_id + "/trajectory?format=" + trj_format + "&frames=" + frames + "&selection=" + urllib.parse.quote(str(selection)) 

113 

114 fu.log("Downloading %s trajectory from: %s" % (project_id, url), out_log, global_log) 

115 

116 r = requests.get(url) 

117 if r.status_code == 404: 

118 fu.log(classname + ": Incorrect url, check project_id, node_id, trj_format, frames and selection: %s" % (url), out_log) 

119 raise SystemExit(classname + ": Incorrect url, check project_id, node_id, trj_format, frames and selection: %s" % (url)) 

120 

121 return r.content 

122 

123 

124def download_mddb_file(project_id, node_id, file_name, out_log=None, global_log=None, classname=None): 

125 """ 

126 Returns: 

127 String: Content of the trajectory file. 

128 """ 

129 

130 url = "https://" + node_id + ".mddbr.eu/api/rest/v1/projects/" + project_id + "/files/" + file_name 

131 

132 fu.log("Downloading %s file from: %s" % (project_id, url), out_log, global_log) 

133 

134 r = requests.get(url) 

135 if r.status_code == 404: 

136 fu.log(classname + ": Incorrect url, check project_id, node_id, trj_format, frames and selection: %s" % (url), out_log) 

137 raise SystemExit(classname + ": Incorrect url, check project_id, node_id, trj_format, frames and selection: %s" % (url)) 

138 

139 return r.content 

140 

141 

142def download_mmcif(pdb_code, api_id, out_log=None, global_log=None): 

143 """ 

144 Returns: 

145 String: Content of the mmcif file. 

146 """ 

147 

148 if api_id == "mmb": 

149 url = "https://mdb-login.bsc.es/api/pdb/" + pdb_code + ".cif" 

150 elif api_id == "pdb": 

151 url = "https://files.rcsb.org/download/" + pdb_code + ".cif" 

152 elif api_id == "pdbe": 

153 url = "https://www.ebi.ac.uk/pdbe/entry-files/download/" + pdb_code + ".cif" 

154 

155 fu.log("Downloading %s from: %s" % (pdb_code, url), out_log, global_log) 

156 return requests.get(url, verify=True).content.decode("utf-8") 

157 

158 

159def download_ligand(ligand_code, api_id, out_log=None, global_log=None): 

160 """ 

161 Returns: 

162 String: Content of the ligand file. 

163 """ 

164 

165 if api_id == "mmb": 

166 url = "https://mdb-login.bsc.es/api/pdbMonomer/" + ligand_code.lower() 

167 text = requests.get(url, verify=True).content.decode("utf-8") 

168 elif api_id == "pdbe": 

169 url = ( 

170 # "https://www.ebi.ac.uk/pdbe/static/files/pdbechem_v2/" + ligand_code.upper() + "_ideal.pdb" 

171 "https://files.rcsb.org/ligands/view/" + ligand_code.upper() + ".cif" 

172 ) 

173 text = urllib.request.urlopen(url).read().decode("utf-8") 

174 

175 fu.log("Downloading %s from: %s" % (ligand_code, url), out_log, global_log) 

176 

177 # removing useless empty lines at the end of the file 

178 text = os.linesep.join([s for s in text.splitlines() if s]) 

179 

180 return text 

181 

182 

183def download_fasta(pdb_code, api_id, out_log=None, global_log=None): 

184 """ 

185 Returns: 

186 String: Content of the fasta file. 

187 """ 

188 

189 if api_id == "mmb": 

190 url = "https://mdb-login.bsc.es/api/pdb/" + pdb_code + ".fasta" 

191 elif api_id == "pdb": 

192 url = "https://www.rcsb.org/fasta/entry/" + pdb_code 

193 elif api_id == "pdbe": 

194 url = "https://www.ebi.ac.uk/pdbe/api/v2/pdb/entry/" + pdb_code + "/fasta" 

195 

196 fu.log("Downloading %s from: %s" % (pdb_code, url), out_log, global_log) 

197 return requests.get(url, verify=True).content.decode("utf-8") 

198 

199 

200def download_binding_site( 

201 pdb_code, 

202 url="https://www.ebi.ac.uk/pdbe/api/v2/pdb/entry/binding_sites/%s/1", 

203 out_log=None, 

204 global_log=None, 

205): 

206 """ 

207 Returns: 

208 String: Content of the component file. 

209 """ 

210 url = url % pdb_code 

211 

212 fu.log("Getting binding sites from: %s" % (url), out_log, global_log) 

213 

214 text = urllib.request.urlopen(url).read() 

215 json_obj = json.loads(text) 

216 json_string = json.dumps(json_obj, indent=4, sort_keys=True) 

217 # json_string = json.dumps(text, indent=4) 

218 

219 return json_string 

220 

221 

222def download_ideal_sdf(ligand_code, api_id, out_log=None, global_log=None): 

223 """ 

224 Returns: 

225 String: Content of the ideal sdf file. 

226 """ 

227 

228 if api_id == "pdb": 

229 url = ( 

230 "https://files.rcsb.org/ligands/download/" + ligand_code.upper() + "_ideal.sdf" 

231 ) 

232 text = requests.get(url, verify=True).content.decode("utf-8") 

233 elif api_id == "pdbe": 

234 url = ( 

235 "https://www.ebi.ac.uk/pdbe/static/files/pdbechem_v2/" + ligand_code.upper() + "_ideal.sdf" 

236 ) 

237 text = urllib.request.urlopen(url).read().decode("utf-8") 

238 

239 fu.log("Downloading %s from: %s" % (ligand_code, url), out_log, global_log) 

240 

241 return text 

242 

243 

244def download_str_info( 

245 pdb_code, 

246 url="https://mdb-login.bsc.es/api/pdb/%s.json", 

247 out_log=None, 

248 global_log=None, 

249): 

250 """ 

251 Returns: 

252 String: Content of the JSON file. 

253 """ 

254 url = url % pdb_code 

255 

256 fu.log("Getting structure info from: %s" % (url), out_log, global_log) 

257 

258 text = urllib.request.urlopen(url).read() 

259 json_obj = json.loads(text) 

260 json_string = json.dumps(json_obj, indent=4, sort_keys=True) 

261 # json_string = json.dumps(text, indent=4) 

262 

263 return json_string 

264 

265 

266def write_pdb(pdb_string, output_pdb_path, filt=None, out_log=None, global_log=None): 

267 """Writes and filters a PDB""" 

268 fu.log("Writting pdb to: %s" % (output_pdb_path), out_log, global_log) 

269 with open(output_pdb_path, "w") as output_pdb_file: 

270 if filt: 

271 fu.log( 

272 "Filtering lines NOT starting with one of these words: %s" % str(filt), 

273 out_log, 

274 global_log, 

275 ) 

276 for line in pdb_string.splitlines(True): 

277 if line.strip().split()[0][0:6] in filt: 

278 output_pdb_file.write(line) 

279 else: 

280 output_pdb_file.write(pdb_string) 

281 

282 

283def write_bin(bin_string, output_bin_path, out_log=None, global_log=None): 

284 """Writes a BIN""" 

285 fu.log("Writting bin to: %s" % (output_bin_path), out_log, global_log) 

286 with open(output_bin_path, "wb") as output_bin_file: 

287 output_bin_file.write(bin_string) 

288 

289 

290def write_mmcif(mmcif_string, output_mmcif_path, out_log=None, global_log=None): 

291 """Writes a mmcif""" 

292 fu.log("Writting mmcif to: %s" % (output_mmcif_path), out_log, global_log) 

293 with open(output_mmcif_path, "w") as output_mmcif_file: 

294 output_mmcif_file.write(mmcif_string) 

295 

296 

297def write_fasta(fasta_string, output_fasta_path, out_log=None, global_log=None): 

298 """Writes a FASTA""" 

299 fu.log("Writting FASTA to: %s" % (output_fasta_path), out_log, global_log) 

300 with open(output_fasta_path, "w") as output_fasta_file: 

301 output_fasta_file.write(fasta_string) 

302 

303 

304def write_sdf(sdf_string, output_sdf_path, out_log=None, global_log=None): 

305 """Writes a SDF""" 

306 fu.log("Writting sdf to: %s" % (output_sdf_path), out_log, global_log) 

307 with open(output_sdf_path, "w") as output_sdf_file: 

308 output_sdf_file.write(sdf_string) 

309 

310 

311def get_cluster_pdb_codes(pdb_code, cluster, out_log=None, global_log=None): 

312 """ 

313 Returns: 

314 String list: The list of pdb_codes of the selected cluster. 

315 """ 

316 url = "https://mdb-login.bsc.es/api/pdb/" 

317 pdb_codes = set() 

318 

319 url = url + pdb_code.lower() + "/clusters/cl-" + str(cluster) + ".json" 

320 cluster_list = json.loads(requests.get(url, verify=True).content.decode("utf-8"))[ 

321 "clusterMembers" 

322 ] 

323 for elem in cluster_list: 

324 pdb_codes.add(elem["_id"].lower()) 

325 

326 if out_log: 

327 out_log.info( 

328 "Cluster: " + str(cluster) + " of pdb_code: " + pdb_code + "\n List: " + str(pdb_codes) 

329 ) 

330 if global_log: 

331 global_log.info( 

332 fu.get_logs_prefix() + "Cluster: " + str(cluster) + " of pdb_code: " + pdb_code + "\n List: " + str(pdb_codes) 

333 ) 

334 

335 return pdb_codes 

336 

337 

338def get_uniprot(pdb_code, url, out_log=None, global_log=None): 

339 """Returns the UNIPROT code corresponding to the `pdb_code`. 

340 

341 Returns: 

342 str: UNIPROT code. 

343 """ 

344 url_uniprot_id = url + "/pdb/" + pdb_code.lower() + "/entry/uniprotRefs/_id" 

345 uniprot_id = requests.get(url_uniprot_id, verify=True).json()["uniprotRefs._id"][0] 

346 

347 if out_log: 

348 out_log.info( 

349 "PDB code: " + pdb_code + " correspond to uniprot id: " + uniprot_id 

350 ) 

351 if global_log: 

352 global_log.info( 

353 "PDB code: " + pdb_code + " correspond to uniprot id: " + uniprot_id 

354 ) 

355 

356 return uniprot_id 

357 

358 

359def get_variants( 

360 uniprot_id, url="https://mdb-login.bsc.es/api", out_log=None, global_log=None 

361): 

362 """Returns the variants of the `uniprot_id` code. 

363 

364 Returns: 

365 :obj:`list` of :obj:`str`: List of variants. 

366 """ 

367 url_uniprot_mut = ( 

368 url + "/uniprot/" + uniprot_id + "/entry/variants/vardata/mut/?varorig=humsavar" 

369 ) 

370 variants = requests.get(url_uniprot_mut, verify=True).json()["variants.vardata.mut"] 

371 variants = variants if variants else [] 

372 

373 fu.log( 

374 "Found: %d variants for uniprot id: %s" % (len(variants), uniprot_id), 

375 out_log, 

376 global_log, 

377 ) 

378 return variants if variants else [] 

379 

380 

381def write_json(json_string, output_json_path, out_log=None, global_log=None): 

382 """Writes a JSON""" 

383 fu.log("Writting json to: %s" % (output_json_path), out_log, global_log) 

384 with open(output_json_path, "w") as output_json_file: 

385 output_json_file.write(json_string) 

386 

387 

388def get_memprotmd_sim_list(out_log=None, global_log=None): 

389 """Returns all available membrane-protein systems (simulations) from the MemProtMD DB using its REST API""" 

390 

391 fu.log( 

392 "Getting all available membrane-protein systems (simulations) from the MemProtMD REST API", 

393 out_log, 

394 global_log, 

395 ) 

396 

397 url = "http://memprotmd.bioch.ox.ac.uk/api/simulations/all" 

398 json_obj = requests.post(url).json() 

399 json_string = json.dumps(json_obj, indent=4) 

400 

401 fu.log("Total number of simulations: %d" % (len(json_obj)), out_log, global_log) 

402 

403 return json_string 

404 

405 

406def get_memprotmd_sim_search(collection_name, keyword, out_log=None, global_log=None): 

407 """Performs advanced searches in the MemProtMD DB using its REST API and a given keyword""" 

408 

409 fu.log( 

410 "Getting search results from the MemProtMD REST API. Collection name: %s, keyword: %s" 

411 % (collection_name, keyword), 

412 out_log, 

413 global_log, 

414 ) 

415 

416 url = "http://memprotmd.bioch.ox.ac.uk/api/search/advanced" 

417 json_query = { 

418 "collectionName": collection_name, 

419 "query": {"keywords": keyword}, 

420 "projection": {"simulations": 1}, 

421 "options": {}, 

422 } 

423 

424 json_obj = requests.post(url, json=json_query).json() 

425 json_string = json.dumps(json_obj, indent=4) 

426 

427 # get total number of simulation 

428 list_kw = [] 

429 for sim_list in json_obj: 

430 for sim in sim_list["simulations"]: 

431 list_kw.append(sim) 

432 

433 fu.log("Total number of simulations: %d" % (len(list_kw)), out_log, global_log) 

434 

435 return json_string 

436 

437 

438def get_memprotmd_sim(pdb_code, output_file, out_log=None, global_log=None): 

439 """Gets a single simulation from MemProtMD DB""" 

440 

441 fu.log("Getting simulation file from pdb code %s" % (pdb_code), out_log, global_log) 

442 

443 url = ( 

444 "http://memprotmd.bioch.ox.ac.uk/data/memprotmd/simulations/" + pdb_code + "_default_dppc/files/run/at.zip" 

445 ) 

446 response = requests.get(url) 

447 

448 open(output_file, "wb").write(response.content) 

449 

450 fu.log("Saving output %s file" % (output_file), out_log, global_log) 

451 

452 

453def check_mandatory_property(property, name, out_log, classname): 

454 """Checks mandatory properties""" 

455 

456 if not property: 

457 fu.log(classname + ": Unexisting %s property, exiting" % name, out_log) 

458 raise SystemExit(classname + ": Unexisting %s property" % name) 

459 return property 

460 

461 

462def check_uniprot_code(code, out_log, classname): 

463 """Checks uniprot code""" 

464 

465 pattern = re.compile( 

466 (r"[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") 

467 ) 

468 

469 if not pattern.match(code): 

470 fu.log(classname + ": Incorrect uniprot code for %s" % code, out_log) 

471 raise SystemExit(classname + ": Incorrect uniprot code for %s" % code) 

472 

473 return True