Coverage for biobb_io/api/common.py: 61%

205 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-04 08:31 +0000

1"""Common functions for package api""" 

2 

3import json 

4import os 

5import re 

6import urllib.request 

7import urllib.parse 

8from pathlib import Path, PurePath 

9 

10import requests 

11from biobb_common.tools import file_utils as fu 

12 

13 

14def check_output_path(path, argument, optional, out_log, classname) -> str: 

15 """Checks output file""" 

16 if optional and not path: 

17 return "" 

18 if PurePath(path).parent and not Path(PurePath(path).parent).exists(): 

19 fu.log(classname + ": Unexisting %s folder, exiting" % argument, out_log) 

20 raise SystemExit(classname + ": Unexisting %s folder" % argument) 

21 file_extension = PurePath(path).suffix 

22 if not is_valid_file(file_extension[1:], argument): 

23 fu.log( 

24 classname + ": Format %s in %s file is not compatible" 

25 % (file_extension[1:], argument), 

26 out_log, 

27 ) 

28 raise SystemExit( 

29 classname + ": Format %s in %s file is not compatible" 

30 % (file_extension[1:], argument) 

31 ) 

32 return path 

33 

34 

35def is_valid_file(ext, argument): 

36 """Checks if file format is compatible""" 

37 formats = { 

38 "output_sdf_path": ["sdf"], 

39 "output_pdb_path": ["pdb"], 

40 "output_simulations": ["json"], 

41 "output_simulation": ["zip"], 

42 "output_pdb_zip_path": ["zip"], 

43 "output_mutations_list_txt": ["txt"], 

44 "output_json_path": ["json"], 

45 "output_fasta_path": ["fasta"], 

46 "output_mmcif_path": ["mmcif", "cif"], 

47 "output_top_path": ["pdb"], 

48 "output_trj_path": ["mdcrd", "trr", "xtc"] 

49 } 

50 return ext in formats[argument] 

51 

52 

53def download_pdb(pdb_code, api_id, out_log=None, global_log=None): 

54 """ 

55 Returns: 

56 String: Content of the pdb file. 

57 """ 

58 

59 if api_id == "mmb": 

60 url = "https://mmb.irbbarcelona.org/api/pdb/" + pdb_code + "/coords/?" 

61 elif api_id == "pdb": 

62 url = "https://files.rcsb.org/download/" + pdb_code + ".pdb" 

63 elif api_id == "pdbe": 

64 url = "https://www.ebi.ac.uk/pdbe/entry-files/download/pdb" + pdb_code + ".ent" 

65 

66 fu.log("Downloading %s from: %s" % (pdb_code, url), out_log, global_log) 

67 return requests.get(url).content.decode("utf-8") 

68 

69 

70def download_af(uniprot_code, out_log=None, global_log=None, classname=None): 

71 """ 

72 Returns: 

73 String: Content of the pdb file. 

74 """ 

75 

76 url = "https://alphafold.ebi.ac.uk/files/AF-" + uniprot_code + "-F1-model_v3.pdb" 

77 

78 fu.log("Downloading %s from: %s" % (uniprot_code, url), out_log, global_log) 

79 

80 r = requests.get(url) 

81 if r.status_code == 404: 

82 fu.log(classname + ": Incorrect Uniprot Code: %s" % (uniprot_code), out_log) 

83 raise SystemExit(classname + ": Incorrect Uniprot Code: %s" % (uniprot_code)) 

84 

85 return r.content.decode("utf-8") 

86 

87 

88def download_mddb_top(project_id, node_id, selection, out_log=None, global_log=None, classname=None): 

89 """ 

90 Returns: 

91 String: Content of the pdb file. 

92 """ 

93 

94 url = "https://" + node_id + ".mddbr.eu/api/rest/v1/projects/" + project_id + "/structure?selection=" + urllib.parse.quote(str(selection)) 

95 

96 fu.log("Downloading %s topology from: %s" % (project_id, url), out_log, global_log) 

97 

98 r = requests.get(url) 

99 if r.status_code == 404: 

100 fu.log(classname + ": Incorrect url, check project_id, node_id and selection: %s" % (url), out_log) 

101 raise SystemExit(classname + ": Incorrect url, check project_id, node_id and selection: %s" % (url)) 

102 

103 return r.content.decode("utf-8") 

104 

105 

106def download_mddb_trj(project_id, node_id, trj_format, frames, selection, out_log=None, global_log=None, classname=None): 

107 """ 

108 Returns: 

109 String: Content of the trajectory file. 

110 """ 

111 

112 url = "https://" + node_id + ".mddbr.eu/api/rest/v1/projects/" + project_id + "/trajectory?format=" + trj_format + "&frames=" + frames + "&selection=" + urllib.parse.quote(str(selection)) 

113 

114 fu.log("Downloading %s trajectory from: %s" % (project_id, url), out_log, global_log) 

115 

116 r = requests.get(url) 

117 if r.status_code == 404: 

118 fu.log(classname + ": Incorrect url, check project_id, node_id, trj_format, frames and selection: %s" % (url), out_log) 

119 raise SystemExit(classname + ": Incorrect url, check project_id, node_id, trj_format, frames and selection: %s" % (url)) 

120 

121 return r.content 

122 

123 

124def download_mddb_file(project_id, node_id, file_name, out_log=None, global_log=None, classname=None): 

125 """ 

126 Returns: 

127 String: Content of the trajectory file. 

128 """ 

129 

130 url = "https://" + node_id + ".mddbr.eu/api/rest/v1/projects/" + project_id + "/files/" + file_name 

131 

132 fu.log("Downloading %s file from: %s" % (project_id, url), out_log, global_log) 

133 

134 r = requests.get(url) 

135 if r.status_code == 404: 

136 fu.log(classname + ": Incorrect url, check project_id, node_id, trj_format, frames and selection: %s" % (url), out_log) 

137 raise SystemExit(classname + ": Incorrect url, check project_id, node_id, trj_format, frames and selection: %s" % (url)) 

138 

139 return r.content 

140 

141 

142def download_mmcif(pdb_code, api_id, out_log=None, global_log=None): 

143 """ 

144 Returns: 

145 String: Content of the mmcif file. 

146 """ 

147 

148 if api_id == "mmb": 

149 url = "http://mmb.irbbarcelona.org/api/pdb/" + pdb_code + ".cif" 

150 elif api_id == "pdb": 

151 url = "https://files.rcsb.org/download/" + pdb_code + ".cif" 

152 elif api_id == "pdbe": 

153 url = "https://www.ebi.ac.uk/pdbe/entry-files/download/" + pdb_code + ".cif" 

154 

155 fu.log("Downloading %s from: %s" % (pdb_code, url), out_log, global_log) 

156 return requests.get(url, verify=True).content.decode("utf-8") 

157 

158 

159def download_ligand(ligand_code, api_id, out_log=None, global_log=None): 

160 """ 

161 Returns: 

162 String: Content of the ligand file. 

163 """ 

164 

165 if api_id == "mmb": 

166 url = "http://mmb.irbbarcelona.org/api/pdbMonomer/" + ligand_code.lower() 

167 text = requests.get(url, verify=True).content.decode("utf-8") 

168 elif api_id == "pdbe": 

169 url = ( 

170 "https://www.ebi.ac.uk/pdbe/static/files/pdbechem_v2/" + ligand_code.upper() + "_ideal.pdb" 

171 ) 

172 text = urllib.request.urlopen(url).read().decode("utf-8") 

173 

174 fu.log("Downloading %s from: %s" % (ligand_code, url), out_log, global_log) 

175 

176 # removing useless empty lines at the end of the file 

177 text = os.linesep.join([s for s in text.splitlines() if s]) 

178 

179 return text 

180 

181 

182def download_fasta(pdb_code, api_id, out_log=None, global_log=None): 

183 """ 

184 Returns: 

185 String: Content of the fasta file. 

186 """ 

187 

188 if api_id == "mmb": 

189 url = "http://mmb.irbbarcelona.org/api/pdb/" + pdb_code + ".fasta" 

190 elif api_id == "pdb": 

191 url = "https://www.rcsb.org/fasta/entry/" + pdb_code 

192 elif api_id == "pdbe": 

193 url = "https://www.ebi.ac.uk/pdbe/entry/pdb/" + pdb_code + "/fasta" 

194 

195 fu.log("Downloading %s from: %s" % (pdb_code, url), out_log, global_log) 

196 return requests.get(url, verify=True).content.decode("utf-8") 

197 

198 

199def download_binding_site( 

200 pdb_code, 

201 url="https://www.ebi.ac.uk/pdbe/api/pdb/entry/binding_sites/%s", 

202 out_log=None, 

203 global_log=None, 

204): 

205 """ 

206 Returns: 

207 String: Content of the component file. 

208 """ 

209 url = url % pdb_code 

210 

211 fu.log("Getting binding sites from: %s" % (url), out_log, global_log) 

212 

213 text = urllib.request.urlopen(url).read() 

214 json_obj = json.loads(text) 

215 json_string = json.dumps(json_obj, indent=4, sort_keys=True) 

216 # json_string = json.dumps(text, indent=4) 

217 

218 return json_string 

219 

220 

221def download_ideal_sdf(ligand_code, api_id, out_log=None, global_log=None): 

222 """ 

223 Returns: 

224 String: Content of the ideal sdf file. 

225 """ 

226 

227 if api_id == "pdb": 

228 url = ( 

229 "https://files.rcsb.org/ligands/download/" + ligand_code.upper() + "_ideal.sdf" 

230 ) 

231 text = requests.get(url, verify=True).content.decode("utf-8") 

232 elif api_id == "pdbe": 

233 url = ( 

234 "https://www.ebi.ac.uk/pdbe/static/files/pdbechem_v2/" + ligand_code.upper() + "_ideal.sdf" 

235 ) 

236 text = urllib.request.urlopen(url).read().decode("utf-8") 

237 

238 fu.log("Downloading %s from: %s" % (ligand_code, url), out_log, global_log) 

239 

240 return text 

241 

242 

243def download_str_info( 

244 pdb_code, 

245 url="http://mmb.irbbarcelona.org/api/pdb/%s.json", 

246 out_log=None, 

247 global_log=None, 

248): 

249 """ 

250 Returns: 

251 String: Content of the JSON file. 

252 """ 

253 url = url % pdb_code 

254 

255 fu.log("Getting structure info from: %s" % (url), out_log, global_log) 

256 

257 text = urllib.request.urlopen(url).read() 

258 json_obj = json.loads(text) 

259 json_string = json.dumps(json_obj, indent=4, sort_keys=True) 

260 # json_string = json.dumps(text, indent=4) 

261 

262 return json_string 

263 

264 

265def write_pdb(pdb_string, output_pdb_path, filt=None, out_log=None, global_log=None): 

266 """Writes and filters a PDB""" 

267 fu.log("Writting pdb to: %s" % (output_pdb_path), out_log, global_log) 

268 with open(output_pdb_path, "w") as output_pdb_file: 

269 if filt: 

270 fu.log( 

271 "Filtering lines NOT starting with one of these words: %s" % str(filt), 

272 out_log, 

273 global_log, 

274 ) 

275 for line in pdb_string.splitlines(True): 

276 if line.strip().split()[0][0:6] in filt: 

277 output_pdb_file.write(line) 

278 else: 

279 output_pdb_file.write(pdb_string) 

280 

281 

282def write_bin(bin_string, output_bin_path, out_log=None, global_log=None): 

283 """Writes a BIN""" 

284 fu.log("Writting bin to: %s" % (output_bin_path), out_log, global_log) 

285 with open(output_bin_path, "wb") as output_bin_file: 

286 output_bin_file.write(bin_string) 

287 

288 

289def write_mmcif(mmcif_string, output_mmcif_path, out_log=None, global_log=None): 

290 """Writes a mmcif""" 

291 fu.log("Writting mmcif to: %s" % (output_mmcif_path), out_log, global_log) 

292 with open(output_mmcif_path, "w") as output_mmcif_file: 

293 output_mmcif_file.write(mmcif_string) 

294 

295 

296def write_fasta(fasta_string, output_fasta_path, out_log=None, global_log=None): 

297 """Writes a FASTA""" 

298 fu.log("Writting FASTA to: %s" % (output_fasta_path), out_log, global_log) 

299 with open(output_fasta_path, "w") as output_fasta_file: 

300 output_fasta_file.write(fasta_string) 

301 

302 

303def write_sdf(sdf_string, output_sdf_path, out_log=None, global_log=None): 

304 """Writes a SDF""" 

305 fu.log("Writting sdf to: %s" % (output_sdf_path), out_log, global_log) 

306 with open(output_sdf_path, "w") as output_sdf_file: 

307 output_sdf_file.write(sdf_string) 

308 

309 

310def get_cluster_pdb_codes(pdb_code, cluster, out_log=None, global_log=None): 

311 """ 

312 Returns: 

313 String list: The list of pdb_codes of the selected cluster. 

314 """ 

315 url = "http://mmb.irbbarcelona.org/api/pdb/" 

316 pdb_codes = set() 

317 

318 url = url + pdb_code.lower() + "/clusters/cl-" + str(cluster) + ".json" 

319 cluster_list = json.loads(requests.get(url, verify=True).content.decode("utf-8"))[ 

320 "clusterMembers" 

321 ] 

322 for elem in cluster_list: 

323 pdb_codes.add(elem["_id"].lower()) 

324 

325 if out_log: 

326 out_log.info( 

327 "Cluster: " + str(cluster) + " of pdb_code: " + pdb_code + "\n List: " + str(pdb_codes) 

328 ) 

329 if global_log: 

330 global_log.info( 

331 fu.get_logs_prefix() + "Cluster: " + str(cluster) + " of pdb_code: " + pdb_code + "\n List: " + str(pdb_codes) 

332 ) 

333 

334 return pdb_codes 

335 

336 

337def get_uniprot(pdb_code, url, out_log=None, global_log=None): 

338 """Returns the UNIPROT code corresponding to the `pdb_code`. 

339 

340 Returns: 

341 str: UNIPROT code. 

342 """ 

343 url_uniprot_id = url + "/pdb/" + pdb_code.lower() + "/entry/uniprotRefs/_id" 

344 uniprot_id = requests.get(url_uniprot_id, verify=True).json()["uniprotRefs._id"][0] 

345 

346 if out_log: 

347 out_log.info( 

348 "PDB code: " + pdb_code + " correspond to uniprot id: " + uniprot_id 

349 ) 

350 if global_log: 

351 global_log.info( 

352 "PDB code: " + pdb_code + " correspond to uniprot id: " + uniprot_id 

353 ) 

354 

355 return uniprot_id 

356 

357 

358def get_variants( 

359 uniprot_id, url="http://mmb.irbbarcelona.org/api", out_log=None, global_log=None 

360): 

361 """Returns the variants of the `uniprot_id` code. 

362 

363 Returns: 

364 :obj:`list` of :obj:`str`: List of variants. 

365 """ 

366 url_uniprot_mut = ( 

367 url + "/uniprot/" + uniprot_id + "/entry/variants/vardata/mut/?varorig=humsavar" 

368 ) 

369 variants = requests.get(url_uniprot_mut, verify=True).json()["variants.vardata.mut"] 

370 variants = variants if variants else [] 

371 

372 fu.log( 

373 "Found: %d variants for uniprot id: %s" % (len(variants), uniprot_id), 

374 out_log, 

375 global_log, 

376 ) 

377 return variants if variants else [] 

378 

379 

380def write_json(json_string, output_json_path, out_log=None, global_log=None): 

381 """Writes a JSON""" 

382 fu.log("Writting json to: %s" % (output_json_path), out_log, global_log) 

383 with open(output_json_path, "w") as output_json_file: 

384 output_json_file.write(json_string) 

385 

386 

387def get_memprotmd_sim_list(out_log=None, global_log=None): 

388 """Returns all available membrane-protein systems (simulations) from the MemProtMD DB using its REST API""" 

389 

390 fu.log( 

391 "Getting all available membrane-protein systems (simulations) from the MemProtMD REST API", 

392 out_log, 

393 global_log, 

394 ) 

395 

396 url = "http://memprotmd.bioch.ox.ac.uk/api/simulations/all" 

397 json_obj = requests.post(url).json() 

398 json_string = json.dumps(json_obj, indent=4) 

399 

400 fu.log("Total number of simulations: %d" % (len(json_obj)), out_log, global_log) 

401 

402 return json_string 

403 

404 

405def get_memprotmd_sim_search(collection_name, keyword, out_log=None, global_log=None): 

406 """Performs advanced searches in the MemProtMD DB using its REST API and a given keyword""" 

407 

408 fu.log( 

409 "Getting search results from the MemProtMD REST API. Collection name: %s, keyword: %s" 

410 % (collection_name, keyword), 

411 out_log, 

412 global_log, 

413 ) 

414 

415 url = "http://memprotmd.bioch.ox.ac.uk/api/search/advanced" 

416 json_query = { 

417 "collectionName": collection_name, 

418 "query": {"keywords": keyword}, 

419 "projection": {"simulations": 1}, 

420 "options": {}, 

421 } 

422 

423 json_obj = requests.post(url, json=json_query).json() 

424 json_string = json.dumps(json_obj, indent=4) 

425 

426 # get total number of simulation 

427 list_kw = [] 

428 for sim_list in json_obj: 

429 for sim in sim_list["simulations"]: 

430 list_kw.append(sim) 

431 

432 fu.log("Total number of simulations: %d" % (len(list_kw)), out_log, global_log) 

433 

434 return json_string 

435 

436 

437def get_memprotmd_sim(pdb_code, output_file, out_log=None, global_log=None): 

438 """Gets a single simulation from MemProtMD DB""" 

439 

440 fu.log("Getting simulation file from pdb code %s" % (pdb_code), out_log, global_log) 

441 

442 url = ( 

443 "http://memprotmd.bioch.ox.ac.uk/data/memprotmd/simulations/" + pdb_code + "_default_dppc/files/run/at.zip" 

444 ) 

445 response = requests.get(url) 

446 

447 open(output_file, "wb").write(response.content) 

448 

449 fu.log("Saving output %s file" % (output_file), out_log, global_log) 

450 

451 

452def check_mandatory_property(property, name, out_log, classname): 

453 """Checks mandatory properties""" 

454 

455 if not property: 

456 fu.log(classname + ": Unexisting %s property, exiting" % name, out_log) 

457 raise SystemExit(classname + ": Unexisting %s property" % name) 

458 return property 

459 

460 

461def check_uniprot_code(code, out_log, classname): 

462 """Checks uniprot code""" 

463 

464 pattern = re.compile( 

465 (r"[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") 

466 ) 

467 

468 if not pattern.match(code): 

469 fu.log(classname + ": Incorrect uniprot code for %s" % code, out_log) 

470 raise SystemExit(classname + ": Incorrect uniprot code for %s" % code) 

471 

472 return True