Coverage for biobb_io / api / common.py: 61%
205 statements
« prev ^ index » next coverage.py v7.13.3, created at 2026-02-04 10:56 +0000
« prev ^ index » next coverage.py v7.13.3, created at 2026-02-04 10:56 +0000
1"""Common functions for package api"""
3import json
4import os
5import re
6import urllib.request
7import urllib.parse
8from pathlib import Path, PurePath
10import requests
11from biobb_common.tools import file_utils as fu
14def check_output_path(path, argument, optional, out_log, classname) -> str:
15 """Checks output file"""
16 if optional and not path:
17 return ""
18 if PurePath(path).parent and not Path(PurePath(path).parent).exists():
19 fu.log(classname + ": Unexisting %s folder, exiting" % argument, out_log)
20 raise SystemExit(classname + ": Unexisting %s folder" % argument)
21 file_extension = PurePath(path).suffix
22 if not is_valid_file(file_extension[1:], argument):
23 fu.log(
24 classname + ": Format %s in %s file is not compatible"
25 % (file_extension[1:], argument),
26 out_log,
27 )
28 raise SystemExit(
29 classname + ": Format %s in %s file is not compatible"
30 % (file_extension[1:], argument)
31 )
32 return path
35def is_valid_file(ext, argument):
36 """Checks if file format is compatible"""
37 formats = {
38 "output_sdf_path": ["sdf"],
39 "output_pdb_path": ["pdb"],
40 "output_simulations": ["json"],
41 "output_simulation": ["zip"],
42 "output_pdb_zip_path": ["zip"],
43 "output_mutations_list_txt": ["txt"],
44 "output_json_path": ["json"],
45 "output_fasta_path": ["fasta"],
46 "output_mmcif_path": ["mmcif", "cif"],
47 "output_top_path": ["pdb"],
48 "output_trj_path": ["mdcrd", "trr", "xtc"]
49 }
50 return ext in formats[argument]
53def download_pdb(pdb_code, api_id, out_log=None, global_log=None):
54 """
55 Returns:
56 String: Content of the pdb file.
57 """
59 if api_id == "mmb":
60 url = "https://mdb-login.bsc.es/api/pdb/" + pdb_code + "/coords/?"
61 elif api_id == "pdb":
62 url = "https://files.rcsb.org/download/" + pdb_code + ".pdb"
63 elif api_id == "pdbe":
64 url = "https://www.ebi.ac.uk/pdbe/entry-files/download/pdb" + pdb_code + ".ent"
66 fu.log("Downloading %s from: %s" % (pdb_code, url), out_log, global_log)
67 return requests.get(url).content.decode("utf-8")
70def download_af(uniprot_code, out_log=None, global_log=None, classname=None):
71 """
72 Returns:
73 String: Content of the pdb file.
74 """
76 url = "https://alphafold.ebi.ac.uk/files/AF-" + uniprot_code + "-F1-model_v6.pdb"
78 fu.log("Downloading %s from: %s" % (uniprot_code, url), out_log, global_log)
80 r = requests.get(url)
81 if r.status_code == 404:
82 fu.log(classname + ": Incorrect Uniprot Code: %s" % (uniprot_code), out_log)
83 raise SystemExit(classname + ": Incorrect Uniprot Code: %s" % (uniprot_code))
85 return r.content.decode("utf-8")
88def download_mddb_top(project_id, node_id, selection, out_log=None, global_log=None, classname=None):
89 """
90 Returns:
91 String: Content of the pdb file.
92 """
94 url = "https://" + node_id + ".mddbr.eu/api/rest/v1/projects/" + project_id + "/structure?selection=" + urllib.parse.quote(str(selection))
96 fu.log("Downloading %s topology from: %s" % (project_id, url), out_log, global_log)
98 r = requests.get(url)
99 if r.status_code == 404:
100 fu.log(classname + ": Incorrect url, check project_id, node_id and selection: %s" % (url), out_log)
101 raise SystemExit(classname + ": Incorrect url, check project_id, node_id and selection: %s" % (url))
103 return r.content.decode("utf-8")
106def download_mddb_trj(project_id, node_id, trj_format, frames, selection, out_log=None, global_log=None, classname=None):
107 """
108 Returns:
109 String: Content of the trajectory file.
110 """
112 url = "https://" + node_id + ".mddbr.eu/api/rest/v1/projects/" + project_id + "/trajectory?format=" + trj_format + "&frames=" + frames + "&selection=" + urllib.parse.quote(str(selection))
114 fu.log("Downloading %s trajectory from: %s" % (project_id, url), out_log, global_log)
116 r = requests.get(url)
117 if r.status_code == 404:
118 fu.log(classname + ": Incorrect url, check project_id, node_id, trj_format, frames and selection: %s" % (url), out_log)
119 raise SystemExit(classname + ": Incorrect url, check project_id, node_id, trj_format, frames and selection: %s" % (url))
121 return r.content
124def download_mddb_file(project_id, node_id, file_name, out_log=None, global_log=None, classname=None):
125 """
126 Returns:
127 String: Content of the trajectory file.
128 """
130 url = "https://" + node_id + ".mddbr.eu/api/rest/v1/projects/" + project_id + "/files/" + file_name
132 fu.log("Downloading %s file from: %s" % (project_id, url), out_log, global_log)
134 r = requests.get(url)
135 if r.status_code == 404:
136 fu.log(classname + ": Incorrect url, check project_id, node_id, trj_format, frames and selection: %s" % (url), out_log)
137 raise SystemExit(classname + ": Incorrect url, check project_id, node_id, trj_format, frames and selection: %s" % (url))
139 return r.content
142def download_mmcif(pdb_code, api_id, out_log=None, global_log=None):
143 """
144 Returns:
145 String: Content of the mmcif file.
146 """
148 if api_id == "mmb":
149 url = "https://mdb-login.bsc.es/api/pdb/" + pdb_code + ".cif"
150 elif api_id == "pdb":
151 url = "https://files.rcsb.org/download/" + pdb_code + ".cif"
152 elif api_id == "pdbe":
153 url = "https://www.ebi.ac.uk/pdbe/entry-files/download/" + pdb_code + ".cif"
155 fu.log("Downloading %s from: %s" % (pdb_code, url), out_log, global_log)
156 return requests.get(url, verify=True).content.decode("utf-8")
159def download_ligand(ligand_code, api_id, out_log=None, global_log=None):
160 """
161 Returns:
162 String: Content of the ligand file.
163 """
165 if api_id == "mmb":
166 url = "https://mdb-login.bsc.es/api/pdbMonomer/" + ligand_code.lower()
167 text = requests.get(url, verify=True).content.decode("utf-8")
168 elif api_id == "pdbe":
169 url = (
170 # "https://www.ebi.ac.uk/pdbe/static/files/pdbechem_v2/" + ligand_code.upper() + "_ideal.pdb"
171 "https://files.rcsb.org/ligands/view/" + ligand_code.upper() + ".cif"
172 )
173 text = urllib.request.urlopen(url).read().decode("utf-8")
175 fu.log("Downloading %s from: %s" % (ligand_code, url), out_log, global_log)
177 # removing useless empty lines at the end of the file
178 text = os.linesep.join([s for s in text.splitlines() if s])
180 return text
183def download_fasta(pdb_code, api_id, out_log=None, global_log=None):
184 """
185 Returns:
186 String: Content of the fasta file.
187 """
189 if api_id == "mmb":
190 url = "https://mdb-login.bsc.es/api/pdb/" + pdb_code + ".fasta"
191 elif api_id == "pdb":
192 url = "https://www.rcsb.org/fasta/entry/" + pdb_code
193 elif api_id == "pdbe":
194 url = "https://www.ebi.ac.uk/pdbe/api/v2/pdb/entry/" + pdb_code + "/fasta"
196 fu.log("Downloading %s from: %s" % (pdb_code, url), out_log, global_log)
197 return requests.get(url, verify=True).content.decode("utf-8")
200def download_binding_site(
201 pdb_code,
202 url="https://www.ebi.ac.uk/pdbe/api/v2/pdb/entry/binding_sites/%s/1",
203 out_log=None,
204 global_log=None,
205):
206 """
207 Returns:
208 String: Content of the component file.
209 """
210 url = url % pdb_code
212 fu.log("Getting binding sites from: %s" % (url), out_log, global_log)
214 text = urllib.request.urlopen(url).read()
215 json_obj = json.loads(text)
216 json_string = json.dumps(json_obj, indent=4, sort_keys=True)
217 # json_string = json.dumps(text, indent=4)
219 return json_string
222def download_ideal_sdf(ligand_code, api_id, out_log=None, global_log=None):
223 """
224 Returns:
225 String: Content of the ideal sdf file.
226 """
228 if api_id == "pdb":
229 url = (
230 "https://files.rcsb.org/ligands/download/" + ligand_code.upper() + "_ideal.sdf"
231 )
232 text = requests.get(url, verify=True).content.decode("utf-8")
233 elif api_id == "pdbe":
234 url = (
235 "https://www.ebi.ac.uk/pdbe/static/files/pdbechem_v2/" + ligand_code.upper() + "_ideal.sdf"
236 )
237 text = urllib.request.urlopen(url).read().decode("utf-8")
239 fu.log("Downloading %s from: %s" % (ligand_code, url), out_log, global_log)
241 return text
244def download_str_info(
245 pdb_code,
246 url="https://mdb-login.bsc.es/api/pdb/%s.json",
247 out_log=None,
248 global_log=None,
249):
250 """
251 Returns:
252 String: Content of the JSON file.
253 """
254 url = url % pdb_code
256 fu.log("Getting structure info from: %s" % (url), out_log, global_log)
258 text = urllib.request.urlopen(url).read()
259 json_obj = json.loads(text)
260 json_string = json.dumps(json_obj, indent=4, sort_keys=True)
261 # json_string = json.dumps(text, indent=4)
263 return json_string
266def write_pdb(pdb_string, output_pdb_path, filt=None, out_log=None, global_log=None):
267 """Writes and filters a PDB"""
268 fu.log("Writting pdb to: %s" % (output_pdb_path), out_log, global_log)
269 with open(output_pdb_path, "w") as output_pdb_file:
270 if filt:
271 fu.log(
272 "Filtering lines NOT starting with one of these words: %s" % str(filt),
273 out_log,
274 global_log,
275 )
276 for line in pdb_string.splitlines(True):
277 if line.strip().split()[0][0:6] in filt:
278 output_pdb_file.write(line)
279 else:
280 output_pdb_file.write(pdb_string)
283def write_bin(bin_string, output_bin_path, out_log=None, global_log=None):
284 """Writes a BIN"""
285 fu.log("Writting bin to: %s" % (output_bin_path), out_log, global_log)
286 with open(output_bin_path, "wb") as output_bin_file:
287 output_bin_file.write(bin_string)
290def write_mmcif(mmcif_string, output_mmcif_path, out_log=None, global_log=None):
291 """Writes a mmcif"""
292 fu.log("Writting mmcif to: %s" % (output_mmcif_path), out_log, global_log)
293 with open(output_mmcif_path, "w") as output_mmcif_file:
294 output_mmcif_file.write(mmcif_string)
297def write_fasta(fasta_string, output_fasta_path, out_log=None, global_log=None):
298 """Writes a FASTA"""
299 fu.log("Writting FASTA to: %s" % (output_fasta_path), out_log, global_log)
300 with open(output_fasta_path, "w") as output_fasta_file:
301 output_fasta_file.write(fasta_string)
304def write_sdf(sdf_string, output_sdf_path, out_log=None, global_log=None):
305 """Writes a SDF"""
306 fu.log("Writting sdf to: %s" % (output_sdf_path), out_log, global_log)
307 with open(output_sdf_path, "w") as output_sdf_file:
308 output_sdf_file.write(sdf_string)
311def get_cluster_pdb_codes(pdb_code, cluster, out_log=None, global_log=None):
312 """
313 Returns:
314 String list: The list of pdb_codes of the selected cluster.
315 """
316 url = "https://mdb-login.bsc.es/api/pdb/"
317 pdb_codes = set()
319 url = url + pdb_code.lower() + "/clusters/cl-" + str(cluster) + ".json"
320 cluster_list = json.loads(requests.get(url, verify=True).content.decode("utf-8"))[
321 "clusterMembers"
322 ]
323 for elem in cluster_list:
324 pdb_codes.add(elem["_id"].lower())
326 if out_log:
327 out_log.info(
328 "Cluster: " + str(cluster) + " of pdb_code: " + pdb_code + "\n List: " + str(pdb_codes)
329 )
330 if global_log:
331 global_log.info(
332 fu.get_logs_prefix() + "Cluster: " + str(cluster) + " of pdb_code: " + pdb_code + "\n List: " + str(pdb_codes)
333 )
335 return pdb_codes
338def get_uniprot(pdb_code, url, out_log=None, global_log=None):
339 """Returns the UNIPROT code corresponding to the `pdb_code`.
341 Returns:
342 str: UNIPROT code.
343 """
344 url_uniprot_id = url + "/pdb/" + pdb_code.lower() + "/entry/uniprotRefs/_id"
345 uniprot_id = requests.get(url_uniprot_id, verify=True).json()["uniprotRefs._id"][0]
347 if out_log:
348 out_log.info(
349 "PDB code: " + pdb_code + " correspond to uniprot id: " + uniprot_id
350 )
351 if global_log:
352 global_log.info(
353 "PDB code: " + pdb_code + " correspond to uniprot id: " + uniprot_id
354 )
356 return uniprot_id
359def get_variants(
360 uniprot_id, url="https://mdb-login.bsc.es/api", out_log=None, global_log=None
361):
362 """Returns the variants of the `uniprot_id` code.
364 Returns:
365 :obj:`list` of :obj:`str`: List of variants.
366 """
367 url_uniprot_mut = (
368 url + "/uniprot/" + uniprot_id + "/entry/variants/vardata/mut/?varorig=humsavar"
369 )
370 variants = requests.get(url_uniprot_mut, verify=True).json()["variants.vardata.mut"]
371 variants = variants if variants else []
373 fu.log(
374 "Found: %d variants for uniprot id: %s" % (len(variants), uniprot_id),
375 out_log,
376 global_log,
377 )
378 return variants if variants else []
381def write_json(json_string, output_json_path, out_log=None, global_log=None):
382 """Writes a JSON"""
383 fu.log("Writting json to: %s" % (output_json_path), out_log, global_log)
384 with open(output_json_path, "w") as output_json_file:
385 output_json_file.write(json_string)
388def get_memprotmd_sim_list(out_log=None, global_log=None):
389 """Returns all available membrane-protein systems (simulations) from the MemProtMD DB using its REST API"""
391 fu.log(
392 "Getting all available membrane-protein systems (simulations) from the MemProtMD REST API",
393 out_log,
394 global_log,
395 )
397 url = "http://memprotmd.bioch.ox.ac.uk/api/simulations/all"
398 json_obj = requests.post(url).json()
399 json_string = json.dumps(json_obj, indent=4)
401 fu.log("Total number of simulations: %d" % (len(json_obj)), out_log, global_log)
403 return json_string
406def get_memprotmd_sim_search(collection_name, keyword, out_log=None, global_log=None):
407 """Performs advanced searches in the MemProtMD DB using its REST API and a given keyword"""
409 fu.log(
410 "Getting search results from the MemProtMD REST API. Collection name: %s, keyword: %s"
411 % (collection_name, keyword),
412 out_log,
413 global_log,
414 )
416 url = "http://memprotmd.bioch.ox.ac.uk/api/search/advanced"
417 json_query = {
418 "collectionName": collection_name,
419 "query": {"keywords": keyword},
420 "projection": {"simulations": 1},
421 "options": {},
422 }
424 json_obj = requests.post(url, json=json_query).json()
425 json_string = json.dumps(json_obj, indent=4)
427 # get total number of simulation
428 list_kw = []
429 for sim_list in json_obj:
430 for sim in sim_list["simulations"]:
431 list_kw.append(sim)
433 fu.log("Total number of simulations: %d" % (len(list_kw)), out_log, global_log)
435 return json_string
438def get_memprotmd_sim(pdb_code, output_file, out_log=None, global_log=None):
439 """Gets a single simulation from MemProtMD DB"""
441 fu.log("Getting simulation file from pdb code %s" % (pdb_code), out_log, global_log)
443 url = (
444 "http://memprotmd.bioch.ox.ac.uk/data/memprotmd/simulations/" + pdb_code + "_default_dppc/files/run/at.zip"
445 )
446 response = requests.get(url)
448 open(output_file, "wb").write(response.content)
450 fu.log("Saving output %s file" % (output_file), out_log, global_log)
453def check_mandatory_property(property, name, out_log, classname):
454 """Checks mandatory properties"""
456 if not property:
457 fu.log(classname + ": Unexisting %s property, exiting" % name, out_log)
458 raise SystemExit(classname + ": Unexisting %s property" % name)
459 return property
462def check_uniprot_code(code, out_log, classname):
463 """Checks uniprot code"""
465 pattern = re.compile(
466 (r"[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}")
467 )
469 if not pattern.match(code):
470 fu.log(classname + ": Incorrect uniprot code for %s" % code, out_log)
471 raise SystemExit(classname + ": Incorrect uniprot code for %s" % code)
473 return True