Coverage for biobb_io/api/common.py: 61%
205 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-04 08:31 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-04 08:31 +0000
1"""Common functions for package api"""
3import json
4import os
5import re
6import urllib.request
7import urllib.parse
8from pathlib import Path, PurePath
10import requests
11from biobb_common.tools import file_utils as fu
14def check_output_path(path, argument, optional, out_log, classname) -> str:
15 """Checks output file"""
16 if optional and not path:
17 return ""
18 if PurePath(path).parent and not Path(PurePath(path).parent).exists():
19 fu.log(classname + ": Unexisting %s folder, exiting" % argument, out_log)
20 raise SystemExit(classname + ": Unexisting %s folder" % argument)
21 file_extension = PurePath(path).suffix
22 if not is_valid_file(file_extension[1:], argument):
23 fu.log(
24 classname + ": Format %s in %s file is not compatible"
25 % (file_extension[1:], argument),
26 out_log,
27 )
28 raise SystemExit(
29 classname + ": Format %s in %s file is not compatible"
30 % (file_extension[1:], argument)
31 )
32 return path
35def is_valid_file(ext, argument):
36 """Checks if file format is compatible"""
37 formats = {
38 "output_sdf_path": ["sdf"],
39 "output_pdb_path": ["pdb"],
40 "output_simulations": ["json"],
41 "output_simulation": ["zip"],
42 "output_pdb_zip_path": ["zip"],
43 "output_mutations_list_txt": ["txt"],
44 "output_json_path": ["json"],
45 "output_fasta_path": ["fasta"],
46 "output_mmcif_path": ["mmcif", "cif"],
47 "output_top_path": ["pdb"],
48 "output_trj_path": ["mdcrd", "trr", "xtc"]
49 }
50 return ext in formats[argument]
53def download_pdb(pdb_code, api_id, out_log=None, global_log=None):
54 """
55 Returns:
56 String: Content of the pdb file.
57 """
59 if api_id == "mmb":
60 url = "https://mmb.irbbarcelona.org/api/pdb/" + pdb_code + "/coords/?"
61 elif api_id == "pdb":
62 url = "https://files.rcsb.org/download/" + pdb_code + ".pdb"
63 elif api_id == "pdbe":
64 url = "https://www.ebi.ac.uk/pdbe/entry-files/download/pdb" + pdb_code + ".ent"
66 fu.log("Downloading %s from: %s" % (pdb_code, url), out_log, global_log)
67 return requests.get(url).content.decode("utf-8")
70def download_af(uniprot_code, out_log=None, global_log=None, classname=None):
71 """
72 Returns:
73 String: Content of the pdb file.
74 """
76 url = "https://alphafold.ebi.ac.uk/files/AF-" + uniprot_code + "-F1-model_v3.pdb"
78 fu.log("Downloading %s from: %s" % (uniprot_code, url), out_log, global_log)
80 r = requests.get(url)
81 if r.status_code == 404:
82 fu.log(classname + ": Incorrect Uniprot Code: %s" % (uniprot_code), out_log)
83 raise SystemExit(classname + ": Incorrect Uniprot Code: %s" % (uniprot_code))
85 return r.content.decode("utf-8")
88def download_mddb_top(project_id, node_id, selection, out_log=None, global_log=None, classname=None):
89 """
90 Returns:
91 String: Content of the pdb file.
92 """
94 url = "https://" + node_id + ".mddbr.eu/api/rest/v1/projects/" + project_id + "/structure?selection=" + urllib.parse.quote(str(selection))
96 fu.log("Downloading %s topology from: %s" % (project_id, url), out_log, global_log)
98 r = requests.get(url)
99 if r.status_code == 404:
100 fu.log(classname + ": Incorrect url, check project_id, node_id and selection: %s" % (url), out_log)
101 raise SystemExit(classname + ": Incorrect url, check project_id, node_id and selection: %s" % (url))
103 return r.content.decode("utf-8")
106def download_mddb_trj(project_id, node_id, trj_format, frames, selection, out_log=None, global_log=None, classname=None):
107 """
108 Returns:
109 String: Content of the trajectory file.
110 """
112 url = "https://" + node_id + ".mddbr.eu/api/rest/v1/projects/" + project_id + "/trajectory?format=" + trj_format + "&frames=" + frames + "&selection=" + urllib.parse.quote(str(selection))
114 fu.log("Downloading %s trajectory from: %s" % (project_id, url), out_log, global_log)
116 r = requests.get(url)
117 if r.status_code == 404:
118 fu.log(classname + ": Incorrect url, check project_id, node_id, trj_format, frames and selection: %s" % (url), out_log)
119 raise SystemExit(classname + ": Incorrect url, check project_id, node_id, trj_format, frames and selection: %s" % (url))
121 return r.content
124def download_mddb_file(project_id, node_id, file_name, out_log=None, global_log=None, classname=None):
125 """
126 Returns:
127 String: Content of the trajectory file.
128 """
130 url = "https://" + node_id + ".mddbr.eu/api/rest/v1/projects/" + project_id + "/files/" + file_name
132 fu.log("Downloading %s file from: %s" % (project_id, url), out_log, global_log)
134 r = requests.get(url)
135 if r.status_code == 404:
136 fu.log(classname + ": Incorrect url, check project_id, node_id, trj_format, frames and selection: %s" % (url), out_log)
137 raise SystemExit(classname + ": Incorrect url, check project_id, node_id, trj_format, frames and selection: %s" % (url))
139 return r.content
142def download_mmcif(pdb_code, api_id, out_log=None, global_log=None):
143 """
144 Returns:
145 String: Content of the mmcif file.
146 """
148 if api_id == "mmb":
149 url = "http://mmb.irbbarcelona.org/api/pdb/" + pdb_code + ".cif"
150 elif api_id == "pdb":
151 url = "https://files.rcsb.org/download/" + pdb_code + ".cif"
152 elif api_id == "pdbe":
153 url = "https://www.ebi.ac.uk/pdbe/entry-files/download/" + pdb_code + ".cif"
155 fu.log("Downloading %s from: %s" % (pdb_code, url), out_log, global_log)
156 return requests.get(url, verify=True).content.decode("utf-8")
159def download_ligand(ligand_code, api_id, out_log=None, global_log=None):
160 """
161 Returns:
162 String: Content of the ligand file.
163 """
165 if api_id == "mmb":
166 url = "http://mmb.irbbarcelona.org/api/pdbMonomer/" + ligand_code.lower()
167 text = requests.get(url, verify=True).content.decode("utf-8")
168 elif api_id == "pdbe":
169 url = (
170 "https://www.ebi.ac.uk/pdbe/static/files/pdbechem_v2/" + ligand_code.upper() + "_ideal.pdb"
171 )
172 text = urllib.request.urlopen(url).read().decode("utf-8")
174 fu.log("Downloading %s from: %s" % (ligand_code, url), out_log, global_log)
176 # removing useless empty lines at the end of the file
177 text = os.linesep.join([s for s in text.splitlines() if s])
179 return text
182def download_fasta(pdb_code, api_id, out_log=None, global_log=None):
183 """
184 Returns:
185 String: Content of the fasta file.
186 """
188 if api_id == "mmb":
189 url = "http://mmb.irbbarcelona.org/api/pdb/" + pdb_code + ".fasta"
190 elif api_id == "pdb":
191 url = "https://www.rcsb.org/fasta/entry/" + pdb_code
192 elif api_id == "pdbe":
193 url = "https://www.ebi.ac.uk/pdbe/entry/pdb/" + pdb_code + "/fasta"
195 fu.log("Downloading %s from: %s" % (pdb_code, url), out_log, global_log)
196 return requests.get(url, verify=True).content.decode("utf-8")
199def download_binding_site(
200 pdb_code,
201 url="https://www.ebi.ac.uk/pdbe/api/pdb/entry/binding_sites/%s",
202 out_log=None,
203 global_log=None,
204):
205 """
206 Returns:
207 String: Content of the component file.
208 """
209 url = url % pdb_code
211 fu.log("Getting binding sites from: %s" % (url), out_log, global_log)
213 text = urllib.request.urlopen(url).read()
214 json_obj = json.loads(text)
215 json_string = json.dumps(json_obj, indent=4, sort_keys=True)
216 # json_string = json.dumps(text, indent=4)
218 return json_string
221def download_ideal_sdf(ligand_code, api_id, out_log=None, global_log=None):
222 """
223 Returns:
224 String: Content of the ideal sdf file.
225 """
227 if api_id == "pdb":
228 url = (
229 "https://files.rcsb.org/ligands/download/" + ligand_code.upper() + "_ideal.sdf"
230 )
231 text = requests.get(url, verify=True).content.decode("utf-8")
232 elif api_id == "pdbe":
233 url = (
234 "https://www.ebi.ac.uk/pdbe/static/files/pdbechem_v2/" + ligand_code.upper() + "_ideal.sdf"
235 )
236 text = urllib.request.urlopen(url).read().decode("utf-8")
238 fu.log("Downloading %s from: %s" % (ligand_code, url), out_log, global_log)
240 return text
243def download_str_info(
244 pdb_code,
245 url="http://mmb.irbbarcelona.org/api/pdb/%s.json",
246 out_log=None,
247 global_log=None,
248):
249 """
250 Returns:
251 String: Content of the JSON file.
252 """
253 url = url % pdb_code
255 fu.log("Getting structure info from: %s" % (url), out_log, global_log)
257 text = urllib.request.urlopen(url).read()
258 json_obj = json.loads(text)
259 json_string = json.dumps(json_obj, indent=4, sort_keys=True)
260 # json_string = json.dumps(text, indent=4)
262 return json_string
265def write_pdb(pdb_string, output_pdb_path, filt=None, out_log=None, global_log=None):
266 """Writes and filters a PDB"""
267 fu.log("Writting pdb to: %s" % (output_pdb_path), out_log, global_log)
268 with open(output_pdb_path, "w") as output_pdb_file:
269 if filt:
270 fu.log(
271 "Filtering lines NOT starting with one of these words: %s" % str(filt),
272 out_log,
273 global_log,
274 )
275 for line in pdb_string.splitlines(True):
276 if line.strip().split()[0][0:6] in filt:
277 output_pdb_file.write(line)
278 else:
279 output_pdb_file.write(pdb_string)
282def write_bin(bin_string, output_bin_path, out_log=None, global_log=None):
283 """Writes a BIN"""
284 fu.log("Writting bin to: %s" % (output_bin_path), out_log, global_log)
285 with open(output_bin_path, "wb") as output_bin_file:
286 output_bin_file.write(bin_string)
289def write_mmcif(mmcif_string, output_mmcif_path, out_log=None, global_log=None):
290 """Writes a mmcif"""
291 fu.log("Writting mmcif to: %s" % (output_mmcif_path), out_log, global_log)
292 with open(output_mmcif_path, "w") as output_mmcif_file:
293 output_mmcif_file.write(mmcif_string)
296def write_fasta(fasta_string, output_fasta_path, out_log=None, global_log=None):
297 """Writes a FASTA"""
298 fu.log("Writting FASTA to: %s" % (output_fasta_path), out_log, global_log)
299 with open(output_fasta_path, "w") as output_fasta_file:
300 output_fasta_file.write(fasta_string)
303def write_sdf(sdf_string, output_sdf_path, out_log=None, global_log=None):
304 """Writes a SDF"""
305 fu.log("Writting sdf to: %s" % (output_sdf_path), out_log, global_log)
306 with open(output_sdf_path, "w") as output_sdf_file:
307 output_sdf_file.write(sdf_string)
310def get_cluster_pdb_codes(pdb_code, cluster, out_log=None, global_log=None):
311 """
312 Returns:
313 String list: The list of pdb_codes of the selected cluster.
314 """
315 url = "http://mmb.irbbarcelona.org/api/pdb/"
316 pdb_codes = set()
318 url = url + pdb_code.lower() + "/clusters/cl-" + str(cluster) + ".json"
319 cluster_list = json.loads(requests.get(url, verify=True).content.decode("utf-8"))[
320 "clusterMembers"
321 ]
322 for elem in cluster_list:
323 pdb_codes.add(elem["_id"].lower())
325 if out_log:
326 out_log.info(
327 "Cluster: " + str(cluster) + " of pdb_code: " + pdb_code + "\n List: " + str(pdb_codes)
328 )
329 if global_log:
330 global_log.info(
331 fu.get_logs_prefix() + "Cluster: " + str(cluster) + " of pdb_code: " + pdb_code + "\n List: " + str(pdb_codes)
332 )
334 return pdb_codes
337def get_uniprot(pdb_code, url, out_log=None, global_log=None):
338 """Returns the UNIPROT code corresponding to the `pdb_code`.
340 Returns:
341 str: UNIPROT code.
342 """
343 url_uniprot_id = url + "/pdb/" + pdb_code.lower() + "/entry/uniprotRefs/_id"
344 uniprot_id = requests.get(url_uniprot_id, verify=True).json()["uniprotRefs._id"][0]
346 if out_log:
347 out_log.info(
348 "PDB code: " + pdb_code + " correspond to uniprot id: " + uniprot_id
349 )
350 if global_log:
351 global_log.info(
352 "PDB code: " + pdb_code + " correspond to uniprot id: " + uniprot_id
353 )
355 return uniprot_id
358def get_variants(
359 uniprot_id, url="http://mmb.irbbarcelona.org/api", out_log=None, global_log=None
360):
361 """Returns the variants of the `uniprot_id` code.
363 Returns:
364 :obj:`list` of :obj:`str`: List of variants.
365 """
366 url_uniprot_mut = (
367 url + "/uniprot/" + uniprot_id + "/entry/variants/vardata/mut/?varorig=humsavar"
368 )
369 variants = requests.get(url_uniprot_mut, verify=True).json()["variants.vardata.mut"]
370 variants = variants if variants else []
372 fu.log(
373 "Found: %d variants for uniprot id: %s" % (len(variants), uniprot_id),
374 out_log,
375 global_log,
376 )
377 return variants if variants else []
380def write_json(json_string, output_json_path, out_log=None, global_log=None):
381 """Writes a JSON"""
382 fu.log("Writting json to: %s" % (output_json_path), out_log, global_log)
383 with open(output_json_path, "w") as output_json_file:
384 output_json_file.write(json_string)
387def get_memprotmd_sim_list(out_log=None, global_log=None):
388 """Returns all available membrane-protein systems (simulations) from the MemProtMD DB using its REST API"""
390 fu.log(
391 "Getting all available membrane-protein systems (simulations) from the MemProtMD REST API",
392 out_log,
393 global_log,
394 )
396 url = "http://memprotmd.bioch.ox.ac.uk/api/simulations/all"
397 json_obj = requests.post(url).json()
398 json_string = json.dumps(json_obj, indent=4)
400 fu.log("Total number of simulations: %d" % (len(json_obj)), out_log, global_log)
402 return json_string
405def get_memprotmd_sim_search(collection_name, keyword, out_log=None, global_log=None):
406 """Performs advanced searches in the MemProtMD DB using its REST API and a given keyword"""
408 fu.log(
409 "Getting search results from the MemProtMD REST API. Collection name: %s, keyword: %s"
410 % (collection_name, keyword),
411 out_log,
412 global_log,
413 )
415 url = "http://memprotmd.bioch.ox.ac.uk/api/search/advanced"
416 json_query = {
417 "collectionName": collection_name,
418 "query": {"keywords": keyword},
419 "projection": {"simulations": 1},
420 "options": {},
421 }
423 json_obj = requests.post(url, json=json_query).json()
424 json_string = json.dumps(json_obj, indent=4)
426 # get total number of simulation
427 list_kw = []
428 for sim_list in json_obj:
429 for sim in sim_list["simulations"]:
430 list_kw.append(sim)
432 fu.log("Total number of simulations: %d" % (len(list_kw)), out_log, global_log)
434 return json_string
437def get_memprotmd_sim(pdb_code, output_file, out_log=None, global_log=None):
438 """Gets a single simulation from MemProtMD DB"""
440 fu.log("Getting simulation file from pdb code %s" % (pdb_code), out_log, global_log)
442 url = (
443 "http://memprotmd.bioch.ox.ac.uk/data/memprotmd/simulations/" + pdb_code + "_default_dppc/files/run/at.zip"
444 )
445 response = requests.get(url)
447 open(output_file, "wb").write(response.content)
449 fu.log("Saving output %s file" % (output_file), out_log, global_log)
452def check_mandatory_property(property, name, out_log, classname):
453 """Checks mandatory properties"""
455 if not property:
456 fu.log(classname + ": Unexisting %s property, exiting" % name, out_log)
457 raise SystemExit(classname + ": Unexisting %s property" % name)
458 return property
461def check_uniprot_code(code, out_log, classname):
462 """Checks uniprot code"""
464 pattern = re.compile(
465 (r"[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}")
466 )
468 if not pattern.match(code):
469 fu.log(classname + ": Incorrect uniprot code for %s" % code, out_log)
470 raise SystemExit(classname + ": Incorrect uniprot code for %s" % code)
472 return True