Coverage for biobb_io/api/pdb_variants.py: 25%
63 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-04 08:31 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-04 08:31 +0000
1#!/usr/bin/env python
3"""PdbVariants Module"""
5import re
6import requests
7from typing import Optional
8from biobb_common.generic.biobb_object import BiobbObject
9from biobb_common.tools import file_utils as fu
10from biobb_common.tools.file_utils import launchlogger
12from biobb_io.api.common import (
13 check_mandatory_property,
14 check_output_path,
15 get_uniprot,
16 get_variants,
17)
20class PdbVariants(BiobbObject):
21 """
22 | biobb_io PdbVariants
23 | This class creates a text file containing a list of all the variants mapped to a PDB code from the corresponding UNIPROT entries.
24 | Wrapper for the `UNIPROT <http://www.uniprot.org/>`_ mirror of the `MMB group REST API <http://mmb.irbbarcelona.org/api/>`_ for creating a list of all the variants mapped to a PDB code from the corresponding UNIPROT entries.
26 Args:
27 output_mutations_list_txt (str): Path to the TXT file containing an ASCII comma separated values of the mutations. File type: output. `Sample file <https://github.com/bioexcel/biobb_io/raw/master/biobb_io/test/reference/api/output_pdb_variants.txt>`_. Accepted formats: txt (edam:format_2330).
28 properties (dic - Python dictionary object containing the tool parameters, not input/output files):
29 * **pdb_code** (*str*) - (None) RSCB PDB four letter code.
30 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
31 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
32 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
34 Examples:
35 This is a use example of how to use the PdbVariants module from Python
37 from biobb_io.api.pdb_variants import pdb_variants
38 prop = {
39 'pdb_code': '2VGB'
40 }
41 pdb_variants(output_mutations_list_txt='/path/to/newMutationslist.txt',
42 properties=prop)
44 Info:
45 * wrapped_software:
46 * name: UNIPROT
47 * license: Creative Commons
48 * ontology:
49 * name: EDAM
50 * schema: http://edamontology.org/EDAM.owl
52 """
54 def __init__(self, output_mutations_list_txt, properties=None, **kwargs) -> None:
55 properties = properties or {}
57 # Call parent class constructor
58 super().__init__(properties)
59 self.locals_var_dict = locals().copy()
61 # Input/Output files
62 self.io_dict = {"out": {"output_mutations_list_txt": output_mutations_list_txt}}
64 # Properties specific for BB
65 self.pdb_code = properties.get("pdb_code", None)
66 self.properties = properties
68 # Check the properties
69 self.check_properties(properties)
70 self.check_arguments()
72 def check_data_params(self, out_log, err_log):
73 """Checks all the input/output paths and parameters"""
74 self.output_mutations_list_txt = check_output_path(
75 self.io_dict["out"]["output_mutations_list_txt"],
76 "output_mutations_list_txt",
77 False,
78 out_log,
79 self.__class__.__name__,
80 )
82 @launchlogger
83 def launch(self) -> int:
84 """Execute the :class:`PdbVariants <api.pdb_variants.PdbVariants>` api.pdb_variants.PdbVariants object."""
86 # check input/output paths and parameters
87 self.check_data_params(self.out_log, self.err_log)
89 # Setup Biobb
90 if self.check_restart():
91 return 0
93 check_mandatory_property(
94 self.pdb_code, "pdb_code", self.out_log, self.__class__.__name__
95 )
97 self.pdb_code = self.pdb_code.strip().lower()
99 url = "http://mmb.irbbarcelona.org/api"
100 uniprot_id = get_uniprot(self.pdb_code, url, self.out_log, self.global_log)
101 url_mapPDBRes = (
102 url + "/uniprot/" + uniprot_id + "/mapPDBRes?pdbId=" + self.pdb_code
103 )
104 pattern = re.compile(
105 (r"p.(?P<wt>[a-zA-Z]{3})(?P<resnum>\d+)(?P<mt>[a-zA-Z]{3})")
106 )
108 fu.log(
109 "Fetching variants for uniprot_id: %s and pdb_code: %s"
110 % (uniprot_id, self.pdb_code),
111 self.out_log,
112 self.global_log,
113 )
114 unfiltered_dic = requests.get(url_mapPDBRes, verify=True).json()
115 if not unfiltered_dic:
116 fu.log("No mutation found", self.out_log, self.global_log)
117 return 1
119 mapdic = requests.get(url_mapPDBRes, verify=True).json()
120 mutations = []
121 uniprot_var_list = get_variants(uniprot_id, url, self.out_log, self.global_log)
122 for var in uniprot_var_list:
123 match = pattern.match(var)
124 if match:
125 uni_mut = match.groupdict()
126 else:
127 continue
128 for k in mapdic.keys():
129 for fragment in mapdic[k]:
130 if (
131 int(fragment["unp_start"]) <= int(uni_mut["resnum"]) <= int(fragment["unp_end"])
132 ):
133 resnum = (
134 int(uni_mut["resnum"]) + int(fragment["pdb_start"]) - int(fragment["unp_start"])
135 )
136 mutations.append(
137 k[-1] + "." + uni_mut["wt"] + str(resnum) + uni_mut["mt"]
138 )
140 fu.log(
141 "Found %d mutations mapped to PDB: %s" % (len(mutations), self.pdb_code),
142 self.out_log,
143 self.global_log,
144 )
145 fu.log(
146 "Writting mutations to: %s" % self.output_mutations_list_txt,
147 self.out_log,
148 self.global_log,
149 )
151 if not self.output_mutations_list_txt:
152 raise ValueError("Output mutations list file path is not specified.")
154 with open(self.output_mutations_list_txt, "w") as mut_file:
155 mutations.sort()
156 mut_file.write(",".join(mutations))
158 self.check_arguments(output_files_created=True, raise_exception=False)
160 return 0
163def pdb_variants(
164 output_mutations_list_txt: str, properties: Optional[dict] = None, **kwargs
165) -> int:
166 """Execute the :class:`PdbVariants <api.pdb_variants.PdbVariants>` class and
167 execute the :meth:`launch() <api.pdb_variants.PdbVariants.launch>` method."""
168 return PdbVariants(**dict(locals())).launch()
171pdb_variants.__doc__ = PdbVariants.__doc__
172main = PdbVariants.get_main(pdb_variants, "Wrapper for the UNIPROT (http://www.uniprot.org/) mirror of the MMB group REST API (http://mmb.irbbarcelona.org/api/) for creating a list of all the variants mapped to a PDB code from the corresponding UNIPROT entries.")
174if __name__ == "__main__":
175 main()