⬅ biobb_io/api/pdb_variants.py source

1 #!/usr/bin/env python
2  
3 """PdbVariants Module"""
4  
5 import argparse
6 import re
7 from typing import Optional
8  
9 import requests
10 from biobb_common.configuration import settings
11 from biobb_common.generic.biobb_object import BiobbObject
12 from biobb_common.tools import file_utils as fu
13 from biobb_common.tools.file_utils import launchlogger
14  
15 from biobb_io.api.common import (
16 check_mandatory_property,
17 check_output_path,
18 get_uniprot,
19 get_variants,
20 )
21  
22  
23 class PdbVariants(BiobbObject):
24 """
25 | biobb_io PdbVariants
26 | This class creates a text file containing a list of all the variants mapped to a PDB code from the corresponding UNIPROT entries.
27 | Wrapper for the `UNIPROT <http://www.uniprot.org/>`_ mirror of the `MMB group REST API <http://mmb.irbbarcelona.org/api/>`_ for creating a list of all the variants mapped to a PDB code from the corresponding UNIPROT entries.
28  
29 Args:
30 output_mutations_list_txt (str): Path to the TXT file containing an ASCII comma separated values of the mutations. File type: output. `Sample file <https://github.com/bioexcel/biobb_io/raw/master/biobb_io/test/reference/api/output_pdb_variants.txt>`_. Accepted formats: txt (edam:format_2330).
31 properties (dic - Python dictionary object containing the tool parameters, not input/output files):
32 * **pdb_code** (*str*) - (None) RSCB PDB four letter code.
33 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
34 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
35 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
36  
37 Examples:
38 This is a use example of how to use the PdbVariants module from Python
39  
40 from biobb_io.api.pdb_variants import pdb_variants
41 prop = {
42 'pdb_code': '2VGB'
43 }
44 pdb_variants(output_mutations_list_txt='/path/to/newMutationslist.txt',
45 properties=prop)
46  
47 Info:
48 * wrapped_software:
49 * name: UNIPROT
50 * license: Creative Commons
51 * ontology:
52 * name: EDAM
53 * schema: http://edamontology.org/EDAM.owl
54  
55 """
56  
57 def __init__(self, output_mutations_list_txt, properties=None, **kwargs) -> None:
58 properties = properties or {}
59  
60 # Call parent class constructor
61 super().__init__(properties)
62 self.locals_var_dict = locals().copy()
63  
64 # Input/Output files
65 self.io_dict = {"out": {"output_mutations_list_txt": output_mutations_list_txt}}
66  
67 # Properties specific for BB
68 self.pdb_code = properties.get("pdb_code", None)
69 self.properties = properties
70  
71 # Check the properties
72 self.check_properties(properties)
73 self.check_arguments()
74  
75 def check_data_params(self, out_log, err_log):
76 """Checks all the input/output paths and parameters"""
77 self.output_mutations_list_txt = check_output_path(
78 self.io_dict["out"]["output_mutations_list_txt"],
79 "output_mutations_list_txt",
80 False,
81 out_log,
82 self.__class__.__name__,
83 )
84  
85 @launchlogger
86 def launch(self) -> int:
87 """Execute the :class:`PdbVariants <api.pdb_variants.PdbVariants>` api.pdb_variants.PdbVariants object."""
88  
89 # check input/output paths and parameters
90 self.check_data_params(self.out_log, self.err_log)
91  
92 # Setup Biobb
93 if self.check_restart():
94 return 0
95  
96 check_mandatory_property(
97 self.pdb_code, "pdb_code", self.out_log, self.__class__.__name__
98 )
99  
100 self.pdb_code = self.pdb_code.strip().lower()
101  
102 url = "http://mmb.irbbarcelona.org/api"
103 uniprot_id = get_uniprot(self.pdb_code, url, self.out_log, self.global_log)
104 url_mapPDBRes = (
105 url + "/uniprot/" + uniprot_id + "/mapPDBRes?pdbId=" + self.pdb_code
106 )
107 pattern = re.compile(
108 (r"p.(?P<wt>[a-zA-Z]{3})(?P<resnum>\d+)(?P<mt>[a-zA-Z]{3})")
109 )
110  
111 fu.log(
112 "Fetching variants for uniprot_id: %s and pdb_code: %s"
113 % (uniprot_id, self.pdb_code),
114 self.out_log,
115 self.global_log,
116 )
117 unfiltered_dic = requests.get(url_mapPDBRes, verify=True).json()
118 if not unfiltered_dic:
119 fu.log("No mutation found", self.out_log, self.global_log)
120 return 1
121  
122 mapdic = requests.get(url_mapPDBRes, verify=True).json()
123 mutations = []
124 uniprot_var_list = get_variants(uniprot_id, url, self.out_log, self.global_log)
125 for var in uniprot_var_list:
126 match = pattern.match(var)
127 if match:
128 uni_mut = match.groupdict()
129 else:
130 continue
131 for k in mapdic.keys():
132 for fragment in mapdic[k]:
133 if (
134 int(fragment["unp_start"])
  • W503 Line break before binary operator
135 <= int(uni_mut["resnum"])
  • W503 Line break before binary operator
136 <= int(fragment["unp_end"])
137 ):
138 resnum = (
139 int(uni_mut["resnum"])
  • W503 Line break before binary operator
140 + int(fragment["pdb_start"])
  • W503 Line break before binary operator
141 - int(fragment["unp_start"])
142 )
143 mutations.append(
144 k[-1] + "." + uni_mut["wt"] + str(resnum) + uni_mut["mt"]
145 )
146  
147 fu.log(
148 "Found %d mutations mapped to PDB: %s" % (len(mutations), self.pdb_code),
149 self.out_log,
150 self.global_log,
151 )
152 fu.log(
153 "Writting mutations to: %s" % self.output_mutations_list_txt,
154 self.out_log,
155 self.global_log,
156 )
157  
158 if not self.output_mutations_list_txt:
159 raise ValueError("Output mutations list file path is not specified.")
160  
161 with open(self.output_mutations_list_txt, "w") as mut_file:
162 mutations.sort()
163 mut_file.write(",".join(mutations))
164  
165 self.check_arguments(output_files_created=True, raise_exception=False)
166  
167 return 0
168  
169  
170 def pdb_variants(
171 output_mutations_list_txt: str, properties: Optional[dict] = None, **kwargs
172 ) -> int:
173 """Execute the :class:`PdbVariants <api.pdb_variants.PdbVariants>` class and
174 execute the :meth:`launch() <api.pdb_variants.PdbVariants.launch>` method."""
175  
176 return PdbVariants(
177 output_mutations_list_txt=output_mutations_list_txt,
178 properties=properties,
179 **kwargs,
180 ).launch()
181  
182  
183 def main():
184 """Command line execution of this building block. Please check the command line documentation."""
185 parser = argparse.ArgumentParser(
186 description="Wrapper for the UNIPROT (http://www.uniprot.org/) mirror of the MMB group REST API (http://mmb.irbbarcelona.org/api/) for creating a list of all the variants mapped to a PDB code from the corresponding UNIPROT entries.",
187 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999),
188 )
189 parser.add_argument(
190 "-c",
191 "--config",
192 required=False,
193 help="This file can be a YAML file, JSON file or JSON string",
194 )
195  
196 # Specific args of each building block
197 required_args = parser.add_argument_group("required arguments")
198 required_args.add_argument(
199 "-o",
200 "--output_mutations_list_txt",
201 required=True,
202 help="Path to the TXT file containing an ASCII comma separated values of the mutations. Accepted formats: txt.",
203 )
204  
205 args = parser.parse_args()
206 config = args.config if args.config else None
207 properties = settings.ConfReader(config=config).get_prop_dic()
208  
209 # Specific call of each building block
210 pdb_variants(
211 output_mutations_list_txt=args.output_mutations_list_txt, properties=properties
212 )
213  
214  
215 if __name__ == "__main__":
216 main()