Coverage for biobb_io/api/pdb_variants.py: 81%

72 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-21 16:46 +0000

1#!/usr/bin/env python 

2 

3"""PdbVariants Module""" 

4 

5import argparse 

6import re 

7from typing import Optional 

8 

9import requests 

10from biobb_common.configuration import settings 

11from biobb_common.generic.biobb_object import BiobbObject 

12from biobb_common.tools import file_utils as fu 

13from biobb_common.tools.file_utils import launchlogger 

14 

15from biobb_io.api.common import ( 

16 check_mandatory_property, 

17 check_output_path, 

18 get_uniprot, 

19 get_variants, 

20) 

21 

22 

23class PdbVariants(BiobbObject): 

24 """ 

25 | biobb_io PdbVariants 

26 | This class creates a text file containing a list of all the variants mapped to a PDB code from the corresponding UNIPROT entries. 

27 | Wrapper for the `UNIPROT <http://www.uniprot.org/>`_ mirror of the `MMB group REST API <http://mmb.irbbarcelona.org/api/>`_ for creating a list of all the variants mapped to a PDB code from the corresponding UNIPROT entries. 

28 

29 Args: 

30 output_mutations_list_txt (str): Path to the TXT file containing an ASCII comma separated values of the mutations. File type: output. `Sample file <https://github.com/bioexcel/biobb_io/raw/master/biobb_io/test/reference/api/output_pdb_variants.txt>`_. Accepted formats: txt (edam:format_2330). 

31 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

32 * **pdb_code** (*str*) - (None) RSCB PDB four letter code. 

33 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

34 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

35 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

36 

37 Examples: 

38 This is a use example of how to use the PdbVariants module from Python 

39 

40 from biobb_io.api.pdb_variants import pdb_variants 

41 prop = { 

42 'pdb_code': '2VGB' 

43 } 

44 pdb_variants(output_mutations_list_txt='/path/to/newMutationslist.txt', 

45 properties=prop) 

46 

47 Info: 

48 * wrapped_software: 

49 * name: UNIPROT 

50 * license: Creative Commons 

51 * ontology: 

52 * name: EDAM 

53 * schema: http://edamontology.org/EDAM.owl 

54 

55 """ 

56 

57 def __init__(self, output_mutations_list_txt, properties=None, **kwargs) -> None: 

58 properties = properties or {} 

59 

60 # Call parent class constructor 

61 super().__init__(properties) 

62 self.locals_var_dict = locals().copy() 

63 

64 # Input/Output files 

65 self.io_dict = {"out": {"output_mutations_list_txt": output_mutations_list_txt}} 

66 

67 # Properties specific for BB 

68 self.pdb_code = properties.get("pdb_code", None) 

69 self.properties = properties 

70 

71 # Check the properties 

72 self.check_properties(properties) 

73 self.check_arguments() 

74 

75 def check_data_params(self, out_log, err_log): 

76 """Checks all the input/output paths and parameters""" 

77 self.output_mutations_list_txt = check_output_path( 

78 self.io_dict["out"]["output_mutations_list_txt"], 

79 "output_mutations_list_txt", 

80 False, 

81 out_log, 

82 self.__class__.__name__, 

83 ) 

84 

85 @launchlogger 

86 def launch(self) -> int: 

87 """Execute the :class:`PdbVariants <api.pdb_variants.PdbVariants>` api.pdb_variants.PdbVariants object.""" 

88 

89 # check input/output paths and parameters 

90 self.check_data_params(self.out_log, self.err_log) 

91 

92 # Setup Biobb 

93 if self.check_restart(): 

94 return 0 

95 

96 check_mandatory_property( 

97 self.pdb_code, "pdb_code", self.out_log, self.__class__.__name__ 

98 ) 

99 

100 self.pdb_code = self.pdb_code.strip().lower() 

101 

102 url = "http://mmb.irbbarcelona.org/api" 

103 uniprot_id = get_uniprot(self.pdb_code, url, self.out_log, self.global_log) 

104 url_mapPDBRes = ( 

105 url + "/uniprot/" + uniprot_id + "/mapPDBRes?pdbId=" + self.pdb_code 

106 ) 

107 pattern = re.compile( 

108 (r"p.(?P<wt>[a-zA-Z]{3})(?P<resnum>\d+)(?P<mt>[a-zA-Z]{3})") 

109 ) 

110 

111 fu.log( 

112 "Fetching variants for uniprot_id: %s and pdb_code: %s" 

113 % (uniprot_id, self.pdb_code), 

114 self.out_log, 

115 self.global_log, 

116 ) 

117 unfiltered_dic = requests.get(url_mapPDBRes, verify=True).json() 

118 if not unfiltered_dic: 

119 fu.log("No mutation found", self.out_log, self.global_log) 

120 return 1 

121 

122 mapdic = requests.get(url_mapPDBRes, verify=True).json() 

123 mutations = [] 

124 uniprot_var_list = get_variants(uniprot_id, url, self.out_log, self.global_log) 

125 for var in uniprot_var_list: 

126 match = pattern.match(var) 

127 if match: 

128 uni_mut = match.groupdict() 

129 else: 

130 continue 

131 for k in mapdic.keys(): 

132 for fragment in mapdic[k]: 

133 if ( 

134 int(fragment["unp_start"]) <= int(uni_mut["resnum"]) <= int(fragment["unp_end"]) 

135 ): 

136 resnum = ( 

137 int(uni_mut["resnum"]) + int(fragment["pdb_start"]) - int(fragment["unp_start"]) 

138 ) 

139 mutations.append( 

140 k[-1] + "." + uni_mut["wt"] + str(resnum) + uni_mut["mt"] 

141 ) 

142 

143 fu.log( 

144 "Found %d mutations mapped to PDB: %s" % (len(mutations), self.pdb_code), 

145 self.out_log, 

146 self.global_log, 

147 ) 

148 fu.log( 

149 "Writting mutations to: %s" % self.output_mutations_list_txt, 

150 self.out_log, 

151 self.global_log, 

152 ) 

153 

154 if not self.output_mutations_list_txt: 

155 raise ValueError("Output mutations list file path is not specified.") 

156 

157 with open(self.output_mutations_list_txt, "w") as mut_file: 

158 mutations.sort() 

159 mut_file.write(",".join(mutations)) 

160 

161 self.check_arguments(output_files_created=True, raise_exception=False) 

162 

163 return 0 

164 

165 

166def pdb_variants( 

167 output_mutations_list_txt: str, properties: Optional[dict] = None, **kwargs 

168) -> int: 

169 """Execute the :class:`PdbVariants <api.pdb_variants.PdbVariants>` class and 

170 execute the :meth:`launch() <api.pdb_variants.PdbVariants.launch>` method.""" 

171 

172 return PdbVariants( 

173 output_mutations_list_txt=output_mutations_list_txt, 

174 properties=properties, 

175 **kwargs, 

176 ).launch() 

177 

178 

179def main(): 

180 """Command line execution of this building block. Please check the command line documentation.""" 

181 parser = argparse.ArgumentParser( 

182 description="Wrapper for the UNIPROT (http://www.uniprot.org/) mirror of the MMB group REST API (http://mmb.irbbarcelona.org/api/) for creating a list of all the variants mapped to a PDB code from the corresponding UNIPROT entries.", 

183 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999), 

184 ) 

185 parser.add_argument( 

186 "-c", 

187 "--config", 

188 required=False, 

189 help="This file can be a YAML file, JSON file or JSON string", 

190 ) 

191 

192 # Specific args of each building block 

193 required_args = parser.add_argument_group("required arguments") 

194 required_args.add_argument( 

195 "-o", 

196 "--output_mutations_list_txt", 

197 required=True, 

198 help="Path to the TXT file containing an ASCII comma separated values of the mutations. Accepted formats: txt.", 

199 ) 

200 

201 args = parser.parse_args() 

202 config = args.config if args.config else None 

203 properties = settings.ConfReader(config=config).get_prop_dic() 

204 

205 # Specific call of each building block 

206 pdb_variants( 

207 output_mutations_list_txt=args.output_mutations_list_txt, properties=properties 

208 ) 

209 

210 

211if __name__ == "__main__": 

212 main()