Coverage for biobb_io/api/pdb_variants.py: 25%

63 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-04 08:31 +0000

1#!/usr/bin/env python 

2 

3"""PdbVariants Module""" 

4 

5import re 

6import requests 

7from typing import Optional 

8from biobb_common.generic.biobb_object import BiobbObject 

9from biobb_common.tools import file_utils as fu 

10from biobb_common.tools.file_utils import launchlogger 

11 

12from biobb_io.api.common import ( 

13 check_mandatory_property, 

14 check_output_path, 

15 get_uniprot, 

16 get_variants, 

17) 

18 

19 

20class PdbVariants(BiobbObject): 

21 """ 

22 | biobb_io PdbVariants 

23 | This class creates a text file containing a list of all the variants mapped to a PDB code from the corresponding UNIPROT entries. 

24 | Wrapper for the `UNIPROT <http://www.uniprot.org/>`_ mirror of the `MMB group REST API <http://mmb.irbbarcelona.org/api/>`_ for creating a list of all the variants mapped to a PDB code from the corresponding UNIPROT entries. 

25 

26 Args: 

27 output_mutations_list_txt (str): Path to the TXT file containing an ASCII comma separated values of the mutations. File type: output. `Sample file <https://github.com/bioexcel/biobb_io/raw/master/biobb_io/test/reference/api/output_pdb_variants.txt>`_. Accepted formats: txt (edam:format_2330). 

28 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

29 * **pdb_code** (*str*) - (None) RSCB PDB four letter code. 

30 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

31 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

32 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

33 

34 Examples: 

35 This is a use example of how to use the PdbVariants module from Python 

36 

37 from biobb_io.api.pdb_variants import pdb_variants 

38 prop = { 

39 'pdb_code': '2VGB' 

40 } 

41 pdb_variants(output_mutations_list_txt='/path/to/newMutationslist.txt', 

42 properties=prop) 

43 

44 Info: 

45 * wrapped_software: 

46 * name: UNIPROT 

47 * license: Creative Commons 

48 * ontology: 

49 * name: EDAM 

50 * schema: http://edamontology.org/EDAM.owl 

51 

52 """ 

53 

54 def __init__(self, output_mutations_list_txt, properties=None, **kwargs) -> None: 

55 properties = properties or {} 

56 

57 # Call parent class constructor 

58 super().__init__(properties) 

59 self.locals_var_dict = locals().copy() 

60 

61 # Input/Output files 

62 self.io_dict = {"out": {"output_mutations_list_txt": output_mutations_list_txt}} 

63 

64 # Properties specific for BB 

65 self.pdb_code = properties.get("pdb_code", None) 

66 self.properties = properties 

67 

68 # Check the properties 

69 self.check_properties(properties) 

70 self.check_arguments() 

71 

72 def check_data_params(self, out_log, err_log): 

73 """Checks all the input/output paths and parameters""" 

74 self.output_mutations_list_txt = check_output_path( 

75 self.io_dict["out"]["output_mutations_list_txt"], 

76 "output_mutations_list_txt", 

77 False, 

78 out_log, 

79 self.__class__.__name__, 

80 ) 

81 

82 @launchlogger 

83 def launch(self) -> int: 

84 """Execute the :class:`PdbVariants <api.pdb_variants.PdbVariants>` api.pdb_variants.PdbVariants object.""" 

85 

86 # check input/output paths and parameters 

87 self.check_data_params(self.out_log, self.err_log) 

88 

89 # Setup Biobb 

90 if self.check_restart(): 

91 return 0 

92 

93 check_mandatory_property( 

94 self.pdb_code, "pdb_code", self.out_log, self.__class__.__name__ 

95 ) 

96 

97 self.pdb_code = self.pdb_code.strip().lower() 

98 

99 url = "http://mmb.irbbarcelona.org/api" 

100 uniprot_id = get_uniprot(self.pdb_code, url, self.out_log, self.global_log) 

101 url_mapPDBRes = ( 

102 url + "/uniprot/" + uniprot_id + "/mapPDBRes?pdbId=" + self.pdb_code 

103 ) 

104 pattern = re.compile( 

105 (r"p.(?P<wt>[a-zA-Z]{3})(?P<resnum>\d+)(?P<mt>[a-zA-Z]{3})") 

106 ) 

107 

108 fu.log( 

109 "Fetching variants for uniprot_id: %s and pdb_code: %s" 

110 % (uniprot_id, self.pdb_code), 

111 self.out_log, 

112 self.global_log, 

113 ) 

114 unfiltered_dic = requests.get(url_mapPDBRes, verify=True).json() 

115 if not unfiltered_dic: 

116 fu.log("No mutation found", self.out_log, self.global_log) 

117 return 1 

118 

119 mapdic = requests.get(url_mapPDBRes, verify=True).json() 

120 mutations = [] 

121 uniprot_var_list = get_variants(uniprot_id, url, self.out_log, self.global_log) 

122 for var in uniprot_var_list: 

123 match = pattern.match(var) 

124 if match: 

125 uni_mut = match.groupdict() 

126 else: 

127 continue 

128 for k in mapdic.keys(): 

129 for fragment in mapdic[k]: 

130 if ( 

131 int(fragment["unp_start"]) <= int(uni_mut["resnum"]) <= int(fragment["unp_end"]) 

132 ): 

133 resnum = ( 

134 int(uni_mut["resnum"]) + int(fragment["pdb_start"]) - int(fragment["unp_start"]) 

135 ) 

136 mutations.append( 

137 k[-1] + "." + uni_mut["wt"] + str(resnum) + uni_mut["mt"] 

138 ) 

139 

140 fu.log( 

141 "Found %d mutations mapped to PDB: %s" % (len(mutations), self.pdb_code), 

142 self.out_log, 

143 self.global_log, 

144 ) 

145 fu.log( 

146 "Writting mutations to: %s" % self.output_mutations_list_txt, 

147 self.out_log, 

148 self.global_log, 

149 ) 

150 

151 if not self.output_mutations_list_txt: 

152 raise ValueError("Output mutations list file path is not specified.") 

153 

154 with open(self.output_mutations_list_txt, "w") as mut_file: 

155 mutations.sort() 

156 mut_file.write(",".join(mutations)) 

157 

158 self.check_arguments(output_files_created=True, raise_exception=False) 

159 

160 return 0 

161 

162 

163def pdb_variants( 

164 output_mutations_list_txt: str, properties: Optional[dict] = None, **kwargs 

165) -> int: 

166 """Execute the :class:`PdbVariants <api.pdb_variants.PdbVariants>` class and 

167 execute the :meth:`launch() <api.pdb_variants.PdbVariants.launch>` method.""" 

168 return PdbVariants(**dict(locals())).launch() 

169 

170 

171pdb_variants.__doc__ = PdbVariants.__doc__ 

172main = PdbVariants.get_main(pdb_variants, "Wrapper for the UNIPROT (http://www.uniprot.org/) mirror of the MMB group REST API (http://mmb.irbbarcelona.org/api/) for creating a list of all the variants mapped to a PDB code from the corresponding UNIPROT entries.") 

173 

174if __name__ == "__main__": 

175 main()