Coverage for biobb_structure_utils/utils/extract_heteroatoms.py: 74%

69 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-28 11:54 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the ExtractHeteroAtoms class and the command line interface.""" 

4 

5import argparse 

6from typing import Optional 

7 

8from Bio.PDB.PDBParser import PDBParser 

9from biobb_common.configuration import settings 

10from biobb_common.generic.biobb_object import BiobbObject 

11from biobb_common.tools import file_utils as fu 

12from biobb_common.tools.file_utils import launchlogger 

13 

14from biobb_structure_utils.utils.common import ( 

15 _from_string_to_list, 

16 check_format_heteroatoms, 

17 check_input_path, 

18 check_output_path, 

19 create_biopython_residue, 

20 create_output_file, 

21) 

22 

23 

24class ExtractHeteroAtoms(BiobbObject): 

25 """ 

26 | biobb_structure_utils ExtractHeteroAtoms 

27 | Class to extract hetero-atoms from a 3D structure using Biopython. 

28 | Extracts a list of heteroatoms from a 3D structure using Biopython. 

29 

30 Args: 

31 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/extract_heteroatom.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

32 output_heteroatom_path (str): Output heteroatom file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_extract_heteroatom.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

33 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

34 * **heteroatoms** (*list*) - (None) List of dictionaries with the name | res_id | chain | model of the heteroatoms to be extracted. Format: [{"name": "ZZ7", "res_id": "302", "chain": "B", "model": "1"}]. If empty, all the heteroatoms of the structure will be returned. 

35 * **water** (*bool*) - (False) Add or not waters. 

36 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

37 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

38 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

39 

40 Examples: 

41 This is a use example of how to use the building block from Python:: 

42 

43 from biobb_structure_utils.utils.extract_heteroatoms import extract_heteroatoms 

44 prop = { 

45 'heteroatoms': [ 

46 { 

47 'name': 'ZZ7', 

48 'res_id': '302', 

49 'chain': 'B', 

50 'model': '1' 

51 } 

52 ] 

53 } 

54 extract_heteroatoms(input_structure_path='/path/to/myStructure.pdb', 

55 output_heteroatom_path='/path/to/newHeteroatom.pdb', 

56 properties=prop) 

57 

58 Info: 

59 * wrapped_software: 

60 * name: In house using Biopython 

61 * version: >=1.76 

62 * license: other 

63 * ontology: 

64 * name: EDAM 

65 * schema: http://edamontology.org/EDAM.owl 

66 

67 """ 

68 

69 def __init__( 

70 self, input_structure_path, output_heteroatom_path, properties=None, **kwargs 

71 ) -> None: 

72 properties = properties or {} 

73 

74 # Call parent class constructor 

75 super().__init__(properties) 

76 self.locals_var_dict = locals().copy() 

77 

78 # Input/Output files 

79 self.io_dict = { 

80 "in": {"input_structure_path": input_structure_path}, 

81 "out": {"output_heteroatom_path": output_heteroatom_path}, 

82 } 

83 

84 # Properties specific for BB 

85 self.heteroatoms = _from_string_to_list(properties.get("heteroatoms", [])) 

86 self.water = properties.get("water", False) 

87 self.properties = properties 

88 

89 # Check the properties 

90 self.check_properties(properties) 

91 self.check_arguments() 

92 

93 @launchlogger 

94 def launch(self) -> int: 

95 """Execute the :class:`ExtractHeteroAtoms <utils.extract_heteroatoms.ExtractHeteroAtoms>` utils.extract_heteroatoms.ExtractHeteroAtoms object.""" 

96 

97 self.io_dict["in"]["input_structure_path"] = check_input_path( 

98 self.io_dict["in"]["input_structure_path"], 

99 self.out_log, 

100 self.__class__.__name__, 

101 ) 

102 self.io_dict["out"]["output_heteroatom_path"] = check_output_path( 

103 self.io_dict["out"]["output_heteroatom_path"], 

104 self.out_log, 

105 self.__class__.__name__, 

106 ) 

107 

108 # Setup Biobb 

109 if self.check_restart(): 

110 return 0 

111 self.stage_files() 

112 

113 # Business code 

114 # get list of heteroatoms from properties 

115 list_heteroatoms = check_format_heteroatoms(self.heteroatoms, self.out_log) 

116 

117 # load input into BioPython structure 

118 structure = PDBParser(QUIET=True).get_structure( 

119 "structure", self.stage_io_dict["in"]["input_structure_path"] 

120 ) 

121 

122 new_structure = [] 

123 # get desired heteroatoms 

124 for residue in structure.get_residues(): 

125 r = create_biopython_residue(residue) 

126 if list_heteroatoms: 

127 for het in list_heteroatoms: 

128 match = True 

129 for code in het["code"]: 

130 if het[code].strip() != r[code].strip(): 

131 match = False 

132 break 

133 

134 if match: 

135 if not self.water and ( 

136 r["name"] == "HOH" or r["name"] == "SOL" or r["name"] == "WAT" 

137 ): 

138 pass 

139 else: 

140 new_structure.append(r) 

141 else: 

142 if not self.water and ( 

143 r["name"] == "HOH" or r["name"] == "SOL" or r["name"] == "WAT" 

144 ): 

145 pass 

146 else: 

147 new_structure.append(r) 

148 

149 # if not heteroatoms found in structure, raise exit 

150 if not new_structure: 

151 fu.log( 

152 self.__class__.__name__ + ": The heteroatoms given by user were not found in input structure", 

153 self.out_log, 

154 ) 

155 raise SystemExit( 

156 self.__class__.__name__ + ": The heteroatoms given by user were not found in input structure" 

157 ) 

158 

159 create_output_file( 

160 1, 

161 self.stage_io_dict["in"]["input_structure_path"], 

162 new_structure, 

163 self.stage_io_dict["out"]["output_heteroatom_path"], 

164 self.out_log, 

165 ) 

166 

167 self.return_code = 0 

168 

169 # Copy files to host 

170 self.copy_to_host() 

171 

172 # Remove temporal files 

173 # self.tmp_files.append(self.stage_io_dict.get("unique_dir", "")) 

174 self.remove_tmp_files() 

175 

176 self.check_arguments(output_files_created=True, raise_exception=False) 

177 

178 return self.return_code 

179 

180 

181def extract_heteroatoms( 

182 input_structure_path: str, 

183 output_heteroatom_path: str, 

184 properties: Optional[dict] = None, 

185 **kwargs, 

186) -> int: 

187 """Execute the :class:`ExtractHeteroAtoms <utils.extract_heteroatoms.ExtractHeteroAtoms>` class and 

188 execute the :meth:`launch() <utils.extract_heteroatoms.ExtractHeteroAtoms.launch>` method.""" 

189 

190 return ExtractHeteroAtoms( 

191 input_structure_path=input_structure_path, 

192 output_heteroatom_path=output_heteroatom_path, 

193 properties=properties, 

194 **kwargs, 

195 ).launch() 

196 

197 extract_heteroatoms.__doc__ = ExtractHeteroAtoms.__doc__ 

198 

199 

200def main(): 

201 """Command line execution of this building block. Please check the command line documentation.""" 

202 parser = argparse.ArgumentParser( 

203 description="Extract a list of heteroatoms from a 3D structure.", 

204 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999), 

205 ) 

206 parser.add_argument( 

207 "-c", 

208 "--config", 

209 required=False, 

210 help="This file can be a YAML file, JSON file or JSON string", 

211 ) 

212 

213 # Specific args of each building block 

214 required_args = parser.add_argument_group("required arguments") 

215 required_args.add_argument( 

216 "-i", 

217 "--input_structure_path", 

218 required=True, 

219 help="Input structure file path. Accepted formats: pdb.", 

220 ) 

221 required_args.add_argument( 

222 "-o", 

223 "--output_heteroatom_path", 

224 required=True, 

225 help="Output heteroatom file path. Accepted formats: pdb.", 

226 ) 

227 

228 args = parser.parse_args() 

229 config = args.config if args.config else None 

230 properties = settings.ConfReader(config=config).get_prop_dic() 

231 

232 # Specific call of each building block 

233 extract_heteroatoms( 

234 input_structure_path=args.input_structure_path, 

235 output_heteroatom_path=args.output_heteroatom_path, 

236 properties=properties, 

237 ) 

238 

239 

240if __name__ == "__main__": 

241 main()