Coverage for biobb_structure_utils / utils / extract_heteroatoms.py: 86%

58 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-22 13:23 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the ExtractHeteroAtoms class and the command line interface.""" 

4from typing import Optional 

5from Bio.PDB.PDBParser import PDBParser 

6from biobb_common.generic.biobb_object import BiobbObject 

7from biobb_common.tools import file_utils as fu 

8from biobb_common.tools.file_utils import launchlogger 

9 

10from biobb_structure_utils.utils.common import ( 

11 _from_string_to_list, 

12 check_format_heteroatoms, 

13 check_input_path, 

14 check_output_path, 

15 create_biopython_residue, 

16 create_output_file, 

17) 

18 

19 

20class ExtractHeteroAtoms(BiobbObject): 

21 """ 

22 | biobb_structure_utils ExtractHeteroAtoms 

23 | Class to extract hetero-atoms from a 3D structure using Biopython. 

24 | Extracts a list of heteroatoms from a 3D structure using Biopython. 

25 

26 Args: 

27 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/extract_heteroatom.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

28 output_heteroatom_path (str): Output heteroatom file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_extract_heteroatom.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

29 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

30 * **heteroatoms** (*list*) - (None) List of dictionaries with the name | res_id | chain | model of the heteroatoms to be extracted. Format: [{"name": "ZZ7", "res_id": "302", "chain": "B", "model": "1"}]. If empty, all the heteroatoms of the structure will be returned. 

31 * **water** (*bool*) - (False) Add or not waters. 

32 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

33 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

34 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

35 

36 Examples: 

37 This is a use example of how to use the building block from Python:: 

38 

39 from biobb_structure_utils.utils.extract_heteroatoms import extract_heteroatoms 

40 prop = { 

41 'heteroatoms': [ 

42 { 

43 'name': 'ZZ7', 

44 'res_id': '302', 

45 'chain': 'B', 

46 'model': '1' 

47 } 

48 ] 

49 } 

50 extract_heteroatoms(input_structure_path='/path/to/myStructure.pdb', 

51 output_heteroatom_path='/path/to/newHeteroatom.pdb', 

52 properties=prop) 

53 

54 Info: 

55 * wrapped_software: 

56 * name: In house using Biopython 

57 * version: >=1.76 

58 * license: other 

59 * ontology: 

60 * name: EDAM 

61 * schema: http://edamontology.org/EDAM.owl 

62 

63 """ 

64 

65 def __init__( 

66 self, input_structure_path, output_heteroatom_path, properties=None, **kwargs 

67 ) -> None: 

68 properties = properties or {} 

69 

70 # Call parent class constructor 

71 super().__init__(properties) 

72 self.locals_var_dict = locals().copy() 

73 

74 # Input/Output files 

75 self.io_dict = { 

76 "in": {"input_structure_path": input_structure_path}, 

77 "out": {"output_heteroatom_path": output_heteroatom_path}, 

78 } 

79 

80 # Properties specific for BB 

81 self.heteroatoms = _from_string_to_list(properties.get("heteroatoms", [])) 

82 self.water = properties.get("water", False) 

83 self.properties = properties 

84 

85 # Check the properties 

86 self.check_properties(properties) 

87 self.check_arguments() 

88 

89 @launchlogger 

90 def launch(self) -> int: 

91 """Execute the :class:`ExtractHeteroAtoms <utils.extract_heteroatoms.ExtractHeteroAtoms>` utils.extract_heteroatoms.ExtractHeteroAtoms object.""" 

92 

93 self.io_dict["in"]["input_structure_path"] = check_input_path( 

94 self.io_dict["in"]["input_structure_path"], 

95 self.out_log, 

96 self.__class__.__name__, 

97 ) 

98 self.io_dict["out"]["output_heteroatom_path"] = check_output_path( 

99 self.io_dict["out"]["output_heteroatom_path"], 

100 self.out_log, 

101 self.__class__.__name__, 

102 ) 

103 

104 # Setup Biobb 

105 if self.check_restart(): 

106 return 0 

107 self.stage_files() 

108 

109 # Business code 

110 # get list of heteroatoms from properties 

111 list_heteroatoms = check_format_heteroatoms(self.heteroatoms, self.out_log) 

112 

113 # load input into BioPython structure 

114 structure = PDBParser(QUIET=True).get_structure( 

115 "structure", self.stage_io_dict["in"]["input_structure_path"] 

116 ) 

117 

118 new_structure = [] 

119 # get desired heteroatoms 

120 for residue in structure.get_residues(): 

121 r = create_biopython_residue(residue) 

122 if list_heteroatoms: 

123 for het in list_heteroatoms: 

124 match = True 

125 for code in het["code"]: 

126 if het[code].strip() != r[code].strip(): 

127 match = False 

128 break 

129 

130 if match: 

131 if not self.water and ( 

132 r["name"] == "HOH" or r["name"] == "SOL" or r["name"] == "WAT" 

133 ): 

134 pass 

135 else: 

136 new_structure.append(r) 

137 else: 

138 if not self.water and ( 

139 r["name"] == "HOH" or r["name"] == "SOL" or r["name"] == "WAT" 

140 ): 

141 pass 

142 else: 

143 new_structure.append(r) 

144 

145 # if not heteroatoms found in structure, raise exit 

146 if not new_structure: 

147 fu.log( 

148 self.__class__.__name__ + ": The heteroatoms given by user were not found in input structure", 

149 self.out_log, 

150 ) 

151 raise SystemExit( 

152 self.__class__.__name__ + ": The heteroatoms given by user were not found in input structure" 

153 ) 

154 

155 create_output_file( 

156 1, 

157 self.stage_io_dict["in"]["input_structure_path"], 

158 new_structure, 

159 self.stage_io_dict["out"]["output_heteroatom_path"], 

160 self.out_log, 

161 ) 

162 

163 self.return_code = 0 

164 

165 # Copy files to host 

166 self.copy_to_host() 

167 

168 # Remove temporal files 

169 self.remove_tmp_files() 

170 

171 self.check_arguments(output_files_created=True, raise_exception=False) 

172 

173 return self.return_code 

174 

175 

176def extract_heteroatoms( 

177 input_structure_path: str, 

178 output_heteroatom_path: str, 

179 properties: Optional[dict] = None, 

180 **kwargs, 

181) -> int: 

182 """Create the :class:`ExtractHeteroAtoms <utils.extract_heteroatoms.ExtractHeteroAtoms>` class and 

183 execute the :meth:`launch() <utils.extract_heteroatoms.ExtractHeteroAtoms.launch>` method.""" 

184 return ExtractHeteroAtoms(**dict(locals())).launch() 

185 

186 

187extract_heteroatoms.__doc__ = ExtractHeteroAtoms.__doc__ 

188main = ExtractHeteroAtoms.get_main(extract_heteroatoms, "Extract a list of heteroatoms from a 3D structure.") 

189 

190if __name__ == "__main__": 

191 main()