Coverage for biobb_structure_utils / utils / closest_residues.py: 89%

74 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-16 14:59 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the ClosestResidues class and the command line interface.""" 

4from typing import Optional 

5import Bio.PDB 

6from biobb_common.generic.biobb_object import BiobbObject 

7from biobb_common.tools import file_utils as fu 

8from biobb_common.tools.file_utils import launchlogger 

9 

10from biobb_structure_utils.utils.common import ( 

11 _from_string_to_list, 

12 check_input_path, 

13 check_output_path, 

14 create_biopython_residue, 

15 create_output_file, 

16 create_residues_list, 

17) 

18 

19 

20class ClosestResidues(BiobbObject): 

21 """ 

22 | biobb_structure_utils ClosestResidues 

23 | Class to search closest residues from a 3D structure using Biopython. 

24 | Return all residues that have at least one atom within radius of center from a list of given residues. 

25 

26 Args: 

27 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/2vgb.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

28 output_residues_path (str): Output molcules file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_closest_residues.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

29 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

30 * **residues** (*list*) - (None) List of comma separated res_id or list of dictionaries with the name | res_id | chain | model of the residues to find the closest neighbours. Format: [{"name": "HIS", "res_id": "72", "chain": "A", "model": "1"}]. 

31 * **radius** (*float*) - (5) Distance in Ångströms to neighbours of the given list of residues. 

32 * **preserve_target** (*bool*) - (True) Whether or not to preserve the target residues in the output structure. 

33 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

34 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

35 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

36 

37 Examples: 

38 This is a use example of how to use the building block from Python:: 

39 

40 from biobb_structure_utils.utils.closest_residues import closest_residues 

41 prop = { 

42 'residues': [ 

43 { 

44 'name': 'HIS', 

45 'res_id': '72', 

46 'chain': 'A', 

47 'model': '1' 

48 } 

49 ], 

50 'radius': 5, 

51 'preserve_target': False 

52 } 

53 closest_residues(input_structure_path='/path/to/myStructure.pdb', 

54 output_residues_path='/path/to/newResidues.pdb', 

55 properties=prop) 

56 

57 Info: 

58 * wrapped_software: 

59 * name: In house using Biopython 

60 * version: >=1.79 

61 * license: other 

62 * ontology: 

63 * name: EDAM 

64 * schema: http://edamontology.org/EDAM.owl 

65 

66 """ 

67 

68 def __init__( 

69 self, input_structure_path, output_residues_path, properties=None, **kwargs 

70 ) -> None: 

71 properties = properties or {} 

72 

73 # Call parent class constructor 

74 super().__init__(properties) 

75 self.locals_var_dict = locals().copy() 

76 

77 # Input/Output files 

78 self.io_dict = { 

79 "in": {"input_structure_path": input_structure_path}, 

80 "out": {"output_residues_path": output_residues_path}, 

81 } 

82 

83 # Properties specific for BB 

84 self.residues = _from_string_to_list(properties.get("residues", [])) 

85 self.radius = properties.get("radius", 5) 

86 self.preserve_target = properties.get("preserve_target", True) 

87 self.properties = properties 

88 

89 # Check the properties 

90 self.check_properties(properties) 

91 self.check_arguments() 

92 

93 @launchlogger 

94 def launch(self) -> int: 

95 """Execute the :class:`ClosestResidues <utils.closest_residues.ClosestResidues>` utils.closest_residues.ClosestResidues object.""" 

96 

97 self.io_dict["in"]["input_structure_path"] = check_input_path( 

98 self.io_dict["in"]["input_structure_path"], 

99 self.out_log, 

100 self.__class__.__name__, 

101 ) 

102 self.io_dict["out"]["output_residues_path"] = check_output_path( 

103 self.io_dict["out"]["output_residues_path"], 

104 self.out_log, 

105 self.__class__.__name__, 

106 ) 

107 

108 # Setup Biobb 

109 if self.check_restart(): 

110 return 0 

111 self.stage_files() 

112 

113 # Business code 

114 # get list of Residues from properties 

115 list_residues = create_residues_list(self.residues, self.out_log) 

116 

117 # load input into BioPython structure 

118 structure = Bio.PDB.PDBParser(QUIET=True).get_structure( 

119 "structure", self.stage_io_dict["in"]["input_structure_path"] 

120 ) 

121 

122 str_residues = [] 

123 # format selected residues 

124 for residue in structure.get_residues(): 

125 r = create_biopython_residue(residue) 

126 if list_residues: 

127 for res in list_residues: 

128 match = True 

129 for code in res["code"]: 

130 if res[code].strip() != r[code].strip(): 

131 match = False 

132 break 

133 if match: 

134 str_residues.append(r) 

135 else: 

136 str_residues.append(r) 

137 

138 # get target residues in BioPython format 

139 target_residues = [] 

140 for sr in str_residues: 

141 # try for residues, if exception, try as HETATM 

142 try: 

143 target_residues.append( 

144 structure[int(sr["model"]) - 1][sr["chain"]][int(sr["res_id"])] 

145 ) 

146 except KeyError: 

147 target_residues.append( 

148 structure[int(sr["model"]) - 1][sr["chain"]][ 

149 "H_" + sr["name"], int(sr["res_id"]), " " 

150 ] 

151 ) 

152 except Exception: 

153 fu.log( 

154 self.__class__.__name__ + ": Unable to find residue %s", 

155 sr["res_id"], 

156 self.out_log, 

157 ) 

158 

159 # get all atoms from target_residues 

160 target_atoms = Bio.PDB.Selection.unfold_entities(target_residues, "A") 

161 # get all atoms of input structure 

162 all_atoms = Bio.PDB.Selection.unfold_entities(structure, "A") 

163 # generate NeighborSearch object 

164 ns = Bio.PDB.NeighborSearch(all_atoms) 

165 # set comprehension list 

166 nearby_residues = { 

167 res 

168 for center_atom in target_atoms 

169 for res in ns.search(center_atom.coord, self.radius, "R") 

170 } 

171 

172 # format nearby residues to pure python objects 

173 neighbor_residues = [] 

174 for residue in nearby_residues: 

175 r = create_biopython_residue(residue) 

176 neighbor_residues.append(r) 

177 

178 # if preserve_target == False, don't add the residues of self.residues to the final structure 

179 if not self.preserve_target: 

180 neighbor_residues = [x for x in neighbor_residues if x not in str_residues] 

181 

182 fu.log("Found %d nearby residues" % len(neighbor_residues), self.out_log) 

183 

184 if len(neighbor_residues) == 0: 

185 fu.log( 

186 self.__class__.__name__ + ": No neighbour residues found, exiting", 

187 self.out_log, 

188 ) 

189 raise SystemExit( 

190 self.__class__.__name__ + ": No neighbour residues found, exiting" 

191 ) 

192 

193 create_output_file( 

194 0, 

195 self.stage_io_dict["in"]["input_structure_path"], 

196 neighbor_residues, 

197 self.stage_io_dict["out"]["output_residues_path"], 

198 self.out_log, 

199 ) 

200 

201 self.return_code = 0 

202 

203 # Copy files to host 

204 self.copy_to_host() 

205 

206 # Remove temporal files 

207 self.remove_tmp_files() 

208 

209 self.check_arguments(output_files_created=True, raise_exception=False) 

210 

211 return self.return_code 

212 

213 

214def closest_residues( 

215 input_structure_path: str, 

216 output_residues_path: str, 

217 properties: Optional[dict] = None, 

218 **kwargs, 

219) -> int: 

220 """Create the :class:`ClosestResidues <utils.closest_residues.ClosestResidues>` class and 

221 execute the :meth:`launch() <utils.closest_residues.ClosestResidues.launch>` method.""" 

222 return ClosestResidues(**dict(locals())).launch() 

223 

224 

225closest_residues.__doc__ = ClosestResidues.__doc__ 

226main = ClosestResidues.get_main(closest_residues, "Search closest residues to a list of given residues.") 

227 

228if __name__ == "__main__": 

229 main()