Coverage for biobb_structure_utils/utils/closest_residues.py: 79%

85 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-28 11:54 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the ClosestResidues class and the command line interface.""" 

4 

5import argparse 

6from typing import Optional 

7 

8import Bio.PDB 

9from biobb_common.configuration import settings 

10from biobb_common.generic.biobb_object import BiobbObject 

11from biobb_common.tools import file_utils as fu 

12from biobb_common.tools.file_utils import launchlogger 

13 

14from biobb_structure_utils.utils.common import ( 

15 _from_string_to_list, 

16 check_input_path, 

17 check_output_path, 

18 create_biopython_residue, 

19 create_output_file, 

20 create_residues_list, 

21) 

22 

23 

24class ClosestResidues(BiobbObject): 

25 """ 

26 | biobb_structure_utils ClosestResidues 

27 | Class to search closest residues from a 3D structure using Biopython. 

28 | Return all residues that have at least one atom within radius of center from a list of given residues. 

29 

30 Args: 

31 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/2vgb.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

32 output_residues_path (str): Output molcules file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_closest_residues.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

33 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

34 * **residues** (*list*) - (None) List of comma separated res_id or list of dictionaries with the name | res_id | chain | model of the residues to find the closest neighbours. Format: [{"name": "HIS", "res_id": "72", "chain": "A", "model": "1"}]. 

35 * **radius** (*float*) - (5) Distance in Ångströms to neighbours of the given list of residues. 

36 * **preserve_target** (*bool*) - (True) Whether or not to preserve the target residues in the output structure. 

37 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

38 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

39 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

40 

41 Examples: 

42 This is a use example of how to use the building block from Python:: 

43 

44 from biobb_structure_utils.utils.closest_residues import closest_residues 

45 prop = { 

46 'residues': [ 

47 { 

48 'name': 'HIS', 

49 'res_id': '72', 

50 'chain': 'A', 

51 'model': '1' 

52 } 

53 ], 

54 'radius': 5, 

55 'preserve_target': False 

56 } 

57 closest_residues(input_structure_path='/path/to/myStructure.pdb', 

58 output_residues_path='/path/to/newResidues.pdb', 

59 properties=prop) 

60 

61 Info: 

62 * wrapped_software: 

63 * name: In house using Biopython 

64 * version: >=1.79 

65 * license: other 

66 * ontology: 

67 * name: EDAM 

68 * schema: http://edamontology.org/EDAM.owl 

69 

70 """ 

71 

72 def __init__( 

73 self, input_structure_path, output_residues_path, properties=None, **kwargs 

74 ) -> None: 

75 properties = properties or {} 

76 

77 # Call parent class constructor 

78 super().__init__(properties) 

79 self.locals_var_dict = locals().copy() 

80 

81 # Input/Output files 

82 self.io_dict = { 

83 "in": {"input_structure_path": input_structure_path}, 

84 "out": {"output_residues_path": output_residues_path}, 

85 } 

86 

87 # Properties specific for BB 

88 self.residues = _from_string_to_list(properties.get("residues", [])) 

89 self.radius = properties.get("radius", 5) 

90 self.preserve_target = properties.get("preserve_target", True) 

91 self.properties = properties 

92 

93 # Check the properties 

94 self.check_properties(properties) 

95 self.check_arguments() 

96 

97 @launchlogger 

98 def launch(self) -> int: 

99 """Execute the :class:`ClosestResidues <utils.closest_residues.ClosestResidues>` utils.closest_residues.ClosestResidues object.""" 

100 

101 self.io_dict["in"]["input_structure_path"] = check_input_path( 

102 self.io_dict["in"]["input_structure_path"], 

103 self.out_log, 

104 self.__class__.__name__, 

105 ) 

106 self.io_dict["out"]["output_residues_path"] = check_output_path( 

107 self.io_dict["out"]["output_residues_path"], 

108 self.out_log, 

109 self.__class__.__name__, 

110 ) 

111 

112 # Setup Biobb 

113 if self.check_restart(): 

114 return 0 

115 self.stage_files() 

116 

117 # Business code 

118 # get list of Residues from properties 

119 list_residues = create_residues_list(self.residues, self.out_log) 

120 

121 # load input into BioPython structure 

122 structure = Bio.PDB.PDBParser(QUIET=True).get_structure( 

123 "structure", self.stage_io_dict["in"]["input_structure_path"] 

124 ) 

125 

126 str_residues = [] 

127 # format selected residues 

128 for residue in structure.get_residues(): 

129 r = create_biopython_residue(residue) 

130 if list_residues: 

131 for res in list_residues: 

132 match = True 

133 for code in res["code"]: 

134 if res[code].strip() != r[code].strip(): 

135 match = False 

136 break 

137 if match: 

138 str_residues.append(r) 

139 else: 

140 str_residues.append(r) 

141 

142 # get target residues in BioPython format 

143 target_residues = [] 

144 for sr in str_residues: 

145 # try for residues, if exception, try as HETATM 

146 try: 

147 target_residues.append( 

148 structure[int(sr["model"]) - 1][sr["chain"]][int(sr["res_id"])] 

149 ) 

150 except KeyError: 

151 target_residues.append( 

152 structure[int(sr["model"]) - 1][sr["chain"]][ 

153 "H_" + sr["name"], int(sr["res_id"]), " " 

154 ] 

155 ) 

156 except Exception: 

157 fu.log( 

158 self.__class__.__name__ + ": Unable to find residue %s", 

159 sr["res_id"], 

160 self.out_log, 

161 ) 

162 

163 # get all atoms from target_residues 

164 target_atoms = Bio.PDB.Selection.unfold_entities(target_residues, "A") 

165 # get all atoms of input structure 

166 all_atoms = Bio.PDB.Selection.unfold_entities(structure, "A") 

167 # generate NeighborSearch object 

168 ns = Bio.PDB.NeighborSearch(all_atoms) 

169 # set comprehension list 

170 nearby_residues = { 

171 res 

172 for center_atom in target_atoms 

173 for res in ns.search(center_atom.coord, self.radius, "R") 

174 } 

175 

176 # format nearby residues to pure python objects 

177 neighbor_residues = [] 

178 for residue in nearby_residues: 

179 r = create_biopython_residue(residue) 

180 neighbor_residues.append(r) 

181 

182 # if preserve_target == False, don't add the residues of self.residues to the final structure 

183 if not self.preserve_target: 

184 neighbor_residues = [x for x in neighbor_residues if x not in str_residues] 

185 

186 fu.log("Found %d nearby residues" % len(neighbor_residues), self.out_log) 

187 

188 if len(neighbor_residues) == 0: 

189 fu.log( 

190 self.__class__.__name__ + ": No neighbour residues found, exiting", 

191 self.out_log, 

192 ) 

193 raise SystemExit( 

194 self.__class__.__name__ + ": No neighbour residues found, exiting" 

195 ) 

196 

197 create_output_file( 

198 0, 

199 self.stage_io_dict["in"]["input_structure_path"], 

200 neighbor_residues, 

201 self.stage_io_dict["out"]["output_residues_path"], 

202 self.out_log, 

203 ) 

204 

205 self.return_code = 0 

206 

207 # Copy files to host 

208 self.copy_to_host() 

209 

210 # Remove temporal files 

211 # self.tmp_files.append(self.stage_io_dict.get("unique_dir", "")) 

212 self.remove_tmp_files() 

213 

214 self.check_arguments(output_files_created=True, raise_exception=False) 

215 

216 return self.return_code 

217 

218 

219def closest_residues( 

220 input_structure_path: str, 

221 output_residues_path: str, 

222 properties: Optional[dict] = None, 

223 **kwargs, 

224) -> int: 

225 """Execute the :class:`ClosestResidues <utils.closest_residues.ClosestResidues>` class and 

226 execute the :meth:`launch() <utils.closest_residues.ClosestResidues.launch>` method.""" 

227 

228 return ClosestResidues( 

229 input_structure_path=input_structure_path, 

230 output_residues_path=output_residues_path, 

231 properties=properties, 

232 **kwargs, 

233 ).launch() 

234 

235 closest_residues.__doc__ = ClosestResidues.__doc__ 

236 

237 

238def main(): 

239 """Command line execution of this building block. Please check the command line documentation.""" 

240 parser = argparse.ArgumentParser( 

241 description="Search closest residues to a list of given residues.", 

242 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999), 

243 ) 

244 parser.add_argument( 

245 "-c", 

246 "--config", 

247 required=False, 

248 help="This file can be a YAML file, JSON file or JSON string", 

249 ) 

250 

251 # Specific args of each building block 

252 required_args = parser.add_argument_group("required arguments") 

253 required_args.add_argument( 

254 "-i", 

255 "--input_structure_path", 

256 required=True, 

257 help="Input structure file path. Accepted formats: pdb.", 

258 ) 

259 required_args.add_argument( 

260 "-o", 

261 "--output_residues_path", 

262 required=True, 

263 help="Output residues file path. Accepted formats: pdb.", 

264 ) 

265 

266 args = parser.parse_args() 

267 config = args.config if args.config else None 

268 properties = settings.ConfReader(config=config).get_prop_dic() 

269 

270 # Specific call of each building block 

271 closest_residues( 

272 input_structure_path=args.input_structure_path, 

273 output_residues_path=args.output_residues_path, 

274 properties=properties, 

275 ) 

276 

277 

278if __name__ == "__main__": 

279 main()