Coverage for biobb_structure_utils/utils/remove_molecules.py: 78%

67 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-28 11:54 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the RemoveMolecules class and the command line interface.""" 

4 

5import argparse 

6from typing import Optional 

7 

8from Bio.PDB.PDBParser import PDBParser 

9from biobb_common.configuration import settings 

10from biobb_common.generic.biobb_object import BiobbObject 

11from biobb_common.tools import file_utils as fu 

12from biobb_common.tools.file_utils import launchlogger 

13 

14from biobb_structure_utils.utils.common import ( 

15 _from_string_to_list, 

16 check_input_path, 

17 check_output_path, 

18 create_biopython_residue, 

19 create_output_file, 

20 create_residues_list, 

21) 

22 

23 

24class RemoveMolecules(BiobbObject): 

25 """ 

26 | biobb_structure_utils RemoveMolecules 

27 | Class to remove molecules from a 3D structure using Biopython. 

28 | Remove a list of molecules from a 3D structure using Biopython. 

29 

30 Args: 

31 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/2vgb.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

32 output_molecules_path (str): Output molcules file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_remove_molecules.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

33 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

34 * **molecules** (*list*) - (None) List of comma separated res_id (will remove all molecules that match the res_id) or list of dictionaries with the name | res_id | chain | model of the molecules to be removed. Format: [{"name": "HIS", "res_id": "72", "chain": "A", "model": "1"}]. 

35 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

36 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

37 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

38 

39 Examples: 

40 This is a use example of how to use the building block from Python:: 

41 

42 from biobb_structure_utils.utils.remove_molecules import remove_molecules 

43 prop = { 

44 'molecules': [ 

45 { 

46 'name': 'HIS', 

47 'res_id': '72', 

48 'chain': 'A', 

49 'model': '1' 

50 } 

51 ] 

52 } 

53 remove_molecules(input_structure_path='/path/to/myStructure.pdb', 

54 output_molecules_path='/path/to/newMolecules.pdb', 

55 properties=prop) 

56 

57 Info: 

58 * wrapped_software: 

59 * name: In house using Biopython 

60 * version: >=1.79 

61 * license: other 

62 * ontology: 

63 * name: EDAM 

64 * schema: http://edamontology.org/EDAM.owl 

65 

66 """ 

67 

68 def __init__( 

69 self, input_structure_path, output_molecules_path, properties=None, **kwargs 

70 ) -> None: 

71 properties = properties or {} 

72 

73 # Call parent class constructor 

74 super().__init__(properties) 

75 self.locals_var_dict = locals().copy() 

76 

77 # Input/Output files 

78 self.io_dict = { 

79 "in": {"input_structure_path": input_structure_path}, 

80 "out": {"output_molecules_path": output_molecules_path}, 

81 } 

82 

83 # Properties specific for BB 

84 self.molecules = _from_string_to_list(properties.get("molecules", [])) 

85 self.properties = properties 

86 

87 # Check the properties 

88 self.check_properties(properties) 

89 self.check_arguments() 

90 

91 @launchlogger 

92 def launch(self) -> int: 

93 """Execute the :class:`RemoveMolecules <utils.remove_molecules.RemoveMolecules>` utils.remove_molecules.RemoveMolecules object.""" 

94 

95 self.io_dict["in"]["input_structure_path"] = check_input_path( 

96 self.io_dict["in"]["input_structure_path"], 

97 self.out_log, 

98 self.__class__.__name__, 

99 ) 

100 self.io_dict["out"]["output_molecules_path"] = check_output_path( 

101 self.io_dict["out"]["output_molecules_path"], 

102 self.out_log, 

103 self.__class__.__name__, 

104 ) 

105 

106 # Setup Biobb 

107 if self.check_restart(): 

108 return 0 

109 self.stage_files() 

110 

111 # Business code 

112 # get list of Residues from properties 

113 list_residues = create_residues_list(self.molecules, self.out_log) 

114 

115 # load input into BioPython structure 

116 structure = PDBParser(QUIET=True).get_structure( 

117 "structure", self.stage_io_dict["in"]["input_structure_path"] 

118 ) 

119 

120 remove_structure = [] 

121 whole_structure = [] 

122 # get desired residues 

123 for residue in structure.get_residues(): 

124 r = create_biopython_residue(residue) 

125 whole_structure.append(r) 

126 if list_residues: 

127 for res in list_residues: 

128 match = True 

129 for code in res["code"]: 

130 if res[code].strip() != r[code].strip(): 

131 match = False 

132 break 

133 if match: 

134 remove_structure.append(r) 

135 else: 

136 remove_structure.append(r) 

137 

138 # if not residues found in structure, raise exit 

139 if not remove_structure: 

140 fu.log( 

141 self.__class__.__name__ + ": The residues given by user were not found in input structure", 

142 self.out_log, 

143 ) 

144 raise SystemExit( 

145 self.__class__.__name__ + ": The residues given by user were not found in input structure" 

146 ) 

147 

148 # substract residues (remove_structure) from whole_structure 

149 new_structure = [x for x in whole_structure if x not in remove_structure] 

150 

151 create_output_file( 

152 0, 

153 self.stage_io_dict["in"]["input_structure_path"], 

154 new_structure, 

155 self.stage_io_dict["out"]["output_molecules_path"], 

156 self.out_log, 

157 ) 

158 

159 self.return_code = 0 

160 

161 # Copy files to host 

162 self.copy_to_host() 

163 

164 # Remove temporal files 

165 # self.tmp_files.append(self.stage_io_dict.get("unique_dir", "")) 

166 self.remove_tmp_files() 

167 

168 self.check_arguments(output_files_created=True, raise_exception=False) 

169 

170 return self.return_code 

171 

172 

173def remove_molecules( 

174 input_structure_path: str, 

175 output_molecules_path: str, 

176 properties: Optional[dict] = None, 

177 **kwargs, 

178) -> int: 

179 """Execute the :class:`RemoveMolecules <utils.remove_molecules.RemoveMolecules>` class and 

180 execute the :meth:`launch() <utils.remove_molecules.RemoveMolecules.launch>` method.""" 

181 

182 return RemoveMolecules( 

183 input_structure_path=input_structure_path, 

184 output_molecules_path=output_molecules_path, 

185 properties=properties, 

186 **kwargs, 

187 ).launch() 

188 

189 remove_molecules.__doc__ = RemoveMolecules.__doc__ 

190 

191 

192def main(): 

193 """Command line execution of this building block. Please check the command line documentation.""" 

194 parser = argparse.ArgumentParser( 

195 description="Removes a list of molecules from a 3D structure.", 

196 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999), 

197 ) 

198 parser.add_argument( 

199 "-c", 

200 "--config", 

201 required=False, 

202 help="This file can be a YAML file, JSON file or JSON string", 

203 ) 

204 

205 # Specific args of each building block 

206 required_args = parser.add_argument_group("required arguments") 

207 required_args.add_argument( 

208 "-i", 

209 "--input_structure_path", 

210 required=True, 

211 help="Input structure file path. Accepted formats: pdb.", 

212 ) 

213 required_args.add_argument( 

214 "-o", 

215 "--output_molecules_path", 

216 required=True, 

217 help="Output molecules file path. Accepted formats: pdb.", 

218 ) 

219 

220 args = parser.parse_args() 

221 config = args.config if args.config else None 

222 properties = settings.ConfReader(config=config).get_prop_dic() 

223 

224 # Specific call of each building block 

225 remove_molecules( 

226 input_structure_path=args.input_structure_path, 

227 output_molecules_path=args.output_molecules_path, 

228 properties=properties, 

229 ) 

230 

231 

232if __name__ == "__main__": 

233 main()