Coverage for biobb_structure_utils / utils / remove_molecules.py: 91%

56 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-22 13:23 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the RemoveMolecules class and the command line interface.""" 

4from typing import Optional 

5from Bio.PDB.PDBParser import PDBParser 

6from biobb_common.generic.biobb_object import BiobbObject 

7from biobb_common.tools import file_utils as fu 

8from biobb_common.tools.file_utils import launchlogger 

9 

10from biobb_structure_utils.utils.common import ( 

11 _from_string_to_list, 

12 check_input_path, 

13 check_output_path, 

14 create_biopython_residue, 

15 create_output_file, 

16 create_residues_list, 

17) 

18 

19 

20class RemoveMolecules(BiobbObject): 

21 """ 

22 | biobb_structure_utils RemoveMolecules 

23 | Class to remove molecules from a 3D structure using Biopython. 

24 | Remove a list of molecules from a 3D structure using Biopython. 

25 

26 Args: 

27 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/2vgb.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

28 output_molecules_path (str): Output molcules file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_remove_molecules.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

29 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

30 * **molecules** (*list*) - (None) List of comma separated res_id (will remove all molecules that match the res_id) or list of dictionaries with the name | res_id | chain | model of the molecules to be removed. Format: [{"name": "HIS", "res_id": "72", "chain": "A", "model": "1"}]. 

31 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

32 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

33 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

34 

35 Examples: 

36 This is a use example of how to use the building block from Python:: 

37 

38 from biobb_structure_utils.utils.remove_molecules import remove_molecules 

39 prop = { 

40 'molecules': [ 

41 { 

42 'name': 'HIS', 

43 'res_id': '72', 

44 'chain': 'A', 

45 'model': '1' 

46 } 

47 ] 

48 } 

49 remove_molecules(input_structure_path='/path/to/myStructure.pdb', 

50 output_molecules_path='/path/to/newMolecules.pdb', 

51 properties=prop) 

52 

53 Info: 

54 * wrapped_software: 

55 * name: In house using Biopython 

56 * version: >=1.79 

57 * license: other 

58 * ontology: 

59 * name: EDAM 

60 * schema: http://edamontology.org/EDAM.owl 

61 

62 """ 

63 

64 def __init__( 

65 self, input_structure_path, output_molecules_path, properties=None, **kwargs 

66 ) -> None: 

67 properties = properties or {} 

68 

69 # Call parent class constructor 

70 super().__init__(properties) 

71 self.locals_var_dict = locals().copy() 

72 

73 # Input/Output files 

74 self.io_dict = { 

75 "in": {"input_structure_path": input_structure_path}, 

76 "out": {"output_molecules_path": output_molecules_path}, 

77 } 

78 

79 # Properties specific for BB 

80 self.molecules = _from_string_to_list(properties.get("molecules", [])) 

81 self.properties = properties 

82 

83 # Check the properties 

84 self.check_properties(properties) 

85 self.check_arguments() 

86 

87 @launchlogger 

88 def launch(self) -> int: 

89 """Execute the :class:`RemoveMolecules <utils.remove_molecules.RemoveMolecules>` utils.remove_molecules.RemoveMolecules object.""" 

90 

91 self.io_dict["in"]["input_structure_path"] = check_input_path( 

92 self.io_dict["in"]["input_structure_path"], 

93 self.out_log, 

94 self.__class__.__name__, 

95 ) 

96 self.io_dict["out"]["output_molecules_path"] = check_output_path( 

97 self.io_dict["out"]["output_molecules_path"], 

98 self.out_log, 

99 self.__class__.__name__, 

100 ) 

101 

102 # Setup Biobb 

103 if self.check_restart(): 

104 return 0 

105 self.stage_files() 

106 

107 # Business code 

108 # get list of Residues from properties 

109 list_residues = create_residues_list(self.molecules, self.out_log) 

110 

111 # load input into BioPython structure 

112 structure = PDBParser(QUIET=True).get_structure( 

113 "structure", self.stage_io_dict["in"]["input_structure_path"] 

114 ) 

115 

116 remove_structure = [] 

117 whole_structure = [] 

118 # get desired residues 

119 for residue in structure.get_residues(): 

120 r = create_biopython_residue(residue) 

121 whole_structure.append(r) 

122 if list_residues: 

123 for res in list_residues: 

124 match = True 

125 for code in res["code"]: 

126 if res[code].strip() != r[code].strip(): 

127 match = False 

128 break 

129 if match: 

130 remove_structure.append(r) 

131 else: 

132 remove_structure.append(r) 

133 

134 # if not residues found in structure, raise exit 

135 if not remove_structure: 

136 fu.log( 

137 self.__class__.__name__ + ": The residues given by user were not found in input structure", 

138 self.out_log, 

139 ) 

140 raise SystemExit( 

141 self.__class__.__name__ + ": The residues given by user were not found in input structure" 

142 ) 

143 

144 # substract residues (remove_structure) from whole_structure 

145 new_structure = [x for x in whole_structure if x not in remove_structure] 

146 

147 create_output_file( 

148 0, 

149 self.stage_io_dict["in"]["input_structure_path"], 

150 new_structure, 

151 self.stage_io_dict["out"]["output_molecules_path"], 

152 self.out_log, 

153 ) 

154 

155 self.return_code = 0 

156 

157 # Copy files to host 

158 self.copy_to_host() 

159 

160 # Remove temporal files 

161 self.remove_tmp_files() 

162 

163 self.check_arguments(output_files_created=True, raise_exception=False) 

164 

165 return self.return_code 

166 

167 

168def remove_molecules( 

169 input_structure_path: str, 

170 output_molecules_path: str, 

171 properties: Optional[dict] = None, 

172 **kwargs, 

173) -> int: 

174 """Create the :class:`RemoveMolecules <utils.remove_molecules.RemoveMolecules>` class and 

175 execute the :meth:`launch() <utils.remove_molecules.RemoveMolecules.launch>` method.""" 

176 return RemoveMolecules(**dict(locals())).launch() 

177 

178 

179remove_molecules.__doc__ = RemoveMolecules.__doc__ 

180main = RemoveMolecules.get_main(remove_molecules, "Removes a list of molecules from a 3D structure.") 

181 

182if __name__ == "__main__": 

183 main()