Coverage for biobb_structure_utils/utils/remove_molecules.py: 79%

66 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2024-06-14 19:03 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the RemoveMolecules class and the command line interface.""" 

4import argparse 

5from biobb_common.configuration import settings 

6from biobb_common.generic.biobb_object import BiobbObject 

7from biobb_common.tools import file_utils as fu 

8from biobb_common.tools.file_utils import launchlogger 

9from Bio.PDB.PDBParser import PDBParser 

10from biobb_structure_utils.utils.common import check_input_path, check_output_path, create_residues_list, create_biopython_residue, create_output_file 

11 

12 

13class RemoveMolecules(BiobbObject): 

14 """ 

15 | biobb_structure_utils RemoveMolecules 

16 | Class to remove molecules from a 3D structure using Biopython. 

17 | Remove a list of molecules from a 3D structure using Biopython. 

18 

19 Args: 

20 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/2vgb.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

21 output_molecules_path (str): Output molcules file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_remove_molecules.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

22 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

23 * **molecules** (*list*) - (None) List of comma separated res_id (will remove all molecules that match the res_id) or list of dictionaries with the name | res_id | chain | model of the molecules to be removed. Format: [{"name": "HIS", "res_id": "72", "chain": "A", "model": "1"}]. 

24 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

25 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

26 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

27 

28 Examples: 

29 This is a use example of how to use the building block from Python:: 

30 

31 from biobb_structure_utils.utils.remove_molecules import remove_molecules 

32 prop = { 

33 'molecules': [ 

34 { 

35 'name': 'HIS', 

36 'res_id': '72', 

37 'chain': 'A', 

38 'model': '1' 

39 } 

40 ] 

41 } 

42 remove_molecules(input_structure_path='/path/to/myStructure.pdb', 

43 output_molecules_path='/path/to/newMolecules.pdb', 

44 properties=prop) 

45 

46 Info: 

47 * wrapped_software: 

48 * name: In house using Biopython 

49 * version: >=1.79 

50 * license: other 

51 * ontology: 

52 * name: EDAM 

53 * schema: http://edamontology.org/EDAM.owl 

54 

55 """ 

56 

57 def __init__(self, input_structure_path, output_molecules_path, properties=None, **kwargs) -> None: 

58 properties = properties or {} 

59 

60 # Call parent class constructor 

61 super().__init__(properties) 

62 self.locals_var_dict = locals().copy() 

63 

64 # Input/Output files 

65 self.io_dict = { 

66 "in": {"input_structure_path": input_structure_path}, 

67 "out": {"output_molecules_path": output_molecules_path} 

68 } 

69 

70 # Properties specific for BB 

71 self.molecules = properties.get('molecules', []) 

72 self.properties = properties 

73 

74 # Check the properties 

75 self.check_properties(properties) 

76 self.check_arguments() 

77 

78 @launchlogger 

79 def launch(self) -> int: 

80 """Execute the :class:`RemoveMolecules <utils.remove_molecules.RemoveMolecules>` utils.remove_molecules.RemoveMolecules object.""" 

81 

82 self.io_dict['in']['input_structure_path'] = check_input_path(self.io_dict['in']['input_structure_path'], 

83 self.out_log, self.__class__.__name__) 

84 self.io_dict['out']['output_molecules_path'] = check_output_path(self.io_dict['out']['output_molecules_path'], 

85 self.out_log, self.__class__.__name__) 

86 

87 # Setup Biobb 

88 if self.check_restart(): 

89 return 0 

90 self.stage_files() 

91 

92 # Business code 

93 # get list of Residues from properties 

94 list_residues = create_residues_list(self.molecules, self.out_log) 

95 

96 # load input into BioPython structure 

97 structure = PDBParser(QUIET=True).get_structure('structure', self.stage_io_dict['in']['input_structure_path']) 

98 

99 remove_structure = [] 

100 whole_structure = [] 

101 # get desired residues 

102 for residue in structure.get_residues(): 

103 r = create_biopython_residue(residue) 

104 whole_structure.append(r) 

105 if list_residues: 

106 for res in list_residues: 

107 match = True 

108 for code in res['code']: 

109 if res[code].strip() != r[code].strip(): 

110 match = False 

111 break 

112 if match: 

113 remove_structure.append(r) 

114 else: 

115 remove_structure.append(r) 

116 

117 # if not residues found in structure, raise exit 

118 if not remove_structure: 

119 fu.log(self.__class__.__name__ + ': The residues given by user were not found in input structure', self.out_log) 

120 raise SystemExit(self.__class__.__name__ + ': The residues given by user were not found in input structure') 

121 

122 # substract residues (remove_structure) from whole_structure 

123 new_structure = [x for x in whole_structure if x not in remove_structure] 

124 

125 create_output_file(0, self.stage_io_dict['in']['input_structure_path'], new_structure, self.stage_io_dict['out']['output_molecules_path'], self.out_log) 

126 

127 self.return_code = 0 

128 

129 # Copy files to host 

130 self.copy_to_host() 

131 

132 # Remove temporal files 

133 self.tmp_files.append(self.stage_io_dict.get("unique_dir")) 

134 self.remove_tmp_files() 

135 

136 self.check_arguments(output_files_created=True, raise_exception=False) 

137 

138 return self.return_code 

139 

140 

141def remove_molecules(input_structure_path: str, output_molecules_path: str, properties: dict = None, **kwargs) -> int: 

142 """Execute the :class:`RemoveMolecules <utils.remove_molecules.RemoveMolecules>` class and 

143 execute the :meth:`launch() <utils.remove_molecules.RemoveMolecules.launch>` method.""" 

144 

145 return RemoveMolecules(input_structure_path=input_structure_path, 

146 output_molecules_path=output_molecules_path, 

147 properties=properties, **kwargs).launch() 

148 

149 

150def main(): 

151 """Command line execution of this building block. Please check the command line documentation.""" 

152 parser = argparse.ArgumentParser(description="Removes a list of molecules from a 3D structure.", formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

153 parser.add_argument('-c', '--config', required=False, help="This file can be a YAML file, JSON file or JSON string") 

154 

155 # Specific args of each building block 

156 required_args = parser.add_argument_group('required arguments') 

157 required_args.add_argument('-i', '--input_structure_path', required=True, help="Input structure file path. Accepted formats: pdb.") 

158 required_args.add_argument('-o', '--output_molecules_path', required=True, help="Output molecules file path. Accepted formats: pdb.") 

159 

160 args = parser.parse_args() 

161 config = args.config if args.config else None 

162 properties = settings.ConfReader(config=config).get_prop_dic() 

163 

164 # Specific call of each building block 

165 remove_molecules(input_structure_path=args.input_structure_path, 

166 output_molecules_path=args.output_molecules_path, 

167 properties=properties) 

168 

169 

170if __name__ == '__main__': 

171 main()