Coverage for biobb_structure_utils/utils/extract_residues.py: 78%

63 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2024-06-14 19:03 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the ExtractResidues class and the command line interface.""" 

4import argparse 

5from biobb_common.configuration import settings 

6from biobb_common.generic.biobb_object import BiobbObject 

7from biobb_common.tools import file_utils as fu 

8from biobb_common.tools.file_utils import launchlogger 

9from Bio.PDB.PDBParser import PDBParser 

10from biobb_structure_utils.utils.common import check_input_path, check_output_path, create_residues_list, create_biopython_residue, create_output_file 

11 

12 

13class ExtractResidues(BiobbObject): 

14 """ 

15 | biobb_structure_utils ExtractResidues 

16 | Class to extract residues from a 3D structure using Biopython. 

17 | Extracts a list of residues from a 3D structure using Biopython. 

18 

19 Args: 

20 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/extract_heteroatom.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

21 output_residues_path (str): Output residues file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_extract_residues.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

22 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

23 * **residues** (*list*) - (None) List of comma separated res_id (will extract all residues that match the res_id) or list of dictionaries with the name | res_id | chain | model of the residues to be extracted. Format: [{"name": "HIS", "res_id": "72", "chain": "A", "model": "1"}]. 

24 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

25 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

26 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

27 

28 Examples: 

29 This is a use example of how to use the building block from Python:: 

30 

31 from biobb_structure_utils.utils.extract_residues import extract_residues 

32 prop = { 

33 'residues': [ 

34 { 

35 'name': 'HIS', 

36 'res_id': '72', 

37 'chain': 'A', 

38 'model': '1' 

39 } 

40 ] 

41 } 

42 extract_residues(input_structure_path='/path/to/myStructure.pdb', 

43 output_residues_path='/path/to/newResidues.pdb', 

44 properties=prop) 

45 

46 Info: 

47 * wrapped_software: 

48 * name: In house using Biopython 

49 * version: >=1.79 

50 * license: other 

51 * ontology: 

52 * name: EDAM 

53 * schema: http://edamontology.org/EDAM.owl 

54 

55 """ 

56 

57 def __init__(self, input_structure_path, output_residues_path, properties=None, **kwargs) -> None: 

58 properties = properties or {} 

59 

60 # Call parent class constructor 

61 super().__init__(properties) 

62 self.locals_var_dict = locals().copy() 

63 

64 # Input/Output files 

65 self.io_dict = { 

66 "in": {"input_structure_path": input_structure_path}, 

67 "out": {"output_residues_path": output_residues_path} 

68 } 

69 

70 # Properties specific for BB 

71 self.residues = properties.get('residues', []) 

72 self.properties = properties 

73 

74 # Check the properties 

75 self.check_properties(properties) 

76 self.check_arguments() 

77 

78 @launchlogger 

79 def launch(self) -> int: 

80 """Execute the :class:`ExtractResidues <utils.extract_residues.ExtractResidues>` utils.extract_residues.ExtractResidues object.""" 

81 

82 self.io_dict['in']['input_structure_path'] = check_input_path(self.io_dict['in']['input_structure_path'], 

83 self.out_log, self.__class__.__name__) 

84 self.io_dict['out']['output_residues_path'] = check_output_path(self.io_dict['out']['output_residues_path'], 

85 self.out_log, self.__class__.__name__) 

86 

87 # Setup Biobb 

88 if self.check_restart(): 

89 return 0 

90 self.stage_files() 

91 

92 # Business code 

93 # get list of Residues from properties 

94 list_residues = create_residues_list(self.residues, self.out_log) 

95 

96 # load input into BioPython structure 

97 structure = PDBParser(QUIET=True).get_structure('structure', self.stage_io_dict['in']['input_structure_path']) 

98 

99 new_structure = [] 

100 # get desired residues 

101 for residue in structure.get_residues(): 

102 r = create_biopython_residue(residue) 

103 if list_residues: 

104 for res in list_residues: 

105 match = True 

106 for code in res['code']: 

107 if res[code].strip() != r[code].strip(): 

108 match = False 

109 break 

110 if match: 

111 new_structure.append(r) 

112 else: 

113 new_structure.append(r) 

114 

115 # if not residues found in structure, raise exit 

116 if not new_structure: 

117 fu.log(self.__class__.__name__ + ': The residues given by user were not found in input structure', self.out_log) 

118 raise SystemExit(self.__class__.__name__ + ': The residues given by user were not found in input structure') 

119 

120 create_output_file(2, self.stage_io_dict['in']['input_structure_path'], new_structure, self.stage_io_dict['out']['output_residues_path'], self.out_log) 

121 

122 self.return_code = 0 

123 

124 # Copy files to host 

125 self.copy_to_host() 

126 

127 # Remove temporal files 

128 self.tmp_files.append(self.stage_io_dict.get("unique_dir")) 

129 self.remove_tmp_files() 

130 

131 self.check_arguments(output_files_created=True, raise_exception=False) 

132 

133 return self.return_code 

134 

135 

136def extract_residues(input_structure_path: str, output_residues_path: str, properties: dict = None, **kwargs) -> int: 

137 """Execute the :class:`ExtractResidues <utils.extract_residues.ExtractResidues>` class and 

138 execute the :meth:`launch() <utils.extract_residues.ExtractResidues.launch>` method.""" 

139 

140 return ExtractResidues(input_structure_path=input_structure_path, 

141 output_residues_path=output_residues_path, 

142 properties=properties, **kwargs).launch() 

143 

144 

145def main(): 

146 """Command line execution of this building block. Please check the command line documentation.""" 

147 parser = argparse.ArgumentParser(description="Extract a list of residues from a 3D structure.", formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

148 parser.add_argument('-c', '--config', required=False, help="This file can be a YAML file, JSON file or JSON string") 

149 

150 # Specific args of each building block 

151 required_args = parser.add_argument_group('required arguments') 

152 required_args.add_argument('-i', '--input_structure_path', required=True, help="Input structure file path. Accepted formats: pdb.") 

153 required_args.add_argument('-o', '--output_residues_path', required=True, help="Output residues file path. Accepted formats: pdb.") 

154 

155 args = parser.parse_args() 

156 config = args.config if args.config else None 

157 properties = settings.ConfReader(config=config).get_prop_dic() 

158 

159 # Specific call of each building block 

160 extract_residues(input_structure_path=args.input_structure_path, 

161 output_residues_path=args.output_residues_path, 

162 properties=properties) 

163 

164 

165if __name__ == '__main__': 

166 main()