Coverage for biobb_structure_utils/utils/extract_molecule.py: 76%

59 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-10-03 15:30 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the ExtractMolecule class and the command line interface.""" 

4import argparse 

5from biobb_common.configuration import settings 

6from biobb_common.generic.biobb_object import BiobbObject 

7from biobb_common.tools.file_utils import launchlogger 

8from biobb_common.tools import file_utils as fu 

9from biobb_structure_utils.utils.common import check_input_path, check_output_path 

10 

11 

12class ExtractMolecule(BiobbObject): 

13 """ 

14 | biobb_structure_utils ExtractMolecule 

15 | This class is a wrapper of the Structure Checking tool to extract a molecule from a 3D structure. 

16 | Wrapper for the `Structure Checking <https://github.com/bioexcel/biobb_structure_checking>`_ tool to extract a molecule from a 3D structure. 

17 

18 Args: 

19 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/extract_molecule.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

20 output_molecule_path (str): Output molecule file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_extract_molecule.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

21 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

22 * **molecule_type** (*string*) - ("all") type of molecule to be extracted. If all, only waters and ligands will be removed from the original structure. Values: all, protein, na, dna, rna, chains. 

23 * **chains** (*list*) - (None) if chains selected in **molecule_type**, specify them here, e.g: ["A", "C", "N"]. 

24 * **binary_path** (*string*) - ("check_structure") path to the check_structure application 

25 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

26 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

27 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

28 

29 Examples: 

30 This is a use example of how to use the building block from Python:: 

31 

32 from biobb_structure_utils.utils.extract_molecule import extract_molecule 

33 prop = { 

34 'molecule_type': 'chains', 

35 'chains': ['A', 'N', 'F'] 

36 } 

37 extract_molecule(input_structure_path='/path/to/myStructure.pdb', 

38 output_molecule_path='/path/to/newMolecule.pdb', 

39 properties=prop) 

40 

41 Info: 

42 * wrapped_software: 

43 * name: Structure Checking from MDWeb 

44 * version: >=3.0.3 

45 * license: Apache-2.0 

46 * ontology: 

47 * name: EDAM 

48 * schema: http://edamontology.org/EDAM.owl 

49 

50 """ 

51 

52 def __init__(self, input_structure_path, output_molecule_path, properties=None, **kwargs) -> None: 

53 properties = properties or {} 

54 

55 # Call parent class constructor 

56 super().__init__(properties) 

57 self.locals_var_dict = locals().copy() 

58 

59 # Input/Output files 

60 self.io_dict = { 

61 "in": {"input_structure_path": input_structure_path}, 

62 "out": {"output_molecule_path": output_molecule_path} 

63 } 

64 

65 # Properties specific for BB 

66 self.molecule_type = properties.get('molecule_type', 'all') 

67 self.chains = properties.get('chains', []) 

68 self.binary_path = properties.get('binary_path', 'check_structure') 

69 self.properties = properties 

70 

71 # Check the properties 

72 self.check_properties(properties) 

73 self.check_arguments() 

74 

75 def create_command_list(self, command_list_path): 

76 """ Creates a command list file as a input for structure checking """ 

77 instructions_list = ['ligands --remove All', 'water --remove Yes'] 

78 

79 if self.molecule_type != 'all': 

80 if self.molecule_type == 'chains': 

81 instructions_list.append('chains --select ' + ','.join(self.chains)) 

82 else: 

83 instructions_list.append('chains --select ' + self.molecule_type) 

84 

85 with open(command_list_path, 'w') as clp: 

86 for line in instructions_list: 

87 clp.write(line.strip() + '\n') 

88 

89 return command_list_path 

90 

91 @launchlogger 

92 def launch(self) -> int: 

93 """Execute the :class:`ExtractMolecule <utils.extract_molecule.ExtractMolecule>` utils.extract_molecule.ExtractMolecule object.""" 

94 

95 self.io_dict['in']['input_structure_path'] = check_input_path(self.io_dict['in']['input_structure_path'], self.out_log, self.__class__.__name__) 

96 self.io_dict['out']['output_molecule_path'] = check_output_path(self.io_dict['out']['output_molecule_path'], self.out_log, self.__class__.__name__) 

97 

98 # Setup Biobb 

99 if self.check_restart(): 

100 return 0 

101 self.stage_files() 

102 

103 # create temporary folder 

104 tmp_folder = fu.create_unique_dir() 

105 fu.log('Creating %s temporary folder' % tmp_folder, self.out_log) 

106 

107 # create command list file 

108 command_list_file = self.create_command_list(tmp_folder + '/extract_prot.lst') 

109 

110 # run command line 

111 self.cmd = [self.binary_path, 

112 '-i', self.io_dict['in']['input_structure_path'], 

113 '-o', self.io_dict['out']['output_molecule_path'], 

114 '--force_save', 

115 '--non_interactive', 

116 'command_list', '--list', command_list_file] 

117 

118 # Run Biobb block 

119 self.run_biobb() 

120 

121 # Copy files to host 

122 self.copy_to_host() 

123 

124 # Remove temporal files 

125 self.tmp_files.extend([ 

126 self.stage_io_dict.get("unique_dir"), 

127 tmp_folder 

128 ]) 

129 self.remove_tmp_files() 

130 

131 self.check_arguments(output_files_created=True, raise_exception=False) 

132 

133 return self.return_code 

134 

135 

136def extract_molecule(input_structure_path: str, output_molecule_path: str, properties: dict = None, **kwargs) -> int: 

137 """Execute the :class:`ExtractMolecule <utils.extract_molecule.ExtractMolecule>` class and 

138 execute the :meth:`launch() <utils.extract_molecule.ExtractMolecule.launch>` method.""" 

139 

140 return ExtractMolecule(input_structure_path=input_structure_path, 

141 output_molecule_path=output_molecule_path, 

142 properties=properties, **kwargs).launch() 

143 

144 

145def main(): 

146 """Command line execution of this building block. Please check the command line documentation.""" 

147 parser = argparse.ArgumentParser(description="Extract a molecule from a 3D structure.", formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

148 parser.add_argument('-c', '--config', required=False, help="This file can be a YAML file, JSON file or JSON string") 

149 

150 # Specific args of each building block 

151 required_args = parser.add_argument_group('required arguments') 

152 required_args.add_argument('-i', '--input_structure_path', required=True, help="Input structure file path. Accepted formats: pdb.") 

153 required_args.add_argument('-o', '--output_molecule_path', required=True, help="Output heteroatom file path. Accepted formats: pdb.") 

154 

155 args = parser.parse_args() 

156 config = args.config if args.config else None 

157 properties = settings.ConfReader(config=config).get_prop_dic() 

158 

159 # Specific call of each building block 

160 extract_molecule(input_structure_path=args.input_structure_path, 

161 output_molecule_path=args.output_molecule_path, 

162 properties=properties) 

163 

164 

165if __name__ == '__main__': 

166 main()