Coverage for biobb_structure_utils/utils/extract_atoms.py: 79%

67 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-10-03 15:30 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the ExtractAtoms class and the command line interface.""" 

4import argparse 

5import re 

6from pathlib import Path 

7from biobb_common.configuration import settings 

8from biobb_common.generic.biobb_object import BiobbObject 

9from biobb_common.tools import file_utils as fu 

10from biobb_common.tools.file_utils import launchlogger 

11from biobb_structure_utils.gro_lib.gro import Gro 

12from biobb_structure_utils.utils.common import PDB_SERIAL_RECORDS 

13 

14 

15class ExtractAtoms(BiobbObject): 

16 """ 

17 | biobb_structure_utils ExtractAtoms 

18 | Class to extract atoms from a 3D structure. 

19 | Extracts all atoms from a 3D structure that match a regular expression pattern. 

20 

21 Args: 

22 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/2vgb.pdb>`_. Accepted formats: pdb (edam:format_1476), gro (edam:format_2033). 

23 output_structure_path (str): Output structure file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/OE2_atoms.pdb>`_. Accepted formats: pdb (edam:format_1476), gro (edam:format_2033). 

24 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

25 * **regular_expression_pattern** (*str*) - ("^D") Python style regular expression matching the selected atom names. 

26 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

27 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

28 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

29 

30 Examples: 

31 This is a use example of how to use the building block from Python:: 

32 

33 from biobb_structure_utils.utils.extract_atoms import extract_atoms 

34 prop = { 

35 'regular_expression_pattern': '^D' 

36 } 

37 extract_atoms(input_structure_path='/path/to/myStructure.pdb', 

38 output_structure_path='/path/to/newStructure.pdb', 

39 properties=prop) 

40 

41 Info: 

42 * wrapped_software: 

43 * name: In house 

44 * license: Apache-2.0 

45 * ontology: 

46 * name: EDAM 

47 * schema: http://edamontology.org/EDAM.owl 

48 

49 """ 

50 

51 def __init__(self, input_structure_path, output_structure_path, properties=None, **kwargs) -> None: 

52 properties = properties or {} 

53 

54 # Call parent class constructor 

55 super().__init__(properties) 

56 self.locals_var_dict = locals().copy() 

57 

58 # Input/Output files 

59 self.io_dict = { 

60 "in": {"input_structure_path": input_structure_path}, 

61 "out": {"output_structure_path": output_structure_path} 

62 } 

63 

64 # Properties specific for BB 

65 self.regular_expression_pattern = properties.get('regular_expression_pattern', '^D') 

66 

67 # Check the properties 

68 self.check_properties(properties) 

69 self.check_arguments() 

70 

71 @launchlogger 

72 def launch(self) -> int: 

73 """Execute the :class:`ExtractAtoms <utils.extract_atoms.ExtractAtoms>` utils.extract_atoms.ExtractAtoms object.""" 

74 

75 # Setup Biobb 

76 if self.check_restart(): 

77 return 0 

78 self.stage_files() 

79 

80 # Business code 

81 extension = Path(self.io_dict['in']['input_structure_path']).suffix.lower() 

82 if extension.lower() == '.gro': 

83 fu.log('GRO format detected, extracting all atoms matching %s' % self.regular_expression_pattern, self.out_log) 

84 gro_st = Gro() 

85 gro_st.read_gro_file(self.io_dict['in']['input_structure_path']) 

86 gro_st.select_atoms(self.regular_expression_pattern) 

87 if gro_st.num_of_atoms: 

88 fu.log('%d atoms found writting GRO file' % gro_st.num_of_atoms, self.out_log, self.global_log) 

89 gro_st.write_gro_file(self.io_dict['out']['output_structure_path']) 

90 else: 

91 fu.log('No matching atoms found writting empty GRO file', self.out_log, self.global_log) 

92 open(self.io_dict['out']['output_structure_path'], 'w').close() 

93 

94 else: 

95 fu.log('PDB format detected, extracting all atoms matching %s' % self.regular_expression_pattern, self.out_log) 

96 # Direct aproach solution implemented to avoid the 

97 # issues presented in commit message (c92aab9604a6a31d13f4170ff47b231df0a588ef) 

98 # with the Biopython library 

99 atoms_match_cont = 0 

100 with open(self.io_dict['in']['input_structure_path'], "r") as input_pdb, open(self.io_dict['out']['output_structure_path'], "w") as output_pdb: 

101 for line in input_pdb: 

102 record = line[:6].upper().strip() 

103 if len(line) > 10 and record in PDB_SERIAL_RECORDS: # Avoid MODEL, ENDMDL records and empty lines 

104 pdb_atom_name = line[12:16].strip() 

105 if re.search(self.regular_expression_pattern, pdb_atom_name): 

106 atoms_match_cont += 1 

107 output_pdb.write(line) 

108 if atoms_match_cont: 

109 fu.log('%d atoms found writting PDB file' % atoms_match_cont, self.out_log, self.global_log) 

110 else: 

111 fu.log('No matching atoms found writting empty PDB file', self.out_log, self.global_log) 

112 self.return_code = 0 

113 ########## 

114 

115 # Copy files to host 

116 self.copy_to_host() 

117 

118 # Remove temporal files 

119 self.tmp_files.append(self.stage_io_dict.get("unique_dir")) 

120 self.remove_tmp_files() 

121 

122 self.check_arguments(output_files_created=True, raise_exception=False) 

123 

124 return self.return_code 

125 

126 

127def extract_atoms(input_structure_path: str, output_structure_path: str, properties: dict = None, **kwargs) -> int: 

128 """Execute the :class:`ExtractAtoms <utils.extract_atoms.ExtractAtoms>` class and 

129 execute the :meth:`launch() <utils.extract_atoms.ExtractAtoms.launch>` method.""" 

130 

131 return ExtractAtoms(input_structure_path=input_structure_path, 

132 output_structure_path=output_structure_path, 

133 properties=properties, **kwargs).launch() 

134 

135 

136def main(): 

137 """Command line execution of this building block. Please check the command line documentation.""" 

138 parser = argparse.ArgumentParser(description="Remove the selected ligand atoms from a 3D structure.", formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

139 parser.add_argument('-c', '--config', required=False, help="This file can be a YAML file, JSON file or JSON string") 

140 

141 # Specific args of each building block 

142 required_args = parser.add_argument_group('required arguments') 

143 required_args.add_argument('-i', '--input_structure_path', required=True, help="Input structure file name") 

144 required_args.add_argument('-o', '--output_structure_path', required=True, help="Output structure file name") 

145 

146 args = parser.parse_args() 

147 config = args.config if args.config else None 

148 properties = settings.ConfReader(config=config).get_prop_dic() 

149 

150 # Specific call of each building block 

151 extract_atoms(input_structure_path=args.input_structure_path, 

152 output_structure_path=args.output_structure_path, 

153 properties=properties) 

154 

155 

156if __name__ == '__main__': 

157 main()