Coverage for biobb_structure_utils / utils / extract_atoms.py: 91%

57 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-22 13:23 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the ExtractAtoms class and the command line interface.""" 

4import re 

5from pathlib import Path 

6from typing import Optional 

7from biobb_common.generic.biobb_object import BiobbObject 

8from biobb_common.tools import file_utils as fu 

9from biobb_common.tools.file_utils import launchlogger 

10 

11from biobb_structure_utils.gro_lib.gro import Gro 

12from biobb_structure_utils.utils.common import PDB_SERIAL_RECORDS 

13 

14 

15class ExtractAtoms(BiobbObject): 

16 """ 

17 | biobb_structure_utils ExtractAtoms 

18 | Class to extract atoms from a 3D structure. 

19 | Extracts all atoms from a 3D structure that match a regular expression pattern. 

20 

21 Args: 

22 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/2vgb.pdb>`_. Accepted formats: pdb (edam:format_1476), gro (edam:format_2033). 

23 output_structure_path (str): Output structure file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/OE2_atoms.pdb>`_. Accepted formats: pdb (edam:format_1476), gro (edam:format_2033). 

24 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

25 * **regular_expression_pattern** (*str*) - ("^D") Python style regular expression matching the selected atom names. 

26 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

27 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

28 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

29 

30 Examples: 

31 This is a use example of how to use the building block from Python:: 

32 

33 from biobb_structure_utils.utils.extract_atoms import extract_atoms 

34 prop = { 

35 'regular_expression_pattern': '^D' 

36 } 

37 extract_atoms(input_structure_path='/path/to/myStructure.pdb', 

38 output_structure_path='/path/to/newStructure.pdb', 

39 properties=prop) 

40 

41 Info: 

42 * wrapped_software: 

43 * name: In house 

44 * license: Apache-2.0 

45 * ontology: 

46 * name: EDAM 

47 * schema: http://edamontology.org/EDAM.owl 

48 

49 """ 

50 

51 def __init__( 

52 self, input_structure_path, output_structure_path, properties=None, **kwargs 

53 ) -> None: 

54 properties = properties or {} 

55 

56 # Call parent class constructor 

57 super().__init__(properties) 

58 self.locals_var_dict = locals().copy() 

59 

60 # Input/Output files 

61 self.io_dict = { 

62 "in": {"input_structure_path": input_structure_path}, 

63 "out": {"output_structure_path": output_structure_path}, 

64 } 

65 

66 # Properties specific for BB 

67 self.regular_expression_pattern = properties.get( 

68 "regular_expression_pattern", "^D" 

69 ) 

70 

71 # Check the properties 

72 self.check_properties(properties) 

73 self.check_arguments() 

74 

75 @launchlogger 

76 def launch(self) -> int: 

77 """Execute the :class:`ExtractAtoms <utils.extract_atoms.ExtractAtoms>` utils.extract_atoms.ExtractAtoms object.""" 

78 

79 # Setup Biobb 

80 if self.check_restart(): 

81 return 0 

82 self.stage_files() 

83 

84 # Business code 

85 extension = Path(self.io_dict["in"]["input_structure_path"]).suffix.lower() 

86 if extension.lower() == ".gro": 

87 fu.log( 

88 "GRO format detected, extracting all atoms matching %s" 

89 % self.regular_expression_pattern, 

90 self.out_log, 

91 ) 

92 gro_st = Gro() 

93 gro_st.read_gro_file(self.io_dict["in"]["input_structure_path"]) 

94 gro_st.select_atoms(self.regular_expression_pattern) 

95 if gro_st.num_of_atoms: 

96 fu.log( 

97 "%d atoms found writting GRO file" % gro_st.num_of_atoms, 

98 self.out_log, 

99 self.global_log, 

100 ) 

101 gro_st.write_gro_file(self.io_dict["out"]["output_structure_path"]) 

102 else: 

103 fu.log( 

104 "No matching atoms found writting empty GRO file", 

105 self.out_log, 

106 self.global_log, 

107 ) 

108 open(self.io_dict["out"]["output_structure_path"], "w").close() 

109 

110 else: 

111 fu.log( 

112 "PDB format detected, extracting all atoms matching %s" 

113 % self.regular_expression_pattern, 

114 self.out_log, 

115 ) 

116 # Direct aproach solution implemented to avoid the 

117 # issues presented in commit message (c92aab9604a6a31d13f4170ff47b231df0a588ef) 

118 # with the Biopython library 

119 atoms_match_cont = 0 

120 with open( 

121 self.io_dict["in"]["input_structure_path"], "r" 

122 ) as input_pdb, open( 

123 self.io_dict["out"]["output_structure_path"], "w" 

124 ) as output_pdb: 

125 for line in input_pdb: 

126 record = line[:6].upper().strip() 

127 if ( 

128 len(line) > 10 and record in PDB_SERIAL_RECORDS 

129 ): # Avoid MODEL, ENDMDL records and empty lines 

130 pdb_atom_name = line[12:16].strip() 

131 if re.search(self.regular_expression_pattern, pdb_atom_name): 

132 atoms_match_cont += 1 

133 output_pdb.write(line) 

134 if atoms_match_cont: 

135 fu.log( 

136 "%d atoms found writting PDB file" % atoms_match_cont, 

137 self.out_log, 

138 self.global_log, 

139 ) 

140 else: 

141 fu.log( 

142 "No matching atoms found writting empty PDB file", 

143 self.out_log, 

144 self.global_log, 

145 ) 

146 self.return_code = 0 

147 ########## 

148 

149 # Copy files to host 

150 self.copy_to_host() 

151 

152 # Remove temporal files 

153 self.remove_tmp_files() 

154 

155 self.check_arguments(output_files_created=True, raise_exception=False) 

156 

157 return self.return_code 

158 

159 

160def extract_atoms( 

161 input_structure_path: str, 

162 output_structure_path: str, 

163 properties: Optional[dict] = None, 

164 **kwargs, 

165) -> int: 

166 """Create the :class:`ExtractAtoms <utils.extract_atoms.ExtractAtoms>` class and 

167 execute the :meth:`launch() <utils.extract_atoms.ExtractAtoms.launch>` method.""" 

168 return ExtractAtoms(**dict(locals())).launch() 

169 

170 

171extract_atoms.__doc__ = ExtractAtoms.__doc__ 

172main = ExtractAtoms.get_main(extract_atoms, "Remove the selected ligand atoms from a 3D structure.") 

173 

174if __name__ == "__main__": 

175 main()