Coverage for biobb_structure_utils/utils/extract_atoms.py: 78%
68 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 11:54 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 11:54 +0000
1#!/usr/bin/env python3
3"""Module containing the ExtractAtoms class and the command line interface."""
5import argparse
6import re
7from pathlib import Path
8from typing import Optional
10from biobb_common.configuration import settings
11from biobb_common.generic.biobb_object import BiobbObject
12from biobb_common.tools import file_utils as fu
13from biobb_common.tools.file_utils import launchlogger
15from biobb_structure_utils.gro_lib.gro import Gro
16from biobb_structure_utils.utils.common import PDB_SERIAL_RECORDS
19class ExtractAtoms(BiobbObject):
20 """
21 | biobb_structure_utils ExtractAtoms
22 | Class to extract atoms from a 3D structure.
23 | Extracts all atoms from a 3D structure that match a regular expression pattern.
25 Args:
26 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/2vgb.pdb>`_. Accepted formats: pdb (edam:format_1476), gro (edam:format_2033).
27 output_structure_path (str): Output structure file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/OE2_atoms.pdb>`_. Accepted formats: pdb (edam:format_1476), gro (edam:format_2033).
28 properties (dic - Python dictionary object containing the tool parameters, not input/output files):
29 * **regular_expression_pattern** (*str*) - ("^D") Python style regular expression matching the selected atom names.
30 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
31 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
32 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
34 Examples:
35 This is a use example of how to use the building block from Python::
37 from biobb_structure_utils.utils.extract_atoms import extract_atoms
38 prop = {
39 'regular_expression_pattern': '^D'
40 }
41 extract_atoms(input_structure_path='/path/to/myStructure.pdb',
42 output_structure_path='/path/to/newStructure.pdb',
43 properties=prop)
45 Info:
46 * wrapped_software:
47 * name: In house
48 * license: Apache-2.0
49 * ontology:
50 * name: EDAM
51 * schema: http://edamontology.org/EDAM.owl
53 """
55 def __init__(
56 self, input_structure_path, output_structure_path, properties=None, **kwargs
57 ) -> None:
58 properties = properties or {}
60 # Call parent class constructor
61 super().__init__(properties)
62 self.locals_var_dict = locals().copy()
64 # Input/Output files
65 self.io_dict = {
66 "in": {"input_structure_path": input_structure_path},
67 "out": {"output_structure_path": output_structure_path},
68 }
70 # Properties specific for BB
71 self.regular_expression_pattern = properties.get(
72 "regular_expression_pattern", "^D"
73 )
75 # Check the properties
76 self.check_properties(properties)
77 self.check_arguments()
79 @launchlogger
80 def launch(self) -> int:
81 """Execute the :class:`ExtractAtoms <utils.extract_atoms.ExtractAtoms>` utils.extract_atoms.ExtractAtoms object."""
83 # Setup Biobb
84 if self.check_restart():
85 return 0
86 self.stage_files()
88 # Business code
89 extension = Path(self.io_dict["in"]["input_structure_path"]).suffix.lower()
90 if extension.lower() == ".gro":
91 fu.log(
92 "GRO format detected, extracting all atoms matching %s"
93 % self.regular_expression_pattern,
94 self.out_log,
95 )
96 gro_st = Gro()
97 gro_st.read_gro_file(self.io_dict["in"]["input_structure_path"])
98 gro_st.select_atoms(self.regular_expression_pattern)
99 if gro_st.num_of_atoms:
100 fu.log(
101 "%d atoms found writting GRO file" % gro_st.num_of_atoms,
102 self.out_log,
103 self.global_log,
104 )
105 gro_st.write_gro_file(self.io_dict["out"]["output_structure_path"])
106 else:
107 fu.log(
108 "No matching atoms found writting empty GRO file",
109 self.out_log,
110 self.global_log,
111 )
112 open(self.io_dict["out"]["output_structure_path"], "w").close()
114 else:
115 fu.log(
116 "PDB format detected, extracting all atoms matching %s"
117 % self.regular_expression_pattern,
118 self.out_log,
119 )
120 # Direct aproach solution implemented to avoid the
121 # issues presented in commit message (c92aab9604a6a31d13f4170ff47b231df0a588ef)
122 # with the Biopython library
123 atoms_match_cont = 0
124 with open(
125 self.io_dict["in"]["input_structure_path"], "r"
126 ) as input_pdb, open(
127 self.io_dict["out"]["output_structure_path"], "w"
128 ) as output_pdb:
129 for line in input_pdb:
130 record = line[:6].upper().strip()
131 if (
132 len(line) > 10 and record in PDB_SERIAL_RECORDS
133 ): # Avoid MODEL, ENDMDL records and empty lines
134 pdb_atom_name = line[12:16].strip()
135 if re.search(self.regular_expression_pattern, pdb_atom_name):
136 atoms_match_cont += 1
137 output_pdb.write(line)
138 if atoms_match_cont:
139 fu.log(
140 "%d atoms found writting PDB file" % atoms_match_cont,
141 self.out_log,
142 self.global_log,
143 )
144 else:
145 fu.log(
146 "No matching atoms found writting empty PDB file",
147 self.out_log,
148 self.global_log,
149 )
150 self.return_code = 0
151 ##########
153 # Copy files to host
154 self.copy_to_host()
156 # Remove temporal files
157 # self.tmp_files.append(self.stage_io_dict.get("unique_dir", ""))
158 self.remove_tmp_files()
160 self.check_arguments(output_files_created=True, raise_exception=False)
162 return self.return_code
165def extract_atoms(
166 input_structure_path: str,
167 output_structure_path: str,
168 properties: Optional[dict] = None,
169 **kwargs,
170) -> int:
171 """Execute the :class:`ExtractAtoms <utils.extract_atoms.ExtractAtoms>` class and
172 execute the :meth:`launch() <utils.extract_atoms.ExtractAtoms.launch>` method."""
174 return ExtractAtoms(
175 input_structure_path=input_structure_path,
176 output_structure_path=output_structure_path,
177 properties=properties,
178 **kwargs,
179 ).launch()
181 extract_atoms.__doc__ = ExtractAtoms.__doc__
184def main():
185 """Command line execution of this building block. Please check the command line documentation."""
186 parser = argparse.ArgumentParser(
187 description="Remove the selected ligand atoms from a 3D structure.",
188 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999),
189 )
190 parser.add_argument(
191 "-c",
192 "--config",
193 required=False,
194 help="This file can be a YAML file, JSON file or JSON string",
195 )
197 # Specific args of each building block
198 required_args = parser.add_argument_group("required arguments")
199 required_args.add_argument(
200 "-i", "--input_structure_path", required=True, help="Input structure file name"
201 )
202 required_args.add_argument(
203 "-o",
204 "--output_structure_path",
205 required=True,
206 help="Output structure file name",
207 )
209 args = parser.parse_args()
210 config = args.config if args.config else None
211 properties = settings.ConfReader(config=config).get_prop_dic()
213 # Specific call of each building block
214 extract_atoms(
215 input_structure_path=args.input_structure_path,
216 output_structure_path=args.output_structure_path,
217 properties=properties,
218 )
221if __name__ == "__main__":
222 main()