Coverage for biobb_structure_utils / utils / extract_atoms.py: 91%
57 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-22 13:23 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-22 13:23 +0000
1#!/usr/bin/env python3
3"""Module containing the ExtractAtoms class and the command line interface."""
4import re
5from pathlib import Path
6from typing import Optional
7from biobb_common.generic.biobb_object import BiobbObject
8from biobb_common.tools import file_utils as fu
9from biobb_common.tools.file_utils import launchlogger
11from biobb_structure_utils.gro_lib.gro import Gro
12from biobb_structure_utils.utils.common import PDB_SERIAL_RECORDS
15class ExtractAtoms(BiobbObject):
16 """
17 | biobb_structure_utils ExtractAtoms
18 | Class to extract atoms from a 3D structure.
19 | Extracts all atoms from a 3D structure that match a regular expression pattern.
21 Args:
22 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/2vgb.pdb>`_. Accepted formats: pdb (edam:format_1476), gro (edam:format_2033).
23 output_structure_path (str): Output structure file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/OE2_atoms.pdb>`_. Accepted formats: pdb (edam:format_1476), gro (edam:format_2033).
24 properties (dic - Python dictionary object containing the tool parameters, not input/output files):
25 * **regular_expression_pattern** (*str*) - ("^D") Python style regular expression matching the selected atom names.
26 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
27 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
28 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
30 Examples:
31 This is a use example of how to use the building block from Python::
33 from biobb_structure_utils.utils.extract_atoms import extract_atoms
34 prop = {
35 'regular_expression_pattern': '^D'
36 }
37 extract_atoms(input_structure_path='/path/to/myStructure.pdb',
38 output_structure_path='/path/to/newStructure.pdb',
39 properties=prop)
41 Info:
42 * wrapped_software:
43 * name: In house
44 * license: Apache-2.0
45 * ontology:
46 * name: EDAM
47 * schema: http://edamontology.org/EDAM.owl
49 """
51 def __init__(
52 self, input_structure_path, output_structure_path, properties=None, **kwargs
53 ) -> None:
54 properties = properties or {}
56 # Call parent class constructor
57 super().__init__(properties)
58 self.locals_var_dict = locals().copy()
60 # Input/Output files
61 self.io_dict = {
62 "in": {"input_structure_path": input_structure_path},
63 "out": {"output_structure_path": output_structure_path},
64 }
66 # Properties specific for BB
67 self.regular_expression_pattern = properties.get(
68 "regular_expression_pattern", "^D"
69 )
71 # Check the properties
72 self.check_properties(properties)
73 self.check_arguments()
75 @launchlogger
76 def launch(self) -> int:
77 """Execute the :class:`ExtractAtoms <utils.extract_atoms.ExtractAtoms>` utils.extract_atoms.ExtractAtoms object."""
79 # Setup Biobb
80 if self.check_restart():
81 return 0
82 self.stage_files()
84 # Business code
85 extension = Path(self.io_dict["in"]["input_structure_path"]).suffix.lower()
86 if extension.lower() == ".gro":
87 fu.log(
88 "GRO format detected, extracting all atoms matching %s"
89 % self.regular_expression_pattern,
90 self.out_log,
91 )
92 gro_st = Gro()
93 gro_st.read_gro_file(self.io_dict["in"]["input_structure_path"])
94 gro_st.select_atoms(self.regular_expression_pattern)
95 if gro_st.num_of_atoms:
96 fu.log(
97 "%d atoms found writting GRO file" % gro_st.num_of_atoms,
98 self.out_log,
99 self.global_log,
100 )
101 gro_st.write_gro_file(self.io_dict["out"]["output_structure_path"])
102 else:
103 fu.log(
104 "No matching atoms found writting empty GRO file",
105 self.out_log,
106 self.global_log,
107 )
108 open(self.io_dict["out"]["output_structure_path"], "w").close()
110 else:
111 fu.log(
112 "PDB format detected, extracting all atoms matching %s"
113 % self.regular_expression_pattern,
114 self.out_log,
115 )
116 # Direct aproach solution implemented to avoid the
117 # issues presented in commit message (c92aab9604a6a31d13f4170ff47b231df0a588ef)
118 # with the Biopython library
119 atoms_match_cont = 0
120 with open(
121 self.io_dict["in"]["input_structure_path"], "r"
122 ) as input_pdb, open(
123 self.io_dict["out"]["output_structure_path"], "w"
124 ) as output_pdb:
125 for line in input_pdb:
126 record = line[:6].upper().strip()
127 if (
128 len(line) > 10 and record in PDB_SERIAL_RECORDS
129 ): # Avoid MODEL, ENDMDL records and empty lines
130 pdb_atom_name = line[12:16].strip()
131 if re.search(self.regular_expression_pattern, pdb_atom_name):
132 atoms_match_cont += 1
133 output_pdb.write(line)
134 if atoms_match_cont:
135 fu.log(
136 "%d atoms found writting PDB file" % atoms_match_cont,
137 self.out_log,
138 self.global_log,
139 )
140 else:
141 fu.log(
142 "No matching atoms found writting empty PDB file",
143 self.out_log,
144 self.global_log,
145 )
146 self.return_code = 0
147 ##########
149 # Copy files to host
150 self.copy_to_host()
152 # Remove temporal files
153 self.remove_tmp_files()
155 self.check_arguments(output_files_created=True, raise_exception=False)
157 return self.return_code
160def extract_atoms(
161 input_structure_path: str,
162 output_structure_path: str,
163 properties: Optional[dict] = None,
164 **kwargs,
165) -> int:
166 """Create the :class:`ExtractAtoms <utils.extract_atoms.ExtractAtoms>` class and
167 execute the :meth:`launch() <utils.extract_atoms.ExtractAtoms.launch>` method."""
168 return ExtractAtoms(**dict(locals())).launch()
171extract_atoms.__doc__ = ExtractAtoms.__doc__
172main = ExtractAtoms.get_main(extract_atoms, "Remove the selected ligand atoms from a 3D structure.")
174if __name__ == "__main__":
175 main()