Coverage for biobb_structure_utils / utils / extract_heteroatoms.py: 86%
58 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-22 13:23 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-22 13:23 +0000
1#!/usr/bin/env python3
3"""Module containing the ExtractHeteroAtoms class and the command line interface."""
4from typing import Optional
5from Bio.PDB.PDBParser import PDBParser
6from biobb_common.generic.biobb_object import BiobbObject
7from biobb_common.tools import file_utils as fu
8from biobb_common.tools.file_utils import launchlogger
10from biobb_structure_utils.utils.common import (
11 _from_string_to_list,
12 check_format_heteroatoms,
13 check_input_path,
14 check_output_path,
15 create_biopython_residue,
16 create_output_file,
17)
20class ExtractHeteroAtoms(BiobbObject):
21 """
22 | biobb_structure_utils ExtractHeteroAtoms
23 | Class to extract hetero-atoms from a 3D structure using Biopython.
24 | Extracts a list of heteroatoms from a 3D structure using Biopython.
26 Args:
27 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/extract_heteroatom.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476).
28 output_heteroatom_path (str): Output heteroatom file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_extract_heteroatom.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476).
29 properties (dic - Python dictionary object containing the tool parameters, not input/output files):
30 * **heteroatoms** (*list*) - (None) List of dictionaries with the name | res_id | chain | model of the heteroatoms to be extracted. Format: [{"name": "ZZ7", "res_id": "302", "chain": "B", "model": "1"}]. If empty, all the heteroatoms of the structure will be returned.
31 * **water** (*bool*) - (False) Add or not waters.
32 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
33 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
34 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
36 Examples:
37 This is a use example of how to use the building block from Python::
39 from biobb_structure_utils.utils.extract_heteroatoms import extract_heteroatoms
40 prop = {
41 'heteroatoms': [
42 {
43 'name': 'ZZ7',
44 'res_id': '302',
45 'chain': 'B',
46 'model': '1'
47 }
48 ]
49 }
50 extract_heteroatoms(input_structure_path='/path/to/myStructure.pdb',
51 output_heteroatom_path='/path/to/newHeteroatom.pdb',
52 properties=prop)
54 Info:
55 * wrapped_software:
56 * name: In house using Biopython
57 * version: >=1.76
58 * license: other
59 * ontology:
60 * name: EDAM
61 * schema: http://edamontology.org/EDAM.owl
63 """
65 def __init__(
66 self, input_structure_path, output_heteroatom_path, properties=None, **kwargs
67 ) -> None:
68 properties = properties or {}
70 # Call parent class constructor
71 super().__init__(properties)
72 self.locals_var_dict = locals().copy()
74 # Input/Output files
75 self.io_dict = {
76 "in": {"input_structure_path": input_structure_path},
77 "out": {"output_heteroatom_path": output_heteroatom_path},
78 }
80 # Properties specific for BB
81 self.heteroatoms = _from_string_to_list(properties.get("heteroatoms", []))
82 self.water = properties.get("water", False)
83 self.properties = properties
85 # Check the properties
86 self.check_properties(properties)
87 self.check_arguments()
89 @launchlogger
90 def launch(self) -> int:
91 """Execute the :class:`ExtractHeteroAtoms <utils.extract_heteroatoms.ExtractHeteroAtoms>` utils.extract_heteroatoms.ExtractHeteroAtoms object."""
93 self.io_dict["in"]["input_structure_path"] = check_input_path(
94 self.io_dict["in"]["input_structure_path"],
95 self.out_log,
96 self.__class__.__name__,
97 )
98 self.io_dict["out"]["output_heteroatom_path"] = check_output_path(
99 self.io_dict["out"]["output_heteroatom_path"],
100 self.out_log,
101 self.__class__.__name__,
102 )
104 # Setup Biobb
105 if self.check_restart():
106 return 0
107 self.stage_files()
109 # Business code
110 # get list of heteroatoms from properties
111 list_heteroatoms = check_format_heteroatoms(self.heteroatoms, self.out_log)
113 # load input into BioPython structure
114 structure = PDBParser(QUIET=True).get_structure(
115 "structure", self.stage_io_dict["in"]["input_structure_path"]
116 )
118 new_structure = []
119 # get desired heteroatoms
120 for residue in structure.get_residues():
121 r = create_biopython_residue(residue)
122 if list_heteroatoms:
123 for het in list_heteroatoms:
124 match = True
125 for code in het["code"]:
126 if het[code].strip() != r[code].strip():
127 match = False
128 break
130 if match:
131 if not self.water and (
132 r["name"] == "HOH" or r["name"] == "SOL" or r["name"] == "WAT"
133 ):
134 pass
135 else:
136 new_structure.append(r)
137 else:
138 if not self.water and (
139 r["name"] == "HOH" or r["name"] == "SOL" or r["name"] == "WAT"
140 ):
141 pass
142 else:
143 new_structure.append(r)
145 # if not heteroatoms found in structure, raise exit
146 if not new_structure:
147 fu.log(
148 self.__class__.__name__ + ": The heteroatoms given by user were not found in input structure",
149 self.out_log,
150 )
151 raise SystemExit(
152 self.__class__.__name__ + ": The heteroatoms given by user were not found in input structure"
153 )
155 create_output_file(
156 1,
157 self.stage_io_dict["in"]["input_structure_path"],
158 new_structure,
159 self.stage_io_dict["out"]["output_heteroatom_path"],
160 self.out_log,
161 )
163 self.return_code = 0
165 # Copy files to host
166 self.copy_to_host()
168 # Remove temporal files
169 self.remove_tmp_files()
171 self.check_arguments(output_files_created=True, raise_exception=False)
173 return self.return_code
176def extract_heteroatoms(
177 input_structure_path: str,
178 output_heteroatom_path: str,
179 properties: Optional[dict] = None,
180 **kwargs,
181) -> int:
182 """Create the :class:`ExtractHeteroAtoms <utils.extract_heteroatoms.ExtractHeteroAtoms>` class and
183 execute the :meth:`launch() <utils.extract_heteroatoms.ExtractHeteroAtoms.launch>` method."""
184 return ExtractHeteroAtoms(**dict(locals())).launch()
187extract_heteroatoms.__doc__ = ExtractHeteroAtoms.__doc__
188main = ExtractHeteroAtoms.get_main(extract_heteroatoms, "Extract a list of heteroatoms from a 3D structure.")
190if __name__ == "__main__":
191 main()