Coverage for biobb_structure_utils/utils/extract_residues.py: 77%
64 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 11:54 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 11:54 +0000
1#!/usr/bin/env python3
3"""Module containing the ExtractResidues class and the command line interface."""
5import argparse
6from typing import Optional
8from Bio.PDB.PDBParser import PDBParser
9from biobb_common.configuration import settings
10from biobb_common.generic.biobb_object import BiobbObject
11from biobb_common.tools import file_utils as fu
12from biobb_common.tools.file_utils import launchlogger
14from biobb_structure_utils.utils.common import (
15 _from_string_to_list,
16 check_input_path,
17 check_output_path,
18 create_biopython_residue,
19 create_output_file,
20 create_residues_list,
21)
24class ExtractResidues(BiobbObject):
25 """
26 | biobb_structure_utils ExtractResidues
27 | Class to extract residues from a 3D structure using Biopython.
28 | Extracts a list of residues from a 3D structure using Biopython.
30 Args:
31 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/extract_heteroatom.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476).
32 output_residues_path (str): Output residues file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_extract_residues.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476).
33 properties (dic - Python dictionary object containing the tool parameters, not input/output files):
34 * **residues** (*list*) - (None) List of comma separated res_id (will extract all residues that match the res_id) or list of dictionaries with the name | res_id | chain | model of the residues to be extracted. Format: [{"name": "HIS", "res_id": "72", "chain": "A", "model": "1"}].
35 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
36 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
37 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
39 Examples:
40 This is a use example of how to use the building block from Python::
42 from biobb_structure_utils.utils.extract_residues import extract_residues
43 prop = {
44 'residues': [
45 {
46 'name': 'HIS',
47 'res_id': '72',
48 'chain': 'A',
49 'model': '1'
50 }
51 ]
52 }
53 extract_residues(input_structure_path='/path/to/myStructure.pdb',
54 output_residues_path='/path/to/newResidues.pdb',
55 properties=prop)
57 Info:
58 * wrapped_software:
59 * name: In house using Biopython
60 * version: >=1.79
61 * license: other
62 * ontology:
63 * name: EDAM
64 * schema: http://edamontology.org/EDAM.owl
66 """
68 def __init__(
69 self, input_structure_path, output_residues_path, properties=None, **kwargs
70 ) -> None:
71 properties = properties or {}
73 # Call parent class constructor
74 super().__init__(properties)
75 self.locals_var_dict = locals().copy()
77 # Input/Output files
78 self.io_dict = {
79 "in": {"input_structure_path": input_structure_path},
80 "out": {"output_residues_path": output_residues_path},
81 }
83 # Properties specific for BB
84 self.residues = _from_string_to_list(properties.get("residues", []))
85 self.properties = properties
87 # Check the properties
88 self.check_properties(properties)
89 self.check_arguments()
91 @launchlogger
92 def launch(self) -> int:
93 """Execute the :class:`ExtractResidues <utils.extract_residues.ExtractResidues>` utils.extract_residues.ExtractResidues object."""
95 self.io_dict["in"]["input_structure_path"] = check_input_path(
96 self.io_dict["in"]["input_structure_path"],
97 self.out_log,
98 self.__class__.__name__,
99 )
100 self.io_dict["out"]["output_residues_path"] = check_output_path(
101 self.io_dict["out"]["output_residues_path"],
102 self.out_log,
103 self.__class__.__name__,
104 )
106 # Setup Biobb
107 if self.check_restart():
108 return 0
109 self.stage_files()
111 # Business code
112 # get list of Residues from properties
113 list_residues = create_residues_list(self.residues, self.out_log)
115 # load input into BioPython structure
116 structure = PDBParser(QUIET=True).get_structure(
117 "structure", self.stage_io_dict["in"]["input_structure_path"]
118 )
120 new_structure = []
121 # get desired residues
122 for residue in structure.get_residues():
123 r = create_biopython_residue(residue)
124 if list_residues:
125 for res in list_residues:
126 match = True
127 for code in res["code"]:
128 if res[code].strip() != r[code].strip():
129 match = False
130 break
131 if match:
132 new_structure.append(r)
133 else:
134 new_structure.append(r)
136 # if not residues found in structure, raise exit
137 if not new_structure:
138 fu.log(
139 self.__class__.__name__ + ": The residues given by user were not found in input structure",
140 self.out_log,
141 )
142 raise SystemExit(
143 self.__class__.__name__ + ": The residues given by user were not found in input structure"
144 )
146 create_output_file(
147 2,
148 self.stage_io_dict["in"]["input_structure_path"],
149 new_structure,
150 self.stage_io_dict["out"]["output_residues_path"],
151 self.out_log,
152 )
154 self.return_code = 0
156 # Copy files to host
157 self.copy_to_host()
159 # Remove temporal files
160 # self.tmp_files.append(self.stage_io_dict.get("unique_dir", ""))
161 self.remove_tmp_files()
163 self.check_arguments(output_files_created=True, raise_exception=False)
165 return self.return_code
168def extract_residues(
169 input_structure_path: str,
170 output_residues_path: str,
171 properties: Optional[dict] = None,
172 **kwargs,
173) -> int:
174 """Execute the :class:`ExtractResidues <utils.extract_residues.ExtractResidues>` class and
175 execute the :meth:`launch() <utils.extract_residues.ExtractResidues.launch>` method."""
177 return ExtractResidues(
178 input_structure_path=input_structure_path,
179 output_residues_path=output_residues_path,
180 properties=properties,
181 **kwargs,
182 ).launch()
184 extract_residues.__doc__ = ExtractResidues.__doc__
187def main():
188 """Command line execution of this building block. Please check the command line documentation."""
189 parser = argparse.ArgumentParser(
190 description="Extract a list of residues from a 3D structure.",
191 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999),
192 )
193 parser.add_argument(
194 "-c",
195 "--config",
196 required=False,
197 help="This file can be a YAML file, JSON file or JSON string",
198 )
200 # Specific args of each building block
201 required_args = parser.add_argument_group("required arguments")
202 required_args.add_argument(
203 "-i",
204 "--input_structure_path",
205 required=True,
206 help="Input structure file path. Accepted formats: pdb.",
207 )
208 required_args.add_argument(
209 "-o",
210 "--output_residues_path",
211 required=True,
212 help="Output residues file path. Accepted formats: pdb.",
213 )
215 args = parser.parse_args()
216 config = args.config if args.config else None
217 properties = settings.ConfReader(config=config).get_prop_dic()
219 # Specific call of each building block
220 extract_residues(
221 input_structure_path=args.input_structure_path,
222 output_residues_path=args.output_residues_path,
223 properties=properties,
224 )
227if __name__ == "__main__":
228 main()