Coverage for biobb_structure_utils/utils/remove_molecules.py: 78%
67 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 11:54 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 11:54 +0000
1#!/usr/bin/env python3
3"""Module containing the RemoveMolecules class and the command line interface."""
5import argparse
6from typing import Optional
8from Bio.PDB.PDBParser import PDBParser
9from biobb_common.configuration import settings
10from biobb_common.generic.biobb_object import BiobbObject
11from biobb_common.tools import file_utils as fu
12from biobb_common.tools.file_utils import launchlogger
14from biobb_structure_utils.utils.common import (
15 _from_string_to_list,
16 check_input_path,
17 check_output_path,
18 create_biopython_residue,
19 create_output_file,
20 create_residues_list,
21)
24class RemoveMolecules(BiobbObject):
25 """
26 | biobb_structure_utils RemoveMolecules
27 | Class to remove molecules from a 3D structure using Biopython.
28 | Remove a list of molecules from a 3D structure using Biopython.
30 Args:
31 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/2vgb.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476).
32 output_molecules_path (str): Output molcules file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_remove_molecules.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476).
33 properties (dic - Python dictionary object containing the tool parameters, not input/output files):
34 * **molecules** (*list*) - (None) List of comma separated res_id (will remove all molecules that match the res_id) or list of dictionaries with the name | res_id | chain | model of the molecules to be removed. Format: [{"name": "HIS", "res_id": "72", "chain": "A", "model": "1"}].
35 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
36 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
37 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
39 Examples:
40 This is a use example of how to use the building block from Python::
42 from biobb_structure_utils.utils.remove_molecules import remove_molecules
43 prop = {
44 'molecules': [
45 {
46 'name': 'HIS',
47 'res_id': '72',
48 'chain': 'A',
49 'model': '1'
50 }
51 ]
52 }
53 remove_molecules(input_structure_path='/path/to/myStructure.pdb',
54 output_molecules_path='/path/to/newMolecules.pdb',
55 properties=prop)
57 Info:
58 * wrapped_software:
59 * name: In house using Biopython
60 * version: >=1.79
61 * license: other
62 * ontology:
63 * name: EDAM
64 * schema: http://edamontology.org/EDAM.owl
66 """
68 def __init__(
69 self, input_structure_path, output_molecules_path, properties=None, **kwargs
70 ) -> None:
71 properties = properties or {}
73 # Call parent class constructor
74 super().__init__(properties)
75 self.locals_var_dict = locals().copy()
77 # Input/Output files
78 self.io_dict = {
79 "in": {"input_structure_path": input_structure_path},
80 "out": {"output_molecules_path": output_molecules_path},
81 }
83 # Properties specific for BB
84 self.molecules = _from_string_to_list(properties.get("molecules", []))
85 self.properties = properties
87 # Check the properties
88 self.check_properties(properties)
89 self.check_arguments()
91 @launchlogger
92 def launch(self) -> int:
93 """Execute the :class:`RemoveMolecules <utils.remove_molecules.RemoveMolecules>` utils.remove_molecules.RemoveMolecules object."""
95 self.io_dict["in"]["input_structure_path"] = check_input_path(
96 self.io_dict["in"]["input_structure_path"],
97 self.out_log,
98 self.__class__.__name__,
99 )
100 self.io_dict["out"]["output_molecules_path"] = check_output_path(
101 self.io_dict["out"]["output_molecules_path"],
102 self.out_log,
103 self.__class__.__name__,
104 )
106 # Setup Biobb
107 if self.check_restart():
108 return 0
109 self.stage_files()
111 # Business code
112 # get list of Residues from properties
113 list_residues = create_residues_list(self.molecules, self.out_log)
115 # load input into BioPython structure
116 structure = PDBParser(QUIET=True).get_structure(
117 "structure", self.stage_io_dict["in"]["input_structure_path"]
118 )
120 remove_structure = []
121 whole_structure = []
122 # get desired residues
123 for residue in structure.get_residues():
124 r = create_biopython_residue(residue)
125 whole_structure.append(r)
126 if list_residues:
127 for res in list_residues:
128 match = True
129 for code in res["code"]:
130 if res[code].strip() != r[code].strip():
131 match = False
132 break
133 if match:
134 remove_structure.append(r)
135 else:
136 remove_structure.append(r)
138 # if not residues found in structure, raise exit
139 if not remove_structure:
140 fu.log(
141 self.__class__.__name__ + ": The residues given by user were not found in input structure",
142 self.out_log,
143 )
144 raise SystemExit(
145 self.__class__.__name__ + ": The residues given by user were not found in input structure"
146 )
148 # substract residues (remove_structure) from whole_structure
149 new_structure = [x for x in whole_structure if x not in remove_structure]
151 create_output_file(
152 0,
153 self.stage_io_dict["in"]["input_structure_path"],
154 new_structure,
155 self.stage_io_dict["out"]["output_molecules_path"],
156 self.out_log,
157 )
159 self.return_code = 0
161 # Copy files to host
162 self.copy_to_host()
164 # Remove temporal files
165 # self.tmp_files.append(self.stage_io_dict.get("unique_dir", ""))
166 self.remove_tmp_files()
168 self.check_arguments(output_files_created=True, raise_exception=False)
170 return self.return_code
173def remove_molecules(
174 input_structure_path: str,
175 output_molecules_path: str,
176 properties: Optional[dict] = None,
177 **kwargs,
178) -> int:
179 """Execute the :class:`RemoveMolecules <utils.remove_molecules.RemoveMolecules>` class and
180 execute the :meth:`launch() <utils.remove_molecules.RemoveMolecules.launch>` method."""
182 return RemoveMolecules(
183 input_structure_path=input_structure_path,
184 output_molecules_path=output_molecules_path,
185 properties=properties,
186 **kwargs,
187 ).launch()
189 remove_molecules.__doc__ = RemoveMolecules.__doc__
192def main():
193 """Command line execution of this building block. Please check the command line documentation."""
194 parser = argparse.ArgumentParser(
195 description="Removes a list of molecules from a 3D structure.",
196 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999),
197 )
198 parser.add_argument(
199 "-c",
200 "--config",
201 required=False,
202 help="This file can be a YAML file, JSON file or JSON string",
203 )
205 # Specific args of each building block
206 required_args = parser.add_argument_group("required arguments")
207 required_args.add_argument(
208 "-i",
209 "--input_structure_path",
210 required=True,
211 help="Input structure file path. Accepted formats: pdb.",
212 )
213 required_args.add_argument(
214 "-o",
215 "--output_molecules_path",
216 required=True,
217 help="Output molecules file path. Accepted formats: pdb.",
218 )
220 args = parser.parse_args()
221 config = args.config if args.config else None
222 properties = settings.ConfReader(config=config).get_prop_dic()
224 # Specific call of each building block
225 remove_molecules(
226 input_structure_path=args.input_structure_path,
227 output_molecules_path=args.output_molecules_path,
228 properties=properties,
229 )
232if __name__ == "__main__":
233 main()