Coverage for biobb_structure_utils/utils/extract_molecule.py: 76%
59 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-03 15:30 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-03 15:30 +0000
1#!/usr/bin/env python3
3"""Module containing the ExtractMolecule class and the command line interface."""
4import argparse
5from biobb_common.configuration import settings
6from biobb_common.generic.biobb_object import BiobbObject
7from biobb_common.tools.file_utils import launchlogger
8from biobb_common.tools import file_utils as fu
9from biobb_structure_utils.utils.common import check_input_path, check_output_path
12class ExtractMolecule(BiobbObject):
13 """
14 | biobb_structure_utils ExtractMolecule
15 | This class is a wrapper of the Structure Checking tool to extract a molecule from a 3D structure.
16 | Wrapper for the `Structure Checking <https://github.com/bioexcel/biobb_structure_checking>`_ tool to extract a molecule from a 3D structure.
18 Args:
19 input_structure_path (str): Input structure file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/extract_molecule.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476).
20 output_molecule_path (str): Output molecule file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_extract_molecule.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476).
21 properties (dic - Python dictionary object containing the tool parameters, not input/output files):
22 * **molecule_type** (*string*) - ("all") type of molecule to be extracted. If all, only waters and ligands will be removed from the original structure. Values: all, protein, na, dna, rna, chains.
23 * **chains** (*list*) - (None) if chains selected in **molecule_type**, specify them here, e.g: ["A", "C", "N"].
24 * **binary_path** (*string*) - ("check_structure") path to the check_structure application
25 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
26 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
27 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
29 Examples:
30 This is a use example of how to use the building block from Python::
32 from biobb_structure_utils.utils.extract_molecule import extract_molecule
33 prop = {
34 'molecule_type': 'chains',
35 'chains': ['A', 'N', 'F']
36 }
37 extract_molecule(input_structure_path='/path/to/myStructure.pdb',
38 output_molecule_path='/path/to/newMolecule.pdb',
39 properties=prop)
41 Info:
42 * wrapped_software:
43 * name: Structure Checking from MDWeb
44 * version: >=3.0.3
45 * license: Apache-2.0
46 * ontology:
47 * name: EDAM
48 * schema: http://edamontology.org/EDAM.owl
50 """
52 def __init__(self, input_structure_path, output_molecule_path, properties=None, **kwargs) -> None:
53 properties = properties or {}
55 # Call parent class constructor
56 super().__init__(properties)
57 self.locals_var_dict = locals().copy()
59 # Input/Output files
60 self.io_dict = {
61 "in": {"input_structure_path": input_structure_path},
62 "out": {"output_molecule_path": output_molecule_path}
63 }
65 # Properties specific for BB
66 self.molecule_type = properties.get('molecule_type', 'all')
67 self.chains = properties.get('chains', [])
68 self.binary_path = properties.get('binary_path', 'check_structure')
69 self.properties = properties
71 # Check the properties
72 self.check_properties(properties)
73 self.check_arguments()
75 def create_command_list(self, command_list_path):
76 """ Creates a command list file as a input for structure checking """
77 instructions_list = ['ligands --remove All', 'water --remove Yes']
79 if self.molecule_type != 'all':
80 if self.molecule_type == 'chains':
81 instructions_list.append('chains --select ' + ','.join(self.chains))
82 else:
83 instructions_list.append('chains --select ' + self.molecule_type)
85 with open(command_list_path, 'w') as clp:
86 for line in instructions_list:
87 clp.write(line.strip() + '\n')
89 return command_list_path
91 @launchlogger
92 def launch(self) -> int:
93 """Execute the :class:`ExtractMolecule <utils.extract_molecule.ExtractMolecule>` utils.extract_molecule.ExtractMolecule object."""
95 self.io_dict['in']['input_structure_path'] = check_input_path(self.io_dict['in']['input_structure_path'], self.out_log, self.__class__.__name__)
96 self.io_dict['out']['output_molecule_path'] = check_output_path(self.io_dict['out']['output_molecule_path'], self.out_log, self.__class__.__name__)
98 # Setup Biobb
99 if self.check_restart():
100 return 0
101 self.stage_files()
103 # create temporary folder
104 tmp_folder = fu.create_unique_dir()
105 fu.log('Creating %s temporary folder' % tmp_folder, self.out_log)
107 # create command list file
108 command_list_file = self.create_command_list(tmp_folder + '/extract_prot.lst')
110 # run command line
111 self.cmd = [self.binary_path,
112 '-i', self.io_dict['in']['input_structure_path'],
113 '-o', self.io_dict['out']['output_molecule_path'],
114 '--force_save',
115 '--non_interactive',
116 'command_list', '--list', command_list_file]
118 # Run Biobb block
119 self.run_biobb()
121 # Copy files to host
122 self.copy_to_host()
124 # Remove temporal files
125 self.tmp_files.extend([
126 self.stage_io_dict.get("unique_dir"),
127 tmp_folder
128 ])
129 self.remove_tmp_files()
131 self.check_arguments(output_files_created=True, raise_exception=False)
133 return self.return_code
136def extract_molecule(input_structure_path: str, output_molecule_path: str, properties: dict = None, **kwargs) -> int:
137 """Execute the :class:`ExtractMolecule <utils.extract_molecule.ExtractMolecule>` class and
138 execute the :meth:`launch() <utils.extract_molecule.ExtractMolecule.launch>` method."""
140 return ExtractMolecule(input_structure_path=input_structure_path,
141 output_molecule_path=output_molecule_path,
142 properties=properties, **kwargs).launch()
145def main():
146 """Command line execution of this building block. Please check the command line documentation."""
147 parser = argparse.ArgumentParser(description="Extract a molecule from a 3D structure.", formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999))
148 parser.add_argument('-c', '--config', required=False, help="This file can be a YAML file, JSON file or JSON string")
150 # Specific args of each building block
151 required_args = parser.add_argument_group('required arguments')
152 required_args.add_argument('-i', '--input_structure_path', required=True, help="Input structure file path. Accepted formats: pdb.")
153 required_args.add_argument('-o', '--output_molecule_path', required=True, help="Output heteroatom file path. Accepted formats: pdb.")
155 args = parser.parse_args()
156 config = args.config if args.config else None
157 properties = settings.ConfReader(config=config).get_prop_dic()
159 # Specific call of each building block
160 extract_molecule(input_structure_path=args.input_structure_path,
161 output_molecule_path=args.output_molecule_path,
162 properties=properties)
165if __name__ == '__main__':
166 main()