Coverage for biobb_chemistry/babelm/babel_add_hydrogens.py: 84%
87 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-12 09:28 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-12 09:28 +0000
1#!/usr/bin/env python3
3"""Module containing the BabelAddHydrogens class and the command line interface."""
5import argparse
6from typing import Optional
8from biobb_common.configuration import settings
9from biobb_common.generic.biobb_object import BiobbObject
10from biobb_common.tools.file_utils import launchlogger
12from biobb_chemistry.babelm.common import (
13 _from_string_to_list,
14 check_input_path,
15 check_output_path,
16 get_coordinates,
17 get_input_format,
18 get_output_format,
19 get_ph,
20)
23class BabelAddHydrogens(BiobbObject):
24 """
25 | biobb_chemistry BabelAddHydrogens
26 | This class is a wrapper of the Open Babel tool.
27 | Adds hydrogens to a given structure or trajectory. Open Babel is a chemical toolbox designed to speak the many languages of chemical data. It's an open, collaborative project allowing anyone to search, convert, analyze, or store data from molecular modeling, chemistry, solid-state materials, biochemistry, or related areas. `Visit the official page <http://openbabel.org/wiki/Main_Page>`_.
29 Args:
30 input_path (str): Path to the input file. File type: input. `Sample file <https://github.com/bioexcel/biobb_chemistry/raw/master/biobb_chemistry/test/data/babel/babel.no.H.pdb>`_. Accepted formats: dat (edam:format_1637), ent (edam:format_1476), fa (edam:format_1929), fasta (edam:format_1929), gro (edam:format_2033), inp (edam:format_3878), log (edam:format_2030), mcif (edam:format_1477), mdl (edam:format_3815), mmcif (edam:format_1477), mol (edam:format_3815), mol2 (edam:format_3816), pdb (edam:format_1476), pdbqt (edam:format_1476), png (edam:format_3603), sdf (edam:format_3814), smi (edam:format_1196), smiles (edam:format_1196), txt (edam:format_2033), xml (edam:format_2332), xtc (edam:format_3875).
31 output_path (str): Path to the output file. File type: output. `Sample file <https://github.com/bioexcel/biobb_chemistry/raw/master/biobb_chemistry/test/reference/babel/ref_babel.hydrogens.pdb>`_. Accepted formats: ent (edam:format_1476), fa (edam:format_1929), fasta (edam:format_1929), gro (edam:format_2033), inp (edam:format_3878), mcif (edam:format_1477), mdl (edam:format_3815), mmcif (edam:format_1477), mol (edam:format_3815), mol2 (edam:format_3816), pdb (edam:format_1476), pdbqt (edam:format_1476), png (edam:format_3603), sdf (edam:format_3814), smi (edam:format_1196), smiles (edam:format_1196), txt (edam:format_2033).
32 properties (dic - Python dictionary object containing the tool parameters, not input/output files):
33 * **input_format** (*str*) - (None) Format of input file. If not provided, input_path extension will be taken. Values: dat (Information represented in a data record), ent (Protein Data Bank format), fa (FASTA sequence format), fasta (FASTA sequence format), gro (GROMACS structure), inp (AMBER trajectory format), log (Events file), mcif (Entry format of PDB database in mmCIF format), mdl (file format for holding information about the atoms; bonds; connectivity and coordinates of a molecule), mmcif (Entry format of PDB database in mmCIF format), mol (file format for holding information about the atoms; bonds; connectivity and coordinates of a molecule), mol2 (Complete and portable representation of a SYBYL molecule), pdb (Protein Data Bank format), pdbqt (Protein Data Bank format with charges), png (File format for image compression), sdf (One of a family of chemical-data file formats developed by MDL Information Systems), smi (Chemical structure specified in Simplified Molecular Input Line Entry System line notation.), smiles (Chemical structure specified in Simplified Molecular Input Line Entry System line notation.), txt (Textual format), xml (eXtensible Markup Language), xtc (Portable binary format for trajectories produced by GROMACS package).
34 * **output_format** (*str*) - (None) Format of output file. If not provided, output_path extension will be taken. Values: ent (Protein Data Bank format), fa (FASTA sequence format), fasta (FASTA sequence format), gro (GROMACS structure), inp (AMBER trajectory format), mcif (Entry format of PDB database in mmCIF format), mdl (file format for holding information about the atoms; bonds; connectivity and coordinates of a molecule), mmcif (Entry format of PDB database in mmCIF format), mol (file format for holding information about the atoms; bonds; connectivity and coordinates of a molecule), mol2 (Complete and portable representation of a SYBYL molecule), pdb (Protein Data Bank format), pdbqt (Protein Data Bank format with charges), png (File format for image compression), sdf (One of a family of chemical-data file formats developed by MDL Information Systems), smi (Chemical structure specified in Simplified Molecular Input Line Entry System line notation.), smiles (Chemical structure specified in Simplified Molecular Input Line Entry System line notation.), txt (Textual format), xtc (Portable binary format for trajectories produced by GROMACS package).
35 * **fs_input** (*list*) - (None) Format-specific input options. Values: b (disable automatic bonding), d (input file is in dlg -AutoDock docking log- format).
36 * **fs_output** (*list*) - (["h"]) Format-specific output options. Values: b (enable automatic bonding), r (output as a rigid molecule), c (combine separate molecular pieces of input into a single rigid molecule), s (output as a flexible residue), p (preserve atom indices from input file), h (preserve hydrogens), n (preserve atom names).
37 * **coordinates** (*int*) - (None) Type of coordinates: 2D or 3D. Values: 2 (2D coordinates), 3 (3D coordinates).
38 * **effort** (*str*) - ("medium") Computational effort wanted to dedicate for the conformer generation coordinates calculations, only for 3D coordinates. Values: fastest (only generate coordinates, no force field or conformer search), fast (perform quick forcefield optimization), medium (forcefield optimization + fast conformer search), better (more optimization + fast conformer search), best (more optimization + significant conformer search).
39 * **ph** (*float*) - (7.4) [0~14|0.1] Add hydrogens appropriate for pH.
40 * **binary_path** (*str*) - ("obabel") Path to the obabel executable binary.
41 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
42 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
43 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
44 * **container_path** (*str*) - (None) Container path definition.
45 * **container_image** (*str*) - ('informaticsmatters/obabel:latest') Container image definition.
46 * **container_volume_path** (*str*) - ('/tmp') Container volume path definition.
47 * **container_working_dir** (*str*) - (None) Container working directory definition.
48 * **container_user_id** (*str*) - (None) Container user_id definition.
49 * **container_shell_path** (*str*) - ('/bin/bash') Path to default shell inside the container.
51 Examples:
52 This is a use example of how to use the building block from Python::
54 from biobb_chemistry.babelm.babel_add_hydrogens import babel_add_hydrogens
55 prop = {
56 'input_format': 'pdb',
57 'output_format': 'pdb',
58 'coordinates': 3,
59 'ph': 7.4
60 }
61 babel_add_hydrogens(input_path='/path/to/myStructure.pdb',
62 output_path='/path/to/newStructure.pdb',
63 properties=prop)
65 Info:
66 * wrapped_software:
67 * name: Open Babel
68 * version: 2.4.1
69 * license: GNU
70 * ontology:
71 * name: EDAM
72 * schema: http://edamontology.org/EDAM.owl
74 """
76 def __init__(self, input_path, output_path, properties=None, **kwargs) -> None:
77 properties = properties or {}
79 # Call parent class constructor
80 super().__init__(properties)
81 self.locals_var_dict = locals().copy()
83 # Input/Output files
84 self.io_dict = {
85 "in": {"input_path": input_path},
86 "out": {"output_path": output_path},
87 }
89 # Properties specific for BB
90 self.input_format = properties.get("input_format", "")
91 self.output_format = properties.get("output_format", "")
92 self.fs_input = _from_string_to_list(properties.get("fs_input", None))
93 self.fs_output = _from_string_to_list(properties.get("fs_output", ["h"]))
94 self.coordinates = properties.get("coordinates", "")
95 self.effort = properties.get("effort", "medium")
96 self.ph = properties.get("ph", "")
97 self.binary_path = properties.get("binary_path", "obabel")
98 self.properties = properties
100 # Check the properties
101 self.check_properties(properties)
102 self.check_arguments()
104 def check_data_params(self, out_log, err_log):
105 """Checks all the input/output paths and parameters"""
106 self.io_dict["in"]["input_path"] = check_input_path(
107 self.io_dict["in"]["input_path"], out_log, self.__class__.__name__
108 )
109 self.io_dict["out"]["output_path"] = check_output_path(
110 self.io_dict["out"]["output_path"], out_log, self.__class__.__name__
111 )
113 def create_cmd(self, container_io_dict, out_log, err_log):
114 """Creates the command line instruction using the properties file settings"""
115 instructions_list = []
117 # executable path
118 instructions_list.append(self.binary_path)
120 # generating input
121 infr = get_input_format(
122 self.input_format, container_io_dict["in"]["input_path"], out_log
123 )
124 iformat = "-i" + infr
125 instructions_list.append(iformat)
126 ipath = container_io_dict["in"]["input_path"]
127 instructions_list.append(ipath)
129 # generating output
130 oufr = get_output_format(
131 self.output_format, container_io_dict["out"]["output_path"], out_log
132 )
133 oformat = "-o" + oufr
134 instructions_list.append(oformat)
135 opath = "-O" + container_io_dict["out"]["output_path"]
136 instructions_list.append(opath)
138 # adding coordinates
139 crd = get_coordinates(self.coordinates, out_log)
140 coordinates = ""
141 if crd:
142 coordinates = "--gen" + crd + "d"
143 instructions_list.append(coordinates)
145 # checking pH
146 p = get_ph(self.ph, out_log)
148 # adding H
149 hydrogens = "-h"
151 # adding pH
152 ph = ""
153 if p:
154 ph = "-p " + p
155 instructions_list.append(ph)
156 else:
157 instructions_list.append(hydrogens)
159 # fs_input
160 if self.fs_input is not None:
161 for fsi in self.fs_input:
162 instructions_list.append("-a" + fsi)
164 # fs_output
165 if self.fs_output is not None:
166 for fso in self.fs_output:
167 instructions_list.append("-x" + fso)
169 # adding effort (only for 3D coordinates)
170 if crd == "3":
171 instructions_list.append("--" + self.effort)
173 return instructions_list
175 @launchlogger
176 def launch(self) -> int:
177 """Execute the :class:`BabelAddHydrogens <babelm.babel_add_hydrogens.BabelAddHydrogens>` babelm.babel_add_hydrogens.BabelAddHydrogens object."""
179 # check input/output paths and parameters
180 self.check_data_params(self.out_log, self.err_log)
182 # Setup Biobb
183 if self.check_restart():
184 return 0
185 self.stage_files()
187 # create command line instruction
188 self.cmd = self.create_cmd(self.stage_io_dict, self.out_log, self.err_log)
190 # Run Biobb block
191 self.run_biobb()
193 # Copy files to host
194 self.copy_to_host()
196 # remove temporary folder(s)
197 # self.tmp_files.extend([self.stage_io_dict.get("unique_dir", "")])
198 self.remove_tmp_files()
200 self.check_arguments(output_files_created=True, raise_exception=False)
202 return self.return_code
205def babel_add_hydrogens(
206 input_path: str, output_path: str, properties: Optional[dict] = None, **kwargs
207) -> int:
208 """Execute the :class:`BabelAddHydrogens <babelm.babel_add_hydrogens.BabelAddHydrogens>` class and
209 execute the :meth:`launch() <babelm.babel_add_hydrogens.BabelAddHydrogens.launch>` method."""
211 return BabelAddHydrogens(
212 input_path=input_path, output_path=output_path, properties=properties, **kwargs
213 ).launch()
215 babel_add_hydrogens.__doc__ = BabelAddHydrogens.__doc__
218def main():
219 """Command line execution of this building block. Please check the command line documentation."""
220 parser = argparse.ArgumentParser(
221 description="Adds hydrogen atoms to small molecules.",
222 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999),
223 )
224 parser.add_argument("--config", required=False, help="Configuration file")
226 # Specific args of each building block
227 required_args = parser.add_argument_group("required arguments")
228 required_args.add_argument(
229 "--input_path",
230 required=True,
231 help="Path to the input file. Accepted formats: dat, ent, fa, fasta, gro, inp, log, mcif, mdl, mmcif, mol, mol2, pdb, pdbqt, png, sdf, smi, smiles, txt, xml, xtc.",
232 )
233 required_args.add_argument(
234 "--output_path",
235 required=True,
236 help="Path to the output file. Accepted formats: ent, fa, fasta, gro, inp, mcif, mdl, mmcif, mol, mol2, pdb, pdbqt, png, sdf, smi, smiles, txt.",
237 )
239 args = parser.parse_args()
240 args.config = args.config or "{}"
241 properties = settings.ConfReader(config=args.config).get_prop_dic()
243 # Specific call of each building block
244 babel_add_hydrogens(
245 input_path=args.input_path, output_path=args.output_path, properties=properties
246 )
249if __name__ == "__main__":
250 main()