Coverage for biobb_chemistry / babelm / babel_add_hydrogens.py: 95%

76 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-22 12:49 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the BabelAddHydrogens class and the command line interface.""" 

4 

5from typing import Optional 

6 

7from biobb_common.generic.biobb_object import BiobbObject 

8from biobb_common.tools.file_utils import launchlogger 

9 

10from biobb_chemistry.babelm.common import ( 

11 _from_string_to_list, 

12 check_input_path, 

13 check_output_path, 

14 get_coordinates, 

15 get_input_format, 

16 get_output_format, 

17 get_ph, 

18) 

19 

20 

21class BabelAddHydrogens(BiobbObject): 

22 """ 

23 | biobb_chemistry BabelAddHydrogens 

24 | This class is a wrapper of the Open Babel tool. 

25 | Adds hydrogens to a given structure or trajectory. Open Babel is a chemical toolbox designed to speak the many languages of chemical data. It's an open, collaborative project allowing anyone to search, convert, analyze, or store data from molecular modeling, chemistry, solid-state materials, biochemistry, or related areas. `Visit the official page <http://openbabel.org/wiki/Main_Page>`_. 

26 

27 Args: 

28 input_path (str): Path to the input file. File type: input. `Sample file <https://github.com/bioexcel/biobb_chemistry/raw/master/biobb_chemistry/test/data/babelm/babel.no.H.pdb>`_. Accepted formats: dat (edam:format_1637), ent (edam:format_1476), fa (edam:format_1929), fasta (edam:format_1929), gro (edam:format_2033), inp (edam:format_3878), log (edam:format_2030), mcif (edam:format_1477), mdl (edam:format_3815), mmcif (edam:format_1477), mol (edam:format_3815), mol2 (edam:format_3816), pdb (edam:format_1476), pdbqt (edam:format_1476), png (edam:format_3603), sdf (edam:format_3814), smi (edam:format_1196), smiles (edam:format_1196), txt (edam:format_2033), xml (edam:format_2332), xtc (edam:format_3875). 

29 output_path (str): Path to the output file. File type: output. `Sample file <https://github.com/bioexcel/biobb_chemistry/raw/master/biobb_chemistry/test/reference/babelm/ref_babel.hydrogens.pdb>`_. Accepted formats: ent (edam:format_1476), fa (edam:format_1929), fasta (edam:format_1929), gro (edam:format_2033), inp (edam:format_3878), mcif (edam:format_1477), mdl (edam:format_3815), mmcif (edam:format_1477), mol (edam:format_3815), mol2 (edam:format_3816), pdb (edam:format_1476), pdbqt (edam:format_1476), png (edam:format_3603), sdf (edam:format_3814), smi (edam:format_1196), smiles (edam:format_1196), txt (edam:format_2033). 

30 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

31 * **input_format** (*str*) - (None) Format of input file. If not provided, input_path extension will be taken. Values: dat (Information represented in a data record), ent (Protein Data Bank format), fa (FASTA sequence format), fasta (FASTA sequence format), gro (GROMACS structure), inp (AMBER trajectory format), log (Events file), mcif (Entry format of PDB database in mmCIF format), mdl (file format for holding information about the atoms; bonds; connectivity and coordinates of a molecule), mmcif (Entry format of PDB database in mmCIF format), mol (file format for holding information about the atoms; bonds; connectivity and coordinates of a molecule), mol2 (Complete and portable representation of a SYBYL molecule), pdb (Protein Data Bank format), pdbqt (Protein Data Bank format with charges), png (File format for image compression), sdf (One of a family of chemical-data file formats developed by MDL Information Systems), smi (Chemical structure specified in Simplified Molecular Input Line Entry System line notation.), smiles (Chemical structure specified in Simplified Molecular Input Line Entry System line notation.), txt (Textual format), xml (eXtensible Markup Language), xtc (Portable binary format for trajectories produced by GROMACS package). 

32 * **output_format** (*str*) - (None) Format of output file. If not provided, output_path extension will be taken. Values: ent (Protein Data Bank format), fa (FASTA sequence format), fasta (FASTA sequence format), gro (GROMACS structure), inp (AMBER trajectory format), mcif (Entry format of PDB database in mmCIF format), mdl (file format for holding information about the atoms; bonds; connectivity and coordinates of a molecule), mmcif (Entry format of PDB database in mmCIF format), mol (file format for holding information about the atoms; bonds; connectivity and coordinates of a molecule), mol2 (Complete and portable representation of a SYBYL molecule), pdb (Protein Data Bank format), pdbqt (Protein Data Bank format with charges), png (File format for image compression), sdf (One of a family of chemical-data file formats developed by MDL Information Systems), smi (Chemical structure specified in Simplified Molecular Input Line Entry System line notation.), smiles (Chemical structure specified in Simplified Molecular Input Line Entry System line notation.), txt (Textual format), xtc (Portable binary format for trajectories produced by GROMACS package). 

33 * **fs_input** (*list*) - (None) Format-specific input options. Values: b (disable automatic bonding), d (input file is in dlg -AutoDock docking log- format). 

34 * **fs_output** (*list*) - (["h"]) Format-specific output options. Values: b (enable automatic bonding), r (output as a rigid molecule), c (combine separate molecular pieces of input into a single rigid molecule), s (output as a flexible residue), p (preserve atom indices from input file), h (preserve hydrogens), n (preserve atom names). 

35 * **coordinates** (*int*) - (None) Type of coordinates: 2D or 3D. Values: 2 (2D coordinates), 3 (3D coordinates). 

36 * **effort** (*str*) - ("medium") Computational effort wanted to dedicate for the conformer generation coordinates calculations, only for 3D coordinates. Values: fastest (only generate coordinates, no force field or conformer search), fast (perform quick forcefield optimization), medium (forcefield optimization + fast conformer search), better (more optimization + fast conformer search), best (more optimization + significant conformer search). 

37 * **ph** (*float*) - (7.4) [0~14|0.1] Add hydrogens appropriate for pH. 

38 * **binary_path** (*str*) - ("obabel") Path to the obabel executable binary. 

39 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

40 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

41 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

42 * **container_path** (*str*) - (None) Container path definition. 

43 * **container_image** (*str*) - ('informaticsmatters/obabel:latest') Container image definition. 

44 * **container_volume_path** (*str*) - ('/tmp') Container volume path definition. 

45 * **container_working_dir** (*str*) - (None) Container working directory definition. 

46 * **container_user_id** (*str*) - (None) Container user_id definition. 

47 * **container_shell_path** (*str*) - ('/bin/bash') Path to default shell inside the container. 

48 

49 Examples: 

50 This is a use example of how to use the building block from Python:: 

51 

52 from biobb_chemistry.babelm.babel_add_hydrogens import babel_add_hydrogens 

53 prop = { 

54 'input_format': 'pdb', 

55 'output_format': 'pdb', 

56 'coordinates': 3, 

57 'ph': 7.4 

58 } 

59 babel_add_hydrogens(input_path='/path/to/myStructure.pdb', 

60 output_path='/path/to/newStructure.pdb', 

61 properties=prop) 

62 

63 Info: 

64 * wrapped_software: 

65 * name: Open Babel 

66 * version: 2.4.1 

67 * license: GNU 

68 * ontology: 

69 * name: EDAM 

70 * schema: http://edamontology.org/EDAM.owl 

71 

72 """ 

73 

74 def __init__(self, input_path, output_path, properties=None, **kwargs) -> None: 

75 properties = properties or {} 

76 

77 # Call parent class constructor 

78 super().__init__(properties) 

79 self.locals_var_dict = locals().copy() 

80 

81 # Input/Output files 

82 self.io_dict = { 

83 "in": {"input_path": input_path}, 

84 "out": {"output_path": output_path}, 

85 } 

86 

87 # Properties specific for BB 

88 self.input_format = properties.get("input_format", "") 

89 self.output_format = properties.get("output_format", "") 

90 self.fs_input = _from_string_to_list(properties.get("fs_input", None)) 

91 self.fs_output = _from_string_to_list(properties.get("fs_output", ["h"])) 

92 self.coordinates = properties.get("coordinates", "") 

93 self.effort = properties.get("effort", "medium") 

94 self.ph = properties.get("ph", "") 

95 self.binary_path = properties.get("binary_path", "obabel") 

96 self.properties = properties 

97 

98 # Check the properties 

99 self.check_properties(properties) 

100 self.check_arguments() 

101 

102 def check_data_params(self, out_log, err_log): 

103 """Checks all the input/output paths and parameters""" 

104 self.io_dict["in"]["input_path"] = check_input_path( 

105 self.io_dict["in"]["input_path"], out_log, self.__class__.__name__ 

106 ) 

107 self.io_dict["out"]["output_path"] = check_output_path( 

108 self.io_dict["out"]["output_path"], out_log, self.__class__.__name__ 

109 ) 

110 

111 def create_cmd(self, container_io_dict, out_log, err_log): 

112 """Creates the command line instruction using the properties file settings""" 

113 instructions_list = [] 

114 

115 # executable path 

116 instructions_list.append(self.binary_path) 

117 

118 # generating input 

119 infr = get_input_format( 

120 self.input_format, container_io_dict["in"]["input_path"], out_log 

121 ) 

122 iformat = "-i" + infr 

123 instructions_list.append(iformat) 

124 ipath = container_io_dict["in"]["input_path"] 

125 instructions_list.append(ipath) 

126 

127 # generating output 

128 oufr = get_output_format( 

129 self.output_format, container_io_dict["out"]["output_path"], out_log 

130 ) 

131 oformat = "-o" + oufr 

132 instructions_list.append(oformat) 

133 opath = "-O" + container_io_dict["out"]["output_path"] 

134 instructions_list.append(opath) 

135 

136 # adding coordinates 

137 crd = get_coordinates(self.coordinates, out_log) 

138 coordinates = "" 

139 if crd: 

140 coordinates = "--gen" + crd + "d" 

141 instructions_list.append(coordinates) 

142 

143 # checking pH 

144 p = get_ph(self.ph, out_log) 

145 

146 # adding H 

147 hydrogens = "-h" 

148 

149 # adding pH 

150 ph = "" 

151 if p: 

152 ph = "-p " + p 

153 instructions_list.append(ph) 

154 else: 

155 instructions_list.append(hydrogens) 

156 

157 # fs_input 

158 if self.fs_input is not None: 

159 for fsi in self.fs_input: 

160 instructions_list.append("-a" + fsi) 

161 

162 # fs_output 

163 if self.fs_output is not None: 

164 for fso in self.fs_output: 

165 instructions_list.append("-x" + fso) 

166 

167 # adding effort (only for 3D coordinates) 

168 if crd == "3": 

169 instructions_list.append("--" + self.effort) 

170 

171 return instructions_list 

172 

173 @launchlogger 

174 def launch(self) -> int: 

175 """Execute the :class:`BabelAddHydrogens <babelm.babel_add_hydrogens.BabelAddHydrogens>` babelm.babel_add_hydrogens.BabelAddHydrogens object.""" 

176 

177 # check input/output paths and parameters 

178 self.check_data_params(self.out_log, self.err_log) 

179 

180 # Setup Biobb 

181 if self.check_restart(): 

182 return 0 

183 self.stage_files() 

184 

185 # create command line instruction 

186 self.cmd = self.create_cmd(self.stage_io_dict, self.out_log, self.err_log) 

187 

188 # Run Biobb block 

189 self.run_biobb() 

190 

191 # Copy files to host 

192 self.copy_to_host() 

193 

194 # remove temporary folder(s) 

195 self.remove_tmp_files() 

196 

197 self.check_arguments(output_files_created=True, raise_exception=False) 

198 

199 return self.return_code 

200 

201 

202def babel_add_hydrogens( 

203 input_path: str, output_path: str, properties: Optional[dict] = None, **kwargs 

204) -> int: 

205 """Create the :class:`BabelAddHydrogens <babelm.babel_add_hydrogens.BabelAddHydrogens>` class and 

206 execute the :meth:`launch() <babelm.babel_add_hydrogens.BabelAddHydrogens.launch>` method.""" 

207 return BabelAddHydrogens(**dict(locals())).launch() 

208 

209 

210babel_add_hydrogens.__doc__ = BabelAddHydrogens.__doc__ 

211main = BabelAddHydrogens.get_main(babel_add_hydrogens, "Adds hydrogen atoms to small molecules.") 

212 

213 

214if __name__ == "__main__": 

215 main()