Coverage for biobb_chemistry / babelm / babel_remove_hydrogens.py: 95%

76 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-22 12:49 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the BabelRemoveHydrogens class and the command line interface.""" 

4from typing import Optional 

5from biobb_common.generic.biobb_object import BiobbObject 

6from biobb_common.tools.file_utils import launchlogger 

7 

8from biobb_chemistry.babelm.common import ( 

9 _from_string_to_list, 

10 check_input_path, 

11 check_output_path, 

12 get_coordinates, 

13 get_input_format, 

14 get_output_format, 

15 get_ph, 

16) 

17 

18 

19class BabelRemoveHydrogens(BiobbObject): 

20 """ 

21 | biobb_chemistry BabelRemoveHydrogens 

22 | This class is a wrapper of the Open Babel tool. 

23 | Removes hydrogens to a given structure or trajectory. Open Babel is a chemical toolbox designed to speak the many languages of chemical data. It's an open, collaborative project allowing anyone to search, convert, analyze, or store data from molecular modeling, chemistry, solid-state materials, biochemistry, or related areas. `Visit the official page <http://openbabel.org/wiki/Main_Page>`_. 

24 

25 Args: 

26 input_path (str): Path to the input file. File type: input. `Sample file <https://github.com/bioexcel/biobb_chemistry/raw/master/biobb_chemistry/test/data/babelm/babel.H.pdb>`_. Accepted formats: dat (edam:format_1637), ent (edam:format_1476), fa (edam:format_1929), fasta (edam:format_1929), gro (edam:format_2033), inp (edam:format_3878), log (edam:format_2030), mcif (edam:format_1477), mdl (edam:format_3815), mmcif (edam:format_1477), mol (edam:format_3815), mol2 (edam:format_3816), pdb (edam:format_1476), pdbqt (edam:format_1476), png (edam:format_3603), sdf (edam:format_3814), smi (edam:format_1196), smiles (edam:format_1196), txt (edam:format_2033), xml (edam:format_2332), xtc (edam:format_3875). 

27 output_path (str): Path to the output file. File type: output. `Sample file <https://github.com/bioexcel/biobb_chemistry/raw/master/biobb_chemistry/test/reference/babelm/ref_babel.nohydrogens.pdb>`_. Accepted formats: ent (edam:format_1476), fa (edam:format_1929), fasta (edam:format_1929), gro (edam:format_2033), inp (edam:format_3878), mcif (edam:format_1477), mdl (edam:format_3815), mmcif (edam:format_1477), mol (edam:format_3815), mol2 (edam:format_3816), pdb (edam:format_1476), pdbqt (edam:format_1476), png (edam:format_3603), sdf (edam:format_3814), smi (edam:format_1196), smiles (edam:format_1196), txt (edam:format_2033). 

28 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

29 * **input_format** (*str*) - (None) Format of input file. If not provided, input_path extension will be taken. Values: dat (Information represented in a data record), ent (Protein Data Bank format), fa (FASTA sequence format), fasta (FASTA sequence format), gro (GROMACS structure), inp (AMBER trajectory format), log (Events file), mcif (Entry format of PDB database in mmCIF format), mdl (file format for holding information about the atoms; bonds; connectivity and coordinates of a molecule), mmcif (Entry format of PDB database in mmCIF format), mol (file format for holding information about the atoms; bonds; connectivity and coordinates of a molecule), mol2 (Complete and portable representation of a SYBYL molecule), pdb (Protein Data Bank format), pdbqt (Protein Data Bank format with charges), png (File format for image compression), sdf (One of a family of chemical-data file formats developed by MDL Information Systems), smi (Chemical structure specified in Simplified Molecular Input Line Entry System line notation.), smiles (Chemical structure specified in Simplified Molecular Input Line Entry System line notation.), txt (Textual format), xml (eXtensible Markup Language), xtc (Portable binary format for trajectories produced by GROMACS package). 

30 * **output_format** (*str*) - (None) Format of output file. If not provided, output_path extension will be taken. Values: ent (Protein Data Bank format), fa (FASTA sequence format), fasta (FASTA sequence format), gro (GROMACS structure), inp (AMBER trajectory format), mcif (Entry format of PDB database in mmCIF format), mdl (file format for holding information about the atoms; bonds; connectivity and coordinates of a molecule), mmcif (Entry format of PDB database in mmCIF format), mol (file format for holding information about the atoms; bonds; connectivity and coordinates of a molecule), mol2 (Complete and portable representation of a SYBYL molecule), pdb (Protein Data Bank format), pdbqt (Protein Data Bank format with charges), png (File format for image compression), sdf (One of a family of chemical-data file formats developed by MDL Information Systems), smi (Chemical structure specified in Simplified Molecular Input Line Entry System line notation.), smiles (Chemical structure specified in Simplified Molecular Input Line Entry System line notation.), txt (Textual format), xtc (Portable binary format for trajectories produced by GROMACS package). 

31 * **fs_input** (*list*) - (None) Format-specific input options. Values: b (disable automatic bonding), d (input file is in dlg -AutoDock docking log- format). 

32 * **fs_output** (*list*) - (None) Format-specific output options. Values: b (enable automatic bonding), r (output as a rigid molecule), c (combine separate molecular pieces of input into a single rigid molecule), s (output as a flexible residue), p (preserve atom indices from input file), h (preserve hydrogens), n (preserve atom names). 

33 * **coordinates** (*int*) - (None) Type of coordinates: 2D or 3D. Values: 2 (2D coordinates), 3 (3D coordinates). 

34 * **effort** (*str*) - ("medium") Computational effort wanted to dedicate for the conformer generation coordinates calculations, only for 3D coordinates. Values: fastest (only generate coordinates, no force field or conformer search), fast (perform quick forcefield optimization), medium (forcefield optimization + fast conformer search), better (more optimization + fast conformer search), best (more optimization + significant conformer search). 

35 * **ph** (*float*) - (7.4) [0~14|0.1] Add hydrogens appropriate for pH. 

36 * **binary_path** (*str*) - ("obabel") Path to the obabel executable binary. 

37 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

38 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

39 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

40 * **container_path** (*str*) - (None) Container path definition. 

41 * **container_image** (*str*) - ('informaticsmatters/obabel:latest') Container image definition. 

42 * **container_volume_path** (*str*) - ('/tmp') Container volume path definition. 

43 * **container_working_dir** (*str*) - (None) Container working directory definition. 

44 * **container_user_id** (*str*) - (None) Container user_id definition. 

45 * **container_shell_path** (*str*) - ('/bin/bash') Path to default shell inside the container. 

46 

47 Examples: 

48 This is a use example of how to use the building block from Python:: 

49 

50 from biobb_chemistry.babelm.babel_remove_hydrogens import babel_remove_hydrogens 

51 prop = { 

52 'input_format': 'pdb', 

53 'output_format': 'pdb', 

54 'coordinates': 3, 

55 'ph': 7.4 

56 } 

57 babel_remove_hydrogens(input_path='/path/to/myStructure.pdb', 

58 output_path='/path/to/newStructure.pdb', 

59 properties=prop) 

60 

61 Info: 

62 * wrapped_software: 

63 * name: Open Babel 

64 * version: 2.4.1 

65 * license: GNU 

66 * ontology: 

67 * name: EDAM 

68 * schema: http://edamontology.org/EDAM.owl 

69 

70 """ 

71 

72 def __init__(self, input_path, output_path, properties=None, **kwargs) -> None: 

73 properties = properties or {} 

74 

75 # Call parent class constructor 

76 super().__init__(properties) 

77 self.locals_var_dict = locals().copy() 

78 

79 # Input/Output files 

80 self.io_dict = { 

81 "in": {"input_path": input_path}, 

82 "out": {"output_path": output_path}, 

83 } 

84 

85 # Properties specific for BB 

86 self.input_format = properties.get("input_format", "") 

87 self.output_format = properties.get("output_format", "") 

88 self.fs_input = _from_string_to_list(properties.get("fs_input", None)) 

89 self.fs_output = _from_string_to_list(properties.get("fs_output", None)) 

90 self.coordinates = properties.get("coordinates", "") 

91 self.effort = properties.get("effort", "medium") 

92 self.ph = properties.get("ph", "") 

93 self.binary_path = properties.get("binary_path", "obabel") 

94 self.properties = properties 

95 

96 # Check the properties 

97 self.check_properties(properties) 

98 self.check_arguments() 

99 

100 def check_data_params(self, out_log, err_log): 

101 """Checks all the input/output paths and parameters""" 

102 self.io_dict["in"]["input_path"] = check_input_path( 

103 self.io_dict["in"]["input_path"], out_log, self.__class__.__name__ 

104 ) 

105 self.io_dict["out"]["output_path"] = check_output_path( 

106 self.io_dict["out"]["output_path"], out_log, self.__class__.__name__ 

107 ) 

108 

109 def create_cmd(self, container_io_dict, out_log, err_log): 

110 """Creates the command line instruction using the properties file settings""" 

111 instructions_list = [] 

112 

113 # executable path 

114 instructions_list.append(self.binary_path) 

115 

116 # generating input 

117 infr = get_input_format( 

118 self.input_format, container_io_dict["in"]["input_path"], out_log 

119 ) 

120 iformat = "-i" + infr 

121 instructions_list.append(iformat) 

122 ipath = container_io_dict["in"]["input_path"] 

123 instructions_list.append(ipath) 

124 

125 # generating output 

126 oufr = get_output_format( 

127 self.output_format, container_io_dict["out"]["output_path"], out_log 

128 ) 

129 oformat = "-o" + oufr 

130 instructions_list.append(oformat) 

131 opath = "-O" + container_io_dict["out"]["output_path"] 

132 instructions_list.append(opath) 

133 

134 # adding coordinates 

135 crd = get_coordinates(self.coordinates, out_log) 

136 coordinates = "" 

137 if crd: 

138 coordinates = "--gen" + crd + "d" 

139 instructions_list.append(coordinates) 

140 

141 hydrogens = "-d" 

142 

143 instructions_list.append(hydrogens) 

144 

145 # adding pH 

146 p = get_ph(self.ph, out_log) 

147 ph = "" 

148 if p: 

149 ph = "-p " + p 

150 instructions_list.append(ph) 

151 

152 # fs_input 

153 if self.fs_input is not None: 

154 for fsi in self.fs_input: 

155 instructions_list.append("-a" + fsi) 

156 

157 # fs_output 

158 if self.fs_output is not None: 

159 for fso in self.fs_output: 

160 instructions_list.append("-x" + fso) 

161 

162 # adding effort (only for 3D coordinates) 

163 if crd == "3": 

164 instructions_list.append("--" + self.effort) 

165 

166 return instructions_list 

167 

168 @launchlogger 

169 def launch(self) -> int: 

170 """Execute the :class:`BabelRemoveHydrogens <babelm.babel_remove_hydrogens.BabelRemoveHydrogens>` babelm.babel_remove_hydrogens.BabelRemoveHydrogens object.""" 

171 

172 # check input/output paths and parameters 

173 self.check_data_params(self.out_log, self.err_log) 

174 

175 # Setup Biobb 

176 if self.check_restart(): 

177 return 0 

178 self.stage_files() 

179 

180 # create command line instruction 

181 self.cmd = self.create_cmd(self.stage_io_dict, self.out_log, self.err_log) 

182 

183 # Run Biobb block 

184 self.run_biobb() 

185 

186 # Copy files to host 

187 self.copy_to_host() 

188 

189 # remove temporary folder(s) 

190 self.remove_tmp_files() 

191 

192 self.check_arguments(output_files_created=True, raise_exception=False) 

193 

194 return self.return_code 

195 

196 

197def babel_remove_hydrogens( 

198 input_path: str, output_path: str, properties: Optional[dict] = None, **kwargs 

199) -> int: 

200 """Create the :class:`BabelRemoveHydrogens <babelm.babel_remove_hydrogens.BabelRemoveHydrogens>` class and 

201 execute the :meth:`launch() <babelm.babel_remove_hydrogens.BabelRemoveHydrogens.launch>` method.""" 

202 return BabelRemoveHydrogens(**dict(locals())).launch() 

203 

204 

205babel_remove_hydrogens.__doc__ = BabelRemoveHydrogens.__doc__ 

206main = BabelRemoveHydrogens.get_main(babel_remove_hydrogens, "Removes hydrogen atoms to small molecules.") 

207 

208 

209if __name__ == "__main__": 

210 main()