Coverage for biobb_gromacs / gromacs / pdb2gmx.py: 81%

124 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-05 08:26 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the Pdb2gmx class and the command line interface.""" 

4import os 

5from typing import Optional 

6from biobb_common.generic.biobb_object import BiobbObject 

7from biobb_common.tools import file_utils as fu 

8from biobb_common.tools.file_utils import launchlogger 

9from biobb_gromacs.gromacs.common import get_gromacs_version 

10 

11 

12class Pdb2gmx(BiobbObject): 

13 """ 

14 | biobb_gromacs Pdb2gmx 

15 | Wrapper class for the `GROMACS pdb2gmx <http://manual.gromacs.org/current/onlinehelp/gmx-pdb2gmx.html>`_ module. 

16 | The GROMACS pdb2gmx module, reads a .pdb (or .gro) file, reads some database files, adds hydrogens to the molecules and generates coordinates in GROMACS (GROMOS), or optionally .pdb, format and a topology in GROMACS format. These files can subsequently be processed to generate a run input file. 

17 

18 Args: 

19 input_pdb_path (str): Path to the input PDB file. File type: input. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/data/gromacs/egfr.pdb>`_. Accepted formats: pdb (edam:format_1476). 

20 output_gro_path (str): Path to the output GRO file. File type: output. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/reference/gromacs/ref_pdb2gmx.gro>`_. Accepted formats: gro (edam:format_2033). 

21 output_top_zip_path (str): Path the output TOP topology in zip format. File type: output. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/reference/gromacs/ref_pdb2gmx.zip>`_. Accepted formats: zip (edam:format_3987). 

22 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

23 * **water_type** (*str*) - ("spce") Water molecule type. Values: spc, spce, tip3p, tip4p, tip5p, tips3p. 

24 * **force_field** (*str*) - ("amber99sb-ildn") Force field to be used during the conversion. Values: gromos45a3, charmm27, gromos53a6, amber96, amber99, gromos43a2, gromos54a7, gromos43a1, amberGS, gromos53a5, amber99sb, amber03, amber99sb-ildn, oplsaa, amber94, amber99sb-star-ildn-mut. 

25 * **ignh** (*bool*) - (False) Should pdb2gmx ignore the hidrogens in the original structure. 

26 * **lys** (*list*) - (None) Lysine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). 

27 * **arg** (*list*) - (None) Arginine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). 

28 * **asp** (*list*) - (None) Aspartic acid protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). 

29 * **glu** (*list*) - (None) Glutamic acid protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). 

30 * **gln** (*list*) - (None) Glutamine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). 

31 * **his** (*list*) - (None) Histidine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain. Make sure residues are named HIS (0: HID, 1: HIE, 2: HIP, 3: HIS1). 

32 * **merge** (*bool*) - (False) Merge all chains into a single molecule. 

33 * **gmx_lib** (*str*) - (None) Path set GROMACS GMXLIB environment variable. 

34 * **binary_path** (*str*) - ("gmx") Path to the GROMACS executable binary. 

35 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

36 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

37 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

38 * **container_path** (*str*) - (None) Path to the binary executable of your container. 

39 * **container_image** (*str*) - ("gromacs/gromacs:latest") Container Image identifier. 

40 * **container_volume_path** (*str*) - ("/data") Path to an internal directory in the container. 

41 * **container_working_dir** (*str*) - (None) Path to the internal CWD in the container. 

42 * **container_user_id** (*str*) - (None) User number id to be mapped inside the container. 

43 * **container_shell_path** (*str*) - ("/bin/bash") Path to the binary executable of the container shell. 

44 

45 Examples: 

46 This is a use example of how to use the building block from Python:: 

47 

48 from biobb_gromacs.gromacs.pdb2gmx import pdb2gmx 

49 prop = { 'his': ['0 0 1 1 0 0 0', '1 1 0 1'] } 

50 pdb2gmx(input_pdb_path='/path/to/myStructure.pdb', 

51 output_gro_path='/path/to/newStructure.gro', 

52 output_top_zip_path='/path/to/newTopology.zip', 

53 properties=prop) 

54 

55 Info: 

56 * wrapped_software: 

57 * name: GROMACS Pdb2gmx 

58 * version: 2025.2 

59 * license: LGPL 2.1 

60 * ontology: 

61 * name: EDAM 

62 * schema: http://edamontology.org/EDAM.owl 

63 """ 

64 

65 def __init__(self, input_pdb_path: str, output_gro_path: str, output_top_zip_path: str, properties: Optional[dict] = None, 

66 **kwargs) -> None: 

67 properties = properties or {} 

68 

69 # Call parent class constructor 

70 super().__init__(properties) 

71 self.locals_var_dict = locals().copy() 

72 

73 # Input/Output files 

74 self.io_dict = { 

75 "in": {"input_pdb_path": input_pdb_path}, 

76 "out": {"output_gro_path": output_gro_path, "output_top_zip_path": output_top_zip_path} 

77 } 

78 

79 # Properties specific for BB 

80 self.internal_top_name = properties.get('internal_top_name', 'p2g.top') # Excluded from documentation for simplicity 

81 self.internal_itp_name = properties.get('internal_itp_name', 'posre.itp') # Excluded from documentation for simplicity 

82 self.water_type = properties.get('water_type', 'spce') 

83 self.force_field = properties.get('force_field', 'amber99sb-ildn') 

84 self.ignh = properties.get('ignh', False) 

85 self.lys = properties.get('lys', None) 

86 self.arg = properties.get('arg', None) 

87 self.asp = properties.get('asp', None) 

88 self.glu = properties.get('glu', None) 

89 self.gln = properties.get('gln', None) 

90 self.his = properties.get('his', None) 

91 self.merge = properties.get('merge', False) 

92 

93 # Properties common in all GROMACS BB 

94 self.gmx_lib = properties.get('gmx_lib', None) 

95 self.binary_path: str = properties.get('binary_path', 'gmx') 

96 self.gmx_nobackup = properties.get('gmx_nobackup', True) 

97 self.gmx_nocopyright = properties.get('gmx_nocopyright', True) 

98 if self.gmx_nobackup: 

99 self.binary_path += ' -nobackup' 

100 if self.gmx_nocopyright: 

101 self.binary_path += ' -nocopyright' 

102 if not self.container_path: 

103 self.gmx_version = get_gromacs_version(self.binary_path) 

104 

105 # Support string for single chain 

106 if isinstance(self.lys, str): 

107 self.lys = [self.lys] 

108 if isinstance(self.arg, str): 

109 self.arg = [self.arg] 

110 if isinstance(self.asp, str): 

111 self.asp = [self.asp] 

112 if isinstance(self.glu, str): 

113 self.glu = [self.glu] 

114 if isinstance(self.gln, str): 

115 self.gln = [self.gln] 

116 if isinstance(self.his, str): 

117 self.his = [self.his] 

118 

119 # Make sure all have the same length 

120 self.check_lengths(self.lys, self.arg, self.asp, self.glu, self.gln, self.his) 

121 

122 # Check the properties 

123 self.check_properties(properties) 

124 self.check_arguments() 

125 

126 @launchlogger 

127 def launch(self) -> int: 

128 """Execute the :class:`Pdb2gmx <gromacs.pdb2gmx.Pdb2gmx>` object.""" 

129 

130 # Setup Biobb 

131 if self.check_restart(): 

132 return 0 

133 

134 # Create stdin file if needed 

135 stdin_content = '' 

136 num_chains = self.find_length(self.lys, self.arg, self.asp, self.glu, self.gln, self.his) 

137 for i in range(num_chains): 

138 if self.lys is not None: 

139 stdin_content += f' {self.lys[i]}' 

140 if self.arg is not None: 

141 stdin_content += f' {self.arg[i]}' 

142 if self.asp is not None: 

143 stdin_content += f' {self.asp[i]}' 

144 if self.glu is not None: 

145 stdin_content += f' {self.glu[i]}' 

146 if self.gln is not None: 

147 stdin_content += f' {self.gln[i]}' 

148 if self.his is not None: 

149 stdin_content += f' {self.his[i]}' 

150 

151 if stdin_content: 

152 self.io_dict['in']['stdin_file_path'] = fu.create_stdin_file(stdin_content) 

153 self.stage_files() 

154 

155 internal_top_name = fu.create_name(prefix=self.prefix, step=self.step, name=self.internal_top_name) 

156 internal_itp_name = fu.create_name(prefix=self.prefix, step=self.step, name=self.internal_itp_name) 

157 

158 # Create command line 

159 self.cmd = [self.binary_path, "pdb2gmx", 

160 "-f", self.stage_io_dict["in"]["input_pdb_path"], 

161 "-o", self.stage_io_dict["out"]["output_gro_path"], 

162 "-p", internal_top_name, 

163 "-water", self.water_type, 

164 "-ff", self.force_field, 

165 "-i", internal_itp_name] 

166 

167 if self.ignh: 

168 self.cmd.append("-ignh") 

169 if self.merge: 

170 self.cmd.append("-merge") 

171 self.cmd.append("all") 

172 if self.lys: 

173 self.cmd.append("-lys") 

174 if self.arg: 

175 self.cmd.append("-arg") 

176 if self.asp: 

177 self.cmd.append("-asp") 

178 if self.glu: 

179 self.cmd.append("-glu") 

180 if self.gln: 

181 self.cmd.append("-gln") 

182 if self.his: 

183 self.cmd.append("-his") 

184 

185 if stdin_content: 

186 self.cmd.append('<') 

187 self.cmd.append(self.stage_io_dict["in"]["stdin_file_path"]) 

188 

189 if self.gmx_lib: 

190 self.env_vars_dict['GMXLIB'] = self.gmx_lib 

191 

192 # Run Biobb block 

193 self.run_biobb() 

194 

195 # Copy files to host 

196 self.copy_to_host() 

197 

198 if self.container_path: 

199 internal_top_name = os.path.join(self.stage_io_dict.get("unique_dir", ""), internal_top_name) 

200 

201 # zip topology 

202 fu.log('Compressing topology to: %s' % self.io_dict["out"]["output_top_zip_path"], self.out_log, 

203 self.global_log) 

204 fu.zip_top(zip_file=self.io_dict["out"]["output_top_zip_path"], top_file=internal_top_name, out_log=self.out_log, remove_original_files=self.remove_tmp) 

205 

206 # Remove temporal files 

207 self.tmp_files.extend([ 

208 self.internal_top_name, 

209 self.internal_itp_name, 

210 self.io_dict['in'].get("stdin_file_path", "") 

211 ]) 

212 self.remove_tmp_files() 

213 

214 self.check_arguments(output_files_created=True, raise_exception=False) 

215 return self.return_code 

216 

217 def check_lengths(self, *lists): 

218 """ 

219 Make sure all lists are the same length 

220 """ 

221 # Find length of each list 

222 lengths = [len(lst) for lst in lists if lst is not None] 

223 

224 # Check if all lengths are the same 

225 all_equal = True 

226 if len(lengths) > 0: 

227 all_equal = len(set(lengths)) == 1 

228 

229 if not all_equal: 

230 raise ValueError(f"""All protonation arrays (lys, arg, asp, glu, gln, his) must have the same length 

231 (one string per chain and empty string if residue is not present in that chain). Found lengths: {lengths}""") 

232 

233 def find_length(self, *lists) -> int: 

234 """ 

235 Find length of the first list 

236 """ 

237 # Find length of each list 

238 lengths = [len(lst) for lst in lists if lst is not None] 

239 

240 # Return the length of the first list, if any 

241 if len(lengths) > 0: 

242 return lengths[0] 

243 else: 

244 return 0 

245 

246 

247def pdb2gmx(input_pdb_path: str, output_gro_path: str, output_top_zip_path: str, 

248 properties: Optional[dict] = None, **kwargs) -> int: 

249 """Create :class:`Pdb2gmx <gromacs.pdb2gmx.Pdb2gmx>` class and 

250 execute the :meth:`launch() <gromacs.pdb2gmx.Pdb2gmx.launch>` method.""" 

251 return Pdb2gmx(**dict(locals())).launch() 

252 

253 

254pdb2gmx.__doc__ = Pdb2gmx.__doc__ 

255main = Pdb2gmx.get_main(pdb2gmx, "Wrapper for the GROMACS pdb2gmx module.") 

256 

257 

258if __name__ == '__main__': 

259 main()