Coverage for biobb_gromacs/gromacs/pdb2gmx.py: 81%

126 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-05-28 06:50 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the Pdb2gmx class and the command line interface.""" 

4from typing import Optional 

5from pathlib import Path, PurePath 

6from biobb_common.generic.biobb_object import BiobbObject 

7from biobb_common.tools import file_utils as fu 

8from biobb_common.tools.file_utils import launchlogger 

9from biobb_gromacs.gromacs.common import get_gromacs_version 

10 

11 

12class Pdb2gmx(BiobbObject): 

13 """ 

14 | biobb_gromacs Pdb2gmx 

15 | Wrapper class for the `GROMACS pdb2gmx <http://manual.gromacs.org/current/onlinehelp/gmx-pdb2gmx.html>`_ module. 

16 | The GROMACS pdb2gmx module, reads a .pdb (or .gro) file, reads some database files, adds hydrogens to the molecules and generates coordinates in GROMACS (GROMOS), or optionally .pdb, format and a topology in GROMACS format. These files can subsequently be processed to generate a run input file. 

17 

18 Args: 

19 input_pdb_path (str): Path to the input PDB file. File type: input. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/data/gromacs/egfr.pdb>`_. Accepted formats: pdb (edam:format_1476). 

20 output_gro_path (str): Path to the output GRO file. File type: output. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/reference/gromacs/ref_pdb2gmx.gro>`_. Accepted formats: gro (edam:format_2033). 

21 output_top_zip_path (str): Path the output TOP topology in zip format. File type: output. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/reference/gromacs/ref_pdb2gmx.zip>`_. Accepted formats: zip (edam:format_3987). 

22 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

23 * **water_type** (*str*) - ("spce") Water molecule type. Values: spc, spce, tip3p, tip4p, tip5p, tips3p. 

24 * **force_field** (*str*) - ("amber99sb-ildn") Force field to be used during the conversion. Values: gromos45a3, charmm27, gromos53a6, amber96, amber99, gromos43a2, gromos54a7, gromos43a1, amberGS, gromos53a5, amber99sb, amber03, amber99sb-ildn, oplsaa, amber94, amber99sb-star-ildn-mut. 

25 * **ignh** (*bool*) - (False) Should pdb2gmx ignore the hydrogens in the original structure. 

26 * **lys** (*list*) - (None) Lysine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). 

27 * **arg** (*list*) - (None) Arginine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). 

28 * **asp** (*list*) - (None) Aspartic acid protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). 

29 * **glu** (*list*) - (None) Glutamic acid protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). 

30 * **gln** (*list*) - (None) Glutamine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). 

31 * **his** (*list*) - (None) Histidine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain. Make sure residues are named HIS (0: HID, 1: HIE, 2: HIP, 3: HIS1). 

32 * **merge** (*bool*) - (False) Merge all chains into a single molecule. 

33 * **gmx_lib** (*str*) - (None) Path set GROMACS GMXLIB environment variable. 

34 * **binary_path** (*str*) - ("gmx") Path to the GROMACS executable binary. 

35 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

36 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

37 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

38 * **container_path** (*str*) - (None) Path to the binary executable of your container. 

39 * **container_image** (*str*) - ("gromacs/gromacs:latest") Container Image identifier. 

40 * **container_volume_path** (*str*) - ("/data") Path to an internal directory in the container. 

41 * **container_working_dir** (*str*) - (None) Path to the internal CWD in the container. 

42 * **container_user_id** (*str*) - (None) User number id to be mapped inside the container. 

43 * **container_shell_path** (*str*) - ("/bin/bash") Path to the binary executable of the container shell. 

44 

45 Examples: 

46 This is a use example of how to use the building block from Python:: 

47 

48 from biobb_gromacs.gromacs.pdb2gmx import pdb2gmx 

49 prop = { 'his': ['0 0 1 1 0 0 0', '1 1 0 1'] } 

50 pdb2gmx(input_pdb_path='/path/to/myStructure.pdb', 

51 output_gro_path='/path/to/newStructure.gro', 

52 output_top_zip_path='/path/to/newTopology.zip', 

53 properties=prop) 

54 

55 Info: 

56 * wrapped_software: 

57 * name: GROMACS Pdb2gmx 

58 * version: 2025.2 

59 * license: LGPL 2.1 

60 * ontology: 

61 * name: EDAM 

62 * schema: http://edamontology.org/EDAM.owl 

63 """ 

64 

65 def __init__(self, input_pdb_path: str, output_gro_path: str, output_top_zip_path: str, properties: Optional[dict] = None, 

66 **kwargs) -> None: 

67 properties = properties or {} 

68 

69 # Call parent class constructor 

70 super().__init__(properties) 

71 self.locals_var_dict = locals().copy() 

72 

73 # Input/Output files 

74 self.io_dict = { 

75 "in": {"input_pdb_path": input_pdb_path}, 

76 "out": {"output_gro_path": output_gro_path, "output_top_zip_path": output_top_zip_path} 

77 } 

78 

79 # Properties specific for BB 

80 self.internal_top_name = properties.get('internal_top_name', 'p2g.top') # Excluded from documentation for simplicity 

81 self.internal_itp_name = properties.get('internal_itp_name', 'posre.itp') # Excluded from documentation for simplicity 

82 self.water_type = properties.get('water_type', 'spce') 

83 self.force_field = properties.get('force_field', 'amber99sb-ildn') 

84 self.ignh = properties.get('ignh', False) 

85 self.lys = properties.get('lys', None) 

86 self.arg = properties.get('arg', None) 

87 self.asp = properties.get('asp', None) 

88 self.glu = properties.get('glu', None) 

89 self.gln = properties.get('gln', None) 

90 self.his = properties.get('his', None) 

91 self.merge = properties.get('merge', False) 

92 

93 # Properties common in all GROMACS BB 

94 self.gmx_lib = properties.get('gmx_lib', None) 

95 self.binary_path: str = properties.get('binary_path', 'gmx') 

96 self.gmx_nobackup = properties.get('gmx_nobackup', True) 

97 self.gmx_nocopyright = properties.get('gmx_nocopyright', True) 

98 if self.gmx_nobackup: 

99 self.binary_path += ' -nobackup' 

100 if self.gmx_nocopyright: 

101 self.binary_path += ' -nocopyright' 

102 if not self.container_path: 

103 self.gmx_version = get_gromacs_version(self.binary_path) 

104 

105 # Support string for single chain 

106 if isinstance(self.lys, str): 

107 self.lys = [self.lys] 

108 if isinstance(self.arg, str): 

109 self.arg = [self.arg] 

110 if isinstance(self.asp, str): 

111 self.asp = [self.asp] 

112 if isinstance(self.glu, str): 

113 self.glu = [self.glu] 

114 if isinstance(self.gln, str): 

115 self.gln = [self.gln] 

116 if isinstance(self.his, str): 

117 self.his = [self.his] 

118 

119 # Make sure all have the same length 

120 self.check_lengths(self.lys, self.arg, self.asp, self.glu, self.gln, self.his) 

121 

122 # Check the properties 

123 self.check_properties(properties) 

124 self.check_arguments() 

125 

126 @launchlogger 

127 def launch(self) -> int: 

128 """Execute the :class:`Pdb2gmx <gromacs.pdb2gmx.Pdb2gmx>` object.""" 

129 

130 # Setup Biobb 

131 if self.check_restart(): 

132 return 0 

133 

134 # Create stdin file if needed 

135 stdin_content = '' 

136 num_chains = self.find_length(self.lys, self.arg, self.asp, self.glu, self.gln, self.his) 

137 for i in range(num_chains): 

138 if self.lys is not None: 

139 stdin_content += f' {self.lys[i]}' 

140 if self.arg is not None: 

141 stdin_content += f' {self.arg[i]}' 

142 if self.asp is not None: 

143 stdin_content += f' {self.asp[i]}' 

144 if self.glu is not None: 

145 stdin_content += f' {self.glu[i]}' 

146 if self.gln is not None: 

147 stdin_content += f' {self.gln[i]}' 

148 if self.his is not None: 

149 stdin_content += f' {self.his[i]}' 

150 

151 if stdin_content: 

152 self.io_dict['in']['stdin_file_path'] = fu.create_stdin_file(stdin_content) 

153 self.stage_files() 

154 

155 internal_top_name = fu.create_name(prefix=self.prefix, step=self.step, name=self.internal_top_name) 

156 internal_itp_name = fu.create_name(prefix=self.prefix, step=self.step, name=self.internal_itp_name) 

157 

158 if self.container_path: 

159 working_dir = self.container_volume_path if self.container_volume_path else "/data" 

160 else: 

161 working_dir = self.stage_io_dict.get('unique_dir', '') 

162 

163 # Create command line 

164 self.cmd = ["cd", working_dir, ";", 

165 self.binary_path, "pdb2gmx", 

166 "-f", PurePath(self.stage_io_dict["in"]["input_pdb_path"]).name, 

167 "-o", PurePath(self.stage_io_dict["out"]["output_gro_path"]).name, 

168 "-p", internal_top_name, 

169 "-water", self.water_type, 

170 "-ff", self.force_field, 

171 "-i", internal_itp_name] 

172 

173 if self.ignh: 

174 self.cmd.append("-ignh") 

175 if self.merge: 

176 self.cmd.append("-merge") 

177 self.cmd.append("all") 

178 if self.lys: 

179 self.cmd.append("-lys") 

180 if self.arg: 

181 self.cmd.append("-arg") 

182 if self.asp: 

183 self.cmd.append("-asp") 

184 if self.glu: 

185 self.cmd.append("-glu") 

186 if self.gln: 

187 self.cmd.append("-gln") 

188 if self.his: 

189 self.cmd.append("-his") 

190 

191 if stdin_content: 

192 self.cmd.append('<') 

193 self.cmd.append(PurePath(self.stage_io_dict["in"]["stdin_file_path"]).name) 

194 

195 if self.gmx_lib: 

196 self.env_vars_dict['GMXLIB'] = self.gmx_lib 

197 

198 # Run Biobb block 

199 self.run_biobb() 

200 

201 # Copy files to host 

202 self.copy_to_host() 

203 

204 internal_top_name = str(Path(self.stage_io_dict.get("unique_dir", "")).joinpath(internal_top_name)) 

205 

206 # zip topology 

207 fu.log('Compressing topology to: %s' % self.io_dict["out"]["output_top_zip_path"], self.out_log, 

208 self.global_log) 

209 fu.zip_top(zip_file=self.io_dict["out"]["output_top_zip_path"], top_file=internal_top_name, out_log=self.out_log, remove_original_files=self.remove_tmp) 

210 

211 # Remove temporal files 

212 self.tmp_files.extend([ 

213 self.internal_top_name, 

214 self.internal_itp_name, 

215 self.io_dict['in'].get("stdin_file_path", "") 

216 ]) 

217 self.remove_tmp_files() 

218 

219 self.check_arguments(output_files_created=True, raise_exception=False) 

220 return self.return_code 

221 

222 def check_lengths(self, *lists): 

223 """ 

224 Make sure all lists are the same length 

225 """ 

226 # Find length of each list 

227 lengths = [len(lst) for lst in lists if lst is not None] 

228 

229 # Check if all lengths are the same 

230 all_equal = True 

231 if len(lengths) > 0: 

232 all_equal = len(set(lengths)) == 1 

233 

234 if not all_equal: 

235 raise ValueError(f"""All protonation arrays (lys, arg, asp, glu, gln, his) must have the same length 

236 (one string per chain and empty string if residue is not present in that chain). Found lengths: {lengths}""") 

237 

238 def find_length(self, *lists) -> int: 

239 """ 

240 Find length of the first list 

241 """ 

242 # Find length of each list 

243 lengths = [len(lst) for lst in lists if lst is not None] 

244 

245 # Return the length of the first list, if any 

246 if len(lengths) > 0: 

247 return lengths[0] 

248 else: 

249 return 0 

250 

251 

252def pdb2gmx(input_pdb_path: str, output_gro_path: str, output_top_zip_path: str, 

253 properties: Optional[dict] = None, **kwargs) -> int: 

254 """Create :class:`Pdb2gmx <gromacs.pdb2gmx.Pdb2gmx>` class and 

255 execute the :meth:`launch() <gromacs.pdb2gmx.Pdb2gmx.launch>` method.""" 

256 return Pdb2gmx(**dict(locals())).launch() 

257 

258 

259pdb2gmx.__doc__ = Pdb2gmx.__doc__ 

260main = Pdb2gmx.get_main(pdb2gmx, "Wrapper for the GROMACS pdb2gmx module.") 

261 

262 

263if __name__ == '__main__': 

264 main()