Coverage for biobb_gromacs/gromacs/pdb2gmx.py: 75%

136 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-25 09:23 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the Pdb2gmx class and the command line interface.""" 

4import os 

5import argparse 

6from typing import Optional 

7from biobb_common.generic.biobb_object import BiobbObject 

8from biobb_common.configuration import settings 

9from biobb_common.tools import file_utils as fu 

10from biobb_common.tools.file_utils import launchlogger 

11from biobb_gromacs.gromacs.common import get_gromacs_version 

12 

13 

14class Pdb2gmx(BiobbObject): 

15 """ 

16 | biobb_gromacs Pdb2gmx 

17 | Wrapper class for the `GROMACS pdb2gmx <http://manual.gromacs.org/current/onlinehelp/gmx-pdb2gmx.html>`_ module. 

18 | The GROMACS pdb2gmx module, reads a .pdb (or .gro) file, reads some database files, adds hydrogens to the molecules and generates coordinates in GROMACS (GROMOS), or optionally .pdb, format and a topology in GROMACS format. These files can subsequently be processed to generate a run input file. 

19 

20 Args: 

21 input_pdb_path (str): Path to the input PDB file. File type: input. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/data/gromacs/egfr.pdb>`_. Accepted formats: pdb (edam:format_1476). 

22 output_gro_path (str): Path to the output GRO file. File type: output. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/reference/gromacs/ref_pdb2gmx.gro>`_. Accepted formats: gro (edam:format_2033). 

23 output_top_zip_path (str): Path the output TOP topology in zip format. File type: output. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/reference/gromacs/ref_pdb2gmx.zip>`_. Accepted formats: zip (edam:format_3987). 

24 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

25 * **water_type** (*str*) - ("spce") Water molecule type. Values: spc, spce, tip3p, tip4p, tip5p, tips3p. 

26 * **force_field** (*str*) - ("amber99sb-ildn") Force field to be used during the conversion. Values: gromos45a3, charmm27, gromos53a6, amber96, amber99, gromos43a2, gromos54a7, gromos43a1, amberGS, gromos53a5, amber99sb, amber03, amber99sb-ildn, oplsaa, amber94, amber99sb-star-ildn-mut. 

27 * **ignh** (*bool*) - (False) Should pdb2gmx ignore the hidrogens in the original structure. 

28 * **lys** (*list*) - (None) Lysine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). 

29 * **arg** (*list*) - (None) Arginine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). 

30 * **asp** (*list*) - (None) Aspartic acid protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). 

31 * **glu** (*list*) - (None) Glutamic acid protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). 

32 * **gln** (*list*) - (None) Glutamine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated). 

33 * **his** (*list*) - (None) Histidine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain. Make sure residues are named HIS (0: HID, 1: HIE, 2: HIP, 3: HIS1). 

34 * **merge** (*bool*) - (False) Merge all chains into a single molecule. 

35 * **gmx_lib** (*str*) - (None) Path set GROMACS GMXLIB environment variable. 

36 * **binary_path** (*str*) - ("gmx") Path to the GROMACS executable binary. 

37 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

38 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

39 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

40 * **container_path** (*str*) - (None) Path to the binary executable of your container. 

41 * **container_image** (*str*) - ("gromacs/gromacs:latest") Container Image identifier. 

42 * **container_volume_path** (*str*) - ("/data") Path to an internal directory in the container. 

43 * **container_working_dir** (*str*) - (None) Path to the internal CWD in the container. 

44 * **container_user_id** (*str*) - (None) User number id to be mapped inside the container. 

45 * **container_shell_path** (*str*) - ("/bin/bash") Path to the binary executable of the container shell. 

46 

47 Examples: 

48 This is a use example of how to use the building block from Python:: 

49 

50 from biobb_gromacs.gromacs.pdb2gmx import pdb2gmx 

51 prop = { 'his': ['0 0 1 1 0 0 0', '1 1 0 1'] } 

52 pdb2gmx(input_pdb_path='/path/to/myStructure.pdb', 

53 output_gro_path='/path/to/newStructure.gro', 

54 output_top_zip_path='/path/to/newTopology.zip', 

55 properties=prop) 

56 

57 Info: 

58 * wrapped_software: 

59 * name: GROMACS Pdb2gmx 

60 * version: 2024.5 

61 * license: LGPL 2.1 

62 * ontology: 

63 * name: EDAM 

64 * schema: http://edamontology.org/EDAM.owl 

65 """ 

66 

67 def __init__(self, input_pdb_path: str, output_gro_path: str, output_top_zip_path: str, properties: Optional[dict] = None, 

68 **kwargs) -> None: 

69 properties = properties or {} 

70 

71 # Call parent class constructor 

72 super().__init__(properties) 

73 self.locals_var_dict = locals().copy() 

74 

75 # Input/Output files 

76 self.io_dict = { 

77 "in": {"input_pdb_path": input_pdb_path}, 

78 "out": {"output_gro_path": output_gro_path, "output_top_zip_path": output_top_zip_path} 

79 } 

80 

81 # Properties specific for BB 

82 self.internal_top_name = properties.get('internal_top_name', 'p2g.top') # Excluded from documentation for simplicity 

83 self.internal_itp_name = properties.get('internal_itp_name', 'posre.itp') # Excluded from documentation for simplicity 

84 self.water_type = properties.get('water_type', 'spce') 

85 self.force_field = properties.get('force_field', 'amber99sb-ildn') 

86 self.ignh = properties.get('ignh', False) 

87 self.lys = properties.get('lys', None) 

88 self.arg = properties.get('arg', None) 

89 self.asp = properties.get('asp', None) 

90 self.glu = properties.get('glu', None) 

91 self.gln = properties.get('gln', None) 

92 self.his = properties.get('his', None) 

93 self.merge = properties.get('merge', False) 

94 

95 # Properties common in all GROMACS BB 

96 self.gmx_lib = properties.get('gmx_lib', None) 

97 self.binary_path: str = properties.get('binary_path', 'gmx') 

98 self.gmx_nobackup = properties.get('gmx_nobackup', True) 

99 self.gmx_nocopyright = properties.get('gmx_nocopyright', True) 

100 if self.gmx_nobackup: 

101 self.binary_path += ' -nobackup' 

102 if self.gmx_nocopyright: 

103 self.binary_path += ' -nocopyright' 

104 if not self.container_path: 

105 self.gmx_version = get_gromacs_version(self.binary_path) 

106 

107 # Support string for single chain 

108 if isinstance(self.lys, str): 

109 self.lys = [self.lys] 

110 if isinstance(self.arg, str): 

111 self.arg = [self.arg] 

112 if isinstance(self.asp, str): 

113 self.asp = [self.asp] 

114 if isinstance(self.glu, str): 

115 self.glu = [self.glu] 

116 if isinstance(self.gln, str): 

117 self.gln = [self.gln] 

118 if isinstance(self.his, str): 

119 self.his = [self.his] 

120 

121 # Make sure all have the same length 

122 self.check_lengths(self.lys, self.arg, self.asp, self.glu, self.gln, self.his) 

123 

124 # Check the properties 

125 self.check_properties(properties) 

126 self.check_arguments() 

127 

128 @launchlogger 

129 def launch(self) -> int: 

130 """Execute the :class:`Pdb2gmx <gromacs.pdb2gmx.Pdb2gmx>` object.""" 

131 

132 # Setup Biobb 

133 if self.check_restart(): 

134 return 0 

135 

136 # Create stdin file if needed 

137 stdin_content = '' 

138 num_chains = self.find_length(self.lys, self.arg, self.asp, self.glu, self.gln, self.his) 

139 for i in range(num_chains): 

140 if self.lys is not None: 

141 stdin_content += f' {self.lys[i]}' 

142 if self.arg is not None: 

143 stdin_content += f' {self.arg[i]}' 

144 if self.asp is not None: 

145 stdin_content += f' {self.asp[i]}' 

146 if self.glu is not None: 

147 stdin_content += f' {self.glu[i]}' 

148 if self.gln is not None: 

149 stdin_content += f' {self.gln[i]}' 

150 if self.his is not None: 

151 stdin_content += f' {self.his[i]}' 

152 

153 if stdin_content: 

154 self.io_dict['in']['stdin_file_path'] = fu.create_stdin_file(stdin_content) 

155 self.stage_files() 

156 

157 internal_top_name = fu.create_name(prefix=self.prefix, step=self.step, name=self.internal_top_name) 

158 internal_itp_name = fu.create_name(prefix=self.prefix, step=self.step, name=self.internal_itp_name) 

159 

160 # Create command line 

161 self.cmd = [self.binary_path, "pdb2gmx", 

162 "-f", self.stage_io_dict["in"]["input_pdb_path"], 

163 "-o", self.stage_io_dict["out"]["output_gro_path"], 

164 "-p", internal_top_name, 

165 "-water", self.water_type, 

166 "-ff", self.force_field, 

167 "-i", internal_itp_name] 

168 

169 if self.ignh: 

170 self.cmd.append("-ignh") 

171 if self.merge: 

172 self.cmd.append("-merge") 

173 self.cmd.append("all") 

174 if self.lys: 

175 self.cmd.append("-lys") 

176 if self.arg: 

177 self.cmd.append("-arg") 

178 if self.asp: 

179 self.cmd.append("-asp") 

180 if self.glu: 

181 self.cmd.append("-glu") 

182 if self.gln: 

183 self.cmd.append("-gln") 

184 if self.his: 

185 self.cmd.append("-his") 

186 

187 if stdin_content: 

188 self.cmd.append('<') 

189 self.cmd.append(self.stage_io_dict["in"]["stdin_file_path"]) 

190 

191 if self.gmx_lib: 

192 self.env_vars_dict['GMXLIB'] = self.gmx_lib 

193 

194 # Run Biobb block 

195 self.run_biobb() 

196 

197 # Copy files to host 

198 self.copy_to_host() 

199 

200 if self.container_path: 

201 internal_top_name = os.path.join(self.stage_io_dict.get("unique_dir", ""), internal_top_name) 

202 

203 # zip topology 

204 fu.log('Compressing topology to: %s' % self.io_dict["out"]["output_top_zip_path"], self.out_log, 

205 self.global_log) 

206 fu.zip_top(zip_file=self.io_dict["out"]["output_top_zip_path"], top_file=internal_top_name, out_log=self.out_log, remove_original_files=self.remove_tmp) 

207 

208 # Remove temporal files 

209 self.tmp_files.extend([ 

210 self.internal_top_name, 

211 self.internal_itp_name, 

212 self.io_dict['in'].get("stdin_file_path", "") 

213 ]) 

214 self.remove_tmp_files() 

215 

216 self.check_arguments(output_files_created=True, raise_exception=False) 

217 return self.return_code 

218 

219 def check_lengths(self, *lists): 

220 """ 

221 Make sure all lists are the same length 

222 """ 

223 # Find length of each list 

224 lengths = [len(lst) for lst in lists if lst is not None] 

225 

226 # Check if all lengths are the same 

227 all_equal = True 

228 if len(lengths) > 0: 

229 all_equal = len(set(lengths)) == 1 

230 

231 if not all_equal: 

232 raise ValueError(f"""All protonation arrays (lys, arg, asp, glu, gln, his) must have the same length 

233 (one string per chain and empty string if residue is not present in that chain). Found lengths: {lengths}""") 

234 

235 def find_length(self, *lists) -> int: 

236 """ 

237 Find length of the first list 

238 """ 

239 # Find length of each list 

240 lengths = [len(lst) for lst in lists if lst is not None] 

241 

242 # Return the length of the first list, if any 

243 if len(lengths) > 0: 

244 return lengths[0] 

245 else: 

246 return 0 

247 

248 

249def pdb2gmx(input_pdb_path: str, output_gro_path: str, output_top_zip_path: str, 

250 properties: Optional[dict] = None, **kwargs) -> int: 

251 """Create :class:`Pdb2gmx <gromacs.pdb2gmx.Pdb2gmx>` class and 

252 execute the :meth:`launch() <gromacs.pdb2gmx.Pdb2gmx.launch>` method.""" 

253 

254 return Pdb2gmx(input_pdb_path=input_pdb_path, output_gro_path=output_gro_path, 

255 output_top_zip_path=output_top_zip_path, properties=properties, 

256 **kwargs).launch() 

257 

258 

259pdb2gmx.__doc__ = Pdb2gmx.__doc__ 

260 

261 

262def main(): 

263 """Command line execution of this building block. Please check the command line documentation.""" 

264 parser = argparse.ArgumentParser(description="Wrapper of the GROMACS pdb2gmx module.", 

265 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

266 parser.add_argument('-c', '--config', required=False, help="This file can be a YAML file, JSON file or JSON string") 

267 

268 # Specific args of each building block 

269 required_args = parser.add_argument_group('required arguments') 

270 required_args.add_argument('--input_pdb_path', required=True) 

271 required_args.add_argument('--output_gro_path', required=True) 

272 required_args.add_argument('--output_top_zip_path', required=True) 

273 

274 args = parser.parse_args() 

275 config = args.config if args.config else None 

276 properties = settings.ConfReader(config=config).get_prop_dic() 

277 

278 # Specific call of each building block 

279 pdb2gmx(input_pdb_path=args.input_pdb_path, output_gro_path=args.output_gro_path, 

280 output_top_zip_path=args.output_top_zip_path, properties=properties) 

281 

282 

283if __name__ == '__main__': 

284 main()