Coverage for biobb_gromacs/gromacs/pdb2gmx.py: 81%
126 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-05-28 06:50 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-05-28 06:50 +0000
1#!/usr/bin/env python3
3"""Module containing the Pdb2gmx class and the command line interface."""
4from typing import Optional
5from pathlib import Path, PurePath
6from biobb_common.generic.biobb_object import BiobbObject
7from biobb_common.tools import file_utils as fu
8from biobb_common.tools.file_utils import launchlogger
9from biobb_gromacs.gromacs.common import get_gromacs_version
12class Pdb2gmx(BiobbObject):
13 """
14 | biobb_gromacs Pdb2gmx
15 | Wrapper class for the `GROMACS pdb2gmx <http://manual.gromacs.org/current/onlinehelp/gmx-pdb2gmx.html>`_ module.
16 | The GROMACS pdb2gmx module, reads a .pdb (or .gro) file, reads some database files, adds hydrogens to the molecules and generates coordinates in GROMACS (GROMOS), or optionally .pdb, format and a topology in GROMACS format. These files can subsequently be processed to generate a run input file.
18 Args:
19 input_pdb_path (str): Path to the input PDB file. File type: input. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/data/gromacs/egfr.pdb>`_. Accepted formats: pdb (edam:format_1476).
20 output_gro_path (str): Path to the output GRO file. File type: output. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/reference/gromacs/ref_pdb2gmx.gro>`_. Accepted formats: gro (edam:format_2033).
21 output_top_zip_path (str): Path the output TOP topology in zip format. File type: output. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/reference/gromacs/ref_pdb2gmx.zip>`_. Accepted formats: zip (edam:format_3987).
22 properties (dict - Python dictionary object containing the tool parameters, not input/output files):
23 * **water_type** (*str*) - ("spce") Water molecule type. Values: spc, spce, tip3p, tip4p, tip5p, tips3p.
24 * **force_field** (*str*) - ("amber99sb-ildn") Force field to be used during the conversion. Values: gromos45a3, charmm27, gromos53a6, amber96, amber99, gromos43a2, gromos54a7, gromos43a1, amberGS, gromos53a5, amber99sb, amber03, amber99sb-ildn, oplsaa, amber94, amber99sb-star-ildn-mut.
25 * **ignh** (*bool*) - (False) Should pdb2gmx ignore the hydrogens in the original structure.
26 * **lys** (*list*) - (None) Lysine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated).
27 * **arg** (*list*) - (None) Arginine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated).
28 * **asp** (*list*) - (None) Aspartic acid protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated).
29 * **glu** (*list*) - (None) Glutamic acid protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated).
30 * **gln** (*list*) - (None) Glutamine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated).
31 * **his** (*list*) - (None) Histidine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain. Make sure residues are named HIS (0: HID, 1: HIE, 2: HIP, 3: HIS1).
32 * **merge** (*bool*) - (False) Merge all chains into a single molecule.
33 * **gmx_lib** (*str*) - (None) Path set GROMACS GMXLIB environment variable.
34 * **binary_path** (*str*) - ("gmx") Path to the GROMACS executable binary.
35 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
36 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
37 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
38 * **container_path** (*str*) - (None) Path to the binary executable of your container.
39 * **container_image** (*str*) - ("gromacs/gromacs:latest") Container Image identifier.
40 * **container_volume_path** (*str*) - ("/data") Path to an internal directory in the container.
41 * **container_working_dir** (*str*) - (None) Path to the internal CWD in the container.
42 * **container_user_id** (*str*) - (None) User number id to be mapped inside the container.
43 * **container_shell_path** (*str*) - ("/bin/bash") Path to the binary executable of the container shell.
45 Examples:
46 This is a use example of how to use the building block from Python::
48 from biobb_gromacs.gromacs.pdb2gmx import pdb2gmx
49 prop = { 'his': ['0 0 1 1 0 0 0', '1 1 0 1'] }
50 pdb2gmx(input_pdb_path='/path/to/myStructure.pdb',
51 output_gro_path='/path/to/newStructure.gro',
52 output_top_zip_path='/path/to/newTopology.zip',
53 properties=prop)
55 Info:
56 * wrapped_software:
57 * name: GROMACS Pdb2gmx
58 * version: 2025.2
59 * license: LGPL 2.1
60 * ontology:
61 * name: EDAM
62 * schema: http://edamontology.org/EDAM.owl
63 """
65 def __init__(self, input_pdb_path: str, output_gro_path: str, output_top_zip_path: str, properties: Optional[dict] = None,
66 **kwargs) -> None:
67 properties = properties or {}
69 # Call parent class constructor
70 super().__init__(properties)
71 self.locals_var_dict = locals().copy()
73 # Input/Output files
74 self.io_dict = {
75 "in": {"input_pdb_path": input_pdb_path},
76 "out": {"output_gro_path": output_gro_path, "output_top_zip_path": output_top_zip_path}
77 }
79 # Properties specific for BB
80 self.internal_top_name = properties.get('internal_top_name', 'p2g.top') # Excluded from documentation for simplicity
81 self.internal_itp_name = properties.get('internal_itp_name', 'posre.itp') # Excluded from documentation for simplicity
82 self.water_type = properties.get('water_type', 'spce')
83 self.force_field = properties.get('force_field', 'amber99sb-ildn')
84 self.ignh = properties.get('ignh', False)
85 self.lys = properties.get('lys', None)
86 self.arg = properties.get('arg', None)
87 self.asp = properties.get('asp', None)
88 self.glu = properties.get('glu', None)
89 self.gln = properties.get('gln', None)
90 self.his = properties.get('his', None)
91 self.merge = properties.get('merge', False)
93 # Properties common in all GROMACS BB
94 self.gmx_lib = properties.get('gmx_lib', None)
95 self.binary_path: str = properties.get('binary_path', 'gmx')
96 self.gmx_nobackup = properties.get('gmx_nobackup', True)
97 self.gmx_nocopyright = properties.get('gmx_nocopyright', True)
98 if self.gmx_nobackup:
99 self.binary_path += ' -nobackup'
100 if self.gmx_nocopyright:
101 self.binary_path += ' -nocopyright'
102 if not self.container_path:
103 self.gmx_version = get_gromacs_version(self.binary_path)
105 # Support string for single chain
106 if isinstance(self.lys, str):
107 self.lys = [self.lys]
108 if isinstance(self.arg, str):
109 self.arg = [self.arg]
110 if isinstance(self.asp, str):
111 self.asp = [self.asp]
112 if isinstance(self.glu, str):
113 self.glu = [self.glu]
114 if isinstance(self.gln, str):
115 self.gln = [self.gln]
116 if isinstance(self.his, str):
117 self.his = [self.his]
119 # Make sure all have the same length
120 self.check_lengths(self.lys, self.arg, self.asp, self.glu, self.gln, self.his)
122 # Check the properties
123 self.check_properties(properties)
124 self.check_arguments()
126 @launchlogger
127 def launch(self) -> int:
128 """Execute the :class:`Pdb2gmx <gromacs.pdb2gmx.Pdb2gmx>` object."""
130 # Setup Biobb
131 if self.check_restart():
132 return 0
134 # Create stdin file if needed
135 stdin_content = ''
136 num_chains = self.find_length(self.lys, self.arg, self.asp, self.glu, self.gln, self.his)
137 for i in range(num_chains):
138 if self.lys is not None:
139 stdin_content += f' {self.lys[i]}'
140 if self.arg is not None:
141 stdin_content += f' {self.arg[i]}'
142 if self.asp is not None:
143 stdin_content += f' {self.asp[i]}'
144 if self.glu is not None:
145 stdin_content += f' {self.glu[i]}'
146 if self.gln is not None:
147 stdin_content += f' {self.gln[i]}'
148 if self.his is not None:
149 stdin_content += f' {self.his[i]}'
151 if stdin_content:
152 self.io_dict['in']['stdin_file_path'] = fu.create_stdin_file(stdin_content)
153 self.stage_files()
155 internal_top_name = fu.create_name(prefix=self.prefix, step=self.step, name=self.internal_top_name)
156 internal_itp_name = fu.create_name(prefix=self.prefix, step=self.step, name=self.internal_itp_name)
158 if self.container_path:
159 working_dir = self.container_volume_path if self.container_volume_path else "/data"
160 else:
161 working_dir = self.stage_io_dict.get('unique_dir', '')
163 # Create command line
164 self.cmd = ["cd", working_dir, ";",
165 self.binary_path, "pdb2gmx",
166 "-f", PurePath(self.stage_io_dict["in"]["input_pdb_path"]).name,
167 "-o", PurePath(self.stage_io_dict["out"]["output_gro_path"]).name,
168 "-p", internal_top_name,
169 "-water", self.water_type,
170 "-ff", self.force_field,
171 "-i", internal_itp_name]
173 if self.ignh:
174 self.cmd.append("-ignh")
175 if self.merge:
176 self.cmd.append("-merge")
177 self.cmd.append("all")
178 if self.lys:
179 self.cmd.append("-lys")
180 if self.arg:
181 self.cmd.append("-arg")
182 if self.asp:
183 self.cmd.append("-asp")
184 if self.glu:
185 self.cmd.append("-glu")
186 if self.gln:
187 self.cmd.append("-gln")
188 if self.his:
189 self.cmd.append("-his")
191 if stdin_content:
192 self.cmd.append('<')
193 self.cmd.append(PurePath(self.stage_io_dict["in"]["stdin_file_path"]).name)
195 if self.gmx_lib:
196 self.env_vars_dict['GMXLIB'] = self.gmx_lib
198 # Run Biobb block
199 self.run_biobb()
201 # Copy files to host
202 self.copy_to_host()
204 internal_top_name = str(Path(self.stage_io_dict.get("unique_dir", "")).joinpath(internal_top_name))
206 # zip topology
207 fu.log('Compressing topology to: %s' % self.io_dict["out"]["output_top_zip_path"], self.out_log,
208 self.global_log)
209 fu.zip_top(zip_file=self.io_dict["out"]["output_top_zip_path"], top_file=internal_top_name, out_log=self.out_log, remove_original_files=self.remove_tmp)
211 # Remove temporal files
212 self.tmp_files.extend([
213 self.internal_top_name,
214 self.internal_itp_name,
215 self.io_dict['in'].get("stdin_file_path", "")
216 ])
217 self.remove_tmp_files()
219 self.check_arguments(output_files_created=True, raise_exception=False)
220 return self.return_code
222 def check_lengths(self, *lists):
223 """
224 Make sure all lists are the same length
225 """
226 # Find length of each list
227 lengths = [len(lst) for lst in lists if lst is not None]
229 # Check if all lengths are the same
230 all_equal = True
231 if len(lengths) > 0:
232 all_equal = len(set(lengths)) == 1
234 if not all_equal:
235 raise ValueError(f"""All protonation arrays (lys, arg, asp, glu, gln, his) must have the same length
236 (one string per chain and empty string if residue is not present in that chain). Found lengths: {lengths}""")
238 def find_length(self, *lists) -> int:
239 """
240 Find length of the first list
241 """
242 # Find length of each list
243 lengths = [len(lst) for lst in lists if lst is not None]
245 # Return the length of the first list, if any
246 if len(lengths) > 0:
247 return lengths[0]
248 else:
249 return 0
252def pdb2gmx(input_pdb_path: str, output_gro_path: str, output_top_zip_path: str,
253 properties: Optional[dict] = None, **kwargs) -> int:
254 """Create :class:`Pdb2gmx <gromacs.pdb2gmx.Pdb2gmx>` class and
255 execute the :meth:`launch() <gromacs.pdb2gmx.Pdb2gmx.launch>` method."""
256 return Pdb2gmx(**dict(locals())).launch()
259pdb2gmx.__doc__ = Pdb2gmx.__doc__
260main = Pdb2gmx.get_main(pdb2gmx, "Wrapper for the GROMACS pdb2gmx module.")
263if __name__ == '__main__':
264 main()