Coverage for biobb_gromacs / gromacs / pdb2gmx.py: 81%
124 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-05 08:26 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-05 08:26 +0000
1#!/usr/bin/env python3
3"""Module containing the Pdb2gmx class and the command line interface."""
4import os
5from typing import Optional
6from biobb_common.generic.biobb_object import BiobbObject
7from biobb_common.tools import file_utils as fu
8from biobb_common.tools.file_utils import launchlogger
9from biobb_gromacs.gromacs.common import get_gromacs_version
12class Pdb2gmx(BiobbObject):
13 """
14 | biobb_gromacs Pdb2gmx
15 | Wrapper class for the `GROMACS pdb2gmx <http://manual.gromacs.org/current/onlinehelp/gmx-pdb2gmx.html>`_ module.
16 | The GROMACS pdb2gmx module, reads a .pdb (or .gro) file, reads some database files, adds hydrogens to the molecules and generates coordinates in GROMACS (GROMOS), or optionally .pdb, format and a topology in GROMACS format. These files can subsequently be processed to generate a run input file.
18 Args:
19 input_pdb_path (str): Path to the input PDB file. File type: input. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/data/gromacs/egfr.pdb>`_. Accepted formats: pdb (edam:format_1476).
20 output_gro_path (str): Path to the output GRO file. File type: output. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/reference/gromacs/ref_pdb2gmx.gro>`_. Accepted formats: gro (edam:format_2033).
21 output_top_zip_path (str): Path the output TOP topology in zip format. File type: output. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/reference/gromacs/ref_pdb2gmx.zip>`_. Accepted formats: zip (edam:format_3987).
22 properties (dict - Python dictionary object containing the tool parameters, not input/output files):
23 * **water_type** (*str*) - ("spce") Water molecule type. Values: spc, spce, tip3p, tip4p, tip5p, tips3p.
24 * **force_field** (*str*) - ("amber99sb-ildn") Force field to be used during the conversion. Values: gromos45a3, charmm27, gromos53a6, amber96, amber99, gromos43a2, gromos54a7, gromos43a1, amberGS, gromos53a5, amber99sb, amber03, amber99sb-ildn, oplsaa, amber94, amber99sb-star-ildn-mut.
25 * **ignh** (*bool*) - (False) Should pdb2gmx ignore the hidrogens in the original structure.
26 * **lys** (*list*) - (None) Lysine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated).
27 * **arg** (*list*) - (None) Arginine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated).
28 * **asp** (*list*) - (None) Aspartic acid protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated).
29 * **glu** (*list*) - (None) Glutamic acid protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated).
30 * **gln** (*list*) - (None) Glutamine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated).
31 * **his** (*list*) - (None) Histidine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain. Make sure residues are named HIS (0: HID, 1: HIE, 2: HIP, 3: HIS1).
32 * **merge** (*bool*) - (False) Merge all chains into a single molecule.
33 * **gmx_lib** (*str*) - (None) Path set GROMACS GMXLIB environment variable.
34 * **binary_path** (*str*) - ("gmx") Path to the GROMACS executable binary.
35 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
36 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
37 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
38 * **container_path** (*str*) - (None) Path to the binary executable of your container.
39 * **container_image** (*str*) - ("gromacs/gromacs:latest") Container Image identifier.
40 * **container_volume_path** (*str*) - ("/data") Path to an internal directory in the container.
41 * **container_working_dir** (*str*) - (None) Path to the internal CWD in the container.
42 * **container_user_id** (*str*) - (None) User number id to be mapped inside the container.
43 * **container_shell_path** (*str*) - ("/bin/bash") Path to the binary executable of the container shell.
45 Examples:
46 This is a use example of how to use the building block from Python::
48 from biobb_gromacs.gromacs.pdb2gmx import pdb2gmx
49 prop = { 'his': ['0 0 1 1 0 0 0', '1 1 0 1'] }
50 pdb2gmx(input_pdb_path='/path/to/myStructure.pdb',
51 output_gro_path='/path/to/newStructure.gro',
52 output_top_zip_path='/path/to/newTopology.zip',
53 properties=prop)
55 Info:
56 * wrapped_software:
57 * name: GROMACS Pdb2gmx
58 * version: 2025.2
59 * license: LGPL 2.1
60 * ontology:
61 * name: EDAM
62 * schema: http://edamontology.org/EDAM.owl
63 """
65 def __init__(self, input_pdb_path: str, output_gro_path: str, output_top_zip_path: str, properties: Optional[dict] = None,
66 **kwargs) -> None:
67 properties = properties or {}
69 # Call parent class constructor
70 super().__init__(properties)
71 self.locals_var_dict = locals().copy()
73 # Input/Output files
74 self.io_dict = {
75 "in": {"input_pdb_path": input_pdb_path},
76 "out": {"output_gro_path": output_gro_path, "output_top_zip_path": output_top_zip_path}
77 }
79 # Properties specific for BB
80 self.internal_top_name = properties.get('internal_top_name', 'p2g.top') # Excluded from documentation for simplicity
81 self.internal_itp_name = properties.get('internal_itp_name', 'posre.itp') # Excluded from documentation for simplicity
82 self.water_type = properties.get('water_type', 'spce')
83 self.force_field = properties.get('force_field', 'amber99sb-ildn')
84 self.ignh = properties.get('ignh', False)
85 self.lys = properties.get('lys', None)
86 self.arg = properties.get('arg', None)
87 self.asp = properties.get('asp', None)
88 self.glu = properties.get('glu', None)
89 self.gln = properties.get('gln', None)
90 self.his = properties.get('his', None)
91 self.merge = properties.get('merge', False)
93 # Properties common in all GROMACS BB
94 self.gmx_lib = properties.get('gmx_lib', None)
95 self.binary_path: str = properties.get('binary_path', 'gmx')
96 self.gmx_nobackup = properties.get('gmx_nobackup', True)
97 self.gmx_nocopyright = properties.get('gmx_nocopyright', True)
98 if self.gmx_nobackup:
99 self.binary_path += ' -nobackup'
100 if self.gmx_nocopyright:
101 self.binary_path += ' -nocopyright'
102 if not self.container_path:
103 self.gmx_version = get_gromacs_version(self.binary_path)
105 # Support string for single chain
106 if isinstance(self.lys, str):
107 self.lys = [self.lys]
108 if isinstance(self.arg, str):
109 self.arg = [self.arg]
110 if isinstance(self.asp, str):
111 self.asp = [self.asp]
112 if isinstance(self.glu, str):
113 self.glu = [self.glu]
114 if isinstance(self.gln, str):
115 self.gln = [self.gln]
116 if isinstance(self.his, str):
117 self.his = [self.his]
119 # Make sure all have the same length
120 self.check_lengths(self.lys, self.arg, self.asp, self.glu, self.gln, self.his)
122 # Check the properties
123 self.check_properties(properties)
124 self.check_arguments()
126 @launchlogger
127 def launch(self) -> int:
128 """Execute the :class:`Pdb2gmx <gromacs.pdb2gmx.Pdb2gmx>` object."""
130 # Setup Biobb
131 if self.check_restart():
132 return 0
134 # Create stdin file if needed
135 stdin_content = ''
136 num_chains = self.find_length(self.lys, self.arg, self.asp, self.glu, self.gln, self.his)
137 for i in range(num_chains):
138 if self.lys is not None:
139 stdin_content += f' {self.lys[i]}'
140 if self.arg is not None:
141 stdin_content += f' {self.arg[i]}'
142 if self.asp is not None:
143 stdin_content += f' {self.asp[i]}'
144 if self.glu is not None:
145 stdin_content += f' {self.glu[i]}'
146 if self.gln is not None:
147 stdin_content += f' {self.gln[i]}'
148 if self.his is not None:
149 stdin_content += f' {self.his[i]}'
151 if stdin_content:
152 self.io_dict['in']['stdin_file_path'] = fu.create_stdin_file(stdin_content)
153 self.stage_files()
155 internal_top_name = fu.create_name(prefix=self.prefix, step=self.step, name=self.internal_top_name)
156 internal_itp_name = fu.create_name(prefix=self.prefix, step=self.step, name=self.internal_itp_name)
158 # Create command line
159 self.cmd = [self.binary_path, "pdb2gmx",
160 "-f", self.stage_io_dict["in"]["input_pdb_path"],
161 "-o", self.stage_io_dict["out"]["output_gro_path"],
162 "-p", internal_top_name,
163 "-water", self.water_type,
164 "-ff", self.force_field,
165 "-i", internal_itp_name]
167 if self.ignh:
168 self.cmd.append("-ignh")
169 if self.merge:
170 self.cmd.append("-merge")
171 self.cmd.append("all")
172 if self.lys:
173 self.cmd.append("-lys")
174 if self.arg:
175 self.cmd.append("-arg")
176 if self.asp:
177 self.cmd.append("-asp")
178 if self.glu:
179 self.cmd.append("-glu")
180 if self.gln:
181 self.cmd.append("-gln")
182 if self.his:
183 self.cmd.append("-his")
185 if stdin_content:
186 self.cmd.append('<')
187 self.cmd.append(self.stage_io_dict["in"]["stdin_file_path"])
189 if self.gmx_lib:
190 self.env_vars_dict['GMXLIB'] = self.gmx_lib
192 # Run Biobb block
193 self.run_biobb()
195 # Copy files to host
196 self.copy_to_host()
198 if self.container_path:
199 internal_top_name = os.path.join(self.stage_io_dict.get("unique_dir", ""), internal_top_name)
201 # zip topology
202 fu.log('Compressing topology to: %s' % self.io_dict["out"]["output_top_zip_path"], self.out_log,
203 self.global_log)
204 fu.zip_top(zip_file=self.io_dict["out"]["output_top_zip_path"], top_file=internal_top_name, out_log=self.out_log, remove_original_files=self.remove_tmp)
206 # Remove temporal files
207 self.tmp_files.extend([
208 self.internal_top_name,
209 self.internal_itp_name,
210 self.io_dict['in'].get("stdin_file_path", "")
211 ])
212 self.remove_tmp_files()
214 self.check_arguments(output_files_created=True, raise_exception=False)
215 return self.return_code
217 def check_lengths(self, *lists):
218 """
219 Make sure all lists are the same length
220 """
221 # Find length of each list
222 lengths = [len(lst) for lst in lists if lst is not None]
224 # Check if all lengths are the same
225 all_equal = True
226 if len(lengths) > 0:
227 all_equal = len(set(lengths)) == 1
229 if not all_equal:
230 raise ValueError(f"""All protonation arrays (lys, arg, asp, glu, gln, his) must have the same length
231 (one string per chain and empty string if residue is not present in that chain). Found lengths: {lengths}""")
233 def find_length(self, *lists) -> int:
234 """
235 Find length of the first list
236 """
237 # Find length of each list
238 lengths = [len(lst) for lst in lists if lst is not None]
240 # Return the length of the first list, if any
241 if len(lengths) > 0:
242 return lengths[0]
243 else:
244 return 0
247def pdb2gmx(input_pdb_path: str, output_gro_path: str, output_top_zip_path: str,
248 properties: Optional[dict] = None, **kwargs) -> int:
249 """Create :class:`Pdb2gmx <gromacs.pdb2gmx.Pdb2gmx>` class and
250 execute the :meth:`launch() <gromacs.pdb2gmx.Pdb2gmx.launch>` method."""
251 return Pdb2gmx(**dict(locals())).launch()
254pdb2gmx.__doc__ = Pdb2gmx.__doc__
255main = Pdb2gmx.get_main(pdb2gmx, "Wrapper for the GROMACS pdb2gmx module.")
258if __name__ == '__main__':
259 main()