Coverage for biobb_gromacs/gromacs/pdb2gmx.py: 75%
136 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-25 09:23 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-25 09:23 +0000
1#!/usr/bin/env python3
3"""Module containing the Pdb2gmx class and the command line interface."""
4import os
5import argparse
6from typing import Optional
7from biobb_common.generic.biobb_object import BiobbObject
8from biobb_common.configuration import settings
9from biobb_common.tools import file_utils as fu
10from biobb_common.tools.file_utils import launchlogger
11from biobb_gromacs.gromacs.common import get_gromacs_version
14class Pdb2gmx(BiobbObject):
15 """
16 | biobb_gromacs Pdb2gmx
17 | Wrapper class for the `GROMACS pdb2gmx <http://manual.gromacs.org/current/onlinehelp/gmx-pdb2gmx.html>`_ module.
18 | The GROMACS pdb2gmx module, reads a .pdb (or .gro) file, reads some database files, adds hydrogens to the molecules and generates coordinates in GROMACS (GROMOS), or optionally .pdb, format and a topology in GROMACS format. These files can subsequently be processed to generate a run input file.
20 Args:
21 input_pdb_path (str): Path to the input PDB file. File type: input. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/data/gromacs/egfr.pdb>`_. Accepted formats: pdb (edam:format_1476).
22 output_gro_path (str): Path to the output GRO file. File type: output. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/reference/gromacs/ref_pdb2gmx.gro>`_. Accepted formats: gro (edam:format_2033).
23 output_top_zip_path (str): Path the output TOP topology in zip format. File type: output. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/reference/gromacs/ref_pdb2gmx.zip>`_. Accepted formats: zip (edam:format_3987).
24 properties (dict - Python dictionary object containing the tool parameters, not input/output files):
25 * **water_type** (*str*) - ("spce") Water molecule type. Values: spc, spce, tip3p, tip4p, tip5p, tips3p.
26 * **force_field** (*str*) - ("amber99sb-ildn") Force field to be used during the conversion. Values: gromos45a3, charmm27, gromos53a6, amber96, amber99, gromos43a2, gromos54a7, gromos43a1, amberGS, gromos53a5, amber99sb, amber03, amber99sb-ildn, oplsaa, amber94, amber99sb-star-ildn-mut.
27 * **ignh** (*bool*) - (False) Should pdb2gmx ignore the hidrogens in the original structure.
28 * **lys** (*list*) - (None) Lysine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated).
29 * **arg** (*list*) - (None) Arginine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated).
30 * **asp** (*list*) - (None) Aspartic acid protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated).
31 * **glu** (*list*) - (None) Glutamic acid protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated).
32 * **gln** (*list*) - (None) Glutamine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain (0: not protonated, 1: protonated).
33 * **his** (*list*) - (None) Histidine protonation states for each chain in the input pdb. Each item of the list should be a string with the protonation states for that chain or empty if the residue is not present in that chain. Make sure residues are named HIS (0: HID, 1: HIE, 2: HIP, 3: HIS1).
34 * **merge** (*bool*) - (False) Merge all chains into a single molecule.
35 * **gmx_lib** (*str*) - (None) Path set GROMACS GMXLIB environment variable.
36 * **binary_path** (*str*) - ("gmx") Path to the GROMACS executable binary.
37 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
38 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
39 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
40 * **container_path** (*str*) - (None) Path to the binary executable of your container.
41 * **container_image** (*str*) - ("gromacs/gromacs:latest") Container Image identifier.
42 * **container_volume_path** (*str*) - ("/data") Path to an internal directory in the container.
43 * **container_working_dir** (*str*) - (None) Path to the internal CWD in the container.
44 * **container_user_id** (*str*) - (None) User number id to be mapped inside the container.
45 * **container_shell_path** (*str*) - ("/bin/bash") Path to the binary executable of the container shell.
47 Examples:
48 This is a use example of how to use the building block from Python::
50 from biobb_gromacs.gromacs.pdb2gmx import pdb2gmx
51 prop = { 'his': ['0 0 1 1 0 0 0', '1 1 0 1'] }
52 pdb2gmx(input_pdb_path='/path/to/myStructure.pdb',
53 output_gro_path='/path/to/newStructure.gro',
54 output_top_zip_path='/path/to/newTopology.zip',
55 properties=prop)
57 Info:
58 * wrapped_software:
59 * name: GROMACS Pdb2gmx
60 * version: 2024.5
61 * license: LGPL 2.1
62 * ontology:
63 * name: EDAM
64 * schema: http://edamontology.org/EDAM.owl
65 """
67 def __init__(self, input_pdb_path: str, output_gro_path: str, output_top_zip_path: str, properties: Optional[dict] = None,
68 **kwargs) -> None:
69 properties = properties or {}
71 # Call parent class constructor
72 super().__init__(properties)
73 self.locals_var_dict = locals().copy()
75 # Input/Output files
76 self.io_dict = {
77 "in": {"input_pdb_path": input_pdb_path},
78 "out": {"output_gro_path": output_gro_path, "output_top_zip_path": output_top_zip_path}
79 }
81 # Properties specific for BB
82 self.internal_top_name = properties.get('internal_top_name', 'p2g.top') # Excluded from documentation for simplicity
83 self.internal_itp_name = properties.get('internal_itp_name', 'posre.itp') # Excluded from documentation for simplicity
84 self.water_type = properties.get('water_type', 'spce')
85 self.force_field = properties.get('force_field', 'amber99sb-ildn')
86 self.ignh = properties.get('ignh', False)
87 self.lys = properties.get('lys', None)
88 self.arg = properties.get('arg', None)
89 self.asp = properties.get('asp', None)
90 self.glu = properties.get('glu', None)
91 self.gln = properties.get('gln', None)
92 self.his = properties.get('his', None)
93 self.merge = properties.get('merge', False)
95 # Properties common in all GROMACS BB
96 self.gmx_lib = properties.get('gmx_lib', None)
97 self.binary_path: str = properties.get('binary_path', 'gmx')
98 self.gmx_nobackup = properties.get('gmx_nobackup', True)
99 self.gmx_nocopyright = properties.get('gmx_nocopyright', True)
100 if self.gmx_nobackup:
101 self.binary_path += ' -nobackup'
102 if self.gmx_nocopyright:
103 self.binary_path += ' -nocopyright'
104 if not self.container_path:
105 self.gmx_version = get_gromacs_version(self.binary_path)
107 # Support string for single chain
108 if isinstance(self.lys, str):
109 self.lys = [self.lys]
110 if isinstance(self.arg, str):
111 self.arg = [self.arg]
112 if isinstance(self.asp, str):
113 self.asp = [self.asp]
114 if isinstance(self.glu, str):
115 self.glu = [self.glu]
116 if isinstance(self.gln, str):
117 self.gln = [self.gln]
118 if isinstance(self.his, str):
119 self.his = [self.his]
121 # Make sure all have the same length
122 self.check_lengths(self.lys, self.arg, self.asp, self.glu, self.gln, self.his)
124 # Check the properties
125 self.check_properties(properties)
126 self.check_arguments()
128 @launchlogger
129 def launch(self) -> int:
130 """Execute the :class:`Pdb2gmx <gromacs.pdb2gmx.Pdb2gmx>` object."""
132 # Setup Biobb
133 if self.check_restart():
134 return 0
136 # Create stdin file if needed
137 stdin_content = ''
138 num_chains = self.find_length(self.lys, self.arg, self.asp, self.glu, self.gln, self.his)
139 for i in range(num_chains):
140 if self.lys is not None:
141 stdin_content += f' {self.lys[i]}'
142 if self.arg is not None:
143 stdin_content += f' {self.arg[i]}'
144 if self.asp is not None:
145 stdin_content += f' {self.asp[i]}'
146 if self.glu is not None:
147 stdin_content += f' {self.glu[i]}'
148 if self.gln is not None:
149 stdin_content += f' {self.gln[i]}'
150 if self.his is not None:
151 stdin_content += f' {self.his[i]}'
153 if stdin_content:
154 self.io_dict['in']['stdin_file_path'] = fu.create_stdin_file(stdin_content)
155 self.stage_files()
157 internal_top_name = fu.create_name(prefix=self.prefix, step=self.step, name=self.internal_top_name)
158 internal_itp_name = fu.create_name(prefix=self.prefix, step=self.step, name=self.internal_itp_name)
160 # Create command line
161 self.cmd = [self.binary_path, "pdb2gmx",
162 "-f", self.stage_io_dict["in"]["input_pdb_path"],
163 "-o", self.stage_io_dict["out"]["output_gro_path"],
164 "-p", internal_top_name,
165 "-water", self.water_type,
166 "-ff", self.force_field,
167 "-i", internal_itp_name]
169 if self.ignh:
170 self.cmd.append("-ignh")
171 if self.merge:
172 self.cmd.append("-merge")
173 self.cmd.append("all")
174 if self.lys:
175 self.cmd.append("-lys")
176 if self.arg:
177 self.cmd.append("-arg")
178 if self.asp:
179 self.cmd.append("-asp")
180 if self.glu:
181 self.cmd.append("-glu")
182 if self.gln:
183 self.cmd.append("-gln")
184 if self.his:
185 self.cmd.append("-his")
187 if stdin_content:
188 self.cmd.append('<')
189 self.cmd.append(self.stage_io_dict["in"]["stdin_file_path"])
191 if self.gmx_lib:
192 self.env_vars_dict['GMXLIB'] = self.gmx_lib
194 # Run Biobb block
195 self.run_biobb()
197 # Copy files to host
198 self.copy_to_host()
200 if self.container_path:
201 internal_top_name = os.path.join(self.stage_io_dict.get("unique_dir", ""), internal_top_name)
203 # zip topology
204 fu.log('Compressing topology to: %s' % self.io_dict["out"]["output_top_zip_path"], self.out_log,
205 self.global_log)
206 fu.zip_top(zip_file=self.io_dict["out"]["output_top_zip_path"], top_file=internal_top_name, out_log=self.out_log, remove_original_files=self.remove_tmp)
208 # Remove temporal files
209 self.tmp_files.extend([
210 self.internal_top_name,
211 self.internal_itp_name,
212 self.io_dict['in'].get("stdin_file_path", "")
213 ])
214 self.remove_tmp_files()
216 self.check_arguments(output_files_created=True, raise_exception=False)
217 return self.return_code
219 def check_lengths(self, *lists):
220 """
221 Make sure all lists are the same length
222 """
223 # Find length of each list
224 lengths = [len(lst) for lst in lists if lst is not None]
226 # Check if all lengths are the same
227 all_equal = True
228 if len(lengths) > 0:
229 all_equal = len(set(lengths)) == 1
231 if not all_equal:
232 raise ValueError(f"""All protonation arrays (lys, arg, asp, glu, gln, his) must have the same length
233 (one string per chain and empty string if residue is not present in that chain). Found lengths: {lengths}""")
235 def find_length(self, *lists) -> int:
236 """
237 Find length of the first list
238 """
239 # Find length of each list
240 lengths = [len(lst) for lst in lists if lst is not None]
242 # Return the length of the first list, if any
243 if len(lengths) > 0:
244 return lengths[0]
245 else:
246 return 0
249def pdb2gmx(input_pdb_path: str, output_gro_path: str, output_top_zip_path: str,
250 properties: Optional[dict] = None, **kwargs) -> int:
251 """Create :class:`Pdb2gmx <gromacs.pdb2gmx.Pdb2gmx>` class and
252 execute the :meth:`launch() <gromacs.pdb2gmx.Pdb2gmx.launch>` method."""
254 return Pdb2gmx(input_pdb_path=input_pdb_path, output_gro_path=output_gro_path,
255 output_top_zip_path=output_top_zip_path, properties=properties,
256 **kwargs).launch()
259pdb2gmx.__doc__ = Pdb2gmx.__doc__
262def main():
263 """Command line execution of this building block. Please check the command line documentation."""
264 parser = argparse.ArgumentParser(description="Wrapper of the GROMACS pdb2gmx module.",
265 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999))
266 parser.add_argument('-c', '--config', required=False, help="This file can be a YAML file, JSON file or JSON string")
268 # Specific args of each building block
269 required_args = parser.add_argument_group('required arguments')
270 required_args.add_argument('--input_pdb_path', required=True)
271 required_args.add_argument('--output_gro_path', required=True)
272 required_args.add_argument('--output_top_zip_path', required=True)
274 args = parser.parse_args()
275 config = args.config if args.config else None
276 properties = settings.ConfReader(config=config).get_prop_dic()
278 # Specific call of each building block
279 pdb2gmx(input_pdb_path=args.input_pdb_path, output_gro_path=args.output_gro_path,
280 output_top_zip_path=args.output_top_zip_path, properties=properties)
283if __name__ == '__main__':
284 main()