Coverage for biobb_gromacs / gromacs_extra / ndx2resttop.py: 72%
137 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-14 10:09 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-14 10:09 +0000
1#!/usr/bin/env python3
3"""Module containing the Ndx2resttop class and the command line interface."""
4import fnmatch
5from typing import Optional
6from typing import Any
7from pathlib import Path
8from biobb_common.generic.biobb_object import BiobbObject
9from biobb_common.tools import file_utils as fu
10from biobb_common.tools.file_utils import launchlogger
13class Ndx2resttop(BiobbObject):
14 """
15 | biobb_gromacs Ndx2resttop
16 | Generate a restrained topology from an index NDX file.
17 | This module automatizes the process of restrained topology generation starting from an index NDX file.
19 Args:
20 input_ndx_path (str): Path to the input NDX index file. File type: input. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/data/gromacs_extra/ndx2resttop.ndx>`_. Accepted formats: ndx (edam:format_2033).
21 input_top_zip_path (str): Path the input TOP topology in zip format. File type: input. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/data/gromacs_extra/ndx2resttop.zip>`_. Accepted formats: zip (edam:format_3987).
22 output_top_zip_path (str): Path the output TOP topology in zip format. File type: output. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/reference/gromacs_extra/ref_ndx2resttop.zip>`_. Accepted formats: zip (edam:format_3987).
23 properties (dict - Python dictionary object containing the tool parameters, not input/output files):
24 * **posres_names** (*str*) - ("CUSTOM_POSRES") Space-separated preprocessor macro names, one per triplet (e.g. ``"CHAIN_A_POSRES CHAIN_B_POSRES"``). Activated at grompp time via ``-D <name>``. Must match the length of ref_rest_mol_triplet_list.
25 * **force_constants** (*str*) - ("500 500 500") Three space-separated force constants Fx Fy Fz in kJ/mol/nm². Written verbatim into each ITP line so the same value is applied to every restrained atom.
26 * **ref_rest_mol_triplet_list** (*str*) - (None) Comma-separated triplets ``(reference_group, restrain_group, molecule_name)``, one per molecule to restrain.``reference_group``: NDX group containing *all* atoms of the molecule in global (system-wide) GROMACS numbering. Used only as a coordinate frame to convert global indices to molecule-local 1-based indices. Example: ``r_1-9280``. ``restrain_group``: NDX group with the subset of atoms to actually restrain (must be a strict subset of reference_group). Example: ``P_&_r_1-9280``. ``molecule_name``: the ``[ moleculetype ]`` name in the topology used to locate where to splice the ``#ifdef`` block. Example: ``1TSR``.
28 Examples:
29 This is a use example of how to use the building block from Python::
31 from biobb_gromacs.gromacs_extra.ndx2resttop import ndx2resttop
32 prop = { 'ref_rest_mol_triplet_list': '( Chain_A, Chain_A_noMut, Protein_chain_A ), ( Chain_B, Chain_B_noMut, Protein_chain_B ), ( Chain_C, Chain_C_noMut, Protein_chain_C ), ( Chain_D, Chain_D_noMut, Protein_chain_D )' }
33 ndx2resttop(input_ndx_path='/path/to/myIndex.ndx',
34 input_top_zip_path='/path/to/myTopology.zip',
35 output_top_zip_path='/path/to/newTopology.zip',
36 properties=prop)
38 Info:
39 * wrapped_software:
40 * name: In house
41 * license: Apache-2.0
42 * ontology:
43 * name: EDAM
44 * schema: http://edamontology.org/EDAM.owl
45 """
47 def __init__(self, input_ndx_path: str, input_top_zip_path: str, output_top_zip_path: str,
48 properties: Optional[dict] = None, **kwargs) -> None:
49 properties = properties or {}
51 # Call parent class constructor
52 super().__init__(properties)
53 self.locals_var_dict = locals().copy()
55 # Input/Output files
56 self.io_dict = {
57 "in": {"input_ndx_path": input_ndx_path, "input_top_zip_path": input_top_zip_path},
58 "out": {"output_top_zip_path": output_top_zip_path}
59 }
61 # Properties specific for BB
62 self.posres_names = properties.get('posres_names')
63 self.force_constants = properties.get('force_constants', '500 500 500')
64 self.ref_rest_mol_triplet_list = properties.get('ref_rest_mol_triplet_list')
66 # Check the properties
67 self.check_properties(properties)
68 self.check_arguments()
70 # --- Private helpers ---
72 def _parse_ndx_groups(self, ndx_lines: list) -> dict:
73 """Parse a GROMACS NDX file into a lookup table of group line ranges.
75 NDX format: each named group starts with a ``[ group_name ]`` header line,
76 followed by one or more rows of whitespace-separated global atom indices.
78 Returns a dict mapping the raw header string (e.g. ``'[ System ]'``) to a
79 two-element list ``[start_line, end_line]`` where the range is **half-open**
80 (matching Python slice semantics): ``ndx_lines[start_line:end_line]`` yields
81 the data rows for that group (the header line itself at ``start_line - 1`` is
82 excluded).
84 Edge cases handled:
86 - **Last group**: the final group has no successor header to trigger the end
87 assignment, so it is closed explicitly with ``index + 1`` (not ``index``)
88 to include the very last data line.
89 - **Single-content-line groups**: when a header is immediately followed by
90 another header (no trailing blank line), the default range would be
91 ``[H, H+1]`` giving an empty slice. The post-loop fix increments end by 1
92 to capture that single data line.
93 """
94 groups_dic: dict[str, Any] = {}
95 current_group = ''
96 previous_group = ''
98 for index, line in enumerate(ndx_lines):
99 if line.startswith('['):
100 current_group = line
101 groups_dic[current_group] = [index, 0]
102 if previous_group:
103 groups_dic[previous_group][1] = index
104 previous_group = current_group
106 if index == len(ndx_lines) - 1:
107 groups_dic[current_group][1] = index + 1
109 # Fix single-content-line groups
110 for group_name in groups_dic:
111 if groups_dic[group_name][0] + 1 == groups_dic[group_name][1]:
112 groups_dic[group_name][1] += 1
114 return groups_dic
116 def _read_ndx_atoms(self, ndx_lines: list, groups_dic: dict, group_name: str) -> list:
117 """Return the flat list of global atom indices for a named NDX group.
119 Slices the group's data rows from ndx_lines using the half-open range
120 stored in groups_dic, then flattens all whitespace-separated tokens into
121 a single list of integers.
123 Returned integers are **global** (system-wide) GROMACS atom numbers, 1-based.
124 """
125 key = f'[ {group_name} ]'
126 start = groups_dic[key][0] + 1
127 stop = groups_dic[key][1]
128 fu.log(f'{group_name}: start_closed={start} stop_open={stop}', self.out_log, self.global_log)
129 atoms = [int(atom) for line in ndx_lines[start:stop] for atom in line.split()]
130 return atoms
132 def _write_posre_itp(self, itp_path: str, selected_atoms: list) -> None:
133 """Write a GROMACS position restraint ITP file.
135 ``selected_atoms`` must contain **molecule-local** 1-based atom indices
136 (not global system-wide indices). The atom type column is hardcoded to
137 ``1`` (GROMACS position restraint type). ``self.force_constants`` is a
138 verbatim string ``"Fx Fy Fz"`` applied uniformly to every restrained atom.
139 """
140 with open(itp_path, 'w') as f:
141 fu.log(f'Creating {itp_path} with selected atoms and force constants', self.out_log, self.global_log)
142 f.write('[ position_restraints ]\n')
143 f.write('; atom type fx fy fz\n')
144 for atom in selected_atoms:
145 f.write(f'{atom} 1 {self.force_constants}\n')
147 def _insert_posres_in_top(self, top_file: str, molecule_name: str, posre_name: str, itp_name: str) -> None:
148 """Splice the #ifdef posres block into the .top file for single-chain topologies.
150 Uses a two-phase scan over the topology lines:
152 Phase 1 – locate the target moleculetype:
153 For each ``[ moleculetype ]`` directive, read the first non-comment
154 content line and compare its first token against ``molecule_name``.
155 Set ``found_molecule = True`` when matched.
157 Phase 2 – find the insertion point (only after found_molecule):
158 Scan forward for the earliest of:
159 - another top-level section (``[ moleculetype ]``, ``[ system ]``,
160 ``[ molecules ]``)
161 - an existing ``#include`` or ``#ifdef POSRES`` line
162 Insert immediately before that line. Fall back to EOF if none found.
164 Important: the insertion-point check (Phase 2) runs *before* the
165 ``[ moleculetype ]`` detector within the same loop iteration. Without this
166 ordering, a new ``[ moleculetype ]`` that should trigger insertion would
167 instead be consumed by the ``continue`` in the detector, causing the block
168 to be placed in the wrong molecule section.
169 """
170 with open(top_file, 'r') as f:
171 lines = f.readlines()
173 in_moleculetype = False
174 found_molecule = False
175 index = None
177 for i, line in enumerate(lines):
178 stripped = line.strip()
180 # Find insertion point: just before the next top-level section or existing restraint block
181 if found_molecule:
182 is_next_section = stripped.startswith('[') and (
183 'system' in stripped or 'molecules' in stripped or 'moleculetype' in stripped)
184 if is_next_section or line.startswith('#include ') or line.startswith('#ifdef POSRES'):
185 index = i
186 break
188 # Detect [ moleculetype ] directive
189 if stripped.startswith('[') and 'moleculetype' in stripped:
190 in_moleculetype = True
191 continue
193 # Find the molecule name line inside the [ moleculetype ] block
194 if in_moleculetype:
195 if stripped and not stripped.startswith(';'):
196 in_moleculetype = False
197 if not stripped.startswith('[') and stripped.split()[0] == molecule_name:
198 found_molecule = True
200 if not found_molecule:
201 raise ValueError(f"Molecule type '{molecule_name}' not found in the topology file")
203 if index is None:
204 index = len(lines)
206 lines.insert(index, '\n')
207 lines.insert(index + 1, '; Include Position restraint file\n')
208 lines.insert(index + 2, f'#ifdef {posre_name}\n')
209 lines.insert(index + 3, f'#include "{itp_name}"\n')
210 lines.insert(index + 4, '#endif\n')
211 lines.insert(index + 5, '\n')
213 with open(top_file, 'w') as f:
214 f.writelines(lines)
216 # --- Main entry point ---
218 @launchlogger
219 def launch(self) -> int:
220 """Execute the :class:`Ndx2resttop <gromacs_extra.ndx2resttop.Ndx2resttop>` object."""
221 if self.check_restart():
222 return 0
224 top_file = fu.unzip_top(zip_file=self.io_dict['in'].get("input_top_zip_path", ""), out_log=self.out_log, unique_dir=self.stage_io_dict.get("unique_dir", ""))
226 ndx_lines = open(self.io_dict['in'].get("input_ndx_path", "")).read().splitlines()
227 groups_dic = self._parse_ndx_groups(ndx_lines)
228 fu.log(f'Parsed NDX groups: {list(groups_dic.keys())}', self.out_log, self.global_log)
230 # Parse triplet string: "(ref, restrain, mol), ..." -> list of 3-tuples
231 raw_triplets = str(self.ref_rest_mol_triplet_list).split('),')
232 self.ref_rest_mol_triplet_list = [
233 tuple(elem.strip(' ()').replace(' ', '').split(','))
234 for elem in raw_triplets
235 ]
236 fu.log('ref_rest_mol_triplet_list: ' + str(self.ref_rest_mol_triplet_list), self.out_log, self.global_log)
238 if self.posres_names:
239 self.posres_names = [elem.strip() for elem in self.posres_names.split()]
240 else:
241 self.posres_names = ['CUSTOM_POSRES'] * len(self.ref_rest_mol_triplet_list)
242 fu.log('posres_names: ' + str(self.posres_names), self.out_log, self.global_log)
244 if len(self.posres_names) != len(self.ref_rest_mol_triplet_list):
245 raise ValueError("posres_names length must match ref_rest_mol_triplet_list length")
247 for triplet, posre_name in zip(self.ref_rest_mol_triplet_list, self.posres_names):
248 # Per-triplet workflow:
249 # 1. Unpack (reference_group, restrain_group, molecule_name)
250 # 2. Read global NDX indices for both groups
251 # 3. Remap restrain atoms to molecule-local 1-based indices
252 # 4. Write the _posre.itp
253 # 5. Inject #ifdef guard — multi-chain (per-chain .itp) or single-chain (.top)
255 reference_group, restrain_group, molecule_name = triplet
256 fu.log(f'Reference group: {reference_group}', self.out_log, self.global_log)
257 fu.log(f'Restrain group: {restrain_group}', self.out_log, self.global_log)
258 fu.log(f'Molecule name: {molecule_name}', self.out_log, self.global_log)
260 itp_path = fu.create_name(path=str(Path(top_file).parent), prefix=self.prefix,
261 step=self.step, name=restrain_group + '_posre.itp')
262 self.io_dict['out']["output_itp_path"] = itp_path
264 # Map global NDX indices -> molecule-local 1-based indices.
265 # reference_atoms.index(atom) gives the 0-based position of each global
266 # restrain atom within the reference group list. Adding 1 converts to
267 # GROMACS 1-based local numbering, which matches the atom order in the
268 # [ atoms ] section of each moleculetype block.
269 reference_atoms = self._read_ndx_atoms(ndx_lines, groups_dic, reference_group)
270 restrain_atoms = self._read_ndx_atoms(ndx_lines, groups_dic, restrain_group)
271 selected_atoms = [reference_atoms.index(atom) + 1 for atom in restrain_atoms]
273 self._write_posre_itp(itp_path, selected_atoms)
274 itp_name = Path(itp_path).name
276 # Multi-chain: append ifdef block to the matching per-chain ITP.
277 # Detection heuristic: if any .itp file in the topology directory has
278 # molecule_name in its filename (and is not already a posre/pr file),
279 # this is a multi-chain topology where each chain has its own .itp
280 # included by the .top. In that case, append directly to that file.
281 # Otherwise fall through to single-chain mode (_insert_posres_in_top).
282 multi_chain = False
283 for file_dir in Path(top_file).parent.iterdir():
284 if "posre" not in file_dir.name and not file_dir.name.endswith("_pr.itp"):
285 match = fnmatch.fnmatch(str(file_dir), "*" + molecule_name + "*.itp")
286 if match:
287 multi_chain = True
288 with open(str(file_dir), 'a') as f:
289 fu.log(f'Opening {file_dir} and adding ifdef include', self.out_log, self.global_log)
290 f.write('\n')
291 f.write('; Include Position restraint file\n')
292 f.write(f'#ifdef {posre_name}\n')
293 f.write(f'#include "{itp_name}"\n')
294 f.write('#endif\n')
295 f.write('\n')
297 # Single-chain: splice ifdef block into the .top file
298 if not multi_chain:
299 self._insert_posres_in_top(top_file, molecule_name, posre_name, itp_name)
301 fu.zip_top(zip_file=self.io_dict['out'].get("output_top_zip_path", ""),
302 top_file=top_file, out_log=self.out_log, remove_original_files=self.remove_tmp)
303 self.remove_tmp_files()
304 self.check_arguments(output_files_created=True, raise_exception=False)
305 return 0
308def ndx2resttop(input_ndx_path: str, input_top_zip_path: str, output_top_zip_path: str,
309 properties: Optional[dict] = None, **kwargs) -> int:
310 """Create :class:`Ndx2resttop <gromacs_extra.ndx2resttop.Ndx2resttop>` class and
311 execute the :meth:`launch() <gromacs_extra.ndx2resttop.Ndx2resttop.launch>` method."""
312 return Ndx2resttop(**dict(locals())).launch()
315ndx2resttop.__doc__ = Ndx2resttop.__doc__
316main = Ndx2resttop.get_main(ndx2resttop, "Generate a restrained topology from an index NDX file.")
319if __name__ == '__main__':
320 main()