Coverage for biobb_gromacs / gromacs_extra / ndx2resttop.py: 72%

137 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-14 10:09 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the Ndx2resttop class and the command line interface.""" 

4import fnmatch 

5from typing import Optional 

6from typing import Any 

7from pathlib import Path 

8from biobb_common.generic.biobb_object import BiobbObject 

9from biobb_common.tools import file_utils as fu 

10from biobb_common.tools.file_utils import launchlogger 

11 

12 

13class Ndx2resttop(BiobbObject): 

14 """ 

15 | biobb_gromacs Ndx2resttop 

16 | Generate a restrained topology from an index NDX file. 

17 | This module automatizes the process of restrained topology generation starting from an index NDX file. 

18 

19 Args: 

20 input_ndx_path (str): Path to the input NDX index file. File type: input. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/data/gromacs_extra/ndx2resttop.ndx>`_. Accepted formats: ndx (edam:format_2033). 

21 input_top_zip_path (str): Path the input TOP topology in zip format. File type: input. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/data/gromacs_extra/ndx2resttop.zip>`_. Accepted formats: zip (edam:format_3987). 

22 output_top_zip_path (str): Path the output TOP topology in zip format. File type: output. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/reference/gromacs_extra/ref_ndx2resttop.zip>`_. Accepted formats: zip (edam:format_3987). 

23 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

24 * **posres_names** (*str*) - ("CUSTOM_POSRES") Space-separated preprocessor macro names, one per triplet (e.g. ``"CHAIN_A_POSRES CHAIN_B_POSRES"``). Activated at grompp time via ``-D <name>``. Must match the length of ref_rest_mol_triplet_list. 

25 * **force_constants** (*str*) - ("500 500 500") Three space-separated force constants Fx Fy Fz in kJ/mol/nm². Written verbatim into each ITP line so the same value is applied to every restrained atom. 

26 * **ref_rest_mol_triplet_list** (*str*) - (None) Comma-separated triplets ``(reference_group, restrain_group, molecule_name)``, one per molecule to restrain.``reference_group``: NDX group containing *all* atoms of the molecule in global (system-wide) GROMACS numbering. Used only as a coordinate frame to convert global indices to molecule-local 1-based indices. Example: ``r_1-9280``. ``restrain_group``: NDX group with the subset of atoms to actually restrain (must be a strict subset of reference_group). Example: ``P_&_r_1-9280``. ``molecule_name``: the ``[ moleculetype ]`` name in the topology used to locate where to splice the ``#ifdef`` block. Example: ``1TSR``. 

27 

28 Examples: 

29 This is a use example of how to use the building block from Python:: 

30 

31 from biobb_gromacs.gromacs_extra.ndx2resttop import ndx2resttop 

32 prop = { 'ref_rest_mol_triplet_list': '( Chain_A, Chain_A_noMut, Protein_chain_A ), ( Chain_B, Chain_B_noMut, Protein_chain_B ), ( Chain_C, Chain_C_noMut, Protein_chain_C ), ( Chain_D, Chain_D_noMut, Protein_chain_D )' } 

33 ndx2resttop(input_ndx_path='/path/to/myIndex.ndx', 

34 input_top_zip_path='/path/to/myTopology.zip', 

35 output_top_zip_path='/path/to/newTopology.zip', 

36 properties=prop) 

37 

38 Info: 

39 * wrapped_software: 

40 * name: In house 

41 * license: Apache-2.0 

42 * ontology: 

43 * name: EDAM 

44 * schema: http://edamontology.org/EDAM.owl 

45 """ 

46 

47 def __init__(self, input_ndx_path: str, input_top_zip_path: str, output_top_zip_path: str, 

48 properties: Optional[dict] = None, **kwargs) -> None: 

49 properties = properties or {} 

50 

51 # Call parent class constructor 

52 super().__init__(properties) 

53 self.locals_var_dict = locals().copy() 

54 

55 # Input/Output files 

56 self.io_dict = { 

57 "in": {"input_ndx_path": input_ndx_path, "input_top_zip_path": input_top_zip_path}, 

58 "out": {"output_top_zip_path": output_top_zip_path} 

59 } 

60 

61 # Properties specific for BB 

62 self.posres_names = properties.get('posres_names') 

63 self.force_constants = properties.get('force_constants', '500 500 500') 

64 self.ref_rest_mol_triplet_list = properties.get('ref_rest_mol_triplet_list') 

65 

66 # Check the properties 

67 self.check_properties(properties) 

68 self.check_arguments() 

69 

70 # --- Private helpers --- 

71 

72 def _parse_ndx_groups(self, ndx_lines: list) -> dict: 

73 """Parse a GROMACS NDX file into a lookup table of group line ranges. 

74 

75 NDX format: each named group starts with a ``[ group_name ]`` header line, 

76 followed by one or more rows of whitespace-separated global atom indices. 

77 

78 Returns a dict mapping the raw header string (e.g. ``'[ System ]'``) to a 

79 two-element list ``[start_line, end_line]`` where the range is **half-open** 

80 (matching Python slice semantics): ``ndx_lines[start_line:end_line]`` yields 

81 the data rows for that group (the header line itself at ``start_line - 1`` is 

82 excluded). 

83 

84 Edge cases handled: 

85 

86 - **Last group**: the final group has no successor header to trigger the end 

87 assignment, so it is closed explicitly with ``index + 1`` (not ``index``) 

88 to include the very last data line. 

89 - **Single-content-line groups**: when a header is immediately followed by 

90 another header (no trailing blank line), the default range would be 

91 ``[H, H+1]`` giving an empty slice. The post-loop fix increments end by 1 

92 to capture that single data line. 

93 """ 

94 groups_dic: dict[str, Any] = {} 

95 current_group = '' 

96 previous_group = '' 

97 

98 for index, line in enumerate(ndx_lines): 

99 if line.startswith('['): 

100 current_group = line 

101 groups_dic[current_group] = [index, 0] 

102 if previous_group: 

103 groups_dic[previous_group][1] = index 

104 previous_group = current_group 

105 

106 if index == len(ndx_lines) - 1: 

107 groups_dic[current_group][1] = index + 1 

108 

109 # Fix single-content-line groups 

110 for group_name in groups_dic: 

111 if groups_dic[group_name][0] + 1 == groups_dic[group_name][1]: 

112 groups_dic[group_name][1] += 1 

113 

114 return groups_dic 

115 

116 def _read_ndx_atoms(self, ndx_lines: list, groups_dic: dict, group_name: str) -> list: 

117 """Return the flat list of global atom indices for a named NDX group. 

118 

119 Slices the group's data rows from ndx_lines using the half-open range 

120 stored in groups_dic, then flattens all whitespace-separated tokens into 

121 a single list of integers. 

122 

123 Returned integers are **global** (system-wide) GROMACS atom numbers, 1-based. 

124 """ 

125 key = f'[ {group_name} ]' 

126 start = groups_dic[key][0] + 1 

127 stop = groups_dic[key][1] 

128 fu.log(f'{group_name}: start_closed={start} stop_open={stop}', self.out_log, self.global_log) 

129 atoms = [int(atom) for line in ndx_lines[start:stop] for atom in line.split()] 

130 return atoms 

131 

132 def _write_posre_itp(self, itp_path: str, selected_atoms: list) -> None: 

133 """Write a GROMACS position restraint ITP file. 

134 

135 ``selected_atoms`` must contain **molecule-local** 1-based atom indices 

136 (not global system-wide indices). The atom type column is hardcoded to 

137 ``1`` (GROMACS position restraint type). ``self.force_constants`` is a 

138 verbatim string ``"Fx Fy Fz"`` applied uniformly to every restrained atom. 

139 """ 

140 with open(itp_path, 'w') as f: 

141 fu.log(f'Creating {itp_path} with selected atoms and force constants', self.out_log, self.global_log) 

142 f.write('[ position_restraints ]\n') 

143 f.write('; atom type fx fy fz\n') 

144 for atom in selected_atoms: 

145 f.write(f'{atom} 1 {self.force_constants}\n') 

146 

147 def _insert_posres_in_top(self, top_file: str, molecule_name: str, posre_name: str, itp_name: str) -> None: 

148 """Splice the #ifdef posres block into the .top file for single-chain topologies. 

149 

150 Uses a two-phase scan over the topology lines: 

151 

152 Phase 1 – locate the target moleculetype: 

153 For each ``[ moleculetype ]`` directive, read the first non-comment 

154 content line and compare its first token against ``molecule_name``. 

155 Set ``found_molecule = True`` when matched. 

156 

157 Phase 2 – find the insertion point (only after found_molecule): 

158 Scan forward for the earliest of: 

159 - another top-level section (``[ moleculetype ]``, ``[ system ]``, 

160 ``[ molecules ]``) 

161 - an existing ``#include`` or ``#ifdef POSRES`` line 

162 Insert immediately before that line. Fall back to EOF if none found. 

163 

164 Important: the insertion-point check (Phase 2) runs *before* the 

165 ``[ moleculetype ]`` detector within the same loop iteration. Without this 

166 ordering, a new ``[ moleculetype ]`` that should trigger insertion would 

167 instead be consumed by the ``continue`` in the detector, causing the block 

168 to be placed in the wrong molecule section. 

169 """ 

170 with open(top_file, 'r') as f: 

171 lines = f.readlines() 

172 

173 in_moleculetype = False 

174 found_molecule = False 

175 index = None 

176 

177 for i, line in enumerate(lines): 

178 stripped = line.strip() 

179 

180 # Find insertion point: just before the next top-level section or existing restraint block 

181 if found_molecule: 

182 is_next_section = stripped.startswith('[') and ( 

183 'system' in stripped or 'molecules' in stripped or 'moleculetype' in stripped) 

184 if is_next_section or line.startswith('#include ') or line.startswith('#ifdef POSRES'): 

185 index = i 

186 break 

187 

188 # Detect [ moleculetype ] directive 

189 if stripped.startswith('[') and 'moleculetype' in stripped: 

190 in_moleculetype = True 

191 continue 

192 

193 # Find the molecule name line inside the [ moleculetype ] block 

194 if in_moleculetype: 

195 if stripped and not stripped.startswith(';'): 

196 in_moleculetype = False 

197 if not stripped.startswith('[') and stripped.split()[0] == molecule_name: 

198 found_molecule = True 

199 

200 if not found_molecule: 

201 raise ValueError(f"Molecule type '{molecule_name}' not found in the topology file") 

202 

203 if index is None: 

204 index = len(lines) 

205 

206 lines.insert(index, '\n') 

207 lines.insert(index + 1, '; Include Position restraint file\n') 

208 lines.insert(index + 2, f'#ifdef {posre_name}\n') 

209 lines.insert(index + 3, f'#include "{itp_name}"\n') 

210 lines.insert(index + 4, '#endif\n') 

211 lines.insert(index + 5, '\n') 

212 

213 with open(top_file, 'w') as f: 

214 f.writelines(lines) 

215 

216 # --- Main entry point --- 

217 

218 @launchlogger 

219 def launch(self) -> int: 

220 """Execute the :class:`Ndx2resttop <gromacs_extra.ndx2resttop.Ndx2resttop>` object.""" 

221 if self.check_restart(): 

222 return 0 

223 

224 top_file = fu.unzip_top(zip_file=self.io_dict['in'].get("input_top_zip_path", ""), out_log=self.out_log, unique_dir=self.stage_io_dict.get("unique_dir", "")) 

225 

226 ndx_lines = open(self.io_dict['in'].get("input_ndx_path", "")).read().splitlines() 

227 groups_dic = self._parse_ndx_groups(ndx_lines) 

228 fu.log(f'Parsed NDX groups: {list(groups_dic.keys())}', self.out_log, self.global_log) 

229 

230 # Parse triplet string: "(ref, restrain, mol), ..." -> list of 3-tuples 

231 raw_triplets = str(self.ref_rest_mol_triplet_list).split('),') 

232 self.ref_rest_mol_triplet_list = [ 

233 tuple(elem.strip(' ()').replace(' ', '').split(',')) 

234 for elem in raw_triplets 

235 ] 

236 fu.log('ref_rest_mol_triplet_list: ' + str(self.ref_rest_mol_triplet_list), self.out_log, self.global_log) 

237 

238 if self.posres_names: 

239 self.posres_names = [elem.strip() for elem in self.posres_names.split()] 

240 else: 

241 self.posres_names = ['CUSTOM_POSRES'] * len(self.ref_rest_mol_triplet_list) 

242 fu.log('posres_names: ' + str(self.posres_names), self.out_log, self.global_log) 

243 

244 if len(self.posres_names) != len(self.ref_rest_mol_triplet_list): 

245 raise ValueError("posres_names length must match ref_rest_mol_triplet_list length") 

246 

247 for triplet, posre_name in zip(self.ref_rest_mol_triplet_list, self.posres_names): 

248 # Per-triplet workflow: 

249 # 1. Unpack (reference_group, restrain_group, molecule_name) 

250 # 2. Read global NDX indices for both groups 

251 # 3. Remap restrain atoms to molecule-local 1-based indices 

252 # 4. Write the _posre.itp 

253 # 5. Inject #ifdef guard — multi-chain (per-chain .itp) or single-chain (.top) 

254 

255 reference_group, restrain_group, molecule_name = triplet 

256 fu.log(f'Reference group: {reference_group}', self.out_log, self.global_log) 

257 fu.log(f'Restrain group: {restrain_group}', self.out_log, self.global_log) 

258 fu.log(f'Molecule name: {molecule_name}', self.out_log, self.global_log) 

259 

260 itp_path = fu.create_name(path=str(Path(top_file).parent), prefix=self.prefix, 

261 step=self.step, name=restrain_group + '_posre.itp') 

262 self.io_dict['out']["output_itp_path"] = itp_path 

263 

264 # Map global NDX indices -> molecule-local 1-based indices. 

265 # reference_atoms.index(atom) gives the 0-based position of each global 

266 # restrain atom within the reference group list. Adding 1 converts to 

267 # GROMACS 1-based local numbering, which matches the atom order in the 

268 # [ atoms ] section of each moleculetype block. 

269 reference_atoms = self._read_ndx_atoms(ndx_lines, groups_dic, reference_group) 

270 restrain_atoms = self._read_ndx_atoms(ndx_lines, groups_dic, restrain_group) 

271 selected_atoms = [reference_atoms.index(atom) + 1 for atom in restrain_atoms] 

272 

273 self._write_posre_itp(itp_path, selected_atoms) 

274 itp_name = Path(itp_path).name 

275 

276 # Multi-chain: append ifdef block to the matching per-chain ITP. 

277 # Detection heuristic: if any .itp file in the topology directory has 

278 # molecule_name in its filename (and is not already a posre/pr file), 

279 # this is a multi-chain topology where each chain has its own .itp 

280 # included by the .top. In that case, append directly to that file. 

281 # Otherwise fall through to single-chain mode (_insert_posres_in_top). 

282 multi_chain = False 

283 for file_dir in Path(top_file).parent.iterdir(): 

284 if "posre" not in file_dir.name and not file_dir.name.endswith("_pr.itp"): 

285 match = fnmatch.fnmatch(str(file_dir), "*" + molecule_name + "*.itp") 

286 if match: 

287 multi_chain = True 

288 with open(str(file_dir), 'a') as f: 

289 fu.log(f'Opening {file_dir} and adding ifdef include', self.out_log, self.global_log) 

290 f.write('\n') 

291 f.write('; Include Position restraint file\n') 

292 f.write(f'#ifdef {posre_name}\n') 

293 f.write(f'#include "{itp_name}"\n') 

294 f.write('#endif\n') 

295 f.write('\n') 

296 

297 # Single-chain: splice ifdef block into the .top file 

298 if not multi_chain: 

299 self._insert_posres_in_top(top_file, molecule_name, posre_name, itp_name) 

300 

301 fu.zip_top(zip_file=self.io_dict['out'].get("output_top_zip_path", ""), 

302 top_file=top_file, out_log=self.out_log, remove_original_files=self.remove_tmp) 

303 self.remove_tmp_files() 

304 self.check_arguments(output_files_created=True, raise_exception=False) 

305 return 0 

306 

307 

308def ndx2resttop(input_ndx_path: str, input_top_zip_path: str, output_top_zip_path: str, 

309 properties: Optional[dict] = None, **kwargs) -> int: 

310 """Create :class:`Ndx2resttop <gromacs_extra.ndx2resttop.Ndx2resttop>` class and 

311 execute the :meth:`launch() <gromacs_extra.ndx2resttop.Ndx2resttop.launch>` method.""" 

312 return Ndx2resttop(**dict(locals())).launch() 

313 

314 

315ndx2resttop.__doc__ = Ndx2resttop.__doc__ 

316main = Ndx2resttop.get_main(ndx2resttop, "Generate a restrained topology from an index NDX file.") 

317 

318 

319if __name__ == '__main__': 

320 main()