Coverage for biobb_gromacs/gromacs_extra/append_ligand.py: 66%

136 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-25 09:23 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the AppendLigand class and the command line interface.""" 

4 

5import argparse 

6import re 

7import shutil 

8from pathlib import Path 

9from typing import Optional 

10 

11from biobb_common.configuration import settings 

12from biobb_common.generic.biobb_object import BiobbObject 

13from biobb_common.tools import file_utils as fu 

14from biobb_common.tools.file_utils import launchlogger 

15 

16 

17class AppendLigand(BiobbObject): 

18 """ 

19 | biobb_gromacs AppendLigand 

20 | This class takes a ligand ITP file and inserts it in a topology. 

21 | This module automatizes the process of inserting a ligand ITP file in a GROMACS topology. 

22 

23 Args: 

24 input_top_zip_path (str): Path the input topology TOP and ITP files zipball. File type: input. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/data/gromacs_extra/ndx2resttop.zip>`_. Accepted formats: zip (edam:format_3987). 

25 input_itp_path (str): Path to the ligand ITP file to be inserted in the topology. File type: input. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/data/gromacs_extra/pep_ligand.itp>`_. Accepted formats: itp (edam:format_3883). 

26 output_top_zip_path (str): Path/Name the output topology TOP and ITP files zipball. File type: output. `Sample file <https://github.com/bioexcel/biobb_gromacs/raw/master/biobb_gromacs/test/reference/gromacs_extra/ref_appendligand.zip>`_. Accepted formats: zip (edam:format_3987). 

27 input_posres_itp_path (str) (Optional): Path to the position restriction ITP file. File type: input. Accepted formats: itp (edam:format_3883). 

28 properties (dic): 

29 * **posres_name** (*str*) - ("POSRES_LIGAND") String to be included in the ifdef clause. 

30 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

31 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

32 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

33 

34 Examples: 

35 This is a use example of how to use the building block from Python:: 

36 

37 from biobb_gromacs.gromacs_extra.append_ligand import append_ligand 

38 prop = { 'posres_name': 'POSRES_LIGAND' } 

39 append_ligand(input_top_zip_path='/path/to/myTopology.zip', 

40 input_itp_path='/path/to/myTopologyAddOn.itp', 

41 output_top_zip_path='/path/to/newTopology.zip', 

42 properties=prop) 

43 

44 Info: 

45 * wrapped_software: 

46 * name: In house 

47 * license: Apache-2.0 

48 * ontology: 

49 * name: EDAM 

50 * schema: http://edamontology.org/EDAM.owl 

51 """ 

52 

53 def __init__( 

54 self, 

55 input_top_zip_path: str, 

56 input_itp_path: str, 

57 output_top_zip_path: str, 

58 input_posres_itp_path: Optional[str] = None, 

59 properties: Optional[dict] = None, 

60 **kwargs, 

61 ) -> None: 

62 properties = properties or {} 

63 

64 # Call parent class constructor 

65 super().__init__(properties) 

66 self.locals_var_dict = locals().copy() 

67 

68 # Input/Output files 

69 self.io_dict = { 

70 "in": { 

71 "input_top_zip_path": input_top_zip_path, 

72 "input_itp_path": input_itp_path, 

73 "input_posres_itp_path": input_posres_itp_path, 

74 }, 

75 "out": {"output_top_zip_path": output_top_zip_path}, 

76 } 

77 

78 # Properties specific for BB 

79 self.posres_name = properties.get("posres_name", "POSRES_LIGAND") 

80 

81 # Check the properties 

82 self.check_properties(properties) 

83 self.check_arguments() 

84 

85 @launchlogger 

86 def launch(self) -> int: 

87 """Execute the :class:`AppendLigand <gromacs_extra.append_ligand.AppendLigand>` object.""" 

88 # Setup Biobb 

89 if self.check_restart(): 

90 return 0 

91 

92 # Unzip topology 

93 top_file = fu.unzip_top( 

94 zip_file=str(self.io_dict["in"].get("input_top_zip_path")), 

95 out_log=self.out_log, 

96 ) 

97 top_dir = str(Path(top_file).parent) 

98 itp_name = str(Path(str(self.io_dict["in"].get("input_itp_path"))).name) 

99 

100 with open(top_file) as top_f: 

101 top_lines = top_f.readlines() 

102 top_f.close() 

103 fu.rm(top_file) 

104 

105 forcefield_pattern = r"#include.*forcefield.itp\"" 

106 if top_lines: 

107 for ff_index, line in enumerate(top_lines): 

108 if re.search(forcefield_pattern, line): 

109 break 

110 else: 

111 fu.log( 

112 f'FATAL: Input topfile {top_file} from input_top_zip_path {self.io_dict["in"].get("input_top_zip_path")} is empty.', 

113 self.out_log, 

114 self.global_log, 

115 ) 

116 return 1 

117 

118 ligand_itp_path = self.io_dict["in"].get("input_itp_path") 

119 

120 # Read ligand itp contents 

121 with open(ligand_itp_path, 'r') as itp_file: 

122 ligand_itp_contents = itp_file.readlines() 

123 

124 # Separate ligand [ atomtypes ] section from the rest 

125 lig_atomtypes_section = [] 

126 remaining_itp_contents = [] 

127 in_atomtypes_section = False 

128 for line in ligand_itp_contents: 

129 if line.strip().startswith("[ atomtypes ]"): 

130 in_atomtypes_section = True 

131 lig_atomtypes_section.append(line) 

132 elif in_atomtypes_section: 

133 if line.strip() == "" or line.startswith("["): 

134 in_atomtypes_section = False 

135 remaining_itp_contents.append(line) 

136 else: 

137 lig_atomtypes_section.append(line) 

138 else: 

139 remaining_itp_contents.append(line) 

140 

141 # If the ligand itp contains an [ atomtypes ] section, merge it into the main topology 

142 if lig_atomtypes_section: 

143 

144 # Look for the [ atomtypes ] section in the main topology 

145 top_atomtypes_section = [] 

146 in_atomtypes_section = False 

147 for line in top_lines: 

148 if line.strip().startswith("[ atomtypes ]"): 

149 in_atomtypes_section = True 

150 top_atomtypes_section.append(line) 

151 elif in_atomtypes_section: 

152 if line.strip() == "" or line.startswith("["): 

153 in_atomtypes_section = False 

154 else: 

155 top_atomtypes_section.append(line) 

156 

157 # If there is already an [ atomtypes ] section in the main topology 

158 if top_atomtypes_section: 

159 

160 # Remove the header and comments of the ligand [ atomtypes ] section 

161 lig_atomtypes_section = lig_atomtypes_section[2:] 

162 

163 # Remove the [ atomtypes ] section from top_lines 

164 top_lines = [line for line in top_lines if line not in top_atomtypes_section] 

165 

166 # NOTE: Check for repeated atoms in the [ atomtypes ] section 

167 # NOTE: raise error if there are conflicts - atoms named equally with different parameters 

168 # NOTE: raise error if there are different number of columns in the atomtypes sections 

169 

170 top_lines.insert(ff_index + 1, "\n") 

171 

172 # Merge both [ atomtypes ] sections 

173 atomtype_section = top_atomtypes_section + lig_atomtypes_section 

174 

175 # Write the merged [ atomtypes ] section into the main topology after the forcefield include 

176 for atomtype_index in range(len(atomtype_section)): 

177 top_lines.insert(ff_index + atomtype_index + 2, atomtype_section[atomtype_index]) 

178 

179 # Update the index for the remaining directives 

180 at_index = ff_index + atomtype_index + 2 

181 else: 

182 at_index = ff_index 

183 

184 top_lines.insert(at_index + 1, "\n") 

185 top_lines.insert(at_index + 2, "; Including ligand ITP\n") 

186 top_lines.insert(at_index + 3, '#include "' + itp_name + '"\n') 

187 top_lines.insert(at_index + 4, "\n") 

188 if self.io_dict["in"].get("input_posres_itp_path"): 

189 top_lines.insert(at_index + 5, "; Ligand position restraints" + "\n") 

190 top_lines.insert(at_index + 6, "#ifdef " + self.posres_name + "\n") 

191 top_lines.insert( 

192 at_index + 7, 

193 '#include "' + str(Path(self.io_dict["in"].get("input_posres_itp_path", "")).name) + '"\n' 

194 ) 

195 top_lines.insert(at_index + 8, "#endif" + "\n") 

196 top_lines.insert(at_index + 9, "\n") 

197 

198 inside_moleculetype_section = False 

199 with open(self.io_dict["in"].get("input_itp_path", "")) as itp_file: 

200 moleculetype_pattern = r"\[ moleculetype \]" 

201 for line in itp_file: 

202 if re.search(moleculetype_pattern, line): 

203 inside_moleculetype_section = True 

204 continue 

205 if inside_moleculetype_section and not line.startswith(";"): 

206 moleculetype = line.strip().split()[0].strip() 

207 break 

208 

209 molecules_pattern = r"\[ molecules \]" 

210 inside_molecules_section = False 

211 index_molecule = None 

212 molecule_string = ( 

213 str(moleculetype) + int(20 - len(moleculetype)) * " " + "1" + "\n" 

214 ) 

215 for index, line in enumerate(top_lines): 

216 if re.search(molecules_pattern, line): 

217 inside_molecules_section = True 

218 continue 

219 if ( 

220 inside_molecules_section and not line.startswith(";") and line.upper().startswith("PROTEIN") 

221 ): 

222 index_molecule = index 

223 

224 if index_molecule: 

225 top_lines.insert(index_molecule + 1, molecule_string) 

226 else: 

227 top_lines.append(molecule_string) 

228 

229 new_top = fu.create_name( 

230 path=top_dir, prefix=self.prefix, step=self.step, name="ligand.top" 

231 ) 

232 

233 with open(new_top, "w") as new_top_f: 

234 new_top_f.write("".join(top_lines)) 

235 

236 # Create a new itp ligand file without the [ atomtypes ] section 

237 new_ligand_tip_path = str(Path(top_dir) / itp_name) 

238 with open(new_ligand_tip_path, 'w') as new_itp_file: 

239 new_itp_file.write("".join(remaining_itp_contents)) 

240 

241 if self.io_dict["in"].get("input_posres_itp_path"): 

242 shutil.copy2(self.io_dict["in"].get("input_posres_itp_path", ""), top_dir) 

243 

244 # zip topology 

245 fu.log( 

246 "Compressing topology to: %s" 

247 % self.io_dict["out"].get("output_top_zip_path"), 

248 self.out_log, 

249 self.global_log, 

250 ) 

251 fu.zip_top( 

252 zip_file=self.io_dict["out"].get("output_top_zip_path", ""), 

253 top_file=new_top, 

254 out_log=self.out_log, 

255 remove_original_files=self.remove_tmp 

256 ) 

257 

258 # Remove temporal files 

259 self.tmp_files.append(top_dir) 

260 self.remove_tmp_files() 

261 

262 self.check_arguments(output_files_created=True, raise_exception=False) 

263 return 0 

264 

265 

266def append_ligand( 

267 input_top_zip_path: str, 

268 input_itp_path: str, 

269 output_top_zip_path: str, 

270 input_posres_itp_path: Optional[str] = None, 

271 properties: Optional[dict] = None, 

272 **kwargs, 

273) -> int: 

274 """Create :class:`AppendLigand <gromacs_extra.append_ligand.AppendLigand>` class and 

275 execute the :meth:`launch() <gromacs_extra.append_ligand.AppendLigand.launch>` method.""" 

276 return AppendLigand( 

277 input_top_zip_path=input_top_zip_path, 

278 input_itp_path=input_itp_path, 

279 output_top_zip_path=output_top_zip_path, 

280 input_posres_itp_path=input_posres_itp_path, 

281 properties=properties, 

282 **kwargs, 

283 ).launch() 

284 

285 

286append_ligand.__doc__ = AppendLigand.__doc__ 

287 

288 

289def main(): 

290 """Command line execution of this building block. Please check the command line documentation.""" 

291 parser = argparse.ArgumentParser( 

292 description="Wrapper of the GROMACS editconf module.", 

293 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999), 

294 ) 

295 parser.add_argument( 

296 "-c", 

297 "--config", 

298 required=False, 

299 help="This file can be a YAML file, JSON file or JSON string", 

300 ) 

301 

302 # Specific args of each building block 

303 required_args = parser.add_argument_group("required arguments") 

304 required_args.add_argument("--input_top_zip_path", required=True) 

305 required_args.add_argument("--input_itp_path", required=True) 

306 required_args.add_argument("--output_top_zip_path", required=True) 

307 parser.add_argument("--input_posres_itp_path", required=False) 

308 

309 args = parser.parse_args() 

310 config = args.config if args.config else None 

311 properties = settings.ConfReader(config=config).get_prop_dic() 

312 

313 # Specific call of each building block 

314 append_ligand( 

315 input_top_zip_path=args.input_top_zip_path, 

316 input_itp_path=args.input_itp_path, 

317 output_top_zip_path=args.output_top_zip_path, 

318 input_posres_itp_path=args.input_posres_itp_path, 

319 properties=properties, 

320 ) 

321 

322 

323if __name__ == "__main__": 

324 main()