Coverage for biobb_flexdyn/flexdyn/concoord_disco.py: 63%

133 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-04 11:11 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the concoord_disco class and the command line interface.""" 

4import argparse 

5from typing import Optional 

6import os 

7import shutil 

8from pathlib import Path 

9from biobb_common.tools import file_utils as fu 

10from biobb_common.generic.biobb_object import BiobbObject 

11from biobb_common.configuration import settings 

12from biobb_common.tools.file_utils import launchlogger 

13 

14 

15class ConcoordDisco(BiobbObject): 

16 """ 

17 | biobb_flexdyn ConcoordDisco 

18 | Wrapper of the Disco tool from the Concoord package. 

19 | Structure generation based on a set of geometric constraints extracted with the Concoord Dist tool. 

20 

21 Args: 

22 input_pdb_path (str): Input structure file in PDB format. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexdyn/raw/master/biobb_flexdyn/test/data/flexdyn/structure.pdb>`_. Accepted formats: pdb (edam:format_1476). 

23 input_dat_path (str): Input dat with structure interpretation and bond definitions. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexdyn/raw/master/biobb_flexdyn/test/data/flexdyn/dist.dat>`_. Accepted formats: dat (edam:format_1637), txt (edam:format_2330). 

24 output_traj_path (str): Output trajectory file. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexdyn/raw/master/biobb_flexdyn/test/reference/flexdyn/disco_trj.pdb>`_. Accepted formats: pdb (edam:format_1476), xtc (edam:format_3875), gro (edam:format_2033). 

25 output_rmsd_path (str): Output rmsd file. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexdyn/raw/master/biobb_flexdyn/test/reference/flexdyn/disco_rmsd.dat>`_. Accepted formats: dat (edam:format_1637). 

26 output_bfactor_path (str): Output B-factor file. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexdyn/raw/master/biobb_flexdyn/test/reference/flexdyn/disco_bfactor.pdb>`_. Accepted formats: pdb (edam:format_1476). 

27 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

28 * **binary_path** (*str*) - ("disco") Concoord disco binary path to be used. 

29 * **vdw** (*int*) - (1) Select a set of Van der Waals parameters. Values: 1 (OPLS-UA -united atoms- parameters), 2 (OPLS-AA -all atoms- parameters), 3 (PROLSQ repel parameters), 4 (Yamber2 parameters), 5 (Li et al. parameters), 6 (OPLS-X parameters -recommended for NMR structure determination-) 

30 * **num_structs** (*int*) - (500) Number of structures to be generated 

31 * **num_iterations** (*int*) - (2500) Maximum number of iterations per structure 

32 * **chirality_check** (*int*) - (2) Chirality check. Values: 0 (no chirality checks), 1 (only check afterwards), 2 (check on the fly) 

33 * **bs** (*int*) - (0) Number of rounds of triangular bound smoothing (default 0), (if >= 6, tetragonal BS is activated) 

34 * **nofit** (*bool*) - (False) Do not fit generated structures to reference 

35 * **seed** (*int*) - (741265) Initial random seed 

36 * **violation** (*float*) - (1.0) Maximal acceptable sum of violations (nm) 

37 * **nofit** (*bool*) - (False) Do not fit generated structures to reference 

38 * **convergence** (*int*) - (50) Consider convergence failed after this number of non-productive iterations 

39 * **trials** (*int*) - (25) Maximum number of trials per run 

40 * **damp** (*int*) - (1) Damping factor for distance corrections. Values: 1 (default), 2 (for cases with convergence problems) 

41 * **dyn** (*int*) - (1) Number of rounds to dynamically set tolerances 

42 * **bump** (*bool*) - (False) Do extra bump check 

43 * **pairlist_freq** (*int*) - (10) Pairlist update frequency in steps (only valid together with bump) 

44 * **cutoff** (*float*) - (0.5) Cut-off radius for pairlist (nm) (only valid together with bump) 

45 * **ref** (*bool*) - (False) Use input coordinates instead of random starting coordinates 

46 * **scale** (*int*) - (1) Pre-scale coordinates with this factor 

47 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

48 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

49 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

50 

51 Examples: 

52 This is a use example of how to use the building block from Python:: 

53 

54 from biobb_flexdyn.flexdyn.concoord_disco import concoord_disco 

55 prop = { 

56 'vdw' : 4, 

57 'num_structs' : 20 

58 } 

59 concoord_disco( input_pdb_path='/path/to/dist_input.pdb', 

60 input_dat_path='/path/to/dist_input.dat', 

61 output_traj_path='/path/to/disco_out_traj.pdb', 

62 output_rmsd_path='/path/to/disco_out_rmsd.dat', 

63 output_bfactor_path='/path/to/disco_out_bfactor.pdb', 

64 properties=prop) 

65 

66 Info: 

67 * wrapped_software: 

68 * name: Concoord 

69 * version: >=2.1.2 

70 * license: other 

71 * ontology: 

72 * name: EDAM 

73 * schema: http://edamontology.org/EDAM.owl 

74 

75 """ 

76 

77 def __init__(self, input_pdb_path: str, input_dat_path: str, output_traj_path: str, 

78 output_rmsd_path: str, output_bfactor_path: str, properties: Optional[dict] = None, **kwargs) -> None: 

79 

80 properties = properties or {} 

81 

82 # Call parent class constructor 

83 super().__init__(properties) 

84 self.locals_var_dict = locals().copy() 

85 

86 # Input/Output files 

87 self.io_dict = { 

88 'in': {'input_pdb_path': input_pdb_path, 

89 'input_dat_path': input_dat_path}, 

90 'out': {'output_traj_path': output_traj_path, 

91 'output_rmsd_path': output_rmsd_path, 

92 'output_bfactor_path': output_bfactor_path} 

93 } 

94 

95 # Properties specific for BB 

96 self.properties = properties 

97 self.binary_path = properties.get('binary_path', 'disco') 

98 

99 self.vdw = properties.get('vdw') 

100 self.num_structs = properties.get('num_structs') 

101 self.num_iterations = properties.get('num_iterations') 

102 self.chirality_check = properties.get('chirality_check') 

103 self.bs = properties.get('bs') 

104 self.nofit = properties.get('nofit') 

105 self.seed = properties.get('seed') 

106 self.violation = properties.get('violation') 

107 self.convergence = properties.get('convergence') 

108 self.trials = properties.get('trials') 

109 self.damp = properties.get('damp') 

110 self.dyn = properties.get('dyn') 

111 self.bump = properties.get('bump') 

112 self.pairlist_freq = properties.get('pairlist_freq') 

113 self.cutoff = properties.get('cutoff') 

114 self.ref = properties.get('ref') 

115 self.scale = properties.get('scale') 

116 

117 # Check the properties 

118 self.check_properties(properties) 

119 self.check_arguments() 

120 

121 @launchlogger 

122 def launch(self): 

123 """Launches the execution of the FlexDyn ConcoordDisco module.""" 

124 

125 # Setup Biobb 

126 if self.check_restart(): 

127 return 0 

128 self.stage_files() 

129 

130 # Copy auxiliary files (MARGINS, ATOMS, BONDS) according to the VdW property to the working dir 

131 concoord_lib = os.getenv("CONCOORDLIB") 

132 

133 # MARGINS_li.DAT, MARGINS_oplsaa.DAT, MARGINS_oplsua.DAT, MARGINS_oplsx.DAT, MARGINS_repel.DAT, MARGINS_yamber2.DAT 

134 # 1 (OPLS-UA -united atoms- parameters), 2 (OPLS-AA -all atoms- parameters), 3 (PROLSQ repel parameters), 4 (Yamber2 parameters), 5 (Li et al. parameters), 6 (OPLS-X parameters -recommended for NMR structure determination-). 

135 vdw_values = ["vdw_values", "oplsua", "oplsaa", "repel", "yamber2", "li", "oplsx"] 

136 if self.vdw is None: 

137 raise ValueError("The 'vdw' property cannot be None") 

138 vdw_index = int(self.vdw) 

139 margins_file = str(concoord_lib) + "/MARGINS_" + vdw_values[vdw_index] + ".DAT" 

140 atoms_file = str(concoord_lib) + "/ATOMS_" + vdw_values[vdw_index] + ".DAT" 

141 bonds_file = str(concoord_lib) + "/BONDS.DAT" 

142 shutil.copy2(margins_file, self.stage_io_dict.get("unique_dir", "")) 

143 shutil.copy2(margins_file, self.stage_io_dict.get("unique_dir", "")+"/MARGINS.DAT") 

144 shutil.copy2(atoms_file, self.stage_io_dict.get("unique_dir", "")) 

145 shutil.copy2(bonds_file, self.stage_io_dict.get("unique_dir", "")) 

146 

147 # Command line 

148 # (concoord) OROZCO67:biobb_flexdyn hospital$ disco -d biobb_flexdyn/test/reference/flexdyn/dist.dat 

149 # -p biobb_flexdyn/test/reference/flexdyn/dist.pdb -op patata.pdb 

150 self.cmd = ["cd ", self.stage_io_dict.get('unique_dir', ''), ";", self.binary_path, 

151 # "-p", str(Path(self.stage_io_dict["in"]["input_pdb_path"]).relative_to(Path.cwd())), 

152 # "-d", str(Path(self.stage_io_dict["in"]["input_dat_path"]).relative_to(Path.cwd())), 

153 # "-or", str(Path(self.stage_io_dict["out"]["output_rmsd_path"]).relative_to(Path.cwd())), 

154 # "-of", str(Path(self.stage_io_dict["out"]["output_bfactor_path"]).relative_to(Path.cwd())) 

155 "-p", str(Path(self.stage_io_dict["in"]["input_pdb_path"]).relative_to(Path(self.stage_io_dict.get('unique_dir', '')))), 

156 "-d", str(Path(self.stage_io_dict["in"]["input_dat_path"]).relative_to(Path(self.stage_io_dict.get('unique_dir', '')))), 

157 "-or", str(Path(self.stage_io_dict["out"]["output_rmsd_path"]).relative_to(Path(self.stage_io_dict.get('unique_dir', '')))), 

158 "-of", str(Path(self.stage_io_dict["out"]["output_bfactor_path"]).relative_to(Path(self.stage_io_dict.get('unique_dir', '')))) 

159 ] 

160 

161 # Output structure formats: 

162 file_extension = Path(self.stage_io_dict["out"]["output_traj_path"]).suffix 

163 if file_extension == ".pdb": 

164 self.cmd.append('-on') # NMR-PDB format (multi-model) 

165# self.cmd.append(str(Path(self.stage_io_dict["out"]["output_traj_path"]).relative_to(Path.cwd()))) 

166 self.cmd.append(str(Path(self.stage_io_dict["out"]["output_traj_path"]).relative_to(Path(self.stage_io_dict.get('unique_dir', ''))))) 

167 elif file_extension == ".gro": 

168 self.cmd.append('-ot') 

169# self.cmd.append(str(Path(self.stage_io_dict["out"]["output_traj_path"]).relative_to(Path.cwd()))) 

170 self.cmd.append(str(Path(self.stage_io_dict["out"]["output_traj_path"]).relative_to(Path(self.stage_io_dict.get('unique_dir', ''))))) 

171 elif file_extension == ".xtc": 

172 self.cmd.append('-ox') 

173# self.cmd.append(str(Path(self.stage_io_dict["out"]["output_traj_path"]).relative_to(Path.cwd()))) 

174 self.cmd.append(str(Path(self.stage_io_dict["out"]["output_traj_path"]).relative_to(Path(self.stage_io_dict.get('unique_dir', ''))))) 

175 else: 

176 fu.log("ERROR: output_traj_path ({}) must be a PDB, GRO or XTC formatted file ({})".format(self.io_dict["out"]["output_traj_path"], file_extension), self.out_log, self.global_log) 

177 

178 # Properties 

179 if self.num_structs: 

180 self.cmd.append('-n') 

181 self.cmd.append(str(self.num_structs)) 

182 

183 if self.num_iterations: 

184 self.cmd.append('-i') 

185 self.cmd.append(str(self.num_iterations)) 

186 

187 if self.chirality_check: 

188 self.cmd.append('-c') 

189 self.cmd.append(str(self.chirality_check)) 

190 

191 if self.bs: 

192 self.cmd.append('-bs') 

193 self.cmd.append(str(self.bs)) 

194 

195 if self.cutoff: 

196 self.cmd.append('-rc') 

197 self.cmd.append(str(self.cutoff)) 

198 

199 if self.seed: 

200 self.cmd.append('-s') 

201 self.cmd.append(str(self.seed)) 

202 

203 if self.damp: 

204 self.cmd.append('-damp') 

205 self.cmd.append(str(self.damp)) 

206 

207 if self.violation: 

208 self.cmd.append('-viol') 

209 self.cmd.append(str(self.violation)) 

210 

211 if self.convergence: 

212 self.cmd.append('-con') 

213 self.cmd.append(str(self.convergence)) 

214 

215 if self.trials: 

216 self.cmd.append('-t') 

217 self.cmd.append(str(self.trials)) 

218 

219 if self.dyn: 

220 self.cmd.append('-dyn') 

221 self.cmd.append(str(self.dyn)) 

222 

223 if self.pairlist_freq: 

224 self.cmd.append('-l') 

225 self.cmd.append(str(self.pairlist_freq)) 

226 

227 if self.scale: 

228 self.cmd.append('-is') 

229 self.cmd.append(str(self.scale)) 

230 

231 if self.nofit: 

232 self.cmd.append('-f') 

233 

234 if self.bump: 

235 self.cmd.append('-bump') 

236 

237 if self.ref: 

238 self.cmd.append('-ref') 

239 

240 # Run Biobb block 

241 self.run_biobb() 

242 

243 # Copy files to host 

244 self.copy_to_host() 

245 

246 # remove temporary folder(s) 

247 self.tmp_files.extend([ 

248 self.stage_io_dict.get("unique_dir", "") 

249 ]) 

250 self.remove_tmp_files() 

251 

252 self.check_arguments(output_files_created=True, raise_exception=False) 

253 

254 return self.return_code 

255 

256 

257def concoord_disco(input_pdb_path: str, input_dat_path: str, 

258 output_traj_path: str, output_rmsd_path: str, output_bfactor_path: str, 

259 properties: Optional[dict] = None, **kwargs) -> int: 

260 """Create :class:`ConcoordDisco <flexdyn.concoord_disco.ConcoordDisco>`flexdyn.concoord_disco.ConcoordDisco class and 

261 execute :meth:`launch() <flexdyn.concoord_disco.ConcoordDisco.launch>` method""" 

262 

263 return ConcoordDisco(input_pdb_path=input_pdb_path, 

264 input_dat_path=input_dat_path, 

265 output_traj_path=output_traj_path, 

266 output_rmsd_path=output_rmsd_path, 

267 output_bfactor_path=output_bfactor_path, 

268 properties=properties).launch() 

269 

270 

271def main(): 

272 parser = argparse.ArgumentParser(description='Structure generation based on a set of geometric constraints extracted with the Concoord Dist tool.', formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

273 parser.add_argument('--config', required=False, help='Configuration file') 

274 

275 # Specific args 

276 required_args = parser.add_argument_group('required arguments') 

277 required_args.add_argument('--input_pdb_path', required=True, help='Input structure file in PDB format. Accepted formats: pdb') 

278 required_args.add_argument('--input_dat_path', required=True, help='Input dat with structure interpretation and bond definitions. Accepted formats: dat, txt') 

279 required_args.add_argument('--output_traj_path', required=True, help='Output trajectory file. Accepted formats: pdb, gro, xtc.') 

280 required_args.add_argument('--output_rmsd_path', required=True, help='Output RMSd file. Accepted formats: dat.') 

281 required_args.add_argument('--output_bfactor_path', required=True, help='Output B-factor file. Accepted formats: pdb.') 

282 

283 args = parser.parse_args() 

284 args.config = args.config or "{}" 

285 properties = settings.ConfReader(config=args.config).get_prop_dic() 

286 

287 # Specific call 

288 concoord_disco(input_pdb_path=args.input_pdb_path, 

289 input_dat_path=args.input_dat_path, 

290 output_traj_path=args.output_traj_path, 

291 output_rmsd_path=args.output_rmsd_path, 

292 output_bfactor_path=args.output_bfactor_path, 

293 properties=properties) 

294 

295 

296if __name__ == '__main__': 

297 main()