Coverage for biobb_pmx/pmxbiobb/pmxanalyse.py: 75%

120 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-23 10:10 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the PMX analyse class and the command line interface.""" 

4 

5import argparse 

6import shutil 

7from pathlib import Path 

8from typing import Optional 

9 

10from biobb_common.configuration import settings 

11from biobb_common.generic.biobb_object import BiobbObject 

12from biobb_common.tools import file_utils as fu 

13from biobb_common.tools.file_utils import launchlogger 

14 

15 

16class Pmxanalyse(BiobbObject): 

17 """ 

18 | biobb_pmx Pmxanalyse 

19 | Wrapper class for the `PMX analyse <https://github.com/deGrootLab/pmx>`_ module. 

20 | Analyze the work values from the dgdl.xvg files of the A and B states to calculate the free energy difference between two states. 

21 

22 Args: 

23 input_a_xvg_zip_path (str): Path the zip file containing the dgdl.xvg files of the A state. File type: input. `Sample file <https://github.com/bioexcel/biobb_pmx/raw/master/biobb_pmx/test/data/pmx/xvg_A.zip>`_. Accepted formats: zip (edam:format_3987). 

24 input_b_xvg_zip_path (str): Path the zip file containing the dgdl.xvg files of the B state. File type: input. `Sample file <https://github.com/bioexcel/biobb_pmx/raw/master/biobb_pmx/test/data/pmx/xvg_B.zip>`_. Accepted formats: zip (edam:format_3987). 

25 output_result_path (str): Path to the TXT results file. File type: output. `Sample file <https://github.com/bioexcel/biobb_pmx/raw/master/biobb_pmx/test/reference/pmx/ref_result.txt>`_. Accepted formats: txt (edam:format_2330). 

26 output_work_plot_path (str): Path to the PNG plot results file. File type: output. `Sample file <https://github.com/bioexcel/biobb_pmx/raw/master/biobb_pmx/test/reference/pmx/ref_plot.png>`_. Accepted formats: png (edam:format_3603). 

27 properties (dic): 

28 * **method** (*str*) - ("CGI BAR JARZ") Choose one or more estimators to use. Values: CGI (Crooks Gaussian Intersection), BAR (Bennet Acceptance Ratio), JARZ (Jarzynski's estimator). 

29 * **temperature** (*float*) - (298.15) [0~1000|0.05] Temperature in Kelvin. 

30 * **nboots** (*int*) - (0) [0~1000|1] Number of bootstrap samples to use for the bootstrap estimate of the standard errors. 

31 * **nblocks** (*int*) - (1) [0~1000|1] Number of blocks to divide the data into for an estimate of the standard error. 

32 * **integ_only** (*bool*) - (False) Whether to do integration only. 

33 * **reverseB** (*bool*) - (False) Whether to reverse the work values for the backward (B->A) transformation. 

34 * **skip** (*int*) - (1) [0~1000|1] Skip files. 

35 * **slice** (*str*) - (None) Subset of trajectories to analyze. Provide list slice, e.g. "10 50" will result in selecting dhdl_files[10:50]. 

36 * **rand** (*int*) - (None) [0~1000|1] Take a random subset of trajectories. Default is None (do not take random subset). 

37 * **index** (*str*) - (None) Zero-based index of files to analyze (e.g. "0 10 20 50 60"). It keeps the dhdl.xvg files according to their position in the list, sorted according to the filenames. 

38 * **prec** (*int*) - (2) [0~100|1] The decimal precision of the screen/file output. 

39 * **units** (*str*) - ("kJ") The units of the output. Values: kJ (Kilojoules), kcal (Kilocalories), kT (the product of the Boltzmann constant k and the temperature). 

40 * **no_ks** (*bool*) - (False) Whether to do a Kolmogorov-Smirnov test to check whether the Gaussian assumption for CGI holds. 

41 * **nbins** (*int*) - (20) [0~1000|1] Number of histograms bins for the plot. 

42 * **dpi** (*int*) - (300) [72~2048|1] Resolution of the plot. 

43 * **binary_path** (*str*) - ("pmx") Path to the PMX command line interface. 

44 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

45 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

46 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

47 * **container_path** (*str*) - (None) Path to the binary executable of your container. 

48 * **container_image** (*str*) - ("gromacs/gromacs:latest") Container Image identifier. 

49 * **container_volume_path** (*str*) - ("/data") Path to an internal directory in the container. 

50 * **container_working_dir** (*str*) - (None) Path to the internal CWD in the container. 

51 * **container_user_id** (*str*) - (None) User number id to be mapped inside the container. 

52 * **container_shell_path** (*str*) - ("/bin/bash") Path to the binary executable of the container shell. 

53 

54 Examples: 

55 This is a use example of how to use the building block from Python:: 

56 

57 from biobb_pmx.pmxbiobb.pmxanalyse import pmxanalyse 

58 prop = { 

59 'method': 'CGI BAR JARZ', 

60 'temperature': 298.15, 

61 'dpi': 600 

62 } 

63 pmxanalyse(input_a_xvg_zip_path='/path/to/myAStateFiles.zip', 

64 input_b_xvg_zip_path='/path/to/myBStateFiles.zip', 

65 output_result_path='/path/to/newResults.txt', 

66 output_work_plot_path='/path/to/newResults.png', 

67 properties=prop) 

68 

69 Info: 

70 * wrapped_software: 

71 * name: PMX analyse 

72 * version: >=1.0.1 

73 * license: GNU 

74 * ontology: 

75 * name: EDAM 

76 * schema: http://edamontology.org/EDAM.owl 

77 

78 """ 

79 

80 def __init__( 

81 self, 

82 input_a_xvg_zip_path: str, 

83 input_b_xvg_zip_path: str, 

84 output_result_path: str, 

85 output_work_plot_path: str, 

86 properties: Optional[dict] = None, 

87 **kwargs, 

88 ) -> None: 

89 properties = properties or {} 

90 

91 # Call parent class constructor 

92 super().__init__(properties) 

93 self.locals_var_dict = locals().copy() 

94 

95 # Input/Output files 

96 self.io_dict = { 

97 "in": {}, 

98 "out": { 

99 "output_result_path": output_result_path, 

100 "output_work_plot_path": output_work_plot_path, 

101 }, 

102 } 

103 # Should not be copied inside container 

104 self.input_a_xvg_zip_path = input_a_xvg_zip_path 

105 self.input_b_xvg_zip_path = input_b_xvg_zip_path 

106 

107 # Properties specific for BB 

108 self.method = properties.get("method", "CGI BAR JARZ") 

109 self.temperature = properties.get("temperature", 298.15) 

110 self.nboots = properties.get("nboots", 0) 

111 self.nblocks = properties.get("nblocks", 1) 

112 self.integ_only = properties.get("integ_only", False) 

113 self.reverseB = properties.get("reverseB", False) 

114 self.skip = properties.get("skip", 1) 

115 self.slice = properties.get("slice", None) 

116 self.rand = properties.get("rand", None) 

117 self.index = properties.get("index", None) 

118 self.prec = properties.get("prec", 2) 

119 self.units = properties.get("units", "kJ") 

120 self.no_ks = properties.get("no_ks", False) 

121 self.nbins = properties.get("nbins", 20) 

122 self.dpi = properties.get("dpi", 300) 

123 

124 # Properties common in all PMX BB 

125 self.binary_path = properties.get("binary_path", "pmx") 

126 

127 # Check the properties 

128 self.check_properties(properties) 

129 self.check_arguments() 

130 

131 @launchlogger 

132 def launch(self) -> int: 

133 """Execute the :class:`Pmxanalyse <pmx.pmxanalyse.Pmxanalyse>` pmx.pmxanalyse.Pmxanalyse object.""" 

134 

135 # Setup Biobb 

136 if self.check_restart(): 

137 return 0 

138 self.stage_files() 

139 

140 # Check if executable is exists 

141 if not self.container_path: 

142 if not Path(self.binary_path).is_file(): 

143 if not shutil.which(self.binary_path): 

144 raise FileNotFoundError( 

145 "Executable %s not found. Check if it is installed in your system and correctly defined in the properties" 

146 % self.binary_path 

147 ) 

148 

149 list_a_dir = fu.create_unique_dir() 

150 list_b_dir = fu.create_unique_dir() 

151 list_a = list( 

152 filter( 

153 lambda f: Path(f).exists() and Path(f).stat().st_size > 10, 

154 fu.unzip_list(self.input_a_xvg_zip_path, list_a_dir, self.out_log), 

155 ) 

156 ) 

157 list_b = list( 

158 filter( 

159 lambda f: Path(f).exists() and Path(f).stat().st_size > 10, 

160 fu.unzip_list(self.input_b_xvg_zip_path, list_b_dir, self.out_log), 

161 ) 

162 ) 

163 string_a = " ".join(list_a) 

164 string_b = " ".join(list_b) 

165 

166 # Copy extra files to container: two directories containing the xvg files 

167 if self.container_path: 

168 shutil.copytree( 

169 list_a_dir, 

170 Path(self.stage_io_dict.get("unique_dir", "")).joinpath( 

171 Path(list_a_dir).name 

172 ), 

173 ) 

174 shutil.copytree( 

175 list_b_dir, 

176 Path(self.stage_io_dict.get("unique_dir", "")).joinpath( 

177 Path(list_b_dir).name 

178 ), 

179 ) 

180 container_volume = " " + self.container_volume_path + "/" 

181 string_a = self.container_volume_path + "/" + container_volume.join(list_a) 

182 string_b = self.container_volume_path + "/" + container_volume.join(list_b) 

183 

184 self.cmd = [ 

185 self.binary_path, 

186 "analyse", 

187 "-fA", 

188 string_a, 

189 "-fB", 

190 string_b, 

191 "-o", 

192 self.stage_io_dict["out"]["output_result_path"], 

193 "-w", 

194 self.stage_io_dict["out"]["output_work_plot_path"], 

195 ] 

196 

197 if self.method: 

198 self.cmd.append("-m") 

199 self.cmd.append(self.method) 

200 if self.temperature: 

201 self.cmd.append("-t") 

202 self.cmd.append(str(self.temperature)) 

203 if self.nboots: 

204 self.cmd.append("-b") 

205 self.cmd.append(str(self.nboots)) 

206 if self.nblocks: 

207 self.cmd.append("-n") 

208 self.cmd.append(str(self.nblocks)) 

209 if self.integ_only: 

210 self.cmd.append("--integ_only") 

211 if self.reverseB: 

212 self.cmd.append("--reverseB") 

213 if self.skip: 

214 self.cmd.append("--skip") 

215 self.cmd.append(str(self.skip)) 

216 if self.slice: 

217 self.cmd.append("--slice") 

218 self.cmd.append(self.slice) 

219 if self.rand: 

220 self.cmd.append("--rand") 

221 if self.index: 

222 self.cmd.append("--index") 

223 self.cmd.append(self.index) 

224 if self.prec: 

225 self.cmd.append("--prec") 

226 self.cmd.append(str(self.prec)) 

227 if self.units: 

228 self.cmd.append("--units") 

229 self.cmd.append(self.units) 

230 if self.no_ks: 

231 self.cmd.append("--no_ks") 

232 if self.nbins: 

233 self.cmd.append("--nbins") 

234 self.cmd.append(str(self.nbins)) 

235 if self.dpi: 

236 self.cmd.append("--dpi") 

237 self.cmd.append(str(self.dpi)) 

238 

239 # Run Biobb block 

240 self.run_biobb() 

241 

242 # Copy files to host 

243 self.copy_to_host() 

244 

245 self.tmp_files.extend( 

246 # [self.stage_io_dict.get("unique_dir", ""), list_a_dir, list_b_dir] 

247 [list_a_dir, list_b_dir] 

248 ) 

249 self.remove_tmp_files() 

250 

251 self.check_arguments(output_files_created=True, raise_exception=False) 

252 return self.return_code 

253 

254 

255def pmxanalyse( 

256 input_a_xvg_zip_path: str, 

257 input_b_xvg_zip_path: str, 

258 output_result_path: str, 

259 output_work_plot_path: str, 

260 properties: Optional[dict] = None, 

261 **kwargs, 

262) -> int: 

263 """Execute the :class:`Pmxanalyse <pmx.pmxanalyse.Pmxanalyse>` class and 

264 execute the :meth:`launch() <pmx.pmxanalyse.Pmxanalyse.launch> method.""" 

265 

266 return Pmxanalyse( 

267 input_a_xvg_zip_path=input_a_xvg_zip_path, 

268 input_b_xvg_zip_path=input_b_xvg_zip_path, 

269 output_result_path=output_result_path, 

270 output_work_plot_path=output_work_plot_path, 

271 properties=properties, 

272 ).launch() 

273 

274 pmxanalyse.__doc__ = Pmxanalyse.__doc__ 

275 

276 

277def main(): 

278 """Command line execution of this building block. Please check the command line documentation.""" 

279 parser = argparse.ArgumentParser( 

280 description="Wrapper class for the PMX analyse module.", 

281 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999), 

282 ) 

283 parser.add_argument( 

284 "-c", 

285 "--config", 

286 required=False, 

287 help="This file can be a YAML file, JSON file or JSON string", 

288 ) 

289 

290 # Specific args of each building block 

291 required_args = parser.add_argument_group("required arguments") 

292 required_args.add_argument( 

293 "--input_a_xvg_zip_path", 

294 required=True, 

295 help="Path the zip file containing the dgdl.xvg files of the A state. Accepted formats: zip.", 

296 ) 

297 required_args.add_argument( 

298 "--input_b_xvg_zip_path", 

299 required=True, 

300 help="Path the zip file containing the dgdl.xvg files of the B state. Accepted formats: zip.", 

301 ) 

302 required_args.add_argument( 

303 "--output_result_path", 

304 required=True, 

305 help="Path to the TXT results file. Accepted formats: txt.", 

306 ) 

307 required_args.add_argument( 

308 "--output_work_plot_path", 

309 required=True, 

310 help="Path to the PNG plot results file. Accepted formats: png.", 

311 ) 

312 

313 args = parser.parse_args() 

314 config = args.config if args.config else None 

315 properties = settings.ConfReader(config=config).get_prop_dic() 

316 

317 # Specific call of each building block 

318 pmxanalyse( 

319 input_a_xvg_zip_path=args.input_a_xvg_zip_path, 

320 input_b_xvg_zip_path=args.input_b_xvg_zip_path, 

321 output_result_path=args.output_result_path, 

322 output_work_plot_path=args.output_work_plot_path, 

323 properties=properties, 

324 ) 

325 

326 

327if __name__ == "__main__": 

328 main()