Coverage for biobb_pmx/pmxbiobb/pmxanalyse.py: 87%

108 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-05-29 06:59 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the PMX analyse class and the command line interface.""" 

4 

5import shutil 

6from pathlib import Path, PurePath 

7from typing import Optional 

8 

9from biobb_common.generic.biobb_object import BiobbObject 

10from biobb_common.tools import file_utils as fu 

11from biobb_common.tools.file_utils import launchlogger 

12 

13 

14class Pmxanalyse(BiobbObject): 

15 """ 

16 | biobb_pmx Pmxanalyse 

17 | Wrapper class for the `PMX analyse <https://github.com/deGrootLab/pmx>`_ module. 

18 | Analyze the work values from the dgdl.xvg files of the A and B states to calculate the free energy difference between two states. 

19 

20 Args: 

21 input_a_xvg_zip_path (str): Path the zip file containing the dgdl.xvg files of the A state. File type: input. `Sample file <https://github.com/bioexcel/biobb_pmx/raw/master/biobb_pmx/test/data/pmx/xvg_A.zip>`_. Accepted formats: zip (edam:format_3987). 

22 input_b_xvg_zip_path (str): Path the zip file containing the dgdl.xvg files of the B state. File type: input. `Sample file <https://github.com/bioexcel/biobb_pmx/raw/master/biobb_pmx/test/data/pmx/xvg_B.zip>`_. Accepted formats: zip (edam:format_3987). 

23 output_result_path (str): Path to the TXT results file. File type: output. `Sample file <https://github.com/bioexcel/biobb_pmx/raw/master/biobb_pmx/test/reference/pmx/ref_result.txt>`_. Accepted formats: txt (edam:format_2330). 

24 output_work_plot_path (str): Path to the PNG plot results file. File type: output. `Sample file <https://github.com/bioexcel/biobb_pmx/raw/master/biobb_pmx/test/reference/pmx/ref_plot.png>`_. Accepted formats: png (edam:format_3603). 

25 properties (dic): 

26 * **method** (*str*) - ("CGI BAR JARZ") Choose one or more estimators to use. Values: CGI (Crooks Gaussian Intersection), BAR (Bennet Acceptance Ratio), JARZ (Jarzynski's estimator). 

27 * **temperature** (*float*) - (298.15) [0~1000|0.05] Temperature in Kelvin. 

28 * **nboots** (*int*) - (0) [0~1000|1] Number of bootstrap samples to use for the bootstrap estimate of the standard errors. 

29 * **nblocks** (*int*) - (1) [0~1000|1] Number of blocks to divide the data into for an estimate of the standard error. 

30 * **integ_only** (*bool*) - (False) Whether to do integration only. 

31 * **reverseB** (*bool*) - (False) Whether to reverse the work values for the backward (B->A) transformation. 

32 * **skip** (*int*) - (1) [0~1000|1] Skip files. 

33 * **slice** (*str*) - (None) Subset of trajectories to analyze. Provide list slice, e.g. "10 50" will result in selecting dhdl_files[10:50]. 

34 * **rand** (*int*) - (None) [0~1000|1] Take a random subset of trajectories. Default is None (do not take random subset). 

35 * **index** (*str*) - (None) Zero-based index of files to analyze (e.g. "0 10 20 50 60"). It keeps the dhdl.xvg files according to their position in the list, sorted according to the filenames. 

36 * **prec** (*int*) - (2) [0~100|1] The decimal precision of the screen/file output. 

37 * **units** (*str*) - ("kJ") The units of the output. Values: kJ (Kilojoules), kcal (Kilocalories), kT (the product of the Boltzmann constant k and the temperature). 

38 * **no_ks** (*bool*) - (False) Whether to do a Kolmogorov-Smirnov test to check whether the Gaussian assumption for CGI holds. 

39 * **nbins** (*int*) - (20) [0~1000|1] Number of histograms bins for the plot. 

40 * **dpi** (*int*) - (300) [72~2048|1] Resolution of the plot. 

41 * **binary_path** (*str*) - ("pmx") Path to the PMX command line interface. 

42 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

43 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

44 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

45 * **container_path** (*str*) - (None) Path to the binary executable of your container. 

46 * **container_image** (*str*) - ("gromacs/gromacs:latest") Container Image identifier. 

47 * **container_volume_path** (*str*) - ("/data") Path to an internal directory in the container. 

48 * **container_working_dir** (*str*) - (None) Path to the internal CWD in the container. 

49 * **container_user_id** (*str*) - (None) User number id to be mapped inside the container. 

50 * **container_shell_path** (*str*) - ("/bin/bash") Path to the binary executable of the container shell. 

51 

52 Examples: 

53 This is a use example of how to use the building block from Python:: 

54 

55 from biobb_pmx.pmxbiobb.pmxanalyse import pmxanalyse 

56 prop = { 

57 'method': 'CGI BAR JARZ', 

58 'temperature': 298.15, 

59 'dpi': 600 

60 } 

61 pmxanalyse(input_a_xvg_zip_path='/path/to/myAStateFiles.zip', 

62 input_b_xvg_zip_path='/path/to/myBStateFiles.zip', 

63 output_result_path='/path/to/newResults.txt', 

64 output_work_plot_path='/path/to/newResults.png', 

65 properties=prop) 

66 

67 Info: 

68 * wrapped_software: 

69 * name: PMX analyse 

70 * version: >=1.0.1 

71 * license: GNU 

72 * ontology: 

73 * name: EDAM 

74 * schema: http://edamontology.org/EDAM.owl 

75 

76 """ 

77 

78 def __init__( 

79 self, 

80 input_a_xvg_zip_path: str, 

81 input_b_xvg_zip_path: str, 

82 output_result_path: str, 

83 output_work_plot_path: str, 

84 properties: Optional[dict] = None, 

85 **kwargs, 

86 ) -> None: 

87 properties = properties or {} 

88 

89 # Call parent class constructor 

90 super().__init__(properties) 

91 self.locals_var_dict = locals().copy() 

92 

93 # Input/Output files 

94 self.io_dict = { 

95 "in": {}, 

96 "out": { 

97 "output_result_path": output_result_path, 

98 "output_work_plot_path": output_work_plot_path, 

99 }, 

100 } 

101 # Should not be copied inside container 

102 self.input_a_xvg_zip_path = input_a_xvg_zip_path 

103 self.input_b_xvg_zip_path = input_b_xvg_zip_path 

104 

105 # Properties specific for BB 

106 self.method = properties.get("method", "CGI BAR JARZ") 

107 self.temperature = properties.get("temperature", 298.15) 

108 self.nboots = properties.get("nboots", 0) 

109 self.nblocks = properties.get("nblocks", 1) 

110 self.integ_only = properties.get("integ_only", False) 

111 self.reverseB = properties.get("reverseB", False) 

112 self.skip = properties.get("skip", 1) 

113 self.slice = properties.get("slice", None) 

114 self.rand = properties.get("rand", None) 

115 self.index = properties.get("index", None) 

116 self.prec = properties.get("prec", 2) 

117 self.units = properties.get("units", "kJ") 

118 self.no_ks = properties.get("no_ks", False) 

119 self.nbins = properties.get("nbins", 20) 

120 self.dpi = properties.get("dpi", 300) 

121 

122 # Properties common in all PMX BB 

123 self.binary_path = properties.get("binary_path", "pmx") 

124 

125 # Check the properties 

126 self.check_properties(properties) 

127 self.check_arguments() 

128 

129 @launchlogger 

130 def launch(self) -> int: 

131 """Execute the :class:`Pmxanalyse <pmx.pmxanalyse.Pmxanalyse>` pmx.pmxanalyse.Pmxanalyse object.""" 

132 

133 # Setup Biobb 

134 if self.check_restart(): 

135 return 0 

136 self.stage_files() 

137 

138 if self.container_path: 

139 working_dir = self.container_volume_path if self.container_volume_path else "/data" 

140 else: 

141 working_dir = self.stage_io_dict.get("unique_dir", "") 

142 

143 # Check if executable is exists 

144 if not self.container_path: 

145 if not Path(self.binary_path).is_file(): 

146 if not shutil.which(self.binary_path): 

147 raise FileNotFoundError( 

148 "Executable %s not found. Check if it is installed in your system and correctly defined in the properties" 

149 % self.binary_path 

150 ) 

151 

152 list_a_dir = fu.create_unique_dir() 

153 list_b_dir = fu.create_unique_dir() 

154 list_a = list( 

155 filter( 

156 lambda f: Path(f).exists() and Path(f).stat().st_size > 10, 

157 fu.unzip_list(self.input_a_xvg_zip_path, list_a_dir, self.out_log), 

158 ) 

159 ) 

160 list_b = list( 

161 filter( 

162 lambda f: Path(f).exists() and Path(f).stat().st_size > 10, 

163 fu.unzip_list(self.input_b_xvg_zip_path, list_b_dir, self.out_log), 

164 ) 

165 ) 

166 

167 # Copy extra files to sandbox: two directories containing the xvg files 

168 list_a_dir_in_sandbox = Path(self.stage_io_dict.get("unique_dir", "")).joinpath( 

169 Path(list_a_dir).name 

170 ) 

171 list_b_dir_in_sandbox = Path(self.stage_io_dict.get("unique_dir", "")).joinpath( 

172 Path(list_b_dir).name 

173 ) 

174 shutil.copytree(list_a_dir, list_a_dir_in_sandbox) 

175 shutil.copytree(list_b_dir, list_b_dir_in_sandbox) 

176 

177 # Keep the full relative paths returned by unzip_list (including frame*/ subfolders). 

178 string_a = " ".join(list_a) 

179 string_b = " ".join(list_b) 

180 

181 self.cmd = [ 

182 "cd", 

183 working_dir, 

184 ";", 

185 self.binary_path, 

186 "analyse", 

187 "-fA", 

188 string_a, 

189 "-fB", 

190 string_b, 

191 "-o", 

192 PurePath(self.stage_io_dict["out"]["output_result_path"]).name, 

193 "-w", 

194 PurePath(self.stage_io_dict["out"]["output_work_plot_path"]).name, 

195 ] 

196 

197 if self.method: 

198 self.cmd.append("-m") 

199 self.cmd.append(self.method) 

200 if self.temperature: 

201 self.cmd.append("-t") 

202 self.cmd.append(str(self.temperature)) 

203 if self.nboots: 

204 self.cmd.append("-b") 

205 self.cmd.append(str(self.nboots)) 

206 if self.nblocks: 

207 self.cmd.append("-n") 

208 self.cmd.append(str(self.nblocks)) 

209 if self.integ_only: 

210 self.cmd.append("--integ_only") 

211 if self.reverseB: 

212 self.cmd.append("--reverseB") 

213 if self.skip: 

214 self.cmd.append("--skip") 

215 self.cmd.append(str(self.skip)) 

216 if self.slice: 

217 self.cmd.append("--slice") 

218 self.cmd.append(self.slice) 

219 if self.rand: 

220 self.cmd.append("--rand") 

221 if self.index: 

222 self.cmd.append("--index") 

223 self.cmd.append(self.index) 

224 if self.prec: 

225 self.cmd.append("--prec") 

226 self.cmd.append(str(self.prec)) 

227 if self.units: 

228 self.cmd.append("--units") 

229 self.cmd.append(self.units) 

230 if self.no_ks: 

231 self.cmd.append("--no_ks") 

232 if self.nbins: 

233 self.cmd.append("--nbins") 

234 self.cmd.append(str(self.nbins)) 

235 if self.dpi: 

236 self.cmd.append("--dpi") 

237 self.cmd.append(str(self.dpi)) 

238 

239 # Run Biobb block 

240 self.run_biobb() 

241 

242 # Copy files to host 

243 self.copy_to_host() 

244 

245 self.tmp_files.extend([list_a_dir, list_b_dir]) 

246 self.remove_tmp_files() 

247 

248 self.check_arguments(output_files_created=True, raise_exception=False) 

249 return self.return_code 

250 

251 

252def pmxanalyse( 

253 input_a_xvg_zip_path: str, 

254 input_b_xvg_zip_path: str, 

255 output_result_path: str, 

256 output_work_plot_path: str, 

257 properties: Optional[dict] = None, 

258 **kwargs, 

259) -> int: 

260 """Create the :class:`Pmxanalyse <pmx.pmxanalyse.Pmxanalyse>` class and 

261 execute the :meth:`launch() <pmx.pmxanalyse.Pmxanalyse.launch> method.""" 

262 return Pmxanalyse(**dict(locals())).launch() 

263 

264 

265pmxanalyse.__doc__ = Pmxanalyse.__doc__ 

266main = Pmxanalyse.get_main(pmxanalyse, "Wrapper class for the PMX analyse module.") 

267 

268if __name__ == "__main__": 

269 main()