Coverage for biobb_pmx/pmxbiobb/pmxanalyse.py: 75%
120 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-23 10:10 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-23 10:10 +0000
1#!/usr/bin/env python3
3"""Module containing the PMX analyse class and the command line interface."""
5import argparse
6import shutil
7from pathlib import Path
8from typing import Optional
10from biobb_common.configuration import settings
11from biobb_common.generic.biobb_object import BiobbObject
12from biobb_common.tools import file_utils as fu
13from biobb_common.tools.file_utils import launchlogger
16class Pmxanalyse(BiobbObject):
17 """
18 | biobb_pmx Pmxanalyse
19 | Wrapper class for the `PMX analyse <https://github.com/deGrootLab/pmx>`_ module.
20 | Analyze the work values from the dgdl.xvg files of the A and B states to calculate the free energy difference between two states.
22 Args:
23 input_a_xvg_zip_path (str): Path the zip file containing the dgdl.xvg files of the A state. File type: input. `Sample file <https://github.com/bioexcel/biobb_pmx/raw/master/biobb_pmx/test/data/pmx/xvg_A.zip>`_. Accepted formats: zip (edam:format_3987).
24 input_b_xvg_zip_path (str): Path the zip file containing the dgdl.xvg files of the B state. File type: input. `Sample file <https://github.com/bioexcel/biobb_pmx/raw/master/biobb_pmx/test/data/pmx/xvg_B.zip>`_. Accepted formats: zip (edam:format_3987).
25 output_result_path (str): Path to the TXT results file. File type: output. `Sample file <https://github.com/bioexcel/biobb_pmx/raw/master/biobb_pmx/test/reference/pmx/ref_result.txt>`_. Accepted formats: txt (edam:format_2330).
26 output_work_plot_path (str): Path to the PNG plot results file. File type: output. `Sample file <https://github.com/bioexcel/biobb_pmx/raw/master/biobb_pmx/test/reference/pmx/ref_plot.png>`_. Accepted formats: png (edam:format_3603).
27 properties (dic):
28 * **method** (*str*) - ("CGI BAR JARZ") Choose one or more estimators to use. Values: CGI (Crooks Gaussian Intersection), BAR (Bennet Acceptance Ratio), JARZ (Jarzynski's estimator).
29 * **temperature** (*float*) - (298.15) [0~1000|0.05] Temperature in Kelvin.
30 * **nboots** (*int*) - (0) [0~1000|1] Number of bootstrap samples to use for the bootstrap estimate of the standard errors.
31 * **nblocks** (*int*) - (1) [0~1000|1] Number of blocks to divide the data into for an estimate of the standard error.
32 * **integ_only** (*bool*) - (False) Whether to do integration only.
33 * **reverseB** (*bool*) - (False) Whether to reverse the work values for the backward (B->A) transformation.
34 * **skip** (*int*) - (1) [0~1000|1] Skip files.
35 * **slice** (*str*) - (None) Subset of trajectories to analyze. Provide list slice, e.g. "10 50" will result in selecting dhdl_files[10:50].
36 * **rand** (*int*) - (None) [0~1000|1] Take a random subset of trajectories. Default is None (do not take random subset).
37 * **index** (*str*) - (None) Zero-based index of files to analyze (e.g. "0 10 20 50 60"). It keeps the dhdl.xvg files according to their position in the list, sorted according to the filenames.
38 * **prec** (*int*) - (2) [0~100|1] The decimal precision of the screen/file output.
39 * **units** (*str*) - ("kJ") The units of the output. Values: kJ (Kilojoules), kcal (Kilocalories), kT (the product of the Boltzmann constant k and the temperature).
40 * **no_ks** (*bool*) - (False) Whether to do a Kolmogorov-Smirnov test to check whether the Gaussian assumption for CGI holds.
41 * **nbins** (*int*) - (20) [0~1000|1] Number of histograms bins for the plot.
42 * **dpi** (*int*) - (300) [72~2048|1] Resolution of the plot.
43 * **binary_path** (*str*) - ("pmx") Path to the PMX command line interface.
44 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
45 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
46 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
47 * **container_path** (*str*) - (None) Path to the binary executable of your container.
48 * **container_image** (*str*) - ("gromacs/gromacs:latest") Container Image identifier.
49 * **container_volume_path** (*str*) - ("/data") Path to an internal directory in the container.
50 * **container_working_dir** (*str*) - (None) Path to the internal CWD in the container.
51 * **container_user_id** (*str*) - (None) User number id to be mapped inside the container.
52 * **container_shell_path** (*str*) - ("/bin/bash") Path to the binary executable of the container shell.
54 Examples:
55 This is a use example of how to use the building block from Python::
57 from biobb_pmx.pmxbiobb.pmxanalyse import pmxanalyse
58 prop = {
59 'method': 'CGI BAR JARZ',
60 'temperature': 298.15,
61 'dpi': 600
62 }
63 pmxanalyse(input_a_xvg_zip_path='/path/to/myAStateFiles.zip',
64 input_b_xvg_zip_path='/path/to/myBStateFiles.zip',
65 output_result_path='/path/to/newResults.txt',
66 output_work_plot_path='/path/to/newResults.png',
67 properties=prop)
69 Info:
70 * wrapped_software:
71 * name: PMX analyse
72 * version: >=1.0.1
73 * license: GNU
74 * ontology:
75 * name: EDAM
76 * schema: http://edamontology.org/EDAM.owl
78 """
80 def __init__(
81 self,
82 input_a_xvg_zip_path: str,
83 input_b_xvg_zip_path: str,
84 output_result_path: str,
85 output_work_plot_path: str,
86 properties: Optional[dict] = None,
87 **kwargs,
88 ) -> None:
89 properties = properties or {}
91 # Call parent class constructor
92 super().__init__(properties)
93 self.locals_var_dict = locals().copy()
95 # Input/Output files
96 self.io_dict = {
97 "in": {},
98 "out": {
99 "output_result_path": output_result_path,
100 "output_work_plot_path": output_work_plot_path,
101 },
102 }
103 # Should not be copied inside container
104 self.input_a_xvg_zip_path = input_a_xvg_zip_path
105 self.input_b_xvg_zip_path = input_b_xvg_zip_path
107 # Properties specific for BB
108 self.method = properties.get("method", "CGI BAR JARZ")
109 self.temperature = properties.get("temperature", 298.15)
110 self.nboots = properties.get("nboots", 0)
111 self.nblocks = properties.get("nblocks", 1)
112 self.integ_only = properties.get("integ_only", False)
113 self.reverseB = properties.get("reverseB", False)
114 self.skip = properties.get("skip", 1)
115 self.slice = properties.get("slice", None)
116 self.rand = properties.get("rand", None)
117 self.index = properties.get("index", None)
118 self.prec = properties.get("prec", 2)
119 self.units = properties.get("units", "kJ")
120 self.no_ks = properties.get("no_ks", False)
121 self.nbins = properties.get("nbins", 20)
122 self.dpi = properties.get("dpi", 300)
124 # Properties common in all PMX BB
125 self.binary_path = properties.get("binary_path", "pmx")
127 # Check the properties
128 self.check_properties(properties)
129 self.check_arguments()
131 @launchlogger
132 def launch(self) -> int:
133 """Execute the :class:`Pmxanalyse <pmx.pmxanalyse.Pmxanalyse>` pmx.pmxanalyse.Pmxanalyse object."""
135 # Setup Biobb
136 if self.check_restart():
137 return 0
138 self.stage_files()
140 # Check if executable is exists
141 if not self.container_path:
142 if not Path(self.binary_path).is_file():
143 if not shutil.which(self.binary_path):
144 raise FileNotFoundError(
145 "Executable %s not found. Check if it is installed in your system and correctly defined in the properties"
146 % self.binary_path
147 )
149 list_a_dir = fu.create_unique_dir()
150 list_b_dir = fu.create_unique_dir()
151 list_a = list(
152 filter(
153 lambda f: Path(f).exists() and Path(f).stat().st_size > 10,
154 fu.unzip_list(self.input_a_xvg_zip_path, list_a_dir, self.out_log),
155 )
156 )
157 list_b = list(
158 filter(
159 lambda f: Path(f).exists() and Path(f).stat().st_size > 10,
160 fu.unzip_list(self.input_b_xvg_zip_path, list_b_dir, self.out_log),
161 )
162 )
163 string_a = " ".join(list_a)
164 string_b = " ".join(list_b)
166 # Copy extra files to container: two directories containing the xvg files
167 if self.container_path:
168 shutil.copytree(
169 list_a_dir,
170 Path(self.stage_io_dict.get("unique_dir", "")).joinpath(
171 Path(list_a_dir).name
172 ),
173 )
174 shutil.copytree(
175 list_b_dir,
176 Path(self.stage_io_dict.get("unique_dir", "")).joinpath(
177 Path(list_b_dir).name
178 ),
179 )
180 container_volume = " " + self.container_volume_path + "/"
181 string_a = self.container_volume_path + "/" + container_volume.join(list_a)
182 string_b = self.container_volume_path + "/" + container_volume.join(list_b)
184 self.cmd = [
185 self.binary_path,
186 "analyse",
187 "-fA",
188 string_a,
189 "-fB",
190 string_b,
191 "-o",
192 self.stage_io_dict["out"]["output_result_path"],
193 "-w",
194 self.stage_io_dict["out"]["output_work_plot_path"],
195 ]
197 if self.method:
198 self.cmd.append("-m")
199 self.cmd.append(self.method)
200 if self.temperature:
201 self.cmd.append("-t")
202 self.cmd.append(str(self.temperature))
203 if self.nboots:
204 self.cmd.append("-b")
205 self.cmd.append(str(self.nboots))
206 if self.nblocks:
207 self.cmd.append("-n")
208 self.cmd.append(str(self.nblocks))
209 if self.integ_only:
210 self.cmd.append("--integ_only")
211 if self.reverseB:
212 self.cmd.append("--reverseB")
213 if self.skip:
214 self.cmd.append("--skip")
215 self.cmd.append(str(self.skip))
216 if self.slice:
217 self.cmd.append("--slice")
218 self.cmd.append(self.slice)
219 if self.rand:
220 self.cmd.append("--rand")
221 if self.index:
222 self.cmd.append("--index")
223 self.cmd.append(self.index)
224 if self.prec:
225 self.cmd.append("--prec")
226 self.cmd.append(str(self.prec))
227 if self.units:
228 self.cmd.append("--units")
229 self.cmd.append(self.units)
230 if self.no_ks:
231 self.cmd.append("--no_ks")
232 if self.nbins:
233 self.cmd.append("--nbins")
234 self.cmd.append(str(self.nbins))
235 if self.dpi:
236 self.cmd.append("--dpi")
237 self.cmd.append(str(self.dpi))
239 # Run Biobb block
240 self.run_biobb()
242 # Copy files to host
243 self.copy_to_host()
245 self.tmp_files.extend(
246 # [self.stage_io_dict.get("unique_dir", ""), list_a_dir, list_b_dir]
247 [list_a_dir, list_b_dir]
248 )
249 self.remove_tmp_files()
251 self.check_arguments(output_files_created=True, raise_exception=False)
252 return self.return_code
255def pmxanalyse(
256 input_a_xvg_zip_path: str,
257 input_b_xvg_zip_path: str,
258 output_result_path: str,
259 output_work_plot_path: str,
260 properties: Optional[dict] = None,
261 **kwargs,
262) -> int:
263 """Execute the :class:`Pmxanalyse <pmx.pmxanalyse.Pmxanalyse>` class and
264 execute the :meth:`launch() <pmx.pmxanalyse.Pmxanalyse.launch> method."""
266 return Pmxanalyse(
267 input_a_xvg_zip_path=input_a_xvg_zip_path,
268 input_b_xvg_zip_path=input_b_xvg_zip_path,
269 output_result_path=output_result_path,
270 output_work_plot_path=output_work_plot_path,
271 properties=properties,
272 ).launch()
274 pmxanalyse.__doc__ = Pmxanalyse.__doc__
277def main():
278 """Command line execution of this building block. Please check the command line documentation."""
279 parser = argparse.ArgumentParser(
280 description="Wrapper class for the PMX analyse module.",
281 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999),
282 )
283 parser.add_argument(
284 "-c",
285 "--config",
286 required=False,
287 help="This file can be a YAML file, JSON file or JSON string",
288 )
290 # Specific args of each building block
291 required_args = parser.add_argument_group("required arguments")
292 required_args.add_argument(
293 "--input_a_xvg_zip_path",
294 required=True,
295 help="Path the zip file containing the dgdl.xvg files of the A state. Accepted formats: zip.",
296 )
297 required_args.add_argument(
298 "--input_b_xvg_zip_path",
299 required=True,
300 help="Path the zip file containing the dgdl.xvg files of the B state. Accepted formats: zip.",
301 )
302 required_args.add_argument(
303 "--output_result_path",
304 required=True,
305 help="Path to the TXT results file. Accepted formats: txt.",
306 )
307 required_args.add_argument(
308 "--output_work_plot_path",
309 required=True,
310 help="Path to the PNG plot results file. Accepted formats: png.",
311 )
313 args = parser.parse_args()
314 config = args.config if args.config else None
315 properties = settings.ConfReader(config=config).get_prop_dic()
317 # Specific call of each building block
318 pmxanalyse(
319 input_a_xvg_zip_path=args.input_a_xvg_zip_path,
320 input_b_xvg_zip_path=args.input_b_xvg_zip_path,
321 output_result_path=args.output_result_path,
322 output_work_plot_path=args.output_work_plot_path,
323 properties=properties,
324 )
327if __name__ == "__main__":
328 main()