Coverage for biobb_flexserv/pcasuite/pcz_evecs.py: 84%
74 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-19 15:08 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-19 15:08 +0000
1#!/usr/bin/env python3
3"""Module containing the PCZevecs class and the command line interface."""
4import argparse
5from typing import Optional
6import shutil
7import json
8import math
9from pathlib import PurePath
10from biobb_common.tools import file_utils as fu
11from biobb_common.generic.biobb_object import BiobbObject
12from biobb_common.configuration import settings
13from biobb_common.tools.file_utils import launchlogger
16class PCZevecs(BiobbObject):
17 """
18 | biobb_flexserv PCZevecs
19 | Extract PCA Eigen Vectors from a compressed PCZ file.
20 | Wrapper of the pczdump tool from the PCAsuite FlexServ module.
22 Args:
23 input_pcz_path (str): Input compressed trajectory file. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/data/pcasuite/pcazip.pcz>`_. Accepted formats: pcz (edam:format_3874).
24 output_json_path (str): Output json file with PCA Eigen Vectors. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/reference/pcasuite/pcz_evecs.json>`_. Accepted formats: json (edam:format_3464).
25 properties (dict - Python dictionary object containing the tool parameters, not input/output files):
26 * **binary_path** (*str*) - ("pczdump") pczdump binary path to be used.
27 * **eigenvector** (*int*) - (1) PCA mode (eigenvector) from which to extract eigen vectors.
28 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
29 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
30 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
32 Examples:
33 This is a use example of how to use the building block from Python::
35 from biobb_flexserv.pcasuite.pcz_evecs import pcz_evecs
37 prop = {
38 'eigenvector': 1
39 }
41 pcz_evecs( input_pcz_path='/path/to/pcazip_input.pcz',
42 output_json_path='/path/to/pcz_evecs.json',
43 properties=prop)
45 Info:
46 * wrapped_software:
47 * name: FlexServ PCAsuite
48 * version: >=1.0
49 * license: Apache-2.0
50 * ontology:
51 * name: EDAM
52 * schema: http://edamontology.org/EDAM.owl
54 """
56 def __init__(self, input_pcz_path: str,
57 output_json_path: str, properties: Optional[dict] = None, **kwargs) -> None:
59 properties = properties or {}
61 # Call parent class constructor
62 super().__init__(properties)
63 self.locals_var_dict = locals().copy()
65 # Input/Output files
66 self.io_dict = {
67 'in': {'input_pcz_path': input_pcz_path},
68 'out': {'output_json_path': output_json_path}
69 }
71 # Properties specific for BB
72 self.properties = properties
73 self.binary_path = properties.get('binary_path', 'pczdump')
74 self.eigenvector = properties.get('eigenvector', 1)
76 # Check the properties
77 self.check_properties(properties)
78 self.check_arguments()
80 @launchlogger
81 def launch(self):
82 """Launches the execution of the FlexServ pcz_evecs module."""
84 # Setup Biobb
85 if self.check_restart():
86 return 0
87 # self.stage_files()
89 # Internal file paths
90 # try:
91 # # Using rel paths to shorten the amount of characters due to fortran path length limitations
92 # input_pcz = str(Path(self.stage_io_dict["in"]["input_pcz_path"]).relative_to(Path.cwd()))
93 # output_json = str(Path(self.stage_io_dict["out"]["output_json_path"]).relative_to(Path.cwd()))
94 # except ValueError:
95 # # Container or remote case
96 # input_pcz = self.stage_io_dict["in"]["input_pcz_path"]
97 # output_json = self.stage_io_dict["out"]["output_json_path"]
99 # Manually creating a Sandbox to avoid issues with input parameters buffer overflow:
100 # Long strings defining a file path makes Fortran or C compiled programs crash if the string
101 # declared is shorter than the input parameter path (string) length.
102 # Generating a temporary folder and working inside this folder (sandbox) fixes this problem.
103 # The problem was found in Galaxy executions, launching Singularity containers (May 2023).
105 # Creating temporary folder
106 self.tmp_folder = fu.create_unique_dir()
107 fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)
109 shutil.copy2(self.io_dict["in"]["input_pcz_path"], self.tmp_folder)
111 # Temporary output
112 # temp_out = str(Path(self.stage_io_dict.get("unique_dir", "")).joinpath("output.dat"))
113 temp_out = "output.dat"
114 temp_json = "output.json"
116 # Command line
117 # pczdump -i structure.ca.std.pcz --evecs -o pcz.evecs
118 # self.cmd = [self.binary_path,
119 # "-i", input_pcz,
120 # "-o", temp_out,
121 # "--evec={}".format(self.eigenvector)
122 # ]
124 self.cmd = ['cd', self.tmp_folder, ';',
125 self.binary_path,
126 '-i', PurePath(self.io_dict["in"]["input_pcz_path"]).name,
127 '-o', temp_out,
128 "--evec={}".format(self.eigenvector)
129 ]
131 # Run Biobb block
132 self.run_biobb()
134 # Parse output evecs
135 # 0.180 -0.069 0.168 0.204 -0.054 0.235 0.145 -0.001 0.260 0.183
136 # -0.041 0.231 0.174 -0.077 0.144 0.097 -0.022 0.143 0.069 0.008
138 info_dict = {}
139 info_dict['evecs'] = []
140 with open(PurePath(self.tmp_folder).joinpath(temp_out), 'r') as file:
141 for line in file:
142 info = line.strip().split(' ')
143 for nums in info:
144 if nums:
145 info_dict['evecs'].append(nums)
147 # Computing Projections
148 info_dict['projs'] = []
149 module = 1
150 proj = 0
151 for num in info_dict['evecs']:
152 val = float(num) * float(num)
153 proj = proj + val
154 if module % 3 == 0:
155 proj = math.sqrt(proj)
156 module = 1
157 info_dict['projs'].append(float("{:.4f}".format(proj)))
158 proj = 0
159 else:
160 module = module + 1
162 with open(PurePath(self.tmp_folder).joinpath(temp_json), 'w') as out_file:
163 out_file.write(json.dumps(info_dict, indent=4))
165 # Copy outputs from temporary folder to output path
166 shutil.copy2(PurePath(self.tmp_folder).joinpath(temp_json), PurePath(self.io_dict["out"]["output_json_path"]))
168 # Copy files to host
169 # self.copy_to_host()
171 # remove temporary folder(s)
172 self.tmp_files.extend([
173 # self.stage_io_dict.get("unique_dir", ""),
174 self.tmp_folder
175 ])
176 self.remove_tmp_files()
178 self.check_arguments(output_files_created=True, raise_exception=False)
180 return self.return_code
183def pcz_evecs(input_pcz_path: str, output_json_path: str,
184 properties: Optional[dict] = None, **kwargs) -> int:
185 """Create :class:`PCZevecs <flexserv.pcasuite.pcz_evecs>`flexserv.pcasuite.PCZevecs class and
186 execute :meth:`launch() <flexserv.pcasuite.pcz_evecs.launch>` method"""
188 return PCZevecs(input_pcz_path=input_pcz_path,
189 output_json_path=output_json_path,
190 properties=properties).launch()
192 pcz_evecs.__doc__ = PCZevecs.__doc__
195def main():
196 parser = argparse.ArgumentParser(description='Extract PCA Eigen Vectors from a compressed PCZ file.', formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999))
197 parser.add_argument('--config', required=False, help='Configuration file')
199 # Specific args
200 required_args = parser.add_argument_group('required arguments')
201 required_args.add_argument('--input_pcz_path', required=True, help='Input compressed trajectory file. Accepted formats: pcz.')
202 required_args.add_argument('--output_json_path', required=True, help='Output json file with PCA evecs. Accepted formats: json.')
204 args = parser.parse_args()
205 args.config = args.config or "{}"
206 properties = settings.ConfReader(config=args.config).get_prop_dic()
208 # Specific call
209 pcz_evecs(input_pcz_path=args.input_pcz_path,
210 output_json_path=args.output_json_path,
211 properties=properties)
214if __name__ == '__main__':
215 main()