Coverage for biobb_dna/curvesplus/canal_unzip.py: 63%
82 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 10:36 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 10:36 +0000
1#!/usr/bin/env python3
3"""Module containing the CanalUnzip class and the command line interface."""
4import re
5import zipfile
6import shutil
7import argparse
8from typing import Optional
10from biobb_common.generic.biobb_object import BiobbObject
11from biobb_common.configuration import settings
12from biobb_common.tools import file_utils as fu
13from biobb_common.tools.file_utils import launchlogger
16class CanalUnzip(BiobbObject):
17 """
18 | biobb_dna CanalUnzip
19 | Tool for extracting biobb_canal output files.
20 | Unzips a Canal output file contained within a zip file.
22 Args:
23 input_zip_file (str): Zip file with Canal output files. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/canal_output.zip>`_. Accepted formats: zip (edam:format_3987).
24 output_path (str): Canal output file contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canal_unzip_output.ser>`_. Accepted formats: ser (edam:format_2330), his (edam:format_3905), cor (edam:format_3465).
25 output_list_path (str) (Optional): Text file with a list of all Canal output files contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canal_unzip_output.txt>`_. Accepted formats: txt (edam:format_2330).
26 properties (dic):
27 * **type** (*str*) - (None) Type of file. Values: series, histo, corr.
28 * **helpar_name** (*str*) - (None) Helical parameter name, only for 'series' and 'histo' types. Values: alphaC, alphaW, ampC, ampW, ax-bend, betaC, betaW, buckle, chiC, chiW, curv, deltaC, deltaW, epsilC, epsilW, gammaC, gammaW, h-ris, h-twi, inclin, majd, majw, mind, minw, opening, phaseC, phaseW, propel, reg, rise, roll, shear, shift, slide, stagger, stretch, tbend, tilt, tip, twist, xdisp, ydisp, zetaC, zetaW.
29 * **correlation** (*str*) - (None) Correlation indexes separated by underscore (ie '98_165'), only for 'corr' type.
30 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
31 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
32 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
33 Examples:
34 This is a use example of how to use the building block from Python::
36 from biobb_dna.curvesplus.canal_unzip import canal_unzip
37 prop = {
38 'type': 'series',
39 'helpar_name': 'alphaC'
40 }
41 canal_unzip(
42 input_zip_file='/path/to/canal/output.zip',
43 output_path='/path/to/output.ser',
44 output_list_path='/path/to/output.txt'
45 properties=prop)
46 Info:
47 * wrapped_software:
48 * name: In house
49 * license: Apache-2.0
50 * ontology:
51 * name: EDAM
52 * schema: http://edamontology.org/EDAM.owl
53 """
55 def __init__(self, input_zip_file,
56 output_path, output_list_path=None, properties=None, **kwargs) -> None:
57 properties = properties or {}
59 # Call parent class constructor
60 super().__init__(properties)
61 self.locals_var_dict = locals().copy()
63 # Input/Output files
64 self.io_dict = {
65 'in': {
66 'input_zip_file': input_zip_file
67 },
68 'out': {
69 'output_path': output_path,
70 'output_list_path': output_list_path
71 }
72 }
74 # Properties specific for BB
75 self.type = properties.get('type', None)
76 self.helpar_name = properties.get('helpar_name', None)
77 self.correlation = properties.get('correlation', None)
78 self.properties = properties
80 # Check the properties
81 self.check_properties(properties)
82 self.check_arguments()
84 @launchlogger
85 def launch(self) -> int:
86 """Execute the :class:`CanalUnzip <biobb_dna.curvesplus.canal_unzip.CanalUnzip>` object."""
88 # Setup Biobb
89 if self.check_restart():
90 return 0
91 self.stage_files()
93 # Check that both properties are set
94 if self.type is None:
95 fu.log("Property 'type' is mandatory to run CanalUnzip. Please set it.",
96 self.out_log, self.global_log)
97 exit(1)
99 # Check that helpar_name is set if type is series or histo
100 if self.type in ["series", "histo"] and self.helpar_name is None:
101 fu.log("Property 'helpar_name' is mandatory to run CanalUnzip with type 'series' or 'histo'. Please set it.",
102 self.out_log, self.global_log)
103 exit(1)
105 # Check that correlation is set if type is corr
106 if self.type == "corr" and self.correlation is None:
107 fu.log("Property 'correlation' is mandatory to run CanalUnzip with type 'corr'. Please set it.",
108 self.out_log, self.global_log)
109 exit(1)
111 extensions = {
112 "series": "ser",
113 "histo": "his",
114 "corr": "cor"
115 }
116 # Check that the type is valid
117 if self.type not in extensions:
118 fu.log(f"Type {self.type} not valid. Valid types are: {', '.join(extensions.keys())}.",
119 self.out_log, self.global_log)
120 exit(1)
122 # generate sufix
123 sufix = ""
124 if self.type == "corr":
125 # Check that the correlation is valid
126 pattern = r'\d+_\d+'
127 if not re.match(pattern, self.correlation):
128 fu.log(f"Correlation {self.correlation} not valid. It should match the pattern <number_number>.",
129 self.out_log, self.global_log)
130 exit(1)
131 sufix = self.correlation
132 else:
133 # Check that the helpar_name is valid
134 if self.helpar_name not in ["alphaC", "alphaW", "ampC", "ampW", "ax-bend", "betaC", "betaW", "buckle",
135 "chiC", "chiW", "curv", "deltaC", "deltaW", "epsilC", "epsilW", "gammaC",
136 "gammaW", "h-ris", "h-twi", "inclin", "majd", "majw", "mind", "minw",
137 "opening", "phaseC", "phaseW", "propel", "reg", "rise", "roll", "shear",
138 "shift", "slide", "stagger", "stretch", "tbend", "tilt", "tip", "twist",
139 "xdisp", "ydisp", "zetaC", "zetaW"]:
140 fu.log(f"Parameter {self.helpar_name} not valid. Valid parameters are: alphaC, alphaW, ampC, ampW, ax-bend, betaC, betaW, buckle, chiC, chiW, curv, deltaC, deltaW, epsilC, epsilW, gammaC, gammaW, h-ris, h-twi, inclin, majd, majw, mind, minw, opening, phaseC, phaseW, propel, reg, rise, roll, shear, shift, slide, stagger, stretch, tbend, tilt, tip, twist, xdisp, ydisp, zetaC, zetaW.",
141 self.out_log, self.global_log)
142 exit(1)
143 sufix = self.helpar_name
145 # Generate the filename
146 filename = f"canal_output_{sufix}.{extensions[self.type]}"
148 # Unzip the file
149 with zipfile.ZipFile(self.stage_io_dict["in"]["input_zip_file"], 'r') as zip_ref:
150 # Check if the file exists in the zip file
151 if filename in zip_ref.namelist():
152 # Extract the file
153 fu.log(f'{filename} exists, copying into {self.stage_io_dict["out"]["output_path"]}.',
154 self.out_log, self.global_log)
155 with zip_ref.open(filename) as source, open(self.stage_io_dict["out"]["output_path"], "wb") as target:
156 shutil.copyfileobj(source, target)
157 else:
158 fu.log(f"File {filename} not found in the zip file.", self.out_log, self.global_log)
159 exit(1)
161 # Write the list of files
162 if self.stage_io_dict["out"]["output_list_path"]:
163 with open(self.stage_io_dict["out"]["output_list_path"], "w") as f:
164 for name in zip_ref.namelist():
165 f.write(f"{name}\n")
167 # Run Biobb block
168 # self.run_biobb()
170 # Copy files to host
171 self.copy_to_host()
173 # Remove temporary file(s)
174 # self.tmp_files.extend([
175 # self.stage_io_dict.get("unique_dir", "")
176 # ])
177 self.remove_tmp_files()
179 self.check_arguments(output_files_created=True, raise_exception=False)
181 return self.return_code
184def canal_unzip(
185 input_zip_file: str,
186 output_path: str,
187 output_list_path: Optional[str] = None,
188 properties: Optional[dict] = None,
189 **kwargs) -> int:
190 """Create :class:`CanalUnzip <biobb_dna.curvesplus.canal_unzip.CanalUnzip>` class and
191 execute the :meth:`launch() <biobb_dna.curvesplus.canal_unzip.CanalUnzip.launch>` method."""
193 return CanalUnzip(
194 input_zip_file=input_zip_file,
195 output_path=output_path,
196 output_list_path=output_list_path,
197 properties=properties, **kwargs).launch()
199 canal_unzip.__doc__ = CanalUnzip.__doc__
202def main():
203 """Command line execution of this building block. Please check the command line documentation."""
204 parser = argparse.ArgumentParser(description='Tool for extracting biobb_canal output files.',
205 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999))
206 parser.add_argument('--config', required=False, help='Configuration file')
208 required_args = parser.add_argument_group('required arguments')
209 required_args.add_argument('--input_zip_file', required=True,
210 help='Zip file with Canal output files. Accepted formats: zip.')
211 required_args.add_argument('--output_path', required=True,
212 help='Canal output file contained within input_zip_file. Accepted formats: ser, his, cor.')
213 parser.add_argument('--output_list_path', required=False,
214 help='Text file with a list of all Canal output files contained within input_zip_file. Accepted formats: txt.')
216 args = parser.parse_args()
217 args.config = args.config or "{}"
218 properties = settings.ConfReader(config=args.config).get_prop_dic()
220 canal_unzip(
221 input_zip_file=args.input_zip_file,
222 output_path=args.output_path,
223 output_list_path=args.output_list_path,
224 properties=properties)
227if __name__ == '__main__':
228 main()