Coverage for biobb_dna / curvesplus / canal_unzip.py: 73%
70 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-15 18:49 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-15 18:49 +0000
1#!/usr/bin/env python3
3"""Module containing the CanalUnzip class and the command line interface."""
4import re
5import zipfile
6import shutil
7from typing import Optional
9from biobb_common.generic.biobb_object import BiobbObject
10from biobb_common.tools import file_utils as fu
11from biobb_common.tools.file_utils import launchlogger
14class CanalUnzip(BiobbObject):
15 """
16 | biobb_dna CanalUnzip
17 | Tool for extracting biobb_canal output files.
18 | Unzips a Canal output file contained within a zip file.
20 Args:
21 input_zip_file (str): Zip file with Canal output files. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/canal_output.zip>`_. Accepted formats: zip (edam:format_3987).
22 output_path (str): Canal output file contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canal_unzip_output.ser>`_. Accepted formats: ser (edam:format_2330), his (edam:format_3905), cor (edam:format_3465).
23 output_list_path (str) (Optional): Text file with a list of all Canal output files contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canal_unzip_output.txt>`_. Accepted formats: txt (edam:format_2330).
24 properties (dic):
25 * **type** (*str*) - (None) Type of file. Values: series, histo, corr.
26 * **helpar_name** (*str*) - (None) Helical parameter name, only for 'series' and 'histo' types. Values: alphaC, alphaW, ampC, ampW, ax-bend, betaC, betaW, buckle, chiC, chiW, curv, deltaC, deltaW, epsilC, epsilW, gammaC, gammaW, h-ris, h-twi, inclin, majd, majw, mind, minw, opening, phaseC, phaseW, propel, reg, rise, roll, shear, shift, slide, stagger, stretch, tbend, tilt, tip, twist, xdisp, ydisp, zetaC, zetaW.
27 * **correlation** (*str*) - (None) Correlation indexes separated by underscore (ie '98_165'), only for 'corr' type.
28 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
29 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
30 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
31 Examples:
32 This is a use example of how to use the building block from Python::
34 from biobb_dna.curvesplus.canal_unzip import canal_unzip
35 prop = {
36 'type': 'series',
37 'helpar_name': 'alphaC'
38 }
39 canal_unzip(
40 input_zip_file='/path/to/canal/output.zip',
41 output_path='/path/to/output.ser',
42 output_list_path='/path/to/output.txt'
43 properties=prop)
44 Info:
45 * wrapped_software:
46 * name: In house
47 * license: Apache-2.0
48 * ontology:
49 * name: EDAM
50 * schema: http://edamontology.org/EDAM.owl
51 """
53 def __init__(self, input_zip_file,
54 output_path, output_list_path=None, properties=None, **kwargs) -> None:
55 properties = properties or {}
57 # Call parent class constructor
58 super().__init__(properties)
59 self.locals_var_dict = locals().copy()
61 # Input/Output files
62 self.io_dict = {
63 'in': {
64 'input_zip_file': input_zip_file
65 },
66 'out': {
67 'output_path': output_path,
68 'output_list_path': output_list_path
69 }
70 }
72 # Properties specific for BB
73 self.type = properties.get('type', None)
74 self.helpar_name = properties.get('helpar_name', None)
75 self.correlation = properties.get('correlation', None)
76 self.properties = properties
78 # Check the properties
79 self.check_properties(properties)
80 self.check_arguments()
82 @launchlogger
83 def launch(self) -> int:
84 """Execute the :class:`CanalUnzip <biobb_dna.curvesplus.canal_unzip.CanalUnzip>` object."""
86 # Setup Biobb
87 if self.check_restart():
88 return 0
89 self.stage_files()
91 # Check that both properties are set
92 if self.type is None:
93 fu.log("Property 'type' is mandatory to run CanalUnzip. Please set it.",
94 self.out_log, self.global_log)
95 exit(1)
97 # Check that helpar_name is set if type is series or histo
98 if self.type in ["series", "histo"] and self.helpar_name is None:
99 fu.log("Property 'helpar_name' is mandatory to run CanalUnzip with type 'series' or 'histo'. Please set it.",
100 self.out_log, self.global_log)
101 exit(1)
103 # Check that correlation is set if type is corr
104 if self.type == "corr" and self.correlation is None:
105 fu.log("Property 'correlation' is mandatory to run CanalUnzip with type 'corr'. Please set it.",
106 self.out_log, self.global_log)
107 exit(1)
109 extensions = {
110 "series": "ser",
111 "histo": "his",
112 "corr": "cor"
113 }
114 # Check that the type is valid
115 if self.type not in extensions:
116 fu.log(f"Type {self.type} not valid. Valid types are: {', '.join(extensions.keys())}.",
117 self.out_log, self.global_log)
118 exit(1)
120 # generate sufix
121 sufix = ""
122 if self.type == "corr":
123 # Check that the correlation is valid
124 pattern = r'\d+_\d+'
125 if not re.match(pattern, self.correlation):
126 fu.log(f"Correlation {self.correlation} not valid. It should match the pattern <number_number>.",
127 self.out_log, self.global_log)
128 exit(1)
129 sufix = self.correlation
130 else:
131 # Check that the helpar_name is valid
132 if self.helpar_name not in ["alphaC", "alphaW", "ampC", "ampW", "ax-bend", "betaC", "betaW", "buckle",
133 "chiC", "chiW", "curv", "deltaC", "deltaW", "epsilC", "epsilW", "gammaC",
134 "gammaW", "h-ris", "h-twi", "inclin", "majd", "majw", "mind", "minw",
135 "opening", "phaseC", "phaseW", "propel", "reg", "rise", "roll", "shear",
136 "shift", "slide", "stagger", "stretch", "tbend", "tilt", "tip", "twist",
137 "xdisp", "ydisp", "zetaC", "zetaW"]:
138 fu.log(f"Parameter {self.helpar_name} not valid. Valid parameters are: alphaC, alphaW, ampC, ampW, ax-bend, betaC, betaW, buckle, chiC, chiW, curv, deltaC, deltaW, epsilC, epsilW, gammaC, gammaW, h-ris, h-twi, inclin, majd, majw, mind, minw, opening, phaseC, phaseW, propel, reg, rise, roll, shear, shift, slide, stagger, stretch, tbend, tilt, tip, twist, xdisp, ydisp, zetaC, zetaW.",
139 self.out_log, self.global_log)
140 exit(1)
141 sufix = self.helpar_name
143 # Generate the filename
144 filename = f"canal_output_{sufix}.{extensions[self.type]}"
146 # Unzip the file
147 with zipfile.ZipFile(self.stage_io_dict["in"]["input_zip_file"], 'r') as zip_ref:
148 # Check if the file exists in the zip file
149 if filename in zip_ref.namelist():
150 # Extract the file
151 fu.log(f'{filename} exists, copying into {self.stage_io_dict["out"]["output_path"]}.',
152 self.out_log, self.global_log)
153 with zip_ref.open(filename) as source, open(self.stage_io_dict["out"]["output_path"], "wb") as target:
154 shutil.copyfileobj(source, target)
155 else:
156 fu.log(f"File {filename} not found in the zip file.", self.out_log, self.global_log)
157 exit(1)
159 # Write the list of files
160 if self.stage_io_dict["out"]["output_list_path"]:
161 with open(self.stage_io_dict["out"]["output_list_path"], "w") as f:
162 for name in zip_ref.namelist():
163 f.write(f"{name}\n")
165 # Run Biobb block
166 # self.run_biobb()
168 # Copy files to host
169 self.copy_to_host()
171 # Remove temporary file(s)
172 self.remove_tmp_files()
174 self.check_arguments(output_files_created=True, raise_exception=False)
176 return self.return_code
179def canal_unzip(
180 input_zip_file: str,
181 output_path: str,
182 output_list_path: Optional[str] = None,
183 properties: Optional[dict] = None,
184 **kwargs) -> int:
185 """Create :class:`CanalUnzip <biobb_dna.curvesplus.canal_unzip.CanalUnzip>` class and
186 execute the :meth:`launch() <biobb_dna.curvesplus.canal_unzip.CanalUnzip.launch>` method."""
187 return CanalUnzip(**dict(locals())).launch()
190canal_unzip.__doc__ = CanalUnzip.__doc__
191main = CanalUnzip.get_main(canal_unzip, "Tool for extracting biobb_canal output files.")
193if __name__ == '__main__':
194 main()