Coverage for biobb_dna / dna / dna_timeseries_unzip.py: 78%
73 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-15 18:49 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-15 18:49 +0000
1#!/usr/bin/env python3
3"""Module containing the DnaTimeseriesUnzip class and the command line interface."""
4import re
5import zipfile
6import shutil
7from typing import Optional
9from biobb_dna.utils import constants
10from biobb_common.generic.biobb_object import BiobbObject
11from biobb_common.tools import file_utils as fu
12from biobb_common.tools.file_utils import launchlogger
15class DnaTimeseriesUnzip(BiobbObject):
16 """
17 | biobb_dna DnaTimeseriesUnzip
18 | Tool for extracting dna_timeseries output files.
19 | Unzips a zip file containing dna_timeseries output files and extracts the csv and jpg files.
21 Args:
22 input_zip_file (str): Zip file with dna_timeseries output files. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/dna/timeseries_output.zip>`_. Accepted formats: zip (edam:format_3987).
23 output_path_csv (str): dna_timeseries output csv file contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.csv>`_. Accepted formats: csv (edam:format_3752).
24 output_path_jpg (str): dna_timeseries output jpg file contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.jpg>`_. Accepted formats: jpg (edam:format_3579).
25 output_list_path (str) (Optional): Text file with a list of all dna_timeseries output files contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.txt>`_. Accepted formats: txt (edam:format_2330).
26 properties (dic):
27 * **type** (*str*) - (None) Type of analysis, series or histogram. Values: series, hist.
28 * **parameter** (*str*) - (None) Type of parameter. Values: majd, majw, mind, minw, inclin, tip, xdisp, ydisp, shear, stretch, stagger, buckle, propel, opening, rise, roll, twist, shift, slide, tilt, alphaC, alphaW, betaC, betaW, gammaC, gammaW, deltaC, deltaW, epsilC, epsilW, zetaC, zetaW, chiC, chiW, phaseC, phaseW.
29 * **sequence** (*str*) - (None) Nucleic acid sequence used for generating dna_timeseries output file.
30 * **index** (*int*) - (1) Base pair index in the parameter 'sequence', starting from 1.
31 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
32 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
33 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
34 Examples:
35 This is a use example of how to use the building block from Python::
37 from biobb_dna.dna.dna_timeseries_unzip import dna_timeseries_unzip
38 prop = {
39 'type': 'hist',
40 'parameter': 'shift',
41 'sequence': 'CGCGAATTCGCG',
42 'index': 5
43 }
44 dna_timeseries_unzip(
45 input_zip_file='/path/to/dna_timeseries/output.zip',
46 output_path='/path/to/output.csv',
47 output_list_path='/path/to/output.txt'
48 properties=prop)
49 Info:
50 * wrapped_software:
51 * name: In house
52 * license: Apache-2.0
53 * ontology:
54 * name: EDAM
55 * schema: http://edamontology.org/EDAM.owl
56 """
58 def __init__(self, input_zip_file,
59 output_path_csv, output_path_jpg, output_list_path=None, properties=None, **kwargs) -> None:
60 properties = properties or {}
62 # Call parent class constructor
63 super().__init__(properties)
64 self.locals_var_dict = locals().copy()
66 # Input/Output files
67 self.io_dict = {
68 'in': {
69 'input_zip_file': input_zip_file
70 },
71 'out': {
72 'output_path_csv': output_path_csv,
73 'output_path_jpg': output_path_jpg,
74 'output_list_path': output_list_path
75 }
76 }
78 # Properties specific for BB
79 self.type = properties.get('type', None)
80 self.parameter = properties.get('parameter', None)
81 self.sequence = properties.get('sequence', None)
82 self.index = properties.get('index', 1)
83 self.properties = properties
85 # Check the properties
86 self.check_properties(properties)
87 self.check_arguments()
89 @launchlogger
90 def launch(self) -> int:
91 """Execute the :class:`DnaTimeseriesUnzip <biobb_dna.dna.dna_timeseries_unzip.DnaTimeseriesUnzip>` object."""
93 # Setup Biobb
94 if self.check_restart():
95 return 0
96 self.stage_files()
98 # Check that both properties are set
99 if self.type is None or self.parameter is None or self.sequence is None:
100 fu.log("Properties 'type', 'parameter' and 'sequence' are mandatory to run DnaTimeseriesUnzip. Please set them.",
101 self.out_log, self.global_log)
102 exit(1)
104 # Check that the type is valid
105 if self.type not in ["series", "hist"]:
106 fu.log(f"Type {self.type} not valid. Valid types are: series, hist.",
107 self.out_log, self.global_log)
108 exit(1)
110 # Check that the parameter is valid
111 if self.parameter not in constants.helical_parameters:
112 fu.log(f"Parameter {self.parameter} not valid. Valid parameters are: {constants.helical_parameters}.",
113 self.out_log, self.global_log)
114 exit(1)
116 # Check that the sequence is valid
117 pattern = r'^[ACGT]+$'
118 if not re.match(pattern, self.sequence):
119 fu.log(f"Sequence {self.sequence} not valid. Only 'A', 'C', 'G' or 'T' bases allowed.",
120 self.out_log, self.global_log)
121 exit(1)
123 # Check that the index is valid
124 if self.index < 1 or self.index >= len(self.sequence) - 1:
125 fu.log(f"Index {self.index} not valid. It should be between 0 and {len(self.sequence) - 2}.",
126 self.out_log, self.global_log)
127 exit(1)
129 # Get index sequence base and next base
130 bp = self.sequence[self.index-1] + self.sequence[self.index]
132 # Get the filename
133 filename = f"{self.type}_{self.parameter}_{self.index}_{bp}"
134 csv_file = f"{filename}.csv"
135 jpg_file = f"{filename}.jpg"
137 # Unzip the file
138 with zipfile.ZipFile(self.stage_io_dict["in"]["input_zip_file"], 'r') as zip_ref:
139 # Check if the csv file exists in the zip file
140 if csv_file in zip_ref.namelist():
141 # Extract the file
142 fu.log(f'{csv_file} exists, copying into {self.stage_io_dict["out"]["output_path_csv"]}.',
143 self.out_log, self.global_log)
144 with zip_ref.open(csv_file) as source, open(self.stage_io_dict["out"]["output_path_csv"], "wb") as target:
145 shutil.copyfileobj(source, target)
146 else:
147 fu.log(f"File {csv_file} not found in the zip file.", self.out_log, self.global_log)
148 exit(1)
150 # Check if the jpg file exists in the zip file
151 if jpg_file in zip_ref.namelist():
152 # Extract the file
153 fu.log(f'{jpg_file} exists, copying into {self.stage_io_dict["out"]["output_path_jpg"]}.',
154 self.out_log, self.global_log)
155 with zip_ref.open(jpg_file) as source, open(self.stage_io_dict["out"]["output_path_jpg"], "wb") as target:
156 shutil.copyfileobj(source, target)
157 else:
158 fu.log(f"File {jpg_file} not found in the zip file.", self.out_log, self.global_log)
159 exit(1)
161 # Write the list of files
162 if self.stage_io_dict["out"]["output_list_path"]:
163 with open(self.stage_io_dict["out"]["output_list_path"], "w") as f:
164 for name in zip_ref.namelist():
165 f.write(f"{name}\n")
167 # Run Biobb block
168 # self.run_biobb()
170 # Copy files to host
171 self.copy_to_host()
173 # Remove temporary file(s)
174 self.remove_tmp_files()
176 self.check_arguments(output_files_created=True, raise_exception=False)
178 return self.return_code
181def dna_timeseries_unzip(
182 input_zip_file: str,
183 output_path_csv: str,
184 output_path_jpg: str,
185 output_list_path: Optional[str] = None,
186 properties: Optional[dict] = None,
187 **kwargs) -> int:
188 """Create :class:`DnaTimeseriesUnzip <biobb_dna.dna.dna_timeseries_unzip.DnaTimeseriesUnzip>` class and
189 execute the :meth:`launch() <biobb_dna.dna.dna_timeseries_unzip.DnaTimeseriesUnzip.launch>` method."""
190 return DnaTimeseriesUnzip(**dict(locals())).launch()
193dna_timeseries_unzip.__doc__ = DnaTimeseriesUnzip.__doc__
194main = DnaTimeseriesUnzip.get_main(dna_timeseries_unzip, "Tool for extracting dna_timeseries output files.")
196if __name__ == '__main__':
197 main()