Coverage for biobb_dna/dna/dna_timeseries_unzip.py: 67%
86 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 10:36 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 10:36 +0000
1#!/usr/bin/env python3
3"""Module containing the DnaTimeseriesUnzip class and the command line interface."""
4import re
5import zipfile
6import shutil
7import argparse
8from typing import Optional
10from biobb_dna.utils import constants
11from biobb_common.generic.biobb_object import BiobbObject
12from biobb_common.configuration import settings
13from biobb_common.tools import file_utils as fu
14from biobb_common.tools.file_utils import launchlogger
17class DnaTimeseriesUnzip(BiobbObject):
18 """
19 | biobb_dna DnaTimeseriesUnzip
20 | Tool for extracting dna_timeseries output files.
21 | Unzips a zip file containing dna_timeseries output files and extracts the csv and jpg files.
23 Args:
24 input_zip_file (str): Zip file with dna_timeseries output files. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/dna/timeseries_output.zip>`_. Accepted formats: zip (edam:format_3987).
25 output_path_csv (str): dna_timeseries output csv file contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.csv>`_. Accepted formats: csv (edam:format_3752).
26 output_path_jpg (str): dna_timeseries output jpg file contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.jpg>`_. Accepted formats: jpg (edam:format_3579).
27 output_list_path (str) (Optional): Text file with a list of all dna_timeseries output files contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.txt>`_. Accepted formats: txt (edam:format_2330).
28 properties (dic):
29 * **type** (*str*) - (None) Type of analysis, series or histogram. Values: series, hist.
30 * **parameter** (*str*) - (None) Type of parameter. Values: majd, majw, mind, minw, inclin, tip, xdisp, ydisp, shear, stretch, stagger, buckle, propel, opening, rise, roll, twist, shift, slide, tilt, alphaC, alphaW, betaC, betaW, gammaC, gammaW, deltaC, deltaW, epsilC, epsilW, zetaC, zetaW, chiC, chiW, phaseC, phaseW.
31 * **sequence** (*str*) - (None) Nucleic acid sequence used for generating dna_timeseries output file.
32 * **index** (*int*) - (1) Base pair index in the parameter 'sequence', starting from 1.
33 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
34 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
35 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
36 Examples:
37 This is a use example of how to use the building block from Python::
39 from biobb_dna.dna.dna_timeseries_unzip import dna_timeseries_unzip
40 prop = {
41 'type': 'hist',
42 'parameter': 'shift',
43 'sequence': 'CGCGAATTCGCG',
44 'index': 5
45 }
46 dna_timeseries_unzip(
47 input_zip_file='/path/to/dna_timeseries/output.zip',
48 output_path='/path/to/output.csv',
49 output_list_path='/path/to/output.txt'
50 properties=prop)
51 Info:
52 * wrapped_software:
53 * name: In house
54 * license: Apache-2.0
55 * ontology:
56 * name: EDAM
57 * schema: http://edamontology.org/EDAM.owl
58 """
60 def __init__(self, input_zip_file,
61 output_path_csv, output_path_jpg, output_list_path=None, properties=None, **kwargs) -> None:
62 properties = properties or {}
64 # Call parent class constructor
65 super().__init__(properties)
66 self.locals_var_dict = locals().copy()
68 # Input/Output files
69 self.io_dict = {
70 'in': {
71 'input_zip_file': input_zip_file
72 },
73 'out': {
74 'output_path_csv': output_path_csv,
75 'output_path_jpg': output_path_jpg,
76 'output_list_path': output_list_path
77 }
78 }
80 # Properties specific for BB
81 self.type = properties.get('type', None)
82 self.parameter = properties.get('parameter', None)
83 self.sequence = properties.get('sequence', None)
84 self.index = properties.get('index', 1)
85 self.properties = properties
87 # Check the properties
88 self.check_properties(properties)
89 self.check_arguments()
91 @launchlogger
92 def launch(self) -> int:
93 """Execute the :class:`DnaTimeseriesUnzip <biobb_dna.dna.dna_timeseries_unzip.DnaTimeseriesUnzip>` object."""
95 # Setup Biobb
96 if self.check_restart():
97 return 0
98 self.stage_files()
100 # Check that both properties are set
101 if self.type is None or self.parameter is None or self.sequence is None:
102 fu.log("Properties 'type', 'parameter' and 'sequence' are mandatory to run DnaTimeseriesUnzip. Please set them.",
103 self.out_log, self.global_log)
104 exit(1)
106 # Check that the type is valid
107 if self.type not in ["series", "hist"]:
108 fu.log(f"Type {self.type} not valid. Valid types are: series, hist.",
109 self.out_log, self.global_log)
110 exit(1)
112 # Check that the parameter is valid
113 if self.parameter not in constants.helical_parameters:
114 fu.log(f"Parameter {self.parameter} not valid. Valid parameters are: {constants.helical_parameters}.",
115 self.out_log, self.global_log)
116 exit(1)
118 # Check that the sequence is valid
119 pattern = r'^[ACGT]+$'
120 if not re.match(pattern, self.sequence):
121 fu.log(f"Sequence {self.sequence} not valid. Only 'A', 'C', 'G' or 'T' bases allowed.",
122 self.out_log, self.global_log)
123 exit(1)
125 # Check that the index is valid
126 if self.index < 1 or self.index >= len(self.sequence) - 1:
127 fu.log(f"Index {self.index} not valid. It should be between 0 and {len(self.sequence) - 2}.",
128 self.out_log, self.global_log)
129 exit(1)
131 # Get index sequence base and next base
132 bp = self.sequence[self.index-1] + self.sequence[self.index]
134 # Get the filename
135 filename = f"{self.type}_{self.parameter}_{self.index}_{bp}"
136 csv_file = f"{filename}.csv"
137 jpg_file = f"{filename}.jpg"
139 # Unzip the file
140 with zipfile.ZipFile(self.stage_io_dict["in"]["input_zip_file"], 'r') as zip_ref:
141 # Check if the csv file exists in the zip file
142 if csv_file in zip_ref.namelist():
143 # Extract the file
144 fu.log(f'{csv_file} exists, copying into {self.stage_io_dict["out"]["output_path_csv"]}.',
145 self.out_log, self.global_log)
146 with zip_ref.open(csv_file) as source, open(self.stage_io_dict["out"]["output_path_csv"], "wb") as target:
147 shutil.copyfileobj(source, target)
148 else:
149 fu.log(f"File {csv_file} not found in the zip file.", self.out_log, self.global_log)
150 exit(1)
152 # Check if the jpg file exists in the zip file
153 if jpg_file in zip_ref.namelist():
154 # Extract the file
155 fu.log(f'{jpg_file} exists, copying into {self.stage_io_dict["out"]["output_path_jpg"]}.',
156 self.out_log, self.global_log)
157 with zip_ref.open(jpg_file) as source, open(self.stage_io_dict["out"]["output_path_jpg"], "wb") as target:
158 shutil.copyfileobj(source, target)
159 else:
160 fu.log(f"File {jpg_file} not found in the zip file.", self.out_log, self.global_log)
161 exit(1)
163 # Write the list of files
164 if self.stage_io_dict["out"]["output_list_path"]:
165 with open(self.stage_io_dict["out"]["output_list_path"], "w") as f:
166 for name in zip_ref.namelist():
167 f.write(f"{name}\n")
169 # Run Biobb block
170 # self.run_biobb()
172 # Copy files to host
173 self.copy_to_host()
175 # Remove temporary file(s)
176 # self.tmp_files.extend([
177 # self.stage_io_dict.get("unique_dir", "")
178 # ])
179 self.remove_tmp_files()
181 self.check_arguments(output_files_created=True, raise_exception=False)
183 return self.return_code
186def dna_timeseries_unzip(
187 input_zip_file: str,
188 output_path_csv: str,
189 output_path_jpg: str,
190 output_list_path: Optional[str] = None,
191 properties: Optional[dict] = None,
192 **kwargs) -> int:
193 """Create :class:`DnaTimeseriesUnzip <biobb_dna.dna.dna_timeseries_unzip.DnaTimeseriesUnzip>` class and
194 execute the :meth:`launch() <biobb_dna.dna.dna_timeseries_unzip.DnaTimeseriesUnzip.launch>` method."""
196 return DnaTimeseriesUnzip(
197 input_zip_file=input_zip_file,
198 output_path_csv=output_path_csv,
199 output_path_jpg=output_path_jpg,
200 output_list_path=output_list_path,
201 properties=properties, **kwargs).launch()
203 dna_timeseries_unzip.__doc__ = DnaTimeseriesUnzip.__doc__
206def main():
207 """Command line execution of this building block. Please check the command line documentation."""
208 parser = argparse.ArgumentParser(description='Tool for extracting dna_timeseries output files.',
209 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999))
210 parser.add_argument('--config', required=False, help='Configuration file')
212 required_args = parser.add_argument_group('required arguments')
213 required_args.add_argument('--input_zip_file', required=True,
214 help='Zip file with dna_timeseries output files. Accepted formats: zip.')
215 required_args.add_argument('--output_path_csv', required=True,
216 help='dna_timeseries output csv file contained within input_zip_file. Accepted formats: csv.')
217 required_args.add_argument('--output_path_jpg', required=True,
218 help='dna_timeseries output jpg file contained within input_zip_file. Accepted formats: jpg.')
219 parser.add_argument('--output_list_path', required=False,
220 help='Text file with a list of all dna_timeseries output files contained within input_zip_file. Accepted formats: txt.')
222 args = parser.parse_args()
223 args.config = args.config or "{}"
224 properties = settings.ConfReader(config=args.config).get_prop_dic()
226 dna_timeseries_unzip(
227 input_zip_file=args.input_zip_file,
228 output_path_csv=args.output_path_csv,
229 output_path_jpg=args.output_path_jpg,
230 output_list_path=args.output_list_path,
231 properties=properties)
234if __name__ == '__main__':
235 main()