Coverage for biobb_dna / dna / dna_timeseries_unzip.py: 78%

73 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-15 18:49 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the DnaTimeseriesUnzip class and the command line interface.""" 

4import re 

5import zipfile 

6import shutil 

7from typing import Optional 

8 

9from biobb_dna.utils import constants 

10from biobb_common.generic.biobb_object import BiobbObject 

11from biobb_common.tools import file_utils as fu 

12from biobb_common.tools.file_utils import launchlogger 

13 

14 

15class DnaTimeseriesUnzip(BiobbObject): 

16 """ 

17 | biobb_dna DnaTimeseriesUnzip 

18 | Tool for extracting dna_timeseries output files. 

19 | Unzips a zip file containing dna_timeseries output files and extracts the csv and jpg files. 

20 

21 Args: 

22 input_zip_file (str): Zip file with dna_timeseries output files. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/dna/timeseries_output.zip>`_. Accepted formats: zip (edam:format_3987). 

23 output_path_csv (str): dna_timeseries output csv file contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.csv>`_. Accepted formats: csv (edam:format_3752). 

24 output_path_jpg (str): dna_timeseries output jpg file contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.jpg>`_. Accepted formats: jpg (edam:format_3579). 

25 output_list_path (str) (Optional): Text file with a list of all dna_timeseries output files contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.txt>`_. Accepted formats: txt (edam:format_2330). 

26 properties (dic): 

27 * **type** (*str*) - (None) Type of analysis, series or histogram. Values: series, hist. 

28 * **parameter** (*str*) - (None) Type of parameter. Values: majd, majw, mind, minw, inclin, tip, xdisp, ydisp, shear, stretch, stagger, buckle, propel, opening, rise, roll, twist, shift, slide, tilt, alphaC, alphaW, betaC, betaW, gammaC, gammaW, deltaC, deltaW, epsilC, epsilW, zetaC, zetaW, chiC, chiW, phaseC, phaseW. 

29 * **sequence** (*str*) - (None) Nucleic acid sequence used for generating dna_timeseries output file. 

30 * **index** (*int*) - (1) Base pair index in the parameter 'sequence', starting from 1. 

31 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

32 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

33 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

34 Examples: 

35 This is a use example of how to use the building block from Python:: 

36 

37 from biobb_dna.dna.dna_timeseries_unzip import dna_timeseries_unzip 

38 prop = { 

39 'type': 'hist', 

40 'parameter': 'shift', 

41 'sequence': 'CGCGAATTCGCG', 

42 'index': 5 

43 } 

44 dna_timeseries_unzip( 

45 input_zip_file='/path/to/dna_timeseries/output.zip', 

46 output_path='/path/to/output.csv', 

47 output_list_path='/path/to/output.txt' 

48 properties=prop) 

49 Info: 

50 * wrapped_software: 

51 * name: In house 

52 * license: Apache-2.0 

53 * ontology: 

54 * name: EDAM 

55 * schema: http://edamontology.org/EDAM.owl 

56 """ 

57 

58 def __init__(self, input_zip_file, 

59 output_path_csv, output_path_jpg, output_list_path=None, properties=None, **kwargs) -> None: 

60 properties = properties or {} 

61 

62 # Call parent class constructor 

63 super().__init__(properties) 

64 self.locals_var_dict = locals().copy() 

65 

66 # Input/Output files 

67 self.io_dict = { 

68 'in': { 

69 'input_zip_file': input_zip_file 

70 }, 

71 'out': { 

72 'output_path_csv': output_path_csv, 

73 'output_path_jpg': output_path_jpg, 

74 'output_list_path': output_list_path 

75 } 

76 } 

77 

78 # Properties specific for BB 

79 self.type = properties.get('type', None) 

80 self.parameter = properties.get('parameter', None) 

81 self.sequence = properties.get('sequence', None) 

82 self.index = properties.get('index', 1) 

83 self.properties = properties 

84 

85 # Check the properties 

86 self.check_properties(properties) 

87 self.check_arguments() 

88 

89 @launchlogger 

90 def launch(self) -> int: 

91 """Execute the :class:`DnaTimeseriesUnzip <biobb_dna.dna.dna_timeseries_unzip.DnaTimeseriesUnzip>` object.""" 

92 

93 # Setup Biobb 

94 if self.check_restart(): 

95 return 0 

96 self.stage_files() 

97 

98 # Check that both properties are set 

99 if self.type is None or self.parameter is None or self.sequence is None: 

100 fu.log("Properties 'type', 'parameter' and 'sequence' are mandatory to run DnaTimeseriesUnzip. Please set them.", 

101 self.out_log, self.global_log) 

102 exit(1) 

103 

104 # Check that the type is valid 

105 if self.type not in ["series", "hist"]: 

106 fu.log(f"Type {self.type} not valid. Valid types are: series, hist.", 

107 self.out_log, self.global_log) 

108 exit(1) 

109 

110 # Check that the parameter is valid 

111 if self.parameter not in constants.helical_parameters: 

112 fu.log(f"Parameter {self.parameter} not valid. Valid parameters are: {constants.helical_parameters}.", 

113 self.out_log, self.global_log) 

114 exit(1) 

115 

116 # Check that the sequence is valid 

117 pattern = r'^[ACGT]+$' 

118 if not re.match(pattern, self.sequence): 

119 fu.log(f"Sequence {self.sequence} not valid. Only 'A', 'C', 'G' or 'T' bases allowed.", 

120 self.out_log, self.global_log) 

121 exit(1) 

122 

123 # Check that the index is valid 

124 if self.index < 1 or self.index >= len(self.sequence) - 1: 

125 fu.log(f"Index {self.index} not valid. It should be between 0 and {len(self.sequence) - 2}.", 

126 self.out_log, self.global_log) 

127 exit(1) 

128 

129 # Get index sequence base and next base 

130 bp = self.sequence[self.index-1] + self.sequence[self.index] 

131 

132 # Get the filename 

133 filename = f"{self.type}_{self.parameter}_{self.index}_{bp}" 

134 csv_file = f"{filename}.csv" 

135 jpg_file = f"{filename}.jpg" 

136 

137 # Unzip the file 

138 with zipfile.ZipFile(self.stage_io_dict["in"]["input_zip_file"], 'r') as zip_ref: 

139 # Check if the csv file exists in the zip file 

140 if csv_file in zip_ref.namelist(): 

141 # Extract the file 

142 fu.log(f'{csv_file} exists, copying into {self.stage_io_dict["out"]["output_path_csv"]}.', 

143 self.out_log, self.global_log) 

144 with zip_ref.open(csv_file) as source, open(self.stage_io_dict["out"]["output_path_csv"], "wb") as target: 

145 shutil.copyfileobj(source, target) 

146 else: 

147 fu.log(f"File {csv_file} not found in the zip file.", self.out_log, self.global_log) 

148 exit(1) 

149 

150 # Check if the jpg file exists in the zip file 

151 if jpg_file in zip_ref.namelist(): 

152 # Extract the file 

153 fu.log(f'{jpg_file} exists, copying into {self.stage_io_dict["out"]["output_path_jpg"]}.', 

154 self.out_log, self.global_log) 

155 with zip_ref.open(jpg_file) as source, open(self.stage_io_dict["out"]["output_path_jpg"], "wb") as target: 

156 shutil.copyfileobj(source, target) 

157 else: 

158 fu.log(f"File {jpg_file} not found in the zip file.", self.out_log, self.global_log) 

159 exit(1) 

160 

161 # Write the list of files 

162 if self.stage_io_dict["out"]["output_list_path"]: 

163 with open(self.stage_io_dict["out"]["output_list_path"], "w") as f: 

164 for name in zip_ref.namelist(): 

165 f.write(f"{name}\n") 

166 

167 # Run Biobb block 

168 # self.run_biobb() 

169 

170 # Copy files to host 

171 self.copy_to_host() 

172 

173 # Remove temporary file(s) 

174 self.remove_tmp_files() 

175 

176 self.check_arguments(output_files_created=True, raise_exception=False) 

177 

178 return self.return_code 

179 

180 

181def dna_timeseries_unzip( 

182 input_zip_file: str, 

183 output_path_csv: str, 

184 output_path_jpg: str, 

185 output_list_path: Optional[str] = None, 

186 properties: Optional[dict] = None, 

187 **kwargs) -> int: 

188 """Create :class:`DnaTimeseriesUnzip <biobb_dna.dna.dna_timeseries_unzip.DnaTimeseriesUnzip>` class and 

189 execute the :meth:`launch() <biobb_dna.dna.dna_timeseries_unzip.DnaTimeseriesUnzip.launch>` method.""" 

190 return DnaTimeseriesUnzip(**dict(locals())).launch() 

191 

192 

193dna_timeseries_unzip.__doc__ = DnaTimeseriesUnzip.__doc__ 

194main = DnaTimeseriesUnzip.get_main(dna_timeseries_unzip, "Tool for extracting dna_timeseries output files.") 

195 

196if __name__ == '__main__': 

197 main()