Coverage for biobb_dna/dna/dna_timeseries

1#!/usr/bin/env python3

3"""Module containing the DnaTimeseriesUnzip class and the command line interface."""

4import re

5import zipfile

6import shutil

7import argparse

8from typing import Optional

10from biobb_dna.utils import constants

11from biobb_common.generic.biobb_object import BiobbObject

12from biobb_common.configuration import settings

13from biobb_common.tools import file_utils as fu

14from biobb_common.tools.file_utils import launchlogger

17class DnaTimeseriesUnzip(BiobbObject):

18 """

19 | biobb_dna DnaTimeseriesUnzip

20 | Tool for extracting dna_timeseries output files.

21 | Unzips a zip file containing dna_timeseries output files and extracts the csv and jpg files.

23 Args:

24 input_zip_file (str): Zip file with dna_timeseries output files. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/dna/timeseries_output.zip>`_. Accepted formats: zip (edam:format_3987).

25 output_path_csv (str): dna_timeseries output csv file contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.csv>`_. Accepted formats: csv (edam:format_3752).

26 output_path_jpg (str): dna_timeseries output jpg file contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.jpg>`_. Accepted formats: jpg (edam:format_3579).

27 output_list_path (str) (Optional): Text file with a list of all dna_timeseries output files contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.txt>`_. Accepted formats: txt (edam:format_2330).

28 properties (dic):

29 * **type** (*str*) - (None) Type of analysis, series or histogram. Values: series, hist.

30 * **parameter** (*str*) - (None) Type of parameter. Values: majd, majw, mind, minw, inclin, tip, xdisp, ydisp, shear, stretch, stagger, buckle, propel, opening, rise, roll, twist, shift, slide, tilt, alphaC, alphaW, betaC, betaW, gammaC, gammaW, deltaC, deltaW, epsilC, epsilW, zetaC, zetaW, chiC, chiW, phaseC, phaseW.

31 * **sequence** (*str*) - (None) Nucleic acid sequence used for generating dna_timeseries output file.

32 * **index** (*int*) - (1) Base pair index in the parameter 'sequence', starting from 1.

33 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.

34 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.

35 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.

36 Examples:

37 This is a use example of how to use the building block from Python::

39 from biobb_dna.dna.dna_timeseries_unzip import dna_timeseries_unzip

40 prop = {

41 'type': 'hist',

42 'parameter': 'shift',

43 'sequence': 'CGCGAATTCGCG',

44 'index': 5

45 }

46 dna_timeseries_unzip(

47 input_zip_file='/path/to/dna_timeseries/output.zip',

48 output_path='/path/to/output.csv',

49 output_list_path='/path/to/output.txt'

50 properties=prop)

51 Info:

52 * wrapped_software:

53 * name: In house

54 * license: Apache-2.0

55 * ontology:

56 * name: EDAM

57 * schema: http://edamontology.org/EDAM.owl

58 """

60 def __init__(self, input_zip_file,

61 output_path_csv, output_path_jpg, output_list_path=None, properties=None, **kwargs) -> None:

62 properties = properties or {}

64 # Call parent class constructor

65 super().__init__(properties)

66 self.locals_var_dict = locals().copy()

68 # Input/Output files

69 self.io_dict = {

70 'in': {

71 'input_zip_file': input_zip_file

72 },

73 'out': {

74 'output_path_csv': output_path_csv,

75 'output_path_jpg': output_path_jpg,

76 'output_list_path': output_list_path

77 }

78 }

80 # Properties specific for BB

81 self.type = properties.get('type', None)

82 self.parameter = properties.get('parameter', None)

83 self.sequence = properties.get('sequence', None)

84 self.index = properties.get('index', 1)

85 self.properties = properties

87 # Check the properties

88 self.check_properties(properties)

89 self.check_arguments()

91 @launchlogger

92 def launch(self) -> int:

93 """Execute the :class:`DnaTimeseriesUnzip <biobb_dna.dna.dna_timeseries_unzip.DnaTimeseriesUnzip>` object."""

95 # Setup Biobb

96 if self.check_restart():

97 return 0

98 self.stage_files()

100 # Check that both properties are set

101 if self.type is None or self.parameter is None or self.sequence is None:

102 fu.log("Properties 'type', 'parameter' and 'sequence' are mandatory to run DnaTimeseriesUnzip. Please set them.",

103 self.out_log, self.global_log)

104 exit(1)

105

106 # Check that the type is valid

107 if self.type not in ["series", "hist"]:

108 fu.log(f"Type {self.type} not valid. Valid types are: series, hist.",

109 self.out_log, self.global_log)

110 exit(1)

111

112 # Check that the parameter is valid

113 if self.parameter not in constants.helical_parameters:

114 fu.log(f"Parameter {self.parameter} not valid. Valid parameters are: {constants.helical_parameters}.",

115 self.out_log, self.global_log)

116 exit(1)

117

118 # Check that the sequence is valid

119 pattern = r'^[ACGT]+$'

120 if not re.match(pattern, self.sequence):

121 fu.log(f"Sequence {self.sequence} not valid. Only 'A', 'C', 'G' or 'T' bases allowed.",

122 self.out_log, self.global_log)

123 exit(1)

124

125 # Check that the index is valid

126 if self.index < 1 or self.index >= len(self.sequence) - 1:

127 fu.log(f"Index {self.index} not valid. It should be between 0 and {len(self.sequence) - 2}.",

128 self.out_log, self.global_log)

129 exit(1)

130

131 # Get index sequence base and next base

132 bp = self.sequence[self.index-1] + self.sequence[self.index]

133

134 # Get the filename

135 filename = f"{self.type}_{self.parameter}_{self.index}_{bp}"

136 csv_file = f"{filename}.csv"

137 jpg_file = f"{filename}.jpg"

138

139 # Unzip the file

140 with zipfile.ZipFile(self.stage_io_dict["in"]["input_zip_file"], 'r') as zip_ref:

141 # Check if the csv file exists in the zip file

142 if csv_file in zip_ref.namelist():

143 # Extract the file

144 fu.log(f'{csv_file} exists, copying into {self.stage_io_dict["out"]["output_path_csv"]}.',

145 self.out_log, self.global_log)

146 with zip_ref.open(csv_file) as source, open(self.stage_io_dict["out"]["output_path_csv"], "wb") as target:

147 shutil.copyfileobj(source, target)

148 else:

149 fu.log(f"File {csv_file} not found in the zip file.", self.out_log, self.global_log)

150 exit(1)

151

152 # Check if the jpg file exists in the zip file

153 if jpg_file in zip_ref.namelist():

154 # Extract the file

155 fu.log(f'{jpg_file} exists, copying into {self.stage_io_dict["out"]["output_path_jpg"]}.',

156 self.out_log, self.global_log)

157 with zip_ref.open(jpg_file) as source, open(self.stage_io_dict["out"]["output_path_jpg"], "wb") as target:

158 shutil.copyfileobj(source, target)

159 else:

160 fu.log(f"File {jpg_file} not found in the zip file.", self.out_log, self.global_log)

161 exit(1)

162

163 # Write the list of files

164 if self.stage_io_dict["out"]["output_list_path"]:

165 with open(self.stage_io_dict["out"]["output_list_path"], "w") as f:

166 for name in zip_ref.namelist():

167 f.write(f"{name}\n")

168

169 # Run Biobb block

170 # self.run_biobb()

171

172 # Copy files to host

173 self.copy_to_host()

174

175 # Remove temporary file(s)

176 # self.tmp_files.extend([

177 # self.stage_io_dict.get("unique_dir", "")

178 # ])

179 self.remove_tmp_files()

180

181 self.check_arguments(output_files_created=True, raise_exception=False)

182

183 return self.return_code

184

185

186def dna_timeseries_unzip(

187 input_zip_file: str,

188 output_path_csv: str,

189 output_path_jpg: str,

190 output_list_path: Optional[str] = None,

191 properties: Optional[dict] = None,

192 **kwargs) -> int:

193 """Create :class:`DnaTimeseriesUnzip <biobb_dna.dna.dna_timeseries_unzip.DnaTimeseriesUnzip>` class and

194 execute the :meth:`launch() <biobb_dna.dna.dna_timeseries_unzip.DnaTimeseriesUnzip.launch>` method."""

195

196 return DnaTimeseriesUnzip(

197 input_zip_file=input_zip_file,

198 output_path_csv=output_path_csv,

199 output_path_jpg=output_path_jpg,

200 output_list_path=output_list_path,

201 properties=properties, **kwargs).launch()

202

203 dna_timeseries_unzip.__doc__ = DnaTimeseriesUnzip.__doc__

204

205

206def main():

207 """Command line execution of this building block. Please check the command line documentation."""

208 parser = argparse.ArgumentParser(description='Tool for extracting dna_timeseries output files.',

209 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999))

210 parser.add_argument('--config', required=False, help='Configuration file')

211

212 required_args = parser.add_argument_group('required arguments')

213 required_args.add_argument('--input_zip_file', required=True,

214 help='Zip file with dna_timeseries output files. Accepted formats: zip.')

215 required_args.add_argument('--output_path_csv', required=True,

216 help='dna_timeseries output csv file contained within input_zip_file. Accepted formats: csv.')

217 required_args.add_argument('--output_path_jpg', required=True,

218 help='dna_timeseries output jpg file contained within input_zip_file. Accepted formats: jpg.')

219 parser.add_argument('--output_list_path', required=False,

220 help='Text file with a list of all dna_timeseries output files contained within input_zip_file. Accepted formats: txt.')

221

222 args = parser.parse_args()

223 args.config = args.config or "{}"

224 properties = settings.ConfReader(config=args.config).get_prop_dic()

225

226 dna_timeseries_unzip(

227 input_zip_file=args.input_zip_file,

228 output_path_csv=args.output_path_csv,

229 output_path_jpg=args.output_path_jpg,

230 output_list_path=args.output_list_path,

231 properties=properties)

232

233

234if __name__ == '__main__':

235 main()

Coverage for biobb_dna/dna/dna_timeseries_unzip.py: 68%

85 statements