Coverage for biobb_dna/curvesplus/canal_unzip.py: 63%

82 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-28 10:36 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the CanalUnzip class and the command line interface.""" 

4import re 

5import zipfile 

6import shutil 

7import argparse 

8from typing import Optional 

9 

10from biobb_common.generic.biobb_object import BiobbObject 

11from biobb_common.configuration import settings 

12from biobb_common.tools import file_utils as fu 

13from biobb_common.tools.file_utils import launchlogger 

14 

15 

16class CanalUnzip(BiobbObject): 

17 """ 

18 | biobb_dna CanalUnzip 

19 | Tool for extracting biobb_canal output files. 

20 | Unzips a Canal output file contained within a zip file. 

21 

22 Args: 

23 input_zip_file (str): Zip file with Canal output files. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/canal_output.zip>`_. Accepted formats: zip (edam:format_3987). 

24 output_path (str): Canal output file contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canal_unzip_output.ser>`_. Accepted formats: ser (edam:format_2330), his (edam:format_3905), cor (edam:format_3465). 

25 output_list_path (str) (Optional): Text file with a list of all Canal output files contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canal_unzip_output.txt>`_. Accepted formats: txt (edam:format_2330). 

26 properties (dic): 

27 * **type** (*str*) - (None) Type of file. Values: series, histo, corr. 

28 * **helpar_name** (*str*) - (None) Helical parameter name, only for 'series' and 'histo' types. Values: alphaC, alphaW, ampC, ampW, ax-bend, betaC, betaW, buckle, chiC, chiW, curv, deltaC, deltaW, epsilC, epsilW, gammaC, gammaW, h-ris, h-twi, inclin, majd, majw, mind, minw, opening, phaseC, phaseW, propel, reg, rise, roll, shear, shift, slide, stagger, stretch, tbend, tilt, tip, twist, xdisp, ydisp, zetaC, zetaW. 

29 * **correlation** (*str*) - (None) Correlation indexes separated by underscore (ie '98_165'), only for 'corr' type. 

30 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

31 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

32 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

33 Examples: 

34 This is a use example of how to use the building block from Python:: 

35 

36 from biobb_dna.curvesplus.canal_unzip import canal_unzip 

37 prop = { 

38 'type': 'series', 

39 'helpar_name': 'alphaC' 

40 } 

41 canal_unzip( 

42 input_zip_file='/path/to/canal/output.zip', 

43 output_path='/path/to/output.ser', 

44 output_list_path='/path/to/output.txt' 

45 properties=prop) 

46 Info: 

47 * wrapped_software: 

48 * name: In house 

49 * license: Apache-2.0 

50 * ontology: 

51 * name: EDAM 

52 * schema: http://edamontology.org/EDAM.owl 

53 """ 

54 

55 def __init__(self, input_zip_file, 

56 output_path, output_list_path=None, properties=None, **kwargs) -> None: 

57 properties = properties or {} 

58 

59 # Call parent class constructor 

60 super().__init__(properties) 

61 self.locals_var_dict = locals().copy() 

62 

63 # Input/Output files 

64 self.io_dict = { 

65 'in': { 

66 'input_zip_file': input_zip_file 

67 }, 

68 'out': { 

69 'output_path': output_path, 

70 'output_list_path': output_list_path 

71 } 

72 } 

73 

74 # Properties specific for BB 

75 self.type = properties.get('type', None) 

76 self.helpar_name = properties.get('helpar_name', None) 

77 self.correlation = properties.get('correlation', None) 

78 self.properties = properties 

79 

80 # Check the properties 

81 self.check_properties(properties) 

82 self.check_arguments() 

83 

84 @launchlogger 

85 def launch(self) -> int: 

86 """Execute the :class:`CanalUnzip <biobb_dna.curvesplus.canal_unzip.CanalUnzip>` object.""" 

87 

88 # Setup Biobb 

89 if self.check_restart(): 

90 return 0 

91 self.stage_files() 

92 

93 # Check that both properties are set 

94 if self.type is None: 

95 fu.log("Property 'type' is mandatory to run CanalUnzip. Please set it.", 

96 self.out_log, self.global_log) 

97 exit(1) 

98 

99 # Check that helpar_name is set if type is series or histo 

100 if self.type in ["series", "histo"] and self.helpar_name is None: 

101 fu.log("Property 'helpar_name' is mandatory to run CanalUnzip with type 'series' or 'histo'. Please set it.", 

102 self.out_log, self.global_log) 

103 exit(1) 

104 

105 # Check that correlation is set if type is corr 

106 if self.type == "corr" and self.correlation is None: 

107 fu.log("Property 'correlation' is mandatory to run CanalUnzip with type 'corr'. Please set it.", 

108 self.out_log, self.global_log) 

109 exit(1) 

110 

111 extensions = { 

112 "series": "ser", 

113 "histo": "his", 

114 "corr": "cor" 

115 } 

116 # Check that the type is valid 

117 if self.type not in extensions: 

118 fu.log(f"Type {self.type} not valid. Valid types are: {', '.join(extensions.keys())}.", 

119 self.out_log, self.global_log) 

120 exit(1) 

121 

122 # generate sufix 

123 sufix = "" 

124 if self.type == "corr": 

125 # Check that the correlation is valid 

126 pattern = r'\d+_\d+' 

127 if not re.match(pattern, self.correlation): 

128 fu.log(f"Correlation {self.correlation} not valid. It should match the pattern <number_number>.", 

129 self.out_log, self.global_log) 

130 exit(1) 

131 sufix = self.correlation 

132 else: 

133 # Check that the helpar_name is valid 

134 if self.helpar_name not in ["alphaC", "alphaW", "ampC", "ampW", "ax-bend", "betaC", "betaW", "buckle", 

135 "chiC", "chiW", "curv", "deltaC", "deltaW", "epsilC", "epsilW", "gammaC", 

136 "gammaW", "h-ris", "h-twi", "inclin", "majd", "majw", "mind", "minw", 

137 "opening", "phaseC", "phaseW", "propel", "reg", "rise", "roll", "shear", 

138 "shift", "slide", "stagger", "stretch", "tbend", "tilt", "tip", "twist", 

139 "xdisp", "ydisp", "zetaC", "zetaW"]: 

140 fu.log(f"Parameter {self.helpar_name} not valid. Valid parameters are: alphaC, alphaW, ampC, ampW, ax-bend, betaC, betaW, buckle, chiC, chiW, curv, deltaC, deltaW, epsilC, epsilW, gammaC, gammaW, h-ris, h-twi, inclin, majd, majw, mind, minw, opening, phaseC, phaseW, propel, reg, rise, roll, shear, shift, slide, stagger, stretch, tbend, tilt, tip, twist, xdisp, ydisp, zetaC, zetaW.", 

141 self.out_log, self.global_log) 

142 exit(1) 

143 sufix = self.helpar_name 

144 

145 # Generate the filename 

146 filename = f"canal_output_{sufix}.{extensions[self.type]}" 

147 

148 # Unzip the file 

149 with zipfile.ZipFile(self.stage_io_dict["in"]["input_zip_file"], 'r') as zip_ref: 

150 # Check if the file exists in the zip file 

151 if filename in zip_ref.namelist(): 

152 # Extract the file 

153 fu.log(f'{filename} exists, copying into {self.stage_io_dict["out"]["output_path"]}.', 

154 self.out_log, self.global_log) 

155 with zip_ref.open(filename) as source, open(self.stage_io_dict["out"]["output_path"], "wb") as target: 

156 shutil.copyfileobj(source, target) 

157 else: 

158 fu.log(f"File {filename} not found in the zip file.", self.out_log, self.global_log) 

159 exit(1) 

160 

161 # Write the list of files 

162 if self.stage_io_dict["out"]["output_list_path"]: 

163 with open(self.stage_io_dict["out"]["output_list_path"], "w") as f: 

164 for name in zip_ref.namelist(): 

165 f.write(f"{name}\n") 

166 

167 # Run Biobb block 

168 # self.run_biobb() 

169 

170 # Copy files to host 

171 self.copy_to_host() 

172 

173 # Remove temporary file(s) 

174 # self.tmp_files.extend([ 

175 # self.stage_io_dict.get("unique_dir", "") 

176 # ]) 

177 self.remove_tmp_files() 

178 

179 self.check_arguments(output_files_created=True, raise_exception=False) 

180 

181 return self.return_code 

182 

183 

184def canal_unzip( 

185 input_zip_file: str, 

186 output_path: str, 

187 output_list_path: Optional[str] = None, 

188 properties: Optional[dict] = None, 

189 **kwargs) -> int: 

190 """Create :class:`CanalUnzip <biobb_dna.curvesplus.canal_unzip.CanalUnzip>` class and 

191 execute the :meth:`launch() <biobb_dna.curvesplus.canal_unzip.CanalUnzip.launch>` method.""" 

192 

193 return CanalUnzip( 

194 input_zip_file=input_zip_file, 

195 output_path=output_path, 

196 output_list_path=output_list_path, 

197 properties=properties, **kwargs).launch() 

198 

199 canal_unzip.__doc__ = CanalUnzip.__doc__ 

200 

201 

202def main(): 

203 """Command line execution of this building block. Please check the command line documentation.""" 

204 parser = argparse.ArgumentParser(description='Tool for extracting biobb_canal output files.', 

205 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

206 parser.add_argument('--config', required=False, help='Configuration file') 

207 

208 required_args = parser.add_argument_group('required arguments') 

209 required_args.add_argument('--input_zip_file', required=True, 

210 help='Zip file with Canal output files. Accepted formats: zip.') 

211 required_args.add_argument('--output_path', required=True, 

212 help='Canal output file contained within input_zip_file. Accepted formats: ser, his, cor.') 

213 parser.add_argument('--output_list_path', required=False, 

214 help='Text file with a list of all Canal output files contained within input_zip_file. Accepted formats: txt.') 

215 

216 args = parser.parse_args() 

217 args.config = args.config or "{}" 

218 properties = settings.ConfReader(config=args.config).get_prop_dic() 

219 

220 canal_unzip( 

221 input_zip_file=args.input_zip_file, 

222 output_path=args.output_path, 

223 output_list_path=args.output_list_path, 

224 properties=properties) 

225 

226 

227if __name__ == '__main__': 

228 main()