Coverage for biobb_dna / curvesplus / canal_unzip.py: 73%

70 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-15 18:49 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the CanalUnzip class and the command line interface.""" 

4import re 

5import zipfile 

6import shutil 

7from typing import Optional 

8 

9from biobb_common.generic.biobb_object import BiobbObject 

10from biobb_common.tools import file_utils as fu 

11from biobb_common.tools.file_utils import launchlogger 

12 

13 

14class CanalUnzip(BiobbObject): 

15 """ 

16 | biobb_dna CanalUnzip 

17 | Tool for extracting biobb_canal output files. 

18 | Unzips a Canal output file contained within a zip file. 

19 

20 Args: 

21 input_zip_file (str): Zip file with Canal output files. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/canal_output.zip>`_. Accepted formats: zip (edam:format_3987). 

22 output_path (str): Canal output file contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canal_unzip_output.ser>`_. Accepted formats: ser (edam:format_2330), his (edam:format_3905), cor (edam:format_3465). 

23 output_list_path (str) (Optional): Text file with a list of all Canal output files contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canal_unzip_output.txt>`_. Accepted formats: txt (edam:format_2330). 

24 properties (dic): 

25 * **type** (*str*) - (None) Type of file. Values: series, histo, corr. 

26 * **helpar_name** (*str*) - (None) Helical parameter name, only for 'series' and 'histo' types. Values: alphaC, alphaW, ampC, ampW, ax-bend, betaC, betaW, buckle, chiC, chiW, curv, deltaC, deltaW, epsilC, epsilW, gammaC, gammaW, h-ris, h-twi, inclin, majd, majw, mind, minw, opening, phaseC, phaseW, propel, reg, rise, roll, shear, shift, slide, stagger, stretch, tbend, tilt, tip, twist, xdisp, ydisp, zetaC, zetaW. 

27 * **correlation** (*str*) - (None) Correlation indexes separated by underscore (ie '98_165'), only for 'corr' type. 

28 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

29 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

30 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

31 Examples: 

32 This is a use example of how to use the building block from Python:: 

33 

34 from biobb_dna.curvesplus.canal_unzip import canal_unzip 

35 prop = { 

36 'type': 'series', 

37 'helpar_name': 'alphaC' 

38 } 

39 canal_unzip( 

40 input_zip_file='/path/to/canal/output.zip', 

41 output_path='/path/to/output.ser', 

42 output_list_path='/path/to/output.txt' 

43 properties=prop) 

44 Info: 

45 * wrapped_software: 

46 * name: In house 

47 * license: Apache-2.0 

48 * ontology: 

49 * name: EDAM 

50 * schema: http://edamontology.org/EDAM.owl 

51 """ 

52 

53 def __init__(self, input_zip_file, 

54 output_path, output_list_path=None, properties=None, **kwargs) -> None: 

55 properties = properties or {} 

56 

57 # Call parent class constructor 

58 super().__init__(properties) 

59 self.locals_var_dict = locals().copy() 

60 

61 # Input/Output files 

62 self.io_dict = { 

63 'in': { 

64 'input_zip_file': input_zip_file 

65 }, 

66 'out': { 

67 'output_path': output_path, 

68 'output_list_path': output_list_path 

69 } 

70 } 

71 

72 # Properties specific for BB 

73 self.type = properties.get('type', None) 

74 self.helpar_name = properties.get('helpar_name', None) 

75 self.correlation = properties.get('correlation', None) 

76 self.properties = properties 

77 

78 # Check the properties 

79 self.check_properties(properties) 

80 self.check_arguments() 

81 

82 @launchlogger 

83 def launch(self) -> int: 

84 """Execute the :class:`CanalUnzip <biobb_dna.curvesplus.canal_unzip.CanalUnzip>` object.""" 

85 

86 # Setup Biobb 

87 if self.check_restart(): 

88 return 0 

89 self.stage_files() 

90 

91 # Check that both properties are set 

92 if self.type is None: 

93 fu.log("Property 'type' is mandatory to run CanalUnzip. Please set it.", 

94 self.out_log, self.global_log) 

95 exit(1) 

96 

97 # Check that helpar_name is set if type is series or histo 

98 if self.type in ["series", "histo"] and self.helpar_name is None: 

99 fu.log("Property 'helpar_name' is mandatory to run CanalUnzip with type 'series' or 'histo'. Please set it.", 

100 self.out_log, self.global_log) 

101 exit(1) 

102 

103 # Check that correlation is set if type is corr 

104 if self.type == "corr" and self.correlation is None: 

105 fu.log("Property 'correlation' is mandatory to run CanalUnzip with type 'corr'. Please set it.", 

106 self.out_log, self.global_log) 

107 exit(1) 

108 

109 extensions = { 

110 "series": "ser", 

111 "histo": "his", 

112 "corr": "cor" 

113 } 

114 # Check that the type is valid 

115 if self.type not in extensions: 

116 fu.log(f"Type {self.type} not valid. Valid types are: {', '.join(extensions.keys())}.", 

117 self.out_log, self.global_log) 

118 exit(1) 

119 

120 # generate sufix 

121 sufix = "" 

122 if self.type == "corr": 

123 # Check that the correlation is valid 

124 pattern = r'\d+_\d+' 

125 if not re.match(pattern, self.correlation): 

126 fu.log(f"Correlation {self.correlation} not valid. It should match the pattern <number_number>.", 

127 self.out_log, self.global_log) 

128 exit(1) 

129 sufix = self.correlation 

130 else: 

131 # Check that the helpar_name is valid 

132 if self.helpar_name not in ["alphaC", "alphaW", "ampC", "ampW", "ax-bend", "betaC", "betaW", "buckle", 

133 "chiC", "chiW", "curv", "deltaC", "deltaW", "epsilC", "epsilW", "gammaC", 

134 "gammaW", "h-ris", "h-twi", "inclin", "majd", "majw", "mind", "minw", 

135 "opening", "phaseC", "phaseW", "propel", "reg", "rise", "roll", "shear", 

136 "shift", "slide", "stagger", "stretch", "tbend", "tilt", "tip", "twist", 

137 "xdisp", "ydisp", "zetaC", "zetaW"]: 

138 fu.log(f"Parameter {self.helpar_name} not valid. Valid parameters are: alphaC, alphaW, ampC, ampW, ax-bend, betaC, betaW, buckle, chiC, chiW, curv, deltaC, deltaW, epsilC, epsilW, gammaC, gammaW, h-ris, h-twi, inclin, majd, majw, mind, minw, opening, phaseC, phaseW, propel, reg, rise, roll, shear, shift, slide, stagger, stretch, tbend, tilt, tip, twist, xdisp, ydisp, zetaC, zetaW.", 

139 self.out_log, self.global_log) 

140 exit(1) 

141 sufix = self.helpar_name 

142 

143 # Generate the filename 

144 filename = f"canal_output_{sufix}.{extensions[self.type]}" 

145 

146 # Unzip the file 

147 with zipfile.ZipFile(self.stage_io_dict["in"]["input_zip_file"], 'r') as zip_ref: 

148 # Check if the file exists in the zip file 

149 if filename in zip_ref.namelist(): 

150 # Extract the file 

151 fu.log(f'{filename} exists, copying into {self.stage_io_dict["out"]["output_path"]}.', 

152 self.out_log, self.global_log) 

153 with zip_ref.open(filename) as source, open(self.stage_io_dict["out"]["output_path"], "wb") as target: 

154 shutil.copyfileobj(source, target) 

155 else: 

156 fu.log(f"File {filename} not found in the zip file.", self.out_log, self.global_log) 

157 exit(1) 

158 

159 # Write the list of files 

160 if self.stage_io_dict["out"]["output_list_path"]: 

161 with open(self.stage_io_dict["out"]["output_list_path"], "w") as f: 

162 for name in zip_ref.namelist(): 

163 f.write(f"{name}\n") 

164 

165 # Run Biobb block 

166 # self.run_biobb() 

167 

168 # Copy files to host 

169 self.copy_to_host() 

170 

171 # Remove temporary file(s) 

172 self.remove_tmp_files() 

173 

174 self.check_arguments(output_files_created=True, raise_exception=False) 

175 

176 return self.return_code 

177 

178 

179def canal_unzip( 

180 input_zip_file: str, 

181 output_path: str, 

182 output_list_path: Optional[str] = None, 

183 properties: Optional[dict] = None, 

184 **kwargs) -> int: 

185 """Create :class:`CanalUnzip <biobb_dna.curvesplus.canal_unzip.CanalUnzip>` class and 

186 execute the :meth:`launch() <biobb_dna.curvesplus.canal_unzip.CanalUnzip.launch>` method.""" 

187 return CanalUnzip(**dict(locals())).launch() 

188 

189 

190canal_unzip.__doc__ = CanalUnzip.__doc__ 

191main = CanalUnzip.get_main(canal_unzip, "Tool for extracting biobb_canal output files.") 

192 

193if __name__ == '__main__': 

194 main()