Coverage for biobb_flexserv / pcasuite / pcz_unzip.py: 90%

42 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-05 13:10 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the PCZunzip class and the command line interface.""" 

4from typing import Optional 

5import shutil 

6from pathlib import PurePath 

7from biobb_common.tools import file_utils as fu 

8from biobb_common.generic.biobb_object import BiobbObject 

9from biobb_common.tools.file_utils import launchlogger 

10 

11 

12class PCZunzip(BiobbObject): 

13 """ 

14 | biobb_flexserv PCZunzip 

15 | Wrapper of the pcaunzip tool from the PCAsuite FlexServ module. 

16 | Uncompress Molecular Dynamics (MD) trajectories compressed using Principal Component Analysis (PCA) algorithms. 

17 

18 Args: 

19 input_pcz_path (str): Input compressed trajectory. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/data/pcasuite/pcazip.pcz>`_. Accepted formats: pcz (edam:format_3874). 

20 output_crd_path (str): Output uncompressed trajectory. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/reference/pcasuite/traj.crd>`_. Accepted formats: crd (edam:format_3878), mdcrd (edam:format_3878), inpcrd (edam:format_3878), pdb (edam:format_1476). 

21 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

22 * **binary_path** (*str*) - ("pcaunzip") pcaunzip binary path to be used. 

23 * **verbose** (*bool*) - (False) Make output verbose 

24 * **pdb** (*bool*) - (False) Use PDB format for output trajectory 

25 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

26 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

27 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

28 

29 Examples: 

30 This is a use example of how to use the building block from Python:: 

31 

32 from biobb_flexserv.pcasuite.pcz_unzip import pcz_unzip 

33 prop = { 

34 'pdb': False 

35 } 

36 pcz_unzip( input_pcz_path='/path/to/pcazip_input.pcz', 

37 output_crd_path='/path/to/pcazip_traj.crd', 

38 properties=prop) 

39 

40 Info: 

41 * wrapped_software: 

42 * name: FlexServ PCAsuite 

43 * version: >=1.0 

44 * license: Apache-2.0 

45 * ontology: 

46 * name: EDAM 

47 * schema: http://edamontology.org/EDAM.owl 

48 

49 """ 

50 

51 def __init__(self, input_pcz_path: str, 

52 output_crd_path: str, properties: Optional[dict] = None, **kwargs) -> None: 

53 

54 properties = properties or {} 

55 

56 # Call parent class constructor 

57 super().__init__(properties) 

58 self.locals_var_dict = locals().copy() 

59 

60 # Input/Output files 

61 self.io_dict = { 

62 'in': {'input_pcz_path': input_pcz_path}, 

63 'out': {'output_crd_path': output_crd_path} 

64 } 

65 

66 # Properties specific for BB 

67 self.properties = properties 

68 self.binary_path = properties.get('binary_path', 'pcaunzip') 

69 self.verbose = properties.get('verbose', False) 

70 self.pdb = properties.get('pdb', False) 

71 

72 # Check the properties 

73 self.check_properties(properties) 

74 self.check_arguments() 

75 

76 @launchlogger 

77 def launch(self): 

78 """Launches the execution of the FlexServ pcaunzip module.""" 

79 

80 # Setup Biobb 

81 if self.check_restart(): 

82 return 0 

83 # self.stage_files() 

84 

85 # Internal file paths 

86 # try: 

87 # # Using rel paths to shorten the amount of characters due to fortran path length limitations 

88 # input_pcz = str(Path(self.stage_io_dict["in"]["input_pcz_path"]).relative_to(Path.cwd())) 

89 # output_crd = str(Path(self.stage_io_dict["out"]["output_crd_path"]).relative_to(Path.cwd())) 

90 # except ValueError: 

91 # # Container or remote case 

92 # input_pcz = self.stage_io_dict["in"]["input_pcz_path"] 

93 # output_crd = self.stage_io_dict["out"]["output_crd_path"] 

94 

95 # Manually creating a Sandbox to avoid issues with input parameters buffer overflow: 

96 # Long strings defining a file path makes Fortran or C compiled programs crash if the string 

97 # declared is shorter than the input parameter path (string) length. 

98 # Generating a temporary folder and working inside this folder (sandbox) fixes this problem. 

99 # The problem was found in Galaxy executions, launching Singularity containers (May 2023). 

100 

101 # Creating temporary folder 

102 tmp_folder = fu.create_unique_dir() 

103 fu.log('Creating %s temporary folder' % tmp_folder, self.out_log) 

104 

105 shutil.copy2(self.io_dict["in"]["input_pcz_path"], tmp_folder) 

106 

107 # Command line 

108 # pcaunzip -i infile [-o outfile] [--pdb] [--verbose] [--help] 

109 # self.cmd = [self.binary_path, 

110 # "-i", input_pcz, 

111 # "-o", output_crd 

112 # ] 

113 

114 self.cmd = ['cd', tmp_folder, ';', 

115 self.binary_path, 

116 "-i", PurePath(self.io_dict["in"]["input_pcz_path"]).name, 

117 "-o", PurePath(self.io_dict["out"]["output_crd_path"]).name 

118 ] 

119 

120 if self.verbose: 

121 self.cmd.append('-v') 

122 

123 if self.pdb: 

124 self.cmd.append('--pdb') 

125 

126 # Run Biobb block 

127 self.run_biobb() 

128 

129 # Copy outputs from temporary folder to output path 

130 shutil.copy2(PurePath(tmp_folder).joinpath(PurePath(self.io_dict["out"]["output_crd_path"]).name), PurePath(self.io_dict["out"]["output_crd_path"])) 

131 

132 # Copy files to host 

133 # self.copy_to_host() 

134 

135 # Remove temporary folder(s) 

136 self.tmp_files.append(tmp_folder) 

137 self.remove_tmp_files() 

138 

139 self.check_arguments(output_files_created=True, raise_exception=False) 

140 

141 return self.return_code 

142 

143 

144def pcz_unzip(input_pcz_path: str, 

145 output_crd_path: str, 

146 properties: Optional[dict] = None, **kwargs) -> int: 

147 """Create :class:`PCZunzip <flexserv.pcasuite.PCZunzip>`flexserv.pcasuite.PCZunzip class and 

148 execute :meth:`launch() <flexserv.pcasuite.PCZunzip.launch>` method""" 

149 return PCZunzip(**dict(locals())).launch() 

150 

151 

152pcz_unzip.__doc__ = PCZunzip.__doc__ 

153main = PCZunzip.get_main(pcz_unzip, "Uncompress Molecular Dynamics (MD) compressed trajectories using Principal Component Analysis (PCA) algorithms.") 

154 

155if __name__ == '__main__': 

156 main()