Coverage for biobb_flexserv/pcasuite/pcz_unzip.py: 74%

53 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-19 15:08 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the PCZunzip class and the command line interface.""" 

4import argparse 

5from typing import Optional 

6import shutil 

7from pathlib import PurePath 

8from biobb_common.tools import file_utils as fu 

9from biobb_common.generic.biobb_object import BiobbObject 

10from biobb_common.configuration import settings 

11from biobb_common.tools.file_utils import launchlogger 

12 

13 

14class PCZunzip(BiobbObject): 

15 """ 

16 | biobb_flexserv PCZunzip 

17 | Wrapper of the pcaunzip tool from the PCAsuite FlexServ module. 

18 | Uncompress Molecular Dynamics (MD) trajectories compressed using Principal Component Analysis (PCA) algorithms. 

19 

20 Args: 

21 input_pcz_path (str): Input compressed trajectory. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/data/pcasuite/pcazip.pcz>`_. Accepted formats: pcz (edam:format_3874). 

22 output_crd_path (str): Output uncompressed trajectory. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/reference/pcasuite/traj.crd>`_. Accepted formats: crd (edam:format_3878), mdcrd (edam:format_3878), inpcrd (edam:format_3878), pdb (edam:format_1476). 

23 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

24 * **binary_path** (*str*) - ("pcaunzip") pcaunzip binary path to be used. 

25 * **verbose** (*bool*) - (False) Make output verbose 

26 * **pdb** (*bool*) - (False) Use PDB format for output trajectory 

27 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

28 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

29 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

30 

31 Examples: 

32 This is a use example of how to use the building block from Python:: 

33 

34 from biobb_flexserv.pcasuite.pcz_unzip import pcz_unzip 

35 prop = { 

36 'pdb': False 

37 } 

38 pcz_unzip( input_pcz_path='/path/to/pcazip_input.pcz', 

39 output_crd_path='/path/to/pcazip_traj.crd', 

40 properties=prop) 

41 

42 Info: 

43 * wrapped_software: 

44 * name: FlexServ PCAsuite 

45 * version: >=1.0 

46 * license: Apache-2.0 

47 * ontology: 

48 * name: EDAM 

49 * schema: http://edamontology.org/EDAM.owl 

50 

51 """ 

52 

53 def __init__(self, input_pcz_path: str, 

54 output_crd_path: str, properties: Optional[dict] = None, **kwargs) -> None: 

55 

56 properties = properties or {} 

57 

58 # Call parent class constructor 

59 super().__init__(properties) 

60 self.locals_var_dict = locals().copy() 

61 

62 # Input/Output files 

63 self.io_dict = { 

64 'in': {'input_pcz_path': input_pcz_path}, 

65 'out': {'output_crd_path': output_crd_path} 

66 } 

67 

68 # Properties specific for BB 

69 self.properties = properties 

70 self.binary_path = properties.get('binary_path', 'pcaunzip') 

71 self.verbose = properties.get('verbose', False) 

72 self.pdb = properties.get('pdb', False) 

73 

74 # Check the properties 

75 self.check_properties(properties) 

76 self.check_arguments() 

77 

78 @launchlogger 

79 def launch(self): 

80 """Launches the execution of the FlexServ pcaunzip module.""" 

81 

82 # Setup Biobb 

83 if self.check_restart(): 

84 return 0 

85 # self.stage_files() 

86 

87 # Internal file paths 

88 # try: 

89 # # Using rel paths to shorten the amount of characters due to fortran path length limitations 

90 # input_pcz = str(Path(self.stage_io_dict["in"]["input_pcz_path"]).relative_to(Path.cwd())) 

91 # output_crd = str(Path(self.stage_io_dict["out"]["output_crd_path"]).relative_to(Path.cwd())) 

92 # except ValueError: 

93 # # Container or remote case 

94 # input_pcz = self.stage_io_dict["in"]["input_pcz_path"] 

95 # output_crd = self.stage_io_dict["out"]["output_crd_path"] 

96 

97 # Manually creating a Sandbox to avoid issues with input parameters buffer overflow: 

98 # Long strings defining a file path makes Fortran or C compiled programs crash if the string 

99 # declared is shorter than the input parameter path (string) length. 

100 # Generating a temporary folder and working inside this folder (sandbox) fixes this problem. 

101 # The problem was found in Galaxy executions, launching Singularity containers (May 2023). 

102 

103 # Creating temporary folder 

104 self.tmp_folder = fu.create_unique_dir() 

105 fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log) 

106 

107 shutil.copy2(self.io_dict["in"]["input_pcz_path"], self.tmp_folder) 

108 

109 # Command line 

110 # pcaunzip -i infile [-o outfile] [--pdb] [--verbose] [--help] 

111 # self.cmd = [self.binary_path, 

112 # "-i", input_pcz, 

113 # "-o", output_crd 

114 # ] 

115 

116 self.cmd = ['cd', self.tmp_folder, ';', 

117 self.binary_path, 

118 "-i", PurePath(self.io_dict["in"]["input_pcz_path"]).name, 

119 "-o", PurePath(self.io_dict["out"]["output_crd_path"]).name 

120 ] 

121 

122 if self.verbose: 

123 self.cmd.append('-v') 

124 

125 if self.pdb: 

126 self.cmd.append('--pdb') 

127 

128 # Run Biobb block 

129 self.run_biobb() 

130 

131 # Copy outputs from temporary folder to output path 

132 shutil.copy2(PurePath(self.tmp_folder).joinpath(PurePath(self.io_dict["out"]["output_crd_path"]).name), PurePath(self.io_dict["out"]["output_crd_path"])) 

133 

134 # Copy files to host 

135 # self.copy_to_host() 

136 

137 # remove temporary folder(s) 

138 self.tmp_files.extend([ 

139 # self.stage_io_dict.get("unique_dir", ""), 

140 self.tmp_folder 

141 ]) 

142 self.remove_tmp_files() 

143 

144 self.check_arguments(output_files_created=True, raise_exception=False) 

145 

146 return self.return_code 

147 

148 

149def pcz_unzip(input_pcz_path: str, 

150 output_crd_path: str, 

151 properties: Optional[dict] = None, **kwargs) -> int: 

152 """Create :class:`PCZunzip <flexserv.pcasuite.PCZunzip>`flexserv.pcasuite.PCZunzip class and 

153 execute :meth:`launch() <flexserv.pcasuite.PCZunzip.launch>` method""" 

154 

155 return PCZunzip(input_pcz_path=input_pcz_path, 

156 output_crd_path=output_crd_path, 

157 properties=properties).launch() 

158 

159 pcz_unzip.__doc__ = PCZunzip.__doc__ 

160 

161 

162def main(): 

163 parser = argparse.ArgumentParser(description='Uncompress Molecular Dynamics (MD) compressed trajectories using Principal Component Analysis (PCA) algorithms.', formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

164 parser.add_argument('--config', required=False, help='Configuration file') 

165 

166 # Specific args 

167 required_args = parser.add_argument_group('required arguments') 

168 required_args.add_argument('--input_pcz_path', required=True, help='Input compressed trajectory file. Accepted formats: pcz.') 

169 required_args.add_argument('--output_crd_path', required=True, help='Output trajectory file. Accepted formats: crd, mdcrd, inpcrd, pdb.') 

170 

171 args = parser.parse_args() 

172 args.config = args.config or "{}" 

173 properties = settings.ConfReader(config=args.config).get_prop_dic() 

174 

175 # Specific call 

176 pcz_unzip(input_pcz_path=args.input_pcz_path, 

177 output_crd_path=args.output_crd_path, 

178 properties=properties) 

179 

180 

181if __name__ == '__main__': 

182 main()