Coverage for biobb_flexserv/pcasuite/pcz_zip.py: 73%

63 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-19 15:08 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the PCAzip class and the command line interface.""" 

4import argparse 

5from typing import Optional 

6import shutil 

7from pathlib import PurePath 

8from biobb_common.tools import file_utils as fu 

9from biobb_common.generic.biobb_object import BiobbObject 

10from biobb_common.configuration import settings 

11from biobb_common.tools.file_utils import launchlogger 

12 

13 

14class PCZzip(BiobbObject): 

15 """ 

16 | biobb_flexserv PCZzip 

17 | Wrapper of the pcazip tool from the PCAsuite FlexServ module. 

18 | Compress Molecular Dynamics (MD) trajectories using Principal Component Analysis (PCA) algorithms. 

19 

20 Args: 

21 input_pdb_path (str): Input PDB file. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/data/pcasuite/structure.ca.pdb>`_. Accepted formats: pdb (edam:format_1476). 

22 input_crd_path (str): Input Trajectory file. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/data/pcasuite/traj.crd>`_. Accepted formats: crd (edam:format_3878), mdcrd (edam:format_3878), inpcrd (edam:format_3878). 

23 output_pcz_path (str): Output compressed trajectory. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/reference/pcasuite/pcazip.pcz>`_. Accepted formats: pcz (edam:format_3874). 

24 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

25 * **binary_path** (*str*) - ("pcazip") pcazip binary path to be used. 

26 * **neigenv** (*int*) - (0) Number of generated eigenvectors 

27 * **variance** (*int*) - (90) Percentage of variance captured by the final set of eigenvectors 

28 * **verbose** (*bool*) - (False) Make output verbose 

29 * **gauss_rmsd** (*bool*) - (False) Use a gaussian RMSd for fitting 

30 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

31 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

32 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

33 

34 Examples: 

35 This is a use example of how to use the building block from Python:: 

36 

37 from biobb_flexserv.pcasuite.pcz_zip import pcz_zip 

38 prop = { 

39 'variance': 90 

40 } 

41 pcz_zip( input_pdb_path='/path/to/pcazip_input.pdb', 

42 input_crd_path='/path/to/pcazip_input.crd', 

43 output_pcz_path='/path/to/pcazip_traj.pcz', 

44 properties=prop) 

45 

46 Info: 

47 * wrapped_software: 

48 * name: FlexServ PCAsuite 

49 * version: >=1.0 

50 * license: Apache-2.0 

51 * ontology: 

52 * name: EDAM 

53 * schema: http://edamontology.org/EDAM.owl 

54 

55 """ 

56 

57 def __init__(self, input_pdb_path: str, input_crd_path: str, 

58 output_pcz_path: str, properties: Optional[dict] = None, **kwargs) -> None: 

59 

60 properties = properties or {} 

61 

62 # Call parent class constructor 

63 super().__init__(properties) 

64 self.locals_var_dict = locals().copy() 

65 

66 # Input/Output files 

67 self.io_dict = { 

68 'in': {'input_pdb_path': input_pdb_path, 

69 'input_crd_path': input_crd_path}, 

70 'out': {'output_pcz_path': output_pcz_path} 

71 } 

72 

73 # Properties specific for BB 

74 self.properties = properties 

75 self.binary_path = properties.get('binary_path', 'pcazip') 

76 self.neigenv = properties.get('neigenv', 0) 

77 # self.variance = properties.get('variance', 90) 

78 self.variance = properties.get('variance') 

79 self.verbose = properties.get('verbose', False) 

80 self.gauss_rmsd = properties.get('gauss_rmsd', False) 

81 

82 # Check the properties 

83 self.check_properties(properties) 

84 self.check_arguments() 

85 

86 @launchlogger 

87 def launch(self): 

88 """Launches the execution of the FlexServ pcazip module.""" 

89 

90 # Setup Biobb 

91 if self.check_restart(): 

92 return 0 

93 # self.stage_files() 

94 

95 # try: 

96 # # Using rel paths to shorten the amount of characters due to fortran path length limitations 

97 # input_pdb = str(Path(self.stage_io_dict["in"]["input_pdb_path"]).relative_to(Path.cwd())) 

98 # input_crd = str(Path(self.stage_io_dict["in"]["input_crd_path"]).relative_to(Path.cwd())) 

99 # output_pcz = str(Path(self.stage_io_dict["out"]["output_pcz_path"]).relative_to(Path.cwd())) 

100 # except ValueError: 

101 # # Container or remote case 

102 # input_pdb = self.stage_io_dict["in"]["input_pdb_path"] 

103 # input_crd = self.stage_io_dict["in"]["input_crd_path"] 

104 # output_pcz = self.stage_io_dict["out"]["output_pcz_path"] 

105 

106 # Manually creating a Sandbox to avoid issues with input parameters buffer overflow: 

107 # Long strings defining a file path makes Fortran or C compiled programs crash if the string 

108 # declared is shorter than the input parameter path (string) length. 

109 # Generating a temporary folder and working inside this folder (sandbox) fixes this problem. 

110 # The problem was found in Galaxy executions, launching Singularity containers (May 2023). 

111 

112 # Creating temporary folder 

113 self.tmp_folder = fu.create_unique_dir() 

114 fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log) 

115 

116 shutil.copy2(self.io_dict["in"]["input_pdb_path"], self.tmp_folder) 

117 shutil.copy2(self.io_dict["in"]["input_crd_path"], self.tmp_folder) 

118 

119 # Command line 

120 # pcazip -i infile -o outfile -n natoms 

121 # [-v] [--mask maskfile] [-e nev] [-q qual] [--pdb pdbfile] 

122 # self.cmd = [self.binary_path, 

123 # "-p", input_pdb, 

124 # "-i", input_crd, 

125 # "-o", output_pcz 

126 # ] 

127 

128 self.cmd = ['cd', self.tmp_folder, ';', 

129 self.binary_path, 

130 "-p", PurePath(self.io_dict["in"]["input_pdb_path"]).name, 

131 "-i", PurePath(self.io_dict["in"]["input_crd_path"]).name, 

132 "-o", PurePath(self.io_dict["out"]["output_pcz_path"]).name 

133 ] 

134 

135 if self.verbose: 

136 self.cmd.append('-v') 

137 

138 if self.gauss_rmsd: 

139 self.cmd.append('-g') 

140 

141 if self.neigenv: 

142 self.cmd.append('-e') 

143 self.cmd.append(str(self.neigenv)) 

144 

145 if self.variance: 

146 self.cmd.append('-q') 

147 self.cmd.append(str(self.variance)) 

148 

149 # Run Biobb block 

150 self.run_biobb() 

151 

152 # Copy outputs from temporary folder to output path 

153 shutil.copy2(PurePath(self.tmp_folder).joinpath(PurePath(self.io_dict["out"]["output_pcz_path"]).name), PurePath(self.io_dict["out"]["output_pcz_path"])) 

154 

155 # Copy files to host 

156 # self.copy_to_host() 

157 

158 # remove temporary folder(s) 

159 self.tmp_files.extend([ 

160 # self.stage_io_dict.get("unique_dir", ""), 

161 self.tmp_folder 

162 ]) 

163 self.remove_tmp_files() 

164 

165 self.check_arguments(output_files_created=True, raise_exception=False) 

166 

167 return self.return_code 

168 

169 

170def pcz_zip(input_pdb_path: str, input_crd_path: str, 

171 output_pcz_path: str, 

172 properties: Optional[dict] = None, **kwargs) -> int: 

173 """Create :class:`PCZzip <flexserv.pcasuite.PCZzip>`flexserv.pcasuite.PCZzip class and 

174 execute :meth:`launch() <flexserv.pcasuite.PCZzip.launch>` method""" 

175 

176 return PCZzip(input_pdb_path=input_pdb_path, 

177 input_crd_path=input_crd_path, 

178 output_pcz_path=output_pcz_path, 

179 properties=properties).launch() 

180 

181 pcz_zip.__doc__ = PCZzip.__doc__ 

182 

183 

184def main(): 

185 parser = argparse.ArgumentParser(description='Compress Molecular Dynamics (MD) trajectories using Principal Component Analysis (PCA) algorithms.', formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

186 parser.add_argument('--config', required=False, help='Configuration file') 

187 

188 # Specific args 

189 required_args = parser.add_argument_group('required arguments') 

190 required_args.add_argument('--input_pdb_path', required=True, help='Input PDB file. Accepted formats: pdb.') 

191 required_args.add_argument('--input_crd_path', required=True, help='Input trajectory file. Accepted formats: crd, mdcrd, inpcrd.') 

192 required_args.add_argument('--output_pcz_path', required=True, help='Output compressed trajectory file. Accepted formats: pcz.') 

193 

194 args = parser.parse_args() 

195 args.config = args.config or "{}" 

196 properties = settings.ConfReader(config=args.config).get_prop_dic() 

197 

198 # Specific call 

199 pcz_zip(input_pdb_path=args.input_pdb_path, 

200 input_crd_path=args.input_crd_path, 

201 output_pcz_path=args.output_pcz_path, 

202 properties=properties) 

203 

204 

205if __name__ == '__main__': 

206 main()