Coverage for biobb_flexserv / pcasuite / pcz_zip.py: 88%

51 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-05 13:10 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the PCAzip class and the command line interface.""" 

4from typing import Optional 

5import shutil 

6from pathlib import PurePath 

7from biobb_common.tools import file_utils as fu 

8from biobb_common.generic.biobb_object import BiobbObject 

9from biobb_common.tools.file_utils import launchlogger 

10 

11 

12class PCZzip(BiobbObject): 

13 """ 

14 | biobb_flexserv PCZzip 

15 | Wrapper of the pcazip tool from the PCAsuite FlexServ module. 

16 | Compress Molecular Dynamics (MD) trajectories using Principal Component Analysis (PCA) algorithms. 

17 

18 Args: 

19 input_pdb_path (str): Input PDB file. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/data/pcasuite/structure.ca.pdb>`_. Accepted formats: pdb (edam:format_1476). 

20 input_crd_path (str): Input Trajectory file. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/data/pcasuite/traj.crd>`_. Accepted formats: crd (edam:format_3878), mdcrd (edam:format_3878), inpcrd (edam:format_3878). 

21 output_pcz_path (str): Output compressed trajectory. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/reference/pcasuite/pcazip.pcz>`_. Accepted formats: pcz (edam:format_3874). 

22 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

23 * **binary_path** (*str*) - ("pcazip") pcazip binary path to be used. 

24 * **neigenv** (*int*) - (0) Number of generated eigenvectors 

25 * **variance** (*int*) - (90) Percentage of variance captured by the final set of eigenvectors 

26 * **verbose** (*bool*) - (False) Make output verbose 

27 * **gauss_rmsd** (*bool*) - (False) Use a gaussian RMSd for fitting 

28 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

29 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

30 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

31 

32 Examples: 

33 This is a use example of how to use the building block from Python:: 

34 

35 from biobb_flexserv.pcasuite.pcz_zip import pcz_zip 

36 prop = { 

37 'variance': 90 

38 } 

39 pcz_zip( input_pdb_path='/path/to/pcazip_input.pdb', 

40 input_crd_path='/path/to/pcazip_input.crd', 

41 output_pcz_path='/path/to/pcazip_traj.pcz', 

42 properties=prop) 

43 

44 Info: 

45 * wrapped_software: 

46 * name: FlexServ PCAsuite 

47 * version: >=1.0 

48 * license: Apache-2.0 

49 * ontology: 

50 * name: EDAM 

51 * schema: http://edamontology.org/EDAM.owl 

52 

53 """ 

54 

55 def __init__(self, input_pdb_path: str, input_crd_path: str, 

56 output_pcz_path: str, properties: Optional[dict] = None, **kwargs) -> None: 

57 

58 properties = properties or {} 

59 

60 # Call parent class constructor 

61 super().__init__(properties) 

62 self.locals_var_dict = locals().copy() 

63 

64 # Input/Output files 

65 self.io_dict = { 

66 'in': {'input_pdb_path': input_pdb_path, 

67 'input_crd_path': input_crd_path}, 

68 'out': {'output_pcz_path': output_pcz_path} 

69 } 

70 

71 # Properties specific for BB 

72 self.properties = properties 

73 self.binary_path = properties.get('binary_path', 'pcazip') 

74 self.neigenv = properties.get('neigenv', 0) 

75 # self.variance = properties.get('variance', 90) 

76 self.variance = properties.get('variance') 

77 self.verbose = properties.get('verbose', False) 

78 self.gauss_rmsd = properties.get('gauss_rmsd', False) 

79 

80 # Check the properties 

81 self.check_properties(properties) 

82 self.check_arguments() 

83 

84 @launchlogger 

85 def launch(self): 

86 """Launches the execution of the FlexServ pcazip module.""" 

87 

88 # Setup Biobb 

89 if self.check_restart(): 

90 return 0 

91 # self.stage_files() 

92 

93 # try: 

94 # # Using rel paths to shorten the amount of characters due to fortran path length limitations 

95 # input_pdb = str(Path(self.stage_io_dict["in"]["input_pdb_path"]).relative_to(Path.cwd())) 

96 # input_crd = str(Path(self.stage_io_dict["in"]["input_crd_path"]).relative_to(Path.cwd())) 

97 # output_pcz = str(Path(self.stage_io_dict["out"]["output_pcz_path"]).relative_to(Path.cwd())) 

98 # except ValueError: 

99 # # Container or remote case 

100 # input_pdb = self.stage_io_dict["in"]["input_pdb_path"] 

101 # input_crd = self.stage_io_dict["in"]["input_crd_path"] 

102 # output_pcz = self.stage_io_dict["out"]["output_pcz_path"] 

103 

104 # Manually creating a Sandbox to avoid issues with input parameters buffer overflow: 

105 # Long strings defining a file path makes Fortran or C compiled programs crash if the string 

106 # declared is shorter than the input parameter path (string) length. 

107 # Generating a temporary folder and working inside this folder (sandbox) fixes this problem. 

108 # The problem was found in Galaxy executions, launching Singularity containers (May 2023). 

109 

110 # Creating temporary folder 

111 tmp_folder = fu.create_unique_dir() 

112 fu.log('Creating %s temporary folder' % tmp_folder, self.out_log) 

113 

114 shutil.copy2(self.io_dict["in"]["input_pdb_path"], tmp_folder) 

115 shutil.copy2(self.io_dict["in"]["input_crd_path"], tmp_folder) 

116 

117 # Command line 

118 # pcazip -i infile -o outfile -n natoms 

119 # [-v] [--mask maskfile] [-e nev] [-q qual] [--pdb pdbfile] 

120 # self.cmd = [self.binary_path, 

121 # "-p", input_pdb, 

122 # "-i", input_crd, 

123 # "-o", output_pcz 

124 # ] 

125 

126 self.cmd = ['cd', tmp_folder, ';', 

127 self.binary_path, 

128 "-p", PurePath(self.io_dict["in"]["input_pdb_path"]).name, 

129 "-i", PurePath(self.io_dict["in"]["input_crd_path"]).name, 

130 "-o", PurePath(self.io_dict["out"]["output_pcz_path"]).name 

131 ] 

132 

133 if self.verbose: 

134 self.cmd.append('-v') 

135 

136 if self.gauss_rmsd: 

137 self.cmd.append('-g') 

138 

139 if self.neigenv: 

140 self.cmd.append('-e') 

141 self.cmd.append(str(self.neigenv)) 

142 

143 if self.variance: 

144 self.cmd.append('-q') 

145 self.cmd.append(str(self.variance)) 

146 

147 # Run Biobb block 

148 self.run_biobb() 

149 

150 # Copy outputs from temporary folder to output path 

151 shutil.copy2(PurePath(tmp_folder).joinpath(PurePath(self.io_dict["out"]["output_pcz_path"]).name), PurePath(self.io_dict["out"]["output_pcz_path"])) 

152 

153 # Copy files to host 

154 # self.copy_to_host() 

155 

156 # Remove temporary folder(s) 

157 self.tmp_files.append(tmp_folder) 

158 self.remove_tmp_files() 

159 

160 self.check_arguments(output_files_created=True, raise_exception=False) 

161 

162 return self.return_code 

163 

164 

165def pcz_zip(input_pdb_path: str, input_crd_path: str, 

166 output_pcz_path: str, 

167 properties: Optional[dict] = None, **kwargs) -> int: 

168 """Create :class:`PCZzip <flexserv.pcasuite.PCZzip>`flexserv.pcasuite.PCZzip class and 

169 execute :meth:`launch() <flexserv.pcasuite.PCZzip.launch>` method""" 

170 return PCZzip(**dict(locals())).launch() 

171 

172 

173pcz_zip.__doc__ = PCZzip.__doc__ 

174main = PCZzip.get_main(pcz_zip, "Compress Molecular Dynamics (MD) trajectories using Principal Component Analysis (PCA) algorithms.") 

175 

176if __name__ == '__main__': 

177 main()