Coverage for biobb_flexserv/pcasuite/pcz_zip.py: 85%

48 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-05-28 11:28 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the PCAzip class and the command line interface.""" 

4from typing import Optional 

5from pathlib import PurePath 

6from biobb_common.generic.biobb_object import BiobbObject 

7from biobb_common.tools.file_utils import launchlogger 

8 

9 

10class PCZzip(BiobbObject): 

11 """ 

12 | biobb_flexserv PCZzip 

13 | Wrapper of the pcazip tool from the PCAsuite FlexServ module. 

14 | Compress Molecular Dynamics (MD) trajectories using Principal Component Analysis (PCA) algorithms. 

15 

16 Args: 

17 input_pdb_path (str): Input PDB file. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/data/pcasuite/structure.ca.pdb>`_. Accepted formats: pdb (edam:format_1476). 

18 input_crd_path (str): Input Trajectory file. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/data/pcasuite/traj.crd>`_. Accepted formats: crd (edam:format_3878), mdcrd (edam:format_3878), inpcrd (edam:format_3878). 

19 output_pcz_path (str): Output compressed trajectory. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/reference/pcasuite/pcazip.pcz>`_. Accepted formats: pcz (edam:format_3874). 

20 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

21 * **binary_path** (*str*) - ("pcazip") pcazip binary path to be used. 

22 * **neigenv** (*int*) - (0) Number of generated eigenvectors 

23 * **variance** (*int*) - (90) Percentage of variance captured by the final set of eigenvectors 

24 * **verbose** (*bool*) - (False) Make output verbose 

25 * **gauss_rmsd** (*bool*) - (False) Use a gaussian RMSd for fitting 

26 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

27 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

28 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

29 * **container_path** (*str*) - (None) Container path definition. 

30 * **container_image** (*str*) - ('afandiadib/ambertools:serial') Container image definition. 

31 * **container_volume_path** (*str*) - ('/tmp') Container volume path definition. 

32 * **container_working_dir** (*str*) - (None) Container working directory definition. 

33 * **container_user_id** (*str*) - (None) Container user_id definition. 

34 * **container_shell_path** (*str*) - ('/bin/bash') Path to default shell inside the container. 

35 

36 Examples: 

37 This is a use example of how to use the building block from Python:: 

38 

39 from biobb_flexserv.pcasuite.pcz_zip import pcz_zip 

40 prop = { 

41 'variance': 90 

42 } 

43 pcz_zip( input_pdb_path='/path/to/pcazip_input.pdb', 

44 input_crd_path='/path/to/pcazip_input.crd', 

45 output_pcz_path='/path/to/pcazip_traj.pcz', 

46 properties=prop) 

47 

48 Info: 

49 * wrapped_software: 

50 * name: FlexServ PCAsuite 

51 * version: >=1.0 

52 * license: Apache-2.0 

53 * ontology: 

54 * name: EDAM 

55 * schema: http://edamontology.org/EDAM.owl 

56 

57 """ 

58 

59 def __init__(self, input_pdb_path: str, input_crd_path: str, 

60 output_pcz_path: str, properties: Optional[dict] = None, **kwargs) -> None: 

61 

62 properties = properties or {} 

63 

64 # Call parent class constructor 

65 super().__init__(properties) 

66 self.locals_var_dict = locals().copy() 

67 

68 # Input/Output files 

69 self.io_dict = { 

70 'in': {'input_pdb_path': input_pdb_path, 

71 'input_crd_path': input_crd_path}, 

72 'out': {'output_pcz_path': output_pcz_path} 

73 } 

74 

75 # Properties specific for BB 

76 self.properties = properties 

77 self.binary_path = properties.get('binary_path', 'pcazip') 

78 self.neigenv = properties.get('neigenv', 0) 

79 # self.variance = properties.get('variance', 90) 

80 self.variance = properties.get('variance') 

81 self.verbose = properties.get('verbose', False) 

82 self.gauss_rmsd = properties.get('gauss_rmsd', False) 

83 

84 # Check the properties 

85 self.check_properties(properties) 

86 self.check_arguments() 

87 

88 @launchlogger 

89 def launch(self): 

90 """Launches the execution of the FlexServ pcazip module.""" 

91 

92 # Setup Biobb 

93 if self.check_restart(): 

94 return 0 

95 self.stage_files() 

96 

97 if self.container_path: 

98 working_dir = self.container_volume_path if self.container_volume_path else "/data" 

99 else: 

100 working_dir = self.stage_io_dict.get("unique_dir", "") 

101 

102 # Command line 

103 # pcazip -i infile -o outfile -n natoms 

104 # [-v] [--mask maskfile] [-e nev] [-q qual] [--pdb pdbfile] 

105 # self.cmd = [self.binary_path, 

106 # "-p", input_pdb, 

107 # "-i", input_crd, 

108 # "-o", output_pcz 

109 # ] 

110 

111 self.cmd = ['cd', working_dir, ';', 

112 self.binary_path, 

113 "-p", PurePath(self.stage_io_dict["in"]["input_pdb_path"]).name, 

114 "-i", PurePath(self.stage_io_dict["in"]["input_crd_path"]).name, 

115 "-o", PurePath(self.stage_io_dict["out"]["output_pcz_path"]).name 

116 ] 

117 

118 if self.verbose: 

119 self.cmd.append('-v') 

120 

121 if self.gauss_rmsd: 

122 self.cmd.append('-g') 

123 

124 if self.neigenv: 

125 self.cmd.append('-e') 

126 self.cmd.append(str(self.neigenv)) 

127 

128 if self.variance: 

129 self.cmd.append('-q') 

130 self.cmd.append(str(self.variance)) 

131 

132 # Run Biobb block 

133 self.run_biobb() 

134 

135 # Copy files to host 

136 self.copy_to_host() 

137 

138 # Remove temporary folder(s) 

139 self.remove_tmp_files() 

140 

141 self.check_arguments(output_files_created=True, raise_exception=False) 

142 

143 return self.return_code 

144 

145 

146def pcz_zip(input_pdb_path: str, input_crd_path: str, 

147 output_pcz_path: str, 

148 properties: Optional[dict] = None, **kwargs) -> int: 

149 """Create :class:`PCZzip <flexserv.pcasuite.PCZzip>`flexserv.pcasuite.PCZzip class and 

150 execute :meth:`launch() <flexserv.pcasuite.PCZzip.launch>` method""" 

151 return PCZzip(**dict(locals())).launch() 

152 

153 

154pcz_zip.__doc__ = PCZzip.__doc__ 

155main = PCZzip.get_main(pcz_zip, "Compress Molecular Dynamics (MD) trajectories using Principal Component Analysis (PCA) algorithms.") 

156 

157if __name__ == '__main__': 

158 main()