Coverage for biobb_flexserv/pcasuite/pcz_info.py: 95%

63 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-05-28 11:28 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the PCZinfo class and the command line interface.""" 

4from typing import Optional 

5import json 

6from pathlib import Path, PurePath 

7from biobb_common.generic.biobb_object import BiobbObject 

8from biobb_common.tools.file_utils import launchlogger 

9 

10 

11class PCZinfo(BiobbObject): 

12 """ 

13 | biobb_flexserv PCZinfo 

14 | Extract PCA info (variance, Dimensionality) from a compressed PCZ file. 

15 | Wrapper of the pczdump tool from the PCAsuite FlexServ module. 

16 

17 Args: 

18 input_pcz_path (str): Input compressed trajectory file. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/data/pcasuite/pcazip.pcz>`_. Accepted formats: pcz (edam:format_3874). 

19 output_json_path (str): Output json file with PCA info such as number of components, variance and dimensionality. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/reference/pcasuite/pcz_info.json>`_. Accepted formats: json (edam:format_3464). 

20 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

21 * **binary_path** (*str*) - ("pczdump") pczdump binary path to be used. 

22 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

23 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

24 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

25 * **container_path** (*str*) - (None) Container path definition. 

26 * **container_image** (*str*) - ('afandiadib/ambertools:serial') Container image definition. 

27 * **container_volume_path** (*str*) - ('/tmp') Container volume path definition. 

28 * **container_working_dir** (*str*) - (None) Container working directory definition. 

29 * **container_user_id** (*str*) - (None) Container user_id definition. 

30 * **container_shell_path** (*str*) - ('/bin/bash') Path to default shell inside the container. 

31 

32 Examples: 

33 This is a use example of how to use the building block from Python:: 

34 

35 from biobb_flexserv.pcasuite.pcz_info import pcz_info 

36 

37 pcz_info( input_pcz_path='/path/to/pcazip_input.pcz', 

38 output_json_path='/path/to/pcz_info.json') 

39 

40 Info: 

41 * wrapped_software: 

42 * name: FlexServ PCAsuite 

43 * version: >=1.0 

44 * license: Apache-2.0 

45 * ontology: 

46 * name: EDAM 

47 * schema: http://edamontology.org/EDAM.owl 

48 

49 """ 

50 

51 def __init__(self, input_pcz_path: str, 

52 output_json_path: str, properties: Optional[dict] = None, **kwargs) -> None: 

53 

54 properties = properties or {} 

55 

56 # Call parent class constructor 

57 super().__init__(properties) 

58 self.locals_var_dict = locals().copy() 

59 

60 # Input/Output files 

61 self.io_dict = { 

62 'in': {'input_pcz_path': input_pcz_path}, 

63 'out': {'output_json_path': output_json_path} 

64 } 

65 

66 # Properties specific for BB 

67 self.properties = properties 

68 self.binary_path = properties.get('binary_path', 'pczdump') 

69 

70 # Check the properties 

71 self.check_properties(properties) 

72 self.check_arguments() 

73 

74 @launchlogger 

75 def launch(self): 

76 """Launches the execution of the FlexServ pcz_info module.""" 

77 

78 # Setup Biobb 

79 if self.check_restart(): 

80 return 0 

81 self.stage_files() 

82 

83 if self.container_path: 

84 working_dir = self.container_volume_path if self.container_volume_path else "/data" 

85 else: 

86 working_dir = self.stage_io_dict.get("unique_dir", "") 

87 

88 unique_dir = Path(self.stage_io_dict.get("unique_dir", "")) 

89 

90 # Temporary output 

91 # temp_out_1 = str(Path(self.stage_io_dict.get("unique_dir", "")).joinpath("output1.dat")) 

92 # temp_out_2 = str(Path(self.stage_io_dict.get("unique_dir", "")).joinpath("output2.dat")) 

93 temp_out_1 = "output1.dat" 

94 temp_out_2 = "output2.dat" 

95 temp_out_1_path = unique_dir.joinpath(temp_out_1) 

96 temp_out_2_path = unique_dir.joinpath(temp_out_2) 

97 staged_output_json_path = unique_dir.joinpath(Path(self.stage_io_dict["out"]["output_json_path"]).name) 

98 

99 # Command line 

100 # pczdump -i structure.ca.std.pcz --info -o pcz.info 

101 # self.cmd = [self.binary_path, 

102 # "-i", input_pcz, 

103 # "-o", temp_out_1, 

104 # "--info", ';', 

105 # self.binary_path, 

106 # "-i", input_pcz, 

107 # "-o", temp_out_2, 

108 # "--evals" 

109 # ] 

110 

111 self.cmd = ['cd', working_dir, ';', 

112 self.binary_path, 

113 "-i", PurePath(self.stage_io_dict["in"]["input_pcz_path"]).name, 

114 "-o", temp_out_1, 

115 "--info", ';', 

116 self.binary_path, 

117 "-i", PurePath(self.stage_io_dict["in"]["input_pcz_path"]).name, 

118 "-o", temp_out_2, 

119 "--evals" 

120 ] 

121 

122 # Run Biobb block 

123 self.run_biobb() 

124 

125 # Parse output info 

126 # Title : MC generated trajectory 

127 # Atoms : 85 

128 # Vectors : 4 

129 # Frames : 1000 

130 # Total variance : 1137.20 

131 # Explained variance: 1043.32 

132 # Quality : 91.74% 

133 # Dimensionality : 21 

134 # RMSd type : Standard RMSd 

135 # Have atom names : True 

136 info_dict = {} 

137 with open(temp_out_1_path, 'r') as file: 

138 for line in file: 

139 info = line.split(':') 

140 info_dict[info[0].strip().replace(' ', '_')] = info[1].strip() 

141 

142 # Parse output evals 

143 # 744.201782 

144 # 170.061981 

145 # 89.214905 

146 # 39.836308 

147 info_dict['Eigen_Values'] = [] 

148 info_dict['Eigen_Values_dimensionality_vs_total'] = [] 

149 info_dict['Eigen_Values_dimensionality_vs_explained'] = [] 

150 accum_tot = 0 

151 accum_exp = 0 

152 with open(temp_out_2_path, 'r') as file: 

153 for line in file: 

154 eval = float(line.strip()) 

155 eval_var = (eval / float(info_dict['Total_variance']))*100 

156 accum_tot = accum_tot + eval_var 

157 eval_dim = (eval / float(info_dict['Explained_variance']))*100 

158 accum_exp = accum_exp + eval_dim 

159 info_dict['Eigen_Values'].append(eval) 

160 info_dict['Eigen_Values_dimensionality_vs_total'].append(accum_tot) 

161 info_dict['Eigen_Values_dimensionality_vs_explained'].append(accum_exp) 

162 

163 with open(staged_output_json_path, 'w') as out_file: 

164 out_file.write(json.dumps(info_dict, indent=4)) 

165 

166 # Copy files to host 

167 self.copy_to_host() 

168 

169 # Remove temporary folder(s) 

170 self.remove_tmp_files() 

171 

172 self.check_arguments(output_files_created=True, raise_exception=False) 

173 

174 return self.return_code 

175 

176 

177def pcz_info(input_pcz_path: str, output_json_path: str, 

178 properties: Optional[dict] = None, **kwargs) -> int: 

179 """Create :class:`PCZinfo <flexserv.pcasuite.pcz_info>`flexserv.pcasuite.PCZinfo class and 

180 execute :meth:`launch() <flexserv.pcasuite.pcz_info.launch>` method""" 

181 return PCZinfo(**dict(locals())).launch() 

182 

183 

184pcz_info.__doc__ = PCZinfo.__doc__ 

185main = PCZinfo.get_main(pcz_info, "Extract PCA info from a compressed PCZ file.") 

186 

187if __name__ == '__main__': 

188 main()