Coverage for biobb_flexserv/pcasuite/pcz_evecs.py: 95%

63 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-05-28 11:28 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the PCZevecs class and the command line interface.""" 

4from typing import Optional 

5import json 

6import math 

7from pathlib import Path, PurePath 

8from biobb_common.generic.biobb_object import BiobbObject 

9from biobb_common.tools.file_utils import launchlogger 

10 

11 

12class PCZevecs(BiobbObject): 

13 """ 

14 | biobb_flexserv PCZevecs 

15 | Extract PCA Eigen Vectors from a compressed PCZ file. 

16 | Wrapper of the pczdump tool from the PCAsuite FlexServ module. 

17 

18 Args: 

19 input_pcz_path (str): Input compressed trajectory file. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/data/pcasuite/pcazip.pcz>`_. Accepted formats: pcz (edam:format_3874). 

20 output_json_path (str): Output json file with PCA Eigen Vectors. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/reference/pcasuite/pcz_evecs.json>`_. Accepted formats: json (edam:format_3464). 

21 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

22 * **binary_path** (*str*) - ("pczdump") pczdump binary path to be used. 

23 * **eigenvector** (*int*) - (1) PCA mode (eigenvector) from which to extract eigen vectors. 

24 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

25 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

26 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

27 * **container_path** (*str*) - (None) Container path definition. 

28 * **container_image** (*str*) - ('afandiadib/ambertools:serial') Container image definition. 

29 * **container_volume_path** (*str*) - ('/tmp') Container volume path definition. 

30 * **container_working_dir** (*str*) - (None) Container working directory definition. 

31 * **container_user_id** (*str*) - (None) Container user_id definition. 

32 * **container_shell_path** (*str*) - ('/bin/bash') Path to default shell inside the container. 

33 

34 Examples: 

35 This is a use example of how to use the building block from Python:: 

36 

37 from biobb_flexserv.pcasuite.pcz_evecs import pcz_evecs 

38 

39 prop = { 

40 'eigenvector': 1 

41 } 

42 

43 pcz_evecs( input_pcz_path='/path/to/pcazip_input.pcz', 

44 output_json_path='/path/to/pcz_evecs.json', 

45 properties=prop) 

46 

47 Info: 

48 * wrapped_software: 

49 * name: FlexServ PCAsuite 

50 * version: >=1.0 

51 * license: Apache-2.0 

52 * ontology: 

53 * name: EDAM 

54 * schema: http://edamontology.org/EDAM.owl 

55 

56 """ 

57 

58 def __init__(self, input_pcz_path: str, 

59 output_json_path: str, properties: Optional[dict] = None, **kwargs) -> None: 

60 

61 properties = properties or {} 

62 

63 # Call parent class constructor 

64 super().__init__(properties) 

65 self.locals_var_dict = locals().copy() 

66 

67 # Input/Output files 

68 self.io_dict = { 

69 'in': {'input_pcz_path': input_pcz_path}, 

70 'out': {'output_json_path': output_json_path} 

71 } 

72 

73 # Properties specific for BB 

74 self.properties = properties 

75 self.binary_path = properties.get('binary_path', 'pczdump') 

76 self.eigenvector = properties.get('eigenvector', 1) 

77 

78 # Check the properties 

79 self.check_properties(properties) 

80 self.check_arguments() 

81 

82 @launchlogger 

83 def launch(self): 

84 """Launches the execution of the FlexServ pcz_evecs module.""" 

85 

86 # Setup Biobb 

87 if self.check_restart(): 

88 return 0 

89 self.stage_files() 

90 

91 if self.container_path: 

92 working_dir = self.container_volume_path if self.container_volume_path else "/data" 

93 else: 

94 working_dir = self.stage_io_dict.get("unique_dir", "") 

95 

96 unique_dir = Path(self.stage_io_dict.get("unique_dir", "")) 

97 

98 # Temporary output 

99 # temp_out = str(Path(self.stage_io_dict.get("unique_dir", "")).joinpath("output.dat")) 

100 temp_out = "output.dat" 

101 temp_out_path = unique_dir.joinpath(temp_out) 

102 staged_output_json_path = unique_dir.joinpath(Path(self.stage_io_dict["out"]["output_json_path"]).name) 

103 

104 # Command line 

105 # pczdump -i structure.ca.std.pcz --evecs -o pcz.evecs 

106 # self.cmd = [self.binary_path, 

107 # "-i", input_pcz, 

108 # "-o", temp_out, 

109 # "--evec={}".format(self.eigenvector) 

110 # ] 

111 

112 self.cmd = ['cd', working_dir, ';', 

113 self.binary_path, 

114 '-i', PurePath(self.stage_io_dict["in"]["input_pcz_path"]).name, 

115 '-o', temp_out, 

116 "--evec={}".format(self.eigenvector) 

117 ] 

118 

119 # Run Biobb block 

120 self.run_biobb() 

121 

122 # Parse output evecs 

123 # 0.180 -0.069 0.168 0.204 -0.054 0.235 0.145 -0.001 0.260 0.183 

124 # -0.041 0.231 0.174 -0.077 0.144 0.097 -0.022 0.143 0.069 0.008 

125 

126 info_dict = {} 

127 info_dict['evecs'] = [] 

128 with open(temp_out_path, 'r') as file: 

129 for line in file: 

130 info = line.strip().split(' ') 

131 for nums in info: 

132 if nums: 

133 info_dict['evecs'].append(nums) 

134 

135 # Computing Projections 

136 info_dict['projs'] = [] 

137 module = 1 

138 proj = 0 

139 for num in info_dict['evecs']: 

140 val = float(num) * float(num) 

141 proj = proj + val 

142 if module % 3 == 0: 

143 proj = math.sqrt(proj) 

144 module = 1 

145 info_dict['projs'].append(float("{:.4f}".format(proj))) 

146 proj = 0 

147 else: 

148 module = module + 1 

149 

150 with open(staged_output_json_path, 'w') as out_file: 

151 out_file.write(json.dumps(info_dict, indent=4)) 

152 

153 # Copy files to host 

154 self.copy_to_host() 

155 

156 # Remove temporary folder(s) 

157 self.remove_tmp_files() 

158 

159 self.check_arguments(output_files_created=True, raise_exception=False) 

160 

161 return self.return_code 

162 

163 

164def pcz_evecs(input_pcz_path: str, output_json_path: str, 

165 properties: Optional[dict] = None, **kwargs) -> int: 

166 """Create :class:`PCZevecs <flexserv.pcasuite.pcz_evecs>`flexserv.pcasuite.PCZevecs class and 

167 execute :meth:`launch() <flexserv.pcasuite.pcz_evecs.launch>` method""" 

168 return PCZevecs(**dict(locals())).launch() 

169 

170 

171pcz_evecs.__doc__ = PCZevecs.__doc__ 

172main = PCZevecs.get_main(pcz_evecs, "Extract PCA Eigen Vectors from a compressed PCZ file.") 

173 

174if __name__ == '__main__': 

175 main()