Coverage for biobb_flexserv / pcasuite / pcz_evecs.py: 97%

63 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-05 13:10 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the PCZevecs class and the command line interface.""" 

4from typing import Optional 

5import shutil 

6import json 

7import math 

8from pathlib import PurePath 

9from biobb_common.tools import file_utils as fu 

10from biobb_common.generic.biobb_object import BiobbObject 

11from biobb_common.tools.file_utils import launchlogger 

12 

13 

14class PCZevecs(BiobbObject): 

15 """ 

16 | biobb_flexserv PCZevecs 

17 | Extract PCA Eigen Vectors from a compressed PCZ file. 

18 | Wrapper of the pczdump tool from the PCAsuite FlexServ module. 

19 

20 Args: 

21 input_pcz_path (str): Input compressed trajectory file. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/data/pcasuite/pcazip.pcz>`_. Accepted formats: pcz (edam:format_3874). 

22 output_json_path (str): Output json file with PCA Eigen Vectors. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/reference/pcasuite/pcz_evecs.json>`_. Accepted formats: json (edam:format_3464). 

23 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

24 * **binary_path** (*str*) - ("pczdump") pczdump binary path to be used. 

25 * **eigenvector** (*int*) - (1) PCA mode (eigenvector) from which to extract eigen vectors. 

26 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

27 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

28 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

29 

30 Examples: 

31 This is a use example of how to use the building block from Python:: 

32 

33 from biobb_flexserv.pcasuite.pcz_evecs import pcz_evecs 

34 

35 prop = { 

36 'eigenvector': 1 

37 } 

38 

39 pcz_evecs( input_pcz_path='/path/to/pcazip_input.pcz', 

40 output_json_path='/path/to/pcz_evecs.json', 

41 properties=prop) 

42 

43 Info: 

44 * wrapped_software: 

45 * name: FlexServ PCAsuite 

46 * version: >=1.0 

47 * license: Apache-2.0 

48 * ontology: 

49 * name: EDAM 

50 * schema: http://edamontology.org/EDAM.owl 

51 

52 """ 

53 

54 def __init__(self, input_pcz_path: str, 

55 output_json_path: str, properties: Optional[dict] = None, **kwargs) -> None: 

56 

57 properties = properties or {} 

58 

59 # Call parent class constructor 

60 super().__init__(properties) 

61 self.locals_var_dict = locals().copy() 

62 

63 # Input/Output files 

64 self.io_dict = { 

65 'in': {'input_pcz_path': input_pcz_path}, 

66 'out': {'output_json_path': output_json_path} 

67 } 

68 

69 # Properties specific for BB 

70 self.properties = properties 

71 self.binary_path = properties.get('binary_path', 'pczdump') 

72 self.eigenvector = properties.get('eigenvector', 1) 

73 

74 # Check the properties 

75 self.check_properties(properties) 

76 self.check_arguments() 

77 

78 @launchlogger 

79 def launch(self): 

80 """Launches the execution of the FlexServ pcz_evecs module.""" 

81 

82 # Setup Biobb 

83 if self.check_restart(): 

84 return 0 

85 # self.stage_files() 

86 

87 # Internal file paths 

88 # try: 

89 # # Using rel paths to shorten the amount of characters due to fortran path length limitations 

90 # input_pcz = str(Path(self.stage_io_dict["in"]["input_pcz_path"]).relative_to(Path.cwd())) 

91 # output_json = str(Path(self.stage_io_dict["out"]["output_json_path"]).relative_to(Path.cwd())) 

92 # except ValueError: 

93 # # Container or remote case 

94 # input_pcz = self.stage_io_dict["in"]["input_pcz_path"] 

95 # output_json = self.stage_io_dict["out"]["output_json_path"] 

96 

97 # Manually creating a Sandbox to avoid issues with input parameters buffer overflow: 

98 # Long strings defining a file path makes Fortran or C compiled programs crash if the string 

99 # declared is shorter than the input parameter path (string) length. 

100 # Generating a temporary folder and working inside this folder (sandbox) fixes this problem. 

101 # The problem was found in Galaxy executions, launching Singularity containers (May 2023). 

102 

103 # Creating temporary folder 

104 tmp_folder = fu.create_unique_dir() 

105 fu.log('Creating %s temporary folder' % tmp_folder, self.out_log) 

106 

107 shutil.copy2(self.io_dict["in"]["input_pcz_path"], tmp_folder) 

108 

109 # Temporary output 

110 # temp_out = str(Path(self.stage_io_dict.get("unique_dir", "")).joinpath("output.dat")) 

111 temp_out = "output.dat" 

112 temp_json = "output.json" 

113 

114 # Command line 

115 # pczdump -i structure.ca.std.pcz --evecs -o pcz.evecs 

116 # self.cmd = [self.binary_path, 

117 # "-i", input_pcz, 

118 # "-o", temp_out, 

119 # "--evec={}".format(self.eigenvector) 

120 # ] 

121 

122 self.cmd = ['cd', tmp_folder, ';', 

123 self.binary_path, 

124 '-i', PurePath(self.io_dict["in"]["input_pcz_path"]).name, 

125 '-o', temp_out, 

126 "--evec={}".format(self.eigenvector) 

127 ] 

128 

129 # Run Biobb block 

130 self.run_biobb() 

131 

132 # Parse output evecs 

133 # 0.180 -0.069 0.168 0.204 -0.054 0.235 0.145 -0.001 0.260 0.183 

134 # -0.041 0.231 0.174 -0.077 0.144 0.097 -0.022 0.143 0.069 0.008 

135 

136 info_dict = {} 

137 info_dict['evecs'] = [] 

138 with open(PurePath(tmp_folder).joinpath(temp_out), 'r') as file: 

139 for line in file: 

140 info = line.strip().split(' ') 

141 for nums in info: 

142 if nums: 

143 info_dict['evecs'].append(nums) 

144 

145 # Computing Projections 

146 info_dict['projs'] = [] 

147 module = 1 

148 proj = 0 

149 for num in info_dict['evecs']: 

150 val = float(num) * float(num) 

151 proj = proj + val 

152 if module % 3 == 0: 

153 proj = math.sqrt(proj) 

154 module = 1 

155 info_dict['projs'].append(float("{:.4f}".format(proj))) 

156 proj = 0 

157 else: 

158 module = module + 1 

159 

160 with open(PurePath(tmp_folder).joinpath(temp_json), 'w') as out_file: 

161 out_file.write(json.dumps(info_dict, indent=4)) 

162 

163 # Copy outputs from temporary folder to output path 

164 shutil.copy2(PurePath(tmp_folder).joinpath(temp_json), PurePath(self.io_dict["out"]["output_json_path"])) 

165 

166 # Copy files to host 

167 # self.copy_to_host() 

168 

169 # Remove temporary folder(s) 

170 self.tmp_files.append(tmp_folder) 

171 self.remove_tmp_files() 

172 

173 self.check_arguments(output_files_created=True, raise_exception=False) 

174 

175 return self.return_code 

176 

177 

178def pcz_evecs(input_pcz_path: str, output_json_path: str, 

179 properties: Optional[dict] = None, **kwargs) -> int: 

180 """Create :class:`PCZevecs <flexserv.pcasuite.pcz_evecs>`flexserv.pcasuite.PCZevecs class and 

181 execute :meth:`launch() <flexserv.pcasuite.pcz_evecs.launch>` method""" 

182 return PCZevecs(**dict(locals())).launch() 

183 

184 

185pcz_evecs.__doc__ = PCZevecs.__doc__ 

186main = PCZevecs.get_main(pcz_evecs, "Extract PCA Eigen Vectors from a compressed PCZ file.") 

187 

188if __name__ == '__main__': 

189 main()