Coverage for biobb_flexserv/pcasuite/pcz_evecs.py: 84%

74 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-19 15:08 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the PCZevecs class and the command line interface.""" 

4import argparse 

5from typing import Optional 

6import shutil 

7import json 

8import math 

9from pathlib import PurePath 

10from biobb_common.tools import file_utils as fu 

11from biobb_common.generic.biobb_object import BiobbObject 

12from biobb_common.configuration import settings 

13from biobb_common.tools.file_utils import launchlogger 

14 

15 

16class PCZevecs(BiobbObject): 

17 """ 

18 | biobb_flexserv PCZevecs 

19 | Extract PCA Eigen Vectors from a compressed PCZ file. 

20 | Wrapper of the pczdump tool from the PCAsuite FlexServ module. 

21 

22 Args: 

23 input_pcz_path (str): Input compressed trajectory file. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/data/pcasuite/pcazip.pcz>`_. Accepted formats: pcz (edam:format_3874). 

24 output_json_path (str): Output json file with PCA Eigen Vectors. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/reference/pcasuite/pcz_evecs.json>`_. Accepted formats: json (edam:format_3464). 

25 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

26 * **binary_path** (*str*) - ("pczdump") pczdump binary path to be used. 

27 * **eigenvector** (*int*) - (1) PCA mode (eigenvector) from which to extract eigen vectors. 

28 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

29 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

30 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

31 

32 Examples: 

33 This is a use example of how to use the building block from Python:: 

34 

35 from biobb_flexserv.pcasuite.pcz_evecs import pcz_evecs 

36 

37 prop = { 

38 'eigenvector': 1 

39 } 

40 

41 pcz_evecs( input_pcz_path='/path/to/pcazip_input.pcz', 

42 output_json_path='/path/to/pcz_evecs.json', 

43 properties=prop) 

44 

45 Info: 

46 * wrapped_software: 

47 * name: FlexServ PCAsuite 

48 * version: >=1.0 

49 * license: Apache-2.0 

50 * ontology: 

51 * name: EDAM 

52 * schema: http://edamontology.org/EDAM.owl 

53 

54 """ 

55 

56 def __init__(self, input_pcz_path: str, 

57 output_json_path: str, properties: Optional[dict] = None, **kwargs) -> None: 

58 

59 properties = properties or {} 

60 

61 # Call parent class constructor 

62 super().__init__(properties) 

63 self.locals_var_dict = locals().copy() 

64 

65 # Input/Output files 

66 self.io_dict = { 

67 'in': {'input_pcz_path': input_pcz_path}, 

68 'out': {'output_json_path': output_json_path} 

69 } 

70 

71 # Properties specific for BB 

72 self.properties = properties 

73 self.binary_path = properties.get('binary_path', 'pczdump') 

74 self.eigenvector = properties.get('eigenvector', 1) 

75 

76 # Check the properties 

77 self.check_properties(properties) 

78 self.check_arguments() 

79 

80 @launchlogger 

81 def launch(self): 

82 """Launches the execution of the FlexServ pcz_evecs module.""" 

83 

84 # Setup Biobb 

85 if self.check_restart(): 

86 return 0 

87 # self.stage_files() 

88 

89 # Internal file paths 

90 # try: 

91 # # Using rel paths to shorten the amount of characters due to fortran path length limitations 

92 # input_pcz = str(Path(self.stage_io_dict["in"]["input_pcz_path"]).relative_to(Path.cwd())) 

93 # output_json = str(Path(self.stage_io_dict["out"]["output_json_path"]).relative_to(Path.cwd())) 

94 # except ValueError: 

95 # # Container or remote case 

96 # input_pcz = self.stage_io_dict["in"]["input_pcz_path"] 

97 # output_json = self.stage_io_dict["out"]["output_json_path"] 

98 

99 # Manually creating a Sandbox to avoid issues with input parameters buffer overflow: 

100 # Long strings defining a file path makes Fortran or C compiled programs crash if the string 

101 # declared is shorter than the input parameter path (string) length. 

102 # Generating a temporary folder and working inside this folder (sandbox) fixes this problem. 

103 # The problem was found in Galaxy executions, launching Singularity containers (May 2023). 

104 

105 # Creating temporary folder 

106 self.tmp_folder = fu.create_unique_dir() 

107 fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log) 

108 

109 shutil.copy2(self.io_dict["in"]["input_pcz_path"], self.tmp_folder) 

110 

111 # Temporary output 

112 # temp_out = str(Path(self.stage_io_dict.get("unique_dir", "")).joinpath("output.dat")) 

113 temp_out = "output.dat" 

114 temp_json = "output.json" 

115 

116 # Command line 

117 # pczdump -i structure.ca.std.pcz --evecs -o pcz.evecs 

118 # self.cmd = [self.binary_path, 

119 # "-i", input_pcz, 

120 # "-o", temp_out, 

121 # "--evec={}".format(self.eigenvector) 

122 # ] 

123 

124 self.cmd = ['cd', self.tmp_folder, ';', 

125 self.binary_path, 

126 '-i', PurePath(self.io_dict["in"]["input_pcz_path"]).name, 

127 '-o', temp_out, 

128 "--evec={}".format(self.eigenvector) 

129 ] 

130 

131 # Run Biobb block 

132 self.run_biobb() 

133 

134 # Parse output evecs 

135 # 0.180 -0.069 0.168 0.204 -0.054 0.235 0.145 -0.001 0.260 0.183 

136 # -0.041 0.231 0.174 -0.077 0.144 0.097 -0.022 0.143 0.069 0.008 

137 

138 info_dict = {} 

139 info_dict['evecs'] = [] 

140 with open(PurePath(self.tmp_folder).joinpath(temp_out), 'r') as file: 

141 for line in file: 

142 info = line.strip().split(' ') 

143 for nums in info: 

144 if nums: 

145 info_dict['evecs'].append(nums) 

146 

147 # Computing Projections 

148 info_dict['projs'] = [] 

149 module = 1 

150 proj = 0 

151 for num in info_dict['evecs']: 

152 val = float(num) * float(num) 

153 proj = proj + val 

154 if module % 3 == 0: 

155 proj = math.sqrt(proj) 

156 module = 1 

157 info_dict['projs'].append(float("{:.4f}".format(proj))) 

158 proj = 0 

159 else: 

160 module = module + 1 

161 

162 with open(PurePath(self.tmp_folder).joinpath(temp_json), 'w') as out_file: 

163 out_file.write(json.dumps(info_dict, indent=4)) 

164 

165 # Copy outputs from temporary folder to output path 

166 shutil.copy2(PurePath(self.tmp_folder).joinpath(temp_json), PurePath(self.io_dict["out"]["output_json_path"])) 

167 

168 # Copy files to host 

169 # self.copy_to_host() 

170 

171 # remove temporary folder(s) 

172 self.tmp_files.extend([ 

173 # self.stage_io_dict.get("unique_dir", ""), 

174 self.tmp_folder 

175 ]) 

176 self.remove_tmp_files() 

177 

178 self.check_arguments(output_files_created=True, raise_exception=False) 

179 

180 return self.return_code 

181 

182 

183def pcz_evecs(input_pcz_path: str, output_json_path: str, 

184 properties: Optional[dict] = None, **kwargs) -> int: 

185 """Create :class:`PCZevecs <flexserv.pcasuite.pcz_evecs>`flexserv.pcasuite.PCZevecs class and 

186 execute :meth:`launch() <flexserv.pcasuite.pcz_evecs.launch>` method""" 

187 

188 return PCZevecs(input_pcz_path=input_pcz_path, 

189 output_json_path=output_json_path, 

190 properties=properties).launch() 

191 

192 pcz_evecs.__doc__ = PCZevecs.__doc__ 

193 

194 

195def main(): 

196 parser = argparse.ArgumentParser(description='Extract PCA Eigen Vectors from a compressed PCZ file.', formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

197 parser.add_argument('--config', required=False, help='Configuration file') 

198 

199 # Specific args 

200 required_args = parser.add_argument_group('required arguments') 

201 required_args.add_argument('--input_pcz_path', required=True, help='Input compressed trajectory file. Accepted formats: pcz.') 

202 required_args.add_argument('--output_json_path', required=True, help='Output json file with PCA evecs. Accepted formats: json.') 

203 

204 args = parser.parse_args() 

205 args.config = args.config or "{}" 

206 properties = settings.ConfReader(config=args.config).get_prop_dic() 

207 

208 # Specific call 

209 pcz_evecs(input_pcz_path=args.input_pcz_path, 

210 output_json_path=args.output_json_path, 

211 properties=properties) 

212 

213 

214if __name__ == '__main__': 

215 main()