Coverage for biobb_flexserv/pcasuite/pcz_collectivity.py: 80%

59 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-19 15:08 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the PCZcollectivity class and the command line interface.""" 

4import argparse 

5from typing import Optional 

6import shutil 

7from pathlib import PurePath 

8from biobb_common.tools import file_utils as fu 

9import json 

10from biobb_common.generic.biobb_object import BiobbObject 

11from biobb_common.configuration import settings 

12from biobb_common.tools.file_utils import launchlogger 

13 

14 

15class PCZcollectivity(BiobbObject): 

16 """ 

17 | biobb_flexserv PCZcollectivity 

18 | Extract PCA collectivity (numerical measure of how many atoms are affected by a given mode) from a compressed PCZ file. 

19 | Wrapper of the pczdump tool from the PCAsuite FlexServ module. 

20 

21 Args: 

22 input_pcz_path (str): Input compressed trajectory file. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/data/pcasuite/pcazip.pcz>`_. Accepted formats: pcz (edam:format_3874). 

23 output_json_path (str): Output json file with PCA Collectivity indexes per mode. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/reference/pcasuite/pcz_collectivity.json>`_. Accepted formats: json (edam:format_3464). 

24 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

25 * **eigenvector** (*int*) - (0) PCA mode (eigenvector) from which to extract stiffness. 

26 * **binary_path** (*str*) - ("pczdump") pczdump binary path to be used. 

27 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

28 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

29 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

30 

31 Examples: 

32 This is a use example of how to use the building block from Python:: 

33 

34 from biobb_flexserv.pcasuite.pcz_collectivity import pcz_collectivity 

35 

36 prop = { 

37 'eigenvector': 1 

38 } 

39 

40 pcz_collectivity( input_pcz_path='/path/to/pcazip_input.pcz', 

41 output_json_path='/path/to/pcz_collectivity.json', 

42 properties=prop) 

43 

44 Info: 

45 * wrapped_software: 

46 * name: FlexServ PCAsuite 

47 * version: >=1.0 

48 * license: Apache-2.0 

49 * ontology: 

50 * name: EDAM 

51 * schema: http://edamontology.org/EDAM.owl 

52 

53 """ 

54 

55 def __init__(self, input_pcz_path: str, 

56 output_json_path: str, properties: Optional[dict] = None, **kwargs) -> None: 

57 

58 properties = properties or {} 

59 

60 # Call parent class constructor 

61 super().__init__(properties) 

62 self.locals_var_dict = locals().copy() 

63 

64 # Input/Output files 

65 self.io_dict = { 

66 'in': {'input_pcz_path': input_pcz_path}, 

67 'out': {'output_json_path': output_json_path} 

68 } 

69 

70 # Properties specific for BB 

71 self.properties = properties 

72 self.binary_path = properties.get('binary_path', 'pczdump') 

73 self.eigenvector = properties.get('eigenvector', 0) 

74 

75 # Check the properties 

76 self.check_properties(properties) 

77 self.check_arguments() 

78 

79 @launchlogger 

80 def launch(self): 

81 """Launches the execution of the FlexServ pcz_collectivity module.""" 

82 

83 # Setup Biobb 

84 if self.check_restart(): 

85 return 0 

86 # self.stage_files() 

87 

88 # Internal file paths 

89 # try: 

90 # # Using rel paths to shorten the amount of characters due to fortran path length limitations 

91 # input_pcz = str(Path(self.stage_io_dict["in"]["input_pcz_path"]).relative_to(Path.cwd())) 

92 # output_json = str(Path(self.stage_io_dict["out"]["output_json_path"]).relative_to(Path.cwd())) 

93 # except ValueError: 

94 # # Container or remote case 

95 # input_pcz = self.stage_io_dict["in"]["input_pcz_path"] 

96 # output_json = self.stage_io_dict["out"]["output_json_path"] 

97 

98 # Manually creating a Sandbox to avoid issues with input parameters buffer overflow: 

99 # Long strings defining a file path makes Fortran or C compiled programs crash if the string 

100 # declared is shorter than the input parameter path (string) length. 

101 # Generating a temporary folder and working inside this folder (sandbox) fixes this problem. 

102 # The problem was found in Galaxy executions, launching Singularity containers (May 2023). 

103 

104 # Creating temporary folder 

105 self.tmp_folder = fu.create_unique_dir() 

106 fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log) 

107 

108 shutil.copy2(self.io_dict["in"]["input_pcz_path"], self.tmp_folder) 

109 

110 # Temporary output 

111 # temp_out = str(Path(self.stage_io_dict.get("unique_dir", "")).joinpath("output.dat")) 

112 temp_out = "output.dat" 

113 temp_json = "output.json" 

114 

115 # Command line 

116 # pczdump -i structure.ca.std.pcz --collectivity -o pcz.collectivity 

117 # self.cmd = [self.binary_path, 

118 # "-i", input_pcz, 

119 # "-o", temp_out, 

120 # "--collectivity={}".format(self.eigenvector) 

121 # ] 

122 

123 self.cmd = ['cd', self.tmp_folder, ';', 

124 self.binary_path, 

125 '-i', PurePath(self.io_dict["in"]["input_pcz_path"]).name, 

126 '-o', temp_out, 

127 "--collectivity={}".format(self.eigenvector) 

128 ] 

129 

130 # Run Biobb block 

131 self.run_biobb() 

132 

133 # Parse output collectivity 

134 # 0.132891 

135 # 0.165089 

136 # 0.147202 

137 info_dict = {} 

138 info_dict['collectivity'] = [] 

139 with open(PurePath(self.tmp_folder).joinpath(temp_out), 'r') as file: 

140 for line in file: 

141 info = float(line.strip()) 

142 info_dict['collectivity'].append(info) 

143 

144 with open(PurePath(self.tmp_folder).joinpath(temp_json), 'w') as out_file: 

145 out_file.write(json.dumps(info_dict, indent=4)) 

146 

147 # Copy outputs from temporary folder to output path 

148 shutil.copy2(PurePath(self.tmp_folder).joinpath(temp_json), PurePath(self.io_dict["out"]["output_json_path"])) 

149 

150 # Copy files to host 

151 # self.copy_to_host() 

152 

153 # remove temporary folder(s) 

154 self.tmp_files.extend([ 

155 # self.stage_io_dict.get("unique_dir", ""), 

156 self.tmp_folder 

157 ]) 

158 self.remove_tmp_files() 

159 

160 self.check_arguments(output_files_created=True, raise_exception=False) 

161 

162 return self.return_code 

163 

164 

165def pcz_collectivity(input_pcz_path: str, output_json_path: str, 

166 properties: Optional[dict] = None, **kwargs) -> int: 

167 """Create :class:`PCZcollectivity <flexserv.pcasuite.pcz_collectivity>`flexserv.pcasuite.PCZcollectivity class and 

168 execute :meth:`launch() <flexserv.pcasuite.pcz_collectivity.launch>` method""" 

169 

170 return PCZcollectivity(input_pcz_path=input_pcz_path, 

171 output_json_path=output_json_path, 

172 properties=properties).launch() 

173 

174 pcz_collectivity.__doc__ = PCZcollectivity.__doc__ 

175 

176 

177def main(): 

178 parser = argparse.ArgumentParser(description='Extract PCA collectivity (numerical measure of how many atoms are affected by a given mode) from a compressed PCZ file.', formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

179 parser.add_argument('--config', required=False, help='Configuration file') 

180 

181 # Specific args 

182 required_args = parser.add_argument_group('required arguments') 

183 required_args.add_argument('--input_pcz_path', required=True, help='Input compressed trajectory file. Accepted formats: pcz.') 

184 required_args.add_argument('--output_json_path', required=True, help='Output json file with PCA collectivity. Accepted formats: json.') 

185 

186 args = parser.parse_args() 

187 args.config = args.config or "{}" 

188 properties = settings.ConfReader(config=args.config).get_prop_dic() 

189 

190 # Specific call 

191 pcz_collectivity(input_pcz_path=args.input_pcz_path, 

192 output_json_path=args.output_json_path, 

193 properties=properties) 

194 

195 

196if __name__ == '__main__': 

197 main()