Coverage for biobb_flexserv / pcasuite / pcz_hinges.py: 60%

119 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-05 13:10 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the PCZhinges class and the command line interface.""" 

4from typing import Optional 

5import shutil 

6import json 

7import re 

8from pathlib import PurePath 

9from biobb_common.tools import file_utils as fu 

10from biobb_common.generic.biobb_object import BiobbObject 

11from biobb_common.tools.file_utils import launchlogger 

12 

13 

14class PCZhinges(BiobbObject): 

15 """ 

16 | biobb_flexserv PCZhinges 

17 | Compute possible hinge regions (residues around which large protein movements are organized) of a molecule from a compressed PCZ file. 

18 | Wrapper of the pczdump tool from the PCAsuite FlexServ module. 

19 

20 Args: 

21 input_pcz_path (str): Input compressed trajectory file. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/data/pcasuite/pcazip.pcz>`_. Accepted formats: pcz (edam:format_3874). 

22 output_json_path (str): Output hinge regions x PCA mode file. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/reference/pcasuite/hinges.json>`_. Accepted formats: json (edam:format_3464). 

23 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

24 * **binary_path** (*str*) - ("pczdump") pczdump binary path to be used. 

25 * **eigenvector** (*int*) - (0) PCA mode (eigenvector) from which to extract bfactor values per residue (0 means average over all modes). 

26 * **method** (*str*) - ("Dynamic_domain") Method to compute the hinge regions (Options: Bfactor_slope, Force_constant, Dynamic_domain) 

27 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

28 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

29 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

30 

31 Examples: 

32 This is a use example of how to use the building block from Python:: 

33 

34 from biobb_flexserv.pcasuite.pcz_hinges import pcz_hinges 

35 prop = { 

36 'eigenvector': 1, 

37 'pdb': True 

38 } 

39 pcz_hinges( input_pcz_path='/path/to/pcazip_input.pcz', 

40 output_json_path='/path/to/hinges.json', 

41 properties=prop) 

42 

43 Info: 

44 * wrapped_software: 

45 * name: FlexServ PCAsuite 

46 * version: >=1.0 

47 * license: Apache-2.0 

48 * ontology: 

49 * name: EDAM 

50 * schema: http://edamontology.org/EDAM.owl 

51 

52 """ 

53 

54 def __init__(self, input_pcz_path: str, output_json_path: str, 

55 properties: Optional[dict] = None, **kwargs) -> None: 

56 

57 properties = properties or {} 

58 

59 # Call parent class constructor 

60 super().__init__(properties) 

61 self.locals_var_dict = locals().copy() 

62 

63 # Input/Output files 

64 self.io_dict = { 

65 'in': {'input_pcz_path': input_pcz_path}, 

66 'out': {'output_json_path': output_json_path} 

67 } 

68 

69 # Properties specific for BB 

70 self.properties = properties 

71 self.binary_path = properties.get('binary_path', 'pczdump') 

72 self.eigenvector = properties.get('eigenvector', 1) 

73 self.method = properties.get('method', "Bfactor_slope") 

74 

75 # Check the properties 

76 self.check_properties(properties) 

77 self.check_arguments() 

78 

79 def parse_output(self, output_file): 

80 """ Parses FlexServ hinges methods output file report """ 

81 

82 method = '' 

83 if self.method == "Bfactor_slope": 

84 method = "#### Distance variation method" 

85 elif self.method == "Force_constant": 

86 method = "#### Force constant" 

87 elif self.method == "Dynamic_domain": 

88 method = "#### Lavery method" 

89 else: 

90 print("Method not recognised ({}), please check it and try again. ".format(self.method)) 

91 

92 start = False 

93 out_data = '' 

94 with open(output_file, 'r') as file: 

95 for line in file: 

96 if method in line: 

97 start = True 

98 elif "####" in line: 

99 start = False 

100 if start: 

101 out_data += line 

102 

103 dict_out = {} 

104 dict_out["method"] = self.method 

105 if self.method == "Force_constant": 

106 dict_out["values_per_residue"] = [] 

107 for line in out_data.split("\n"): 

108 if line and "#" not in line: 

109 dict_out["values_per_residue"].append(float(line.strip())) 

110 if "possible hinge" in line: # Peak constant (possible hinge): residue 64 (16.740) 

111 residue = int(line.split(' ')[6]) 

112 dict_out["hinge_residues"] = residue 

113 elif self.method == "Bfactor_slope": 

114 dict_out["hinge_residues"] = [] 

115 for line in out_data.split("\n"): 

116 if "Window" in line: # Window 28: residue 54 seems a downhill hinge point 

117 residue = int(re.split(r'\s+', line)[3]) 

118 dict_out["hinge_residues"].append(residue) 

119 if "Consensus" in line: # Consensus Downhill hinge point : 23.7 ( 64.965) 

120 hinge_point = float(line.split(':')[1].split('(')[0]) 

121 dict_out["consensus_hinge"] = hinge_point 

122 elif self.method == "Dynamic_domain": 

123 start = 0 

124 dict_out["clusters"] = [] 

125 for line in out_data.split("\n"): 

126 if "threshold" not in line and "nClusters" in line: # nClusters: 2 

127 nclusters = int(line.split(':')[1]) 

128 dict_out["nClusters"] = nclusters 

129 if "Threshold" in line: # *** Threshold defined: 0.300000 

130 threshold = float(line.split(':')[1]) 

131 dict_out["threshold"] = threshold 

132 if "Min. drij" in line: # *** Min. drij: 0.000322 

133 minValue = float(line.split(':')[1]) 

134 dict_out["minValue"] = minValue 

135 if "Max. drij" in line: # *** Max. drij: 6.385425 

136 maxValue = float(line.split(':')[1]) 

137 dict_out["maxValue"] = maxValue 

138 if "threshold" in line: # nClusters: 2 threshold: 3.192873 

139 final_threshold = float(line.split(':')[2]) 

140 dict_out["final_threshold"] = final_threshold 

141 if "Cluster" in line and "elements" in line: # Cluster 0 (74 elements) 

142 clusterLine = line.split() 

143 clusterNum = int(clusterLine[1]) 

144 clusterElems = int(clusterLine[2].replace('(', '')) 

145 cluster = {"clusterNum": clusterNum, "clusterElems": clusterElems} 

146 dict_out["clusters"].append(cluster) 

147 start = start + 1 

148 if start and "[" in line: 

149 # dict_out["clusters"][start-1]["residues"] = list(map(int,list(line.replace(", ]", "").replace(" [","").split(', ')))) 

150 dict_out["clusters"][start-1]["residues"] = eval(line) 

151 # Interacting regions: 13 14 30 31 69 70 84 85 112 113 114 115 116 166 167 199 200 

152 if "Interacting regions" in line: 

153 nums = line.split(':')[1] 

154 dict_out["interacting_regions"] = list(map(int, nums.split())) 

155 # Hinge residues: 13 14 30 31 69 70 84 85 112 113 114 115 116 166 167 199 200 

156 if "Hinge residues" in line: 

157 nums = line.split(':')[1] 

158 dict_out["hinge_residues"] = list(map(int, nums.split())) 

159 

160 return dict_out 

161 

162 @launchlogger 

163 def launch(self): 

164 """Launches the execution of the FlexServ pcz_hinges module.""" 

165 

166 # Setup Biobb 

167 if self.check_restart(): 

168 return 0 

169 # self.stage_files() 

170 

171 # Internal file paths 

172 # try: 

173 # # Using rel paths to shorten the amount of characters due to fortran path length limitations 

174 # input_pcz = str(Path(self.stage_io_dict["in"]["input_pcz_path"]).relative_to(Path.cwd())) 

175 # output_json = str(Path(self.stage_io_dict["out"]["output_json_path"]).relative_to(Path.cwd())) 

176 # except ValueError: 

177 # # Container or remote case 

178 # input_pcz = self.stage_io_dict["in"]["input_pcz_path"] 

179 # output_json = self.stage_io_dict["out"]["output_json_path"] 

180 

181 # Manually creating a Sandbox to avoid issues with input parameters buffer overflow: 

182 # Long strings defining a file path makes Fortran or C compiled programs crash if the string 

183 # declared is shorter than the input parameter path (string) length. 

184 # Generating a temporary folder and working inside this folder (sandbox) fixes this problem. 

185 # The problem was found in Galaxy executions, launching Singularity containers (May 2023). 

186 

187 # Creating temporary folder 

188 tmp_folder = fu.create_unique_dir() 

189 fu.log('Creating %s temporary folder' % tmp_folder, self.out_log) 

190 

191 shutil.copy2(self.io_dict["in"]["input_pcz_path"], tmp_folder) 

192 

193 # Temporary output 

194 # temp_out = str(Path(self.stage_io_dict.get("unique_dir", "")).joinpath("output.dat")) 

195 temp_out = "output.dat" 

196 temp_log = "output.log" 

197 temp_json = "output.json" 

198 

199 # Command line (1: dat file) 

200 # pczdump -i structure.ca.std.pcz --fluc=1 -o bfactor_1.dat 

201 # self.cmd = [self.binary_path, 

202 # "-i", input_pcz, 

203 # "-o", temp_out, 

204 # "-t", "0.3", 

205 # "--hinge={}".format(self.eigenvector), 

206 # ">&", "pcz_dump.hinges.log" 

207 # ] 

208 

209 self.cmd = ['cd', tmp_folder, ';', 

210 self.binary_path, 

211 '-i', PurePath(self.io_dict["in"]["input_pcz_path"]).name, 

212 '-o', temp_out, 

213 "-t", "0.3", 

214 "--hinge={}".format(self.eigenvector), 

215 ">&", temp_log 

216 ] 

217 

218 # Run Biobb block 

219 self.run_biobb() 

220 

221 # Parsing output file and extracting results for the given method 

222 dict_out = self.parse_output(PurePath(tmp_folder).joinpath(temp_out)) 

223 

224 with open(PurePath(tmp_folder).joinpath(temp_json), 'w') as out_file: 

225 out_file.write(json.dumps(dict_out, indent=4)) 

226 

227 # Copy outputs from temporary folder to output path 

228 shutil.copy2(PurePath(tmp_folder).joinpath(temp_json), PurePath(self.io_dict["out"]["output_json_path"])) 

229 

230 # Copy files to host 

231 # self.copy_to_host() 

232 

233 # Remove temporary folder(s) 

234 self.tmp_files.append(tmp_folder) 

235 self.remove_tmp_files() 

236 

237 self.check_arguments(output_files_created=True, raise_exception=False) 

238 

239 return self.return_code 

240 

241 

242def pcz_hinges(input_pcz_path: str, output_json_path: str, 

243 properties: Optional[dict] = None, **kwargs) -> int: 

244 """Create :class:`PCZhinges <flexserv.pcasuite.pcz_hinges>`flexserv.pcasuite.PCZhinges class and 

245 execute :meth:`launch() <flexserv.pcasuite.pcz_hinges.launch>` method""" 

246 return PCZhinges(**dict(locals())).launch() 

247 

248 

249pcz_hinges.__doc__ = PCZhinges.__doc__ 

250main = PCZhinges.get_main(pcz_hinges, "Compute possible hinge regions (residues around which large protein movements are organized) of a molecule from a compressed PCZ file.") 

251 

252if __name__ == '__main__': 

253 main()