Coverage for biobb_flexserv / pcasuite / pcz_hinges.py: 60%
119 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-05 13:10 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-05 13:10 +0000
1#!/usr/bin/env python3
3"""Module containing the PCZhinges class and the command line interface."""
4from typing import Optional
5import shutil
6import json
7import re
8from pathlib import PurePath
9from biobb_common.tools import file_utils as fu
10from biobb_common.generic.biobb_object import BiobbObject
11from biobb_common.tools.file_utils import launchlogger
14class PCZhinges(BiobbObject):
15 """
16 | biobb_flexserv PCZhinges
17 | Compute possible hinge regions (residues around which large protein movements are organized) of a molecule from a compressed PCZ file.
18 | Wrapper of the pczdump tool from the PCAsuite FlexServ module.
20 Args:
21 input_pcz_path (str): Input compressed trajectory file. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/data/pcasuite/pcazip.pcz>`_. Accepted formats: pcz (edam:format_3874).
22 output_json_path (str): Output hinge regions x PCA mode file. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/reference/pcasuite/hinges.json>`_. Accepted formats: json (edam:format_3464).
23 properties (dict - Python dictionary object containing the tool parameters, not input/output files):
24 * **binary_path** (*str*) - ("pczdump") pczdump binary path to be used.
25 * **eigenvector** (*int*) - (0) PCA mode (eigenvector) from which to extract bfactor values per residue (0 means average over all modes).
26 * **method** (*str*) - ("Dynamic_domain") Method to compute the hinge regions (Options: Bfactor_slope, Force_constant, Dynamic_domain)
27 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
28 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
29 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
31 Examples:
32 This is a use example of how to use the building block from Python::
34 from biobb_flexserv.pcasuite.pcz_hinges import pcz_hinges
35 prop = {
36 'eigenvector': 1,
37 'pdb': True
38 }
39 pcz_hinges( input_pcz_path='/path/to/pcazip_input.pcz',
40 output_json_path='/path/to/hinges.json',
41 properties=prop)
43 Info:
44 * wrapped_software:
45 * name: FlexServ PCAsuite
46 * version: >=1.0
47 * license: Apache-2.0
48 * ontology:
49 * name: EDAM
50 * schema: http://edamontology.org/EDAM.owl
52 """
54 def __init__(self, input_pcz_path: str, output_json_path: str,
55 properties: Optional[dict] = None, **kwargs) -> None:
57 properties = properties or {}
59 # Call parent class constructor
60 super().__init__(properties)
61 self.locals_var_dict = locals().copy()
63 # Input/Output files
64 self.io_dict = {
65 'in': {'input_pcz_path': input_pcz_path},
66 'out': {'output_json_path': output_json_path}
67 }
69 # Properties specific for BB
70 self.properties = properties
71 self.binary_path = properties.get('binary_path', 'pczdump')
72 self.eigenvector = properties.get('eigenvector', 1)
73 self.method = properties.get('method', "Bfactor_slope")
75 # Check the properties
76 self.check_properties(properties)
77 self.check_arguments()
79 def parse_output(self, output_file):
80 """ Parses FlexServ hinges methods output file report """
82 method = ''
83 if self.method == "Bfactor_slope":
84 method = "#### Distance variation method"
85 elif self.method == "Force_constant":
86 method = "#### Force constant"
87 elif self.method == "Dynamic_domain":
88 method = "#### Lavery method"
89 else:
90 print("Method not recognised ({}), please check it and try again. ".format(self.method))
92 start = False
93 out_data = ''
94 with open(output_file, 'r') as file:
95 for line in file:
96 if method in line:
97 start = True
98 elif "####" in line:
99 start = False
100 if start:
101 out_data += line
103 dict_out = {}
104 dict_out["method"] = self.method
105 if self.method == "Force_constant":
106 dict_out["values_per_residue"] = []
107 for line in out_data.split("\n"):
108 if line and "#" not in line:
109 dict_out["values_per_residue"].append(float(line.strip()))
110 if "possible hinge" in line: # Peak constant (possible hinge): residue 64 (16.740)
111 residue = int(line.split(' ')[6])
112 dict_out["hinge_residues"] = residue
113 elif self.method == "Bfactor_slope":
114 dict_out["hinge_residues"] = []
115 for line in out_data.split("\n"):
116 if "Window" in line: # Window 28: residue 54 seems a downhill hinge point
117 residue = int(re.split(r'\s+', line)[3])
118 dict_out["hinge_residues"].append(residue)
119 if "Consensus" in line: # Consensus Downhill hinge point : 23.7 ( 64.965)
120 hinge_point = float(line.split(':')[1].split('(')[0])
121 dict_out["consensus_hinge"] = hinge_point
122 elif self.method == "Dynamic_domain":
123 start = 0
124 dict_out["clusters"] = []
125 for line in out_data.split("\n"):
126 if "threshold" not in line and "nClusters" in line: # nClusters: 2
127 nclusters = int(line.split(':')[1])
128 dict_out["nClusters"] = nclusters
129 if "Threshold" in line: # *** Threshold defined: 0.300000
130 threshold = float(line.split(':')[1])
131 dict_out["threshold"] = threshold
132 if "Min. drij" in line: # *** Min. drij: 0.000322
133 minValue = float(line.split(':')[1])
134 dict_out["minValue"] = minValue
135 if "Max. drij" in line: # *** Max. drij: 6.385425
136 maxValue = float(line.split(':')[1])
137 dict_out["maxValue"] = maxValue
138 if "threshold" in line: # nClusters: 2 threshold: 3.192873
139 final_threshold = float(line.split(':')[2])
140 dict_out["final_threshold"] = final_threshold
141 if "Cluster" in line and "elements" in line: # Cluster 0 (74 elements)
142 clusterLine = line.split()
143 clusterNum = int(clusterLine[1])
144 clusterElems = int(clusterLine[2].replace('(', ''))
145 cluster = {"clusterNum": clusterNum, "clusterElems": clusterElems}
146 dict_out["clusters"].append(cluster)
147 start = start + 1
148 if start and "[" in line:
149 # dict_out["clusters"][start-1]["residues"] = list(map(int,list(line.replace(", ]", "").replace(" [","").split(', '))))
150 dict_out["clusters"][start-1]["residues"] = eval(line)
151 # Interacting regions: 13 14 30 31 69 70 84 85 112 113 114 115 116 166 167 199 200
152 if "Interacting regions" in line:
153 nums = line.split(':')[1]
154 dict_out["interacting_regions"] = list(map(int, nums.split()))
155 # Hinge residues: 13 14 30 31 69 70 84 85 112 113 114 115 116 166 167 199 200
156 if "Hinge residues" in line:
157 nums = line.split(':')[1]
158 dict_out["hinge_residues"] = list(map(int, nums.split()))
160 return dict_out
162 @launchlogger
163 def launch(self):
164 """Launches the execution of the FlexServ pcz_hinges module."""
166 # Setup Biobb
167 if self.check_restart():
168 return 0
169 # self.stage_files()
171 # Internal file paths
172 # try:
173 # # Using rel paths to shorten the amount of characters due to fortran path length limitations
174 # input_pcz = str(Path(self.stage_io_dict["in"]["input_pcz_path"]).relative_to(Path.cwd()))
175 # output_json = str(Path(self.stage_io_dict["out"]["output_json_path"]).relative_to(Path.cwd()))
176 # except ValueError:
177 # # Container or remote case
178 # input_pcz = self.stage_io_dict["in"]["input_pcz_path"]
179 # output_json = self.stage_io_dict["out"]["output_json_path"]
181 # Manually creating a Sandbox to avoid issues with input parameters buffer overflow:
182 # Long strings defining a file path makes Fortran or C compiled programs crash if the string
183 # declared is shorter than the input parameter path (string) length.
184 # Generating a temporary folder and working inside this folder (sandbox) fixes this problem.
185 # The problem was found in Galaxy executions, launching Singularity containers (May 2023).
187 # Creating temporary folder
188 tmp_folder = fu.create_unique_dir()
189 fu.log('Creating %s temporary folder' % tmp_folder, self.out_log)
191 shutil.copy2(self.io_dict["in"]["input_pcz_path"], tmp_folder)
193 # Temporary output
194 # temp_out = str(Path(self.stage_io_dict.get("unique_dir", "")).joinpath("output.dat"))
195 temp_out = "output.dat"
196 temp_log = "output.log"
197 temp_json = "output.json"
199 # Command line (1: dat file)
200 # pczdump -i structure.ca.std.pcz --fluc=1 -o bfactor_1.dat
201 # self.cmd = [self.binary_path,
202 # "-i", input_pcz,
203 # "-o", temp_out,
204 # "-t", "0.3",
205 # "--hinge={}".format(self.eigenvector),
206 # ">&", "pcz_dump.hinges.log"
207 # ]
209 self.cmd = ['cd', tmp_folder, ';',
210 self.binary_path,
211 '-i', PurePath(self.io_dict["in"]["input_pcz_path"]).name,
212 '-o', temp_out,
213 "-t", "0.3",
214 "--hinge={}".format(self.eigenvector),
215 ">&", temp_log
216 ]
218 # Run Biobb block
219 self.run_biobb()
221 # Parsing output file and extracting results for the given method
222 dict_out = self.parse_output(PurePath(tmp_folder).joinpath(temp_out))
224 with open(PurePath(tmp_folder).joinpath(temp_json), 'w') as out_file:
225 out_file.write(json.dumps(dict_out, indent=4))
227 # Copy outputs from temporary folder to output path
228 shutil.copy2(PurePath(tmp_folder).joinpath(temp_json), PurePath(self.io_dict["out"]["output_json_path"]))
230 # Copy files to host
231 # self.copy_to_host()
233 # Remove temporary folder(s)
234 self.tmp_files.append(tmp_folder)
235 self.remove_tmp_files()
237 self.check_arguments(output_files_created=True, raise_exception=False)
239 return self.return_code
242def pcz_hinges(input_pcz_path: str, output_json_path: str,
243 properties: Optional[dict] = None, **kwargs) -> int:
244 """Create :class:`PCZhinges <flexserv.pcasuite.pcz_hinges>`flexserv.pcasuite.PCZhinges class and
245 execute :meth:`launch() <flexserv.pcasuite.pcz_hinges.launch>` method"""
246 return PCZhinges(**dict(locals())).launch()
249pcz_hinges.__doc__ = PCZhinges.__doc__
250main = PCZhinges.get_main(pcz_hinges, "Compute possible hinge regions (residues around which large protein movements are organized) of a molecule from a compressed PCZ file.")
252if __name__ == '__main__':
253 main()