Coverage for biobb_flexserv/pcasuite/pcz_hinges.py: 59%
119 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-05-28 11:28 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-05-28 11:28 +0000
1#!/usr/bin/env python3
3"""Module containing the PCZhinges class and the command line interface."""
4from typing import Optional
5import json
6import re
7from pathlib import Path, PurePath
8from biobb_common.generic.biobb_object import BiobbObject
9from biobb_common.tools.file_utils import launchlogger
12class PCZhinges(BiobbObject):
13 """
14 | biobb_flexserv PCZhinges
15 | Compute possible hinge regions (residues around which large protein movements are organized) of a molecule from a compressed PCZ file.
16 | Wrapper of the pczdump tool from the PCAsuite FlexServ module.
18 Args:
19 input_pcz_path (str): Input compressed trajectory file. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/data/pcasuite/pcazip.pcz>`_. Accepted formats: pcz (edam:format_3874).
20 output_json_path (str): Output hinge regions x PCA mode file. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/reference/pcasuite/hinges.json>`_. Accepted formats: json (edam:format_3464).
21 properties (dict - Python dictionary object containing the tool parameters, not input/output files):
22 * **binary_path** (*str*) - ("pczdump") pczdump binary path to be used.
23 * **eigenvector** (*int*) - (0) PCA mode (eigenvector) from which to extract bfactor values per residue (0 means average over all modes).
24 * **method** (*str*) - ("Dynamic_domain") Method to compute the hinge regions (Options: Bfactor_slope, Force_constant, Dynamic_domain)
25 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
26 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
27 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
28 * **container_path** (*str*) - (None) Container path definition.
29 * **container_image** (*str*) - ('afandiadib/ambertools:serial') Container image definition.
30 * **container_volume_path** (*str*) - ('/tmp') Container volume path definition.
31 * **container_working_dir** (*str*) - (None) Container working directory definition.
32 * **container_user_id** (*str*) - (None) Container user_id definition.
33 * **container_shell_path** (*str*) - ('/bin/bash') Path to default shell inside the container.
35 Examples:
36 This is a use example of how to use the building block from Python::
38 from biobb_flexserv.pcasuite.pcz_hinges import pcz_hinges
39 prop = {
40 'eigenvector': 1,
41 'pdb': True
42 }
43 pcz_hinges( input_pcz_path='/path/to/pcazip_input.pcz',
44 output_json_path='/path/to/hinges.json',
45 properties=prop)
47 Info:
48 * wrapped_software:
49 * name: FlexServ PCAsuite
50 * version: >=1.0
51 * license: Apache-2.0
52 * ontology:
53 * name: EDAM
54 * schema: http://edamontology.org/EDAM.owl
56 """
58 def __init__(self, input_pcz_path: str, output_json_path: str,
59 properties: Optional[dict] = None, **kwargs) -> None:
61 properties = properties or {}
63 # Call parent class constructor
64 super().__init__(properties)
65 self.locals_var_dict = locals().copy()
67 # Input/Output files
68 self.io_dict = {
69 'in': {'input_pcz_path': input_pcz_path},
70 'out': {'output_json_path': output_json_path}
71 }
73 # Properties specific for BB
74 self.properties = properties
75 self.binary_path = properties.get('binary_path', 'pczdump')
76 self.eigenvector = properties.get('eigenvector', 1)
77 self.method = properties.get('method', "Bfactor_slope")
79 # Check the properties
80 self.check_properties(properties)
81 self.check_arguments()
83 def parse_output(self, output_file):
84 """ Parses FlexServ hinges methods output file report """
86 method = ''
87 if self.method == "Bfactor_slope":
88 method = "#### Distance variation method"
89 elif self.method == "Force_constant":
90 method = "#### Force constant"
91 elif self.method == "Dynamic_domain":
92 method = "#### Lavery method"
93 else:
94 print("Method not recognised ({}), please check it and try again. ".format(self.method))
96 start = False
97 out_data = ''
98 with open(output_file, 'r') as file:
99 for line in file:
100 if method in line:
101 start = True
102 elif "####" in line:
103 start = False
104 if start:
105 out_data += line
107 dict_out = {}
108 dict_out["method"] = self.method
109 if self.method == "Force_constant":
110 dict_out["values_per_residue"] = []
111 for line in out_data.split("\n"):
112 if line and "#" not in line:
113 dict_out["values_per_residue"].append(float(line.strip()))
114 if "possible hinge" in line: # Peak constant (possible hinge): residue 64 (16.740)
115 residue = int(line.split(' ')[6])
116 dict_out["hinge_residues"] = residue
117 elif self.method == "Bfactor_slope":
118 dict_out["hinge_residues"] = []
119 for line in out_data.split("\n"):
120 if "Window" in line: # Window 28: residue 54 seems a downhill hinge point
121 residue = int(re.split(r'\s+', line)[3])
122 dict_out["hinge_residues"].append(residue)
123 if "Consensus" in line: # Consensus Downhill hinge point : 23.7 ( 64.965)
124 hinge_point = float(line.split(':')[1].split('(')[0])
125 dict_out["consensus_hinge"] = hinge_point
126 elif self.method == "Dynamic_domain":
127 start = 0
128 dict_out["clusters"] = []
129 for line in out_data.split("\n"):
130 if "threshold" not in line and "nClusters" in line: # nClusters: 2
131 nclusters = int(line.split(':')[1])
132 dict_out["nClusters"] = nclusters
133 if "Threshold" in line: # *** Threshold defined: 0.300000
134 threshold = float(line.split(':')[1])
135 dict_out["threshold"] = threshold
136 if "Min. drij" in line: # *** Min. drij: 0.000322
137 minValue = float(line.split(':')[1])
138 dict_out["minValue"] = minValue
139 if "Max. drij" in line: # *** Max. drij: 6.385425
140 maxValue = float(line.split(':')[1])
141 dict_out["maxValue"] = maxValue
142 if "threshold" in line: # nClusters: 2 threshold: 3.192873
143 final_threshold = float(line.split(':')[2])
144 dict_out["final_threshold"] = final_threshold
145 if "Cluster" in line and "elements" in line: # Cluster 0 (74 elements)
146 clusterLine = line.split()
147 clusterNum = int(clusterLine[1])
148 clusterElems = int(clusterLine[2].replace('(', ''))
149 cluster = {"clusterNum": clusterNum, "clusterElems": clusterElems}
150 dict_out["clusters"].append(cluster)
151 start = start + 1
152 if start and "[" in line:
153 # dict_out["clusters"][start-1]["residues"] = list(map(int,list(line.replace(", ]", "").replace(" [","").split(', '))))
154 dict_out["clusters"][start-1]["residues"] = eval(line)
155 # Interacting regions: 13 14 30 31 69 70 84 85 112 113 114 115 116 166 167 199 200
156 if "Interacting regions" in line:
157 nums = line.split(':')[1]
158 dict_out["interacting_regions"] = list(map(int, nums.split()))
159 # Hinge residues: 13 14 30 31 69 70 84 85 112 113 114 115 116 166 167 199 200
160 if "Hinge residues" in line:
161 nums = line.split(':')[1]
162 dict_out["hinge_residues"] = list(map(int, nums.split()))
164 return dict_out
166 @launchlogger
167 def launch(self):
168 """Launches the execution of the FlexServ pcz_hinges module."""
170 # Setup Biobb
171 if self.check_restart():
172 return 0
173 self.stage_files()
175 if self.container_path:
176 working_dir = self.container_volume_path if self.container_volume_path else "/data"
177 else:
178 working_dir = self.stage_io_dict.get("unique_dir", "")
180 unique_dir = Path(self.stage_io_dict.get("unique_dir", ""))
182 # Temporary output
183 # temp_out = str(Path(self.stage_io_dict.get("unique_dir", "")).joinpath("output.dat"))
184 temp_out = "output.dat"
185 temp_log = "output.log"
186 temp_out_path = unique_dir.joinpath(temp_out)
187 staged_output_json_path = unique_dir.joinpath(Path(self.stage_io_dict["out"]["output_json_path"]).name)
189 # Command line (1: dat file)
190 # pczdump -i structure.ca.std.pcz --fluc=1 -o bfactor_1.dat
191 # self.cmd = [self.binary_path,
192 # "-i", input_pcz,
193 # "-o", temp_out,
194 # "-t", "0.3",
195 # "--hinge={}".format(self.eigenvector),
196 # ">&", "pcz_dump.hinges.log"
197 # ]
199 self.cmd = ['cd', working_dir, ';',
200 self.binary_path,
201 '-i', PurePath(self.stage_io_dict["in"]["input_pcz_path"]).name,
202 '-o', temp_out,
203 "-t", "0.3",
204 "--hinge={}".format(self.eigenvector),
205 ">&", temp_log
206 ]
208 # Run Biobb block
209 self.run_biobb()
211 # Parsing output file and extracting results for the given method
212 dict_out = self.parse_output(temp_out_path)
214 with open(staged_output_json_path, 'w') as out_file:
215 out_file.write(json.dumps(dict_out, indent=4))
217 # Copy files to host
218 self.copy_to_host()
220 # Remove temporary folder(s)
221 self.remove_tmp_files()
223 self.check_arguments(output_files_created=True, raise_exception=False)
225 return self.return_code
228def pcz_hinges(input_pcz_path: str, output_json_path: str,
229 properties: Optional[dict] = None, **kwargs) -> int:
230 """Create :class:`PCZhinges <flexserv.pcasuite.pcz_hinges>`flexserv.pcasuite.PCZhinges class and
231 execute :meth:`launch() <flexserv.pcasuite.pcz_hinges.launch>` method"""
232 return PCZhinges(**dict(locals())).launch()
235pcz_hinges.__doc__ = PCZhinges.__doc__
236main = PCZhinges.get_main(pcz_hinges, "Compute possible hinge regions (residues around which large protein movements are organized) of a molecule from a compressed PCZ file.")
238if __name__ == '__main__':
239 main()