Coverage for biobb_dna / interbp_correlations / interhpcorr.py: 91%
116 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-15 18:49 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-15 18:49 +0000
1#!/usr/bin/env python3
3"""Module containing the InterHelParCorrelation class and the command line interface."""
4from typing import Optional
6import pandas as pd
7import numpy as np
8import matplotlib.pyplot as plt
10from biobb_common.generic.biobb_object import BiobbObject
11from biobb_common.tools.file_utils import launchlogger
12from biobb_dna.utils.loader import load_data
15class InterHelParCorrelation(BiobbObject):
16 """
17 | biobb_dna InterHelParCorrelation
18 | Calculate correlation between helical parameters for a single inter-base pair.
19 | Calculate correlation between helical parameters for a single inter-base pair.
21 Args:
22 input_filename_shift (str): Path to .csv file with data for helical parameter 'shift'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/stiffness/series_shift_AA.csv>`_. Accepted formats: csv (edam:format_3752).
23 input_filename_slide (str): Path to .csv file with data for helical parameter 'slide'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/stiffness/series_slide_AA.csv>`_. Accepted formats: csv (edam:format_3752).
24 input_filename_rise (str): Path to .csv file with data for helical parameter 'rise'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/stiffness/series_rise_AA.csv>`_. Accepted formats: csv (edam:format_3752).
25 input_filename_tilt (str): Path to .csv file with data for helical parameter 'tilt'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/stiffness/series_tilt_AA.csv>`_. Accepted formats: csv (edam:format_3752).
26 input_filename_roll (str): Path to .csv file with data for helical parameter 'roll'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/stiffness/series_roll_AA.csv>`_. Accepted formats: csv (edam:format_3752).
27 input_filename_twist (str): Path to .csv file with data for helical parameter 'twist'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/stiffness/series_twist_AA.csv>`_. Accepted formats: csv (edam:format_3752).
28 output_csv_path (str): Path to directory where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/correlation/inter_hpcorr_ref.csv>`_. Accepted formats: csv (edam:format_3752).
29 output_jpg_path (str): Path to .jpg file where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/correlation/inter_hpcorr_ref.jpg>`_. Accepted formats: jpg (edam:format_3579).
30 properties (dict):
31 * **basepair** (*str*) - (None) Name of basepair analyzed.
32 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
33 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
34 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
36 Examples:
37 This is a use example of how to use the building block from Python::
39 from biobb_dna.interbp_correlations.interhpcorr import interhpcorr
41 prop = {
42 'basepair': 'AA',
43 }
44 interhpcorr(
45 input_filename_shift='path/to/shift.csv',
46 input_filename_slide='path/to/slide.csv',
47 input_filename_rise='path/to/rise.csv',
48 input_filename_tilt='path/to/tilt.csv',
49 input_filename_roll='path/to/roll.csv',
50 input_filename_twist='path/to/twist.csv',
51 output_csv_path='path/to/output/file.csv',
52 output_jpg_path='path/to/output/file.jpg',
53 properties=prop)
54 Info:
55 * wrapped_software:
56 * name: In house
57 * license: Apache-2.0
58 * ontology:
59 * name: EDAM
60 * schema: http://edamontology.org/EDAM.owl
62 """
64 def __init__(
65 self, input_filename_shift, input_filename_slide,
66 input_filename_rise, input_filename_tilt,
67 input_filename_roll, input_filename_twist,
68 output_csv_path, output_jpg_path,
69 properties=None, **kwargs) -> None:
70 properties = properties or {}
72 # Call parent class constructor
73 super().__init__(properties)
74 self.locals_var_dict = locals().copy()
76 # Input/Output files
77 self.io_dict = {
78 'in': {
79 'input_filename_shift': input_filename_shift,
80 'input_filename_slide': input_filename_slide,
81 'input_filename_rise': input_filename_rise,
82 'input_filename_tilt': input_filename_tilt,
83 'input_filename_roll': input_filename_roll,
84 'input_filename_twist': input_filename_twist
85 },
86 'out': {
87 'output_csv_path': output_csv_path,
88 'output_jpg_path': output_jpg_path
89 }
90 }
92 self.properties = properties
93 self.basepair = properties.get("basepair", None)
95 # Check the properties
96 self.check_properties(properties)
97 self.check_arguments()
99 @launchlogger
100 def launch(self) -> int:
101 """Execute the :class:`InterHelParCorrelation <interbp_correlations.interhpcorr.InterHelParCorrelation>` object."""
103 # Setup Biobb
104 if self.check_restart():
105 return 0
106 self.stage_files()
108 # read input
109 shift = load_data(self.stage_io_dict["in"]["input_filename_shift"])
110 slide = load_data(self.stage_io_dict["in"]["input_filename_slide"])
111 rise = load_data(self.stage_io_dict["in"]["input_filename_rise"])
112 tilt = load_data(self.stage_io_dict["in"]["input_filename_tilt"])
113 roll = load_data(self.stage_io_dict["in"]["input_filename_roll"])
114 twist = load_data(self.stage_io_dict["in"]["input_filename_twist"])
116 # get basepair
117 if self.basepair is None:
118 self.basepair = shift.columns[0]
120 # make matrix
121 coordinates = ["shift", "slide", "rise", "tilt", "roll", "twist"]
122 corr_matrix = pd.DataFrame(
123 np.eye(6, 6), index=coordinates, columns=coordinates)
125 # shift
126 # corr_matrix["shift"]["slide"] = shift.corrwith(slide, method="pearson")
127 corr_matrix.loc["slide", "shift"] = shift.corrwith(slide, method="pearson").values[0]
128 # corr_matrix["shift"]["rise"] = shift.corrwith(rise, method="pearson")
129 corr_matrix.loc["rise", "shift"] = shift.corrwith(rise, method="pearson").values[0]
130 # corr_matrix["shift"]["tilt"] = shift.corrwith(tilt, method=self.circlineal)
131 corr_matrix.loc["tilt", "shift"] = shift.corrwith(tilt, method=self.circlineal).values[0] # type: ignore
132 # corr_matrix["shift"]["roll"] = shift.corrwith(roll, method=self.circlineal)
133 corr_matrix.loc["roll", "shift"] = shift.corrwith(roll, method=self.circlineal).values[0] # type: ignore
134 # corr_matrix["shift"]["twist"] = shift.corrwith(twist, method=self.circlineal)
135 corr_matrix.loc["twist", "shift"] = shift.corrwith(twist, method=self.circlineal).values[0] # type: ignore
136 # symmetric values
137 # corr_matrix["slide"]["shift"] = corr_matrix["shift"]["slide"]
138 corr_matrix.loc["shift", "slide"] = corr_matrix.loc["slide", "shift"]
139 # corr_matrix["rise"]["shift"] = corr_matrix["shift"]["rise"]
140 corr_matrix.loc["shift", "rise"] = corr_matrix.loc["rise", "shift"]
141 # corr_matrix["tilt"]["shift"] = corr_matrix["shift"]["tilt"]
142 corr_matrix.loc["shift", "tilt"] = corr_matrix.loc["tilt", "shift"]
143 # corr_matrix["roll"]["shift"] = corr_matrix["shift"]["roll"]
144 corr_matrix.loc["shift", "roll"] = corr_matrix.loc["roll", "shift"]
145 # corr_matrix["twist"]["shift"] = corr_matrix["shift"]["twist"]
146 corr_matrix.loc["shift", "twist"] = corr_matrix.loc["twist", "shift"]
148 # slide
149 # corr_matrix["slide"]["rise"] = slide.corrwith(rise, method="pearson")
150 corr_matrix.loc["rise", "slide"] = slide.corrwith(rise, method="pearson").values[0]
151 # corr_matrix["slide"]["tilt"] = slide.corrwith(tilt, method=self.circlineal)
152 corr_matrix.loc["tilt", "slide"] = slide.corrwith(tilt, method=self.circlineal).values[0] # type: ignore
153 # corr_matrix["slide"]["roll"] = slide.corrwith(roll, method=self.circlineal)
154 corr_matrix.loc["roll", "slide"] = slide.corrwith(roll, method=self.circlineal).values[0] # type: ignore
155 # corr_matrix["slide"]["twist"] = slide.corrwith(twist, method=self.circlineal)
156 corr_matrix.loc["twist", "slide"] = slide.corrwith(twist, method=self.circlineal).values[0] # type: ignore
157 # symmetric values
158 # corr_matrix["rise"]["slide"] = corr_matrix["slide"]["rise"]
159 corr_matrix.loc["slide", "rise"] = corr_matrix.loc["rise", "slide"]
160 # corr_matrix["tilt"]["slide"] = corr_matrix["slide"]["tilt"]
161 corr_matrix.loc["slide", "tilt"] = corr_matrix.loc["tilt", "slide"]
162 # corr_matrix["roll"]["slide"] = corr_matrix["slide"]["roll"]
163 corr_matrix.loc["slide", "roll"] = corr_matrix.loc["roll", "slide"]
164 # corr_matrix["twist"]["slide"] = corr_matrix["slide"]["twist"]
165 corr_matrix.loc["slide", "twist"] = corr_matrix.loc["twist", "slide"]
167 # rise
168 # corr_matrix["rise"]["tilt"] = rise.corrwith(tilt, method=self.circlineal)
169 corr_matrix.loc["tilt", "rise"] = rise.corrwith(tilt, method=self.circlineal).values[0] # type: ignore
170 # corr_matrix["rise"]["roll"] = rise.corrwith(roll, method=self.circlineal)
171 corr_matrix.loc["roll", "rise"] = rise.corrwith(roll, method=self.circlineal).values[0] # type: ignore
172 # corr_matrix["rise"]["twist"] = rise.corrwith(twist, method=self.circlineal)
173 corr_matrix.loc["twist", "rise"] = rise.corrwith(twist, method=self.circlineal).values[0] # type: ignore
174 # symmetric values
175 # corr_matrix["tilt"]["rise"] = corr_matrix["rise"]["tilt"]
176 corr_matrix.loc["rise", "tilt"] = corr_matrix.loc["tilt", "rise"]
177 # corr_matrix["roll"]["rise"] = corr_matrix["rise"]["roll"]
178 corr_matrix.loc["rise", "roll"] = corr_matrix.loc["roll", "rise"]
179 # corr_matrix["twist"]["rise"] = corr_matrix["rise"]["twist"]
180 corr_matrix.loc["rise", "twist"] = corr_matrix.loc["twist", "rise"]
182 # tilt
183 # corr_matrix["tilt"]["roll"] = tilt.corrwith(roll, method=self.circular)
184 corr_matrix.loc["roll", "tilt"] = tilt.corrwith(roll, method=self.circular).values[0] # type: ignore
185 # corr_matrix["tilt"]["twist"] = tilt.corrwith(twist, method=self.circular)
186 corr_matrix.loc["twist", "tilt"] = tilt.corrwith(twist, method=self.circular).values[0] # type: ignore
187 # symmetric values
188 # corr_matrix["roll"]["tilt"] = corr_matrix["tilt"]["roll"]
189 corr_matrix.loc["tilt", "roll"] = corr_matrix.loc["roll", "tilt"]
190 # corr_matrix["twist"]["tilt"] = corr_matrix["tilt"]["twist"]
191 corr_matrix.loc["tilt", "twist"] = corr_matrix.loc["twist", "tilt"]
193 # roll
194 # corr_matrix["roll"]["twist"] = roll.corrwith(twist, method=self.circular)
195 corr_matrix.loc["twist", "roll"] = roll.corrwith(twist, method=self.circular).values[0] # type: ignore
196 # symmetric values
197 # corr_matrix["twist"]["roll"] = corr_matrix["roll"]["twist"]
198 corr_matrix.loc["roll", "twist"] = corr_matrix.loc["twist", "roll"]
200 # save csv data
201 corr_matrix.to_csv(self.stage_io_dict["out"]["output_csv_path"])
203 # create heatmap
204 fig, axs = plt.subplots(1, 1, dpi=300, tight_layout=True)
205 axs.pcolor(corr_matrix)
206 # Loop over data dimensions and create text annotations.
207 for i in range(len(corr_matrix)):
208 for j in range(len(corr_matrix)):
209 axs.text(
210 j+.5,
211 i+.5,
212 f"{corr_matrix[coordinates[j]].loc[coordinates[i]]:.2f}",
213 ha="center",
214 va="center",
215 color="w")
216 axs.set_xticks([i + 0.5 for i in range(len(corr_matrix))])
217 axs.set_xticklabels(corr_matrix.columns, rotation=90)
218 axs.set_yticks([i+0.5 for i in range(len(corr_matrix))])
219 axs.set_yticklabels(corr_matrix.index)
220 axs.set_title(
221 "Helical Parameter Correlation "
222 f"for Base Pair Step \'{self.basepair}\'")
223 fig.tight_layout()
224 fig.savefig(
225 self.stage_io_dict['out']['output_jpg_path'],
226 format="jpg")
227 plt.close()
229 # Copy files to host
230 self.copy_to_host()
232 # Remove temporary file(s)
233 self.remove_tmp_files()
235 self.check_arguments(output_files_created=True, raise_exception=False)
237 return 0
239 def get_corr_method(self, corrtype1, corrtype2):
240 if corrtype1 == "circular" and corrtype2 == "linear":
241 method = self.circlineal
242 if corrtype1 == "linear" and corrtype2 == "circular":
243 method = self.circlineal
244 elif corrtype1 == "circular" and corrtype2 == "circular":
245 method = self.circular
246 else:
247 method = "pearson"
248 return method
250 @staticmethod
251 def circular(x1, x2):
252 x1 = x1 * np.pi / 180
253 x2 = x2 * np.pi / 180
254 diff_1 = np.sin(x1 - x1.mean())
255 diff_2 = np.sin(x2 - x2.mean())
256 num = (diff_1 * diff_2).sum()
257 den = np.sqrt((diff_1 ** 2).sum() * (diff_2 ** 2).sum())
258 return num / den
260 @staticmethod
261 def circlineal(x1, x2):
262 x2 = x2 * np.pi / 180
263 rc = np.corrcoef(x1, np.cos(x2))[1, 0]
264 rs = np.corrcoef(x1, np.sin(x2))[1, 0]
265 rcs = np.corrcoef(np.sin(x2), np.cos(x2))[1, 0]
266 num = (rc ** 2) + (rs ** 2) - 2 * rc * rs * rcs
267 den = 1 - (rcs ** 2)
268 correlation = np.sqrt(num / den)
269 if np.corrcoef(x1, x2)[1, 0] < 0:
270 correlation *= -1
271 return correlation
274def interhpcorr(
275 input_filename_shift: str, input_filename_slide: str,
276 input_filename_rise: str, input_filename_tilt: str,
277 input_filename_roll: str, input_filename_twist: str,
278 output_csv_path: str, output_jpg_path: str,
279 properties: Optional[dict] = None, **kwargs) -> int:
280 """Create :class:`InterHelParCorrelation <interbp_correlations.interhpcorr.InterHelParCorrelation>` class and
281 execute the :meth:`launch() <interbp_correlations.interhpcorr.InterHelParCorrelation.launch>` method."""
282 return InterHelParCorrelation(**dict(locals())).launch()
285interhpcorr.__doc__ = InterHelParCorrelation.__doc__
286main = InterHelParCorrelation.get_main(interhpcorr, "Load helical parameter file and save base data individually.")
288if __name__ == '__main__':
289 main()