Coverage for biobb_dna/intrabp_correlations/intrahpcorr.py: 80%
133 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 10:36 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 10:36 +0000
1#!/usr/bin/env python3
3"""Module containing the IntraHelParCorrelation class and the command line interface."""
4import argparse
5from typing import Optional
7import pandas as pd
8import numpy as np
9import matplotlib.pyplot as plt
11from biobb_common.generic.biobb_object import BiobbObject
12from biobb_common.configuration import settings
13from biobb_common.tools.file_utils import launchlogger
14from biobb_dna.utils.loader import load_data
17class IntraHelParCorrelation(BiobbObject):
18 """
19 | biobb_dna IntraHelParCorrelation
20 | Calculate correlation between helical parameters for a single intra-base pair.
21 | Calculate correlation between helical parameters for a single intra-base pair.
23 Args:
24 input_filename_shear (str): Path to .csv file with data for helical parameter 'shear'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_shear_A.csv>`_. Accepted formats: csv (edam:format_3752).
25 input_filename_stretch (str): Path to .csv file with data for helical parameter 'stretch'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_stretch_A.csv>`_. Accepted formats: csv (edam:format_3752).
26 input_filename_stagger (str): Path to .csv file with data for helical parameter 'stagger'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_stagger_A.csv>`_. Accepted formats: csv (edam:format_3752).
27 input_filename_buckle (str): Path to .csv file with data for helical parameter 'buckle'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_buckle_A.csv>`_. Accepted formats: csv (edam:format_3752).
28 input_filename_propel (str): Path to .csv file with data for helical parameter 'propeller'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_propel_A.csv>`_. Accepted formats: csv (edam:format_3752).
29 input_filename_opening (str): Path to .csv file with data for helical parameter 'opening'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_opening_A.csv>`_. Accepted formats: csv (edam:format_3752).
30 output_csv_path (str): Path to directory where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/correlation/intra_hpcorr_ref.csv>`_. Accepted formats: csv (edam:format_3752).
31 output_jpg_path (str): Path to .jpg file where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/correlation/intra_hpcorr_ref.jpg>`_. Accepted formats: jpg (edam:format_3579).
32 properties (dict):
33 * **base** (*str*) - (None) Name of base analyzed.
34 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
35 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
36 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
38 Examples:
39 This is a use example of how to use the building block from Python::
41 from biobb_dna.intrabp_correlations.intrahpcorr import intrahpcorr
43 prop = {
44 'base': 'A',
45 }
46 intrahpcorr(
47 input_filename_shear='path/to/shear.csv',
48 input_filename_stretch='path/to/stretch.csv',
49 input_filename_stagger='path/to/stagger.csv',
50 input_filename_buckle='path/to/buckle.csv',
51 input_filename_propel='path/to/propel.csv',
52 input_filename_opening='path/to/opening.csv',
53 output_csv_path='path/to/output/file.csv',
54 output_jpg_path='path/to/output/file.jpg',
55 properties=prop)
56 Info:
57 * wrapped_software:
58 * name: In house
59 * license: Apache-2.0
60 * ontology:
61 * name: EDAM
62 * schema: http://edamontology.org/EDAM.owl
64 """
66 def __init__(
67 self, input_filename_shear, input_filename_stretch,
68 input_filename_stagger, input_filename_buckle,
69 input_filename_propel, input_filename_opening,
70 output_csv_path, output_jpg_path,
71 properties=None, **kwargs) -> None:
72 properties = properties or {}
74 # Call parent class constructor
75 super().__init__(properties)
76 self.locals_var_dict = locals().copy()
78 # Input/Output files
79 self.io_dict = {
80 'in': {
81 'input_filename_shear': input_filename_shear,
82 'input_filename_stretch': input_filename_stretch,
83 'input_filename_stagger': input_filename_stagger,
84 'input_filename_buckle': input_filename_buckle,
85 'input_filename_propel': input_filename_propel,
86 'input_filename_opening': input_filename_opening
87 },
88 'out': {
89 'output_csv_path': output_csv_path,
90 'output_jpg_path': output_jpg_path
91 }
92 }
94 self.properties = properties
95 self.base = properties.get("base", None)
97 # Check the properties
98 self.check_properties(properties)
99 self.check_arguments()
101 @launchlogger
102 def launch(self) -> int:
103 """Execute the :class:`IntraHelParCorrelation <intrabp_correlations.intrahpcorr.IntraHelParCorrelation>` object."""
105 # Setup Biobb
106 if self.check_restart():
107 return 0
108 self.stage_files()
110 # read input
111 shear = load_data(self.stage_io_dict["in"]["input_filename_shear"])
112 stretch = load_data(self.stage_io_dict["in"]["input_filename_stretch"])
113 stagger = load_data(self.stage_io_dict["in"]["input_filename_stagger"])
114 buckle = load_data(self.stage_io_dict["in"]["input_filename_buckle"])
115 propel = load_data(self.stage_io_dict["in"]["input_filename_propel"])
116 opening = load_data(self.stage_io_dict["in"]["input_filename_opening"])
118 # get base
119 if self.base is None:
120 self.base = shear.columns[0]
122 # make matrix
123 # coordinates = ["shear", "stretch", "stagger", "buckle", "propel", "opening"]
124 coordinates = [
125 "shear", "stretch", "stagger", "buckle", "propel", "opening"]
126 corr_matrix = pd.DataFrame(
127 np.eye(6, 6), index=coordinates, columns=coordinates)
129 # shear
130 # corr_matrix["shear"]["stretch"] = shear.corrwith(stretch, method="pearson")
131 corr_matrix.loc["stretch", "shear"] = shear.corrwith(stretch, method="pearson").values[0]
132 # corr_matrix["shear"]["stagger"] = shear.corrwith(stagger, method="pearson")
133 corr_matrix.loc["stagger", "shear"] = shear.corrwith(stagger, method="pearson").values[0]
134 # corr_matrix["shear"]["buckle"] = shear.corrwith(buckle, method=self.circlineal)
135 corr_matrix.loc["buckle", "shear"] = shear.corrwith(buckle, method=self.circlineal).values[0] # type: ignore
136 # corr_matrix["shear"]["propel"] = shear.corrwith(propel, method=self.circlineal)
137 corr_matrix.loc["propel", "shear"] = shear.corrwith(propel, method=self.circlineal).values[0] # type: ignore
138 # corr_matrix["shear"]["opening"] = shear.corrwith(opening, method=self.circlineal)
139 corr_matrix.loc["opening", "shear"] = shear.corrwith(opening, method=self.circlineal).values[0] # type: ignore
140 # symmetric values
141 # corr_matrix["stretch"]["shear"] = corr_matrix["shear"]["stretch"]
142 corr_matrix.loc["shear", "stretch"] = corr_matrix.loc["stretch", "shear"]
143 # corr_matrix["stagger"]["shear"] = corr_matrix["shear"]["stagger"]
144 corr_matrix.loc["shear", "stagger"] = corr_matrix.loc["stagger", "shear"]
145 # corr_matrix["buckle"]["shear"] = corr_matrix["shear"]["buckle"]
146 corr_matrix.loc["shear", "buckle"] = corr_matrix.loc["buckle", "shear"]
147 # corr_matrix["propel"]["shear"] = corr_matrix["shear"]["propel"]
148 corr_matrix.loc["shear", "propel"] = corr_matrix.loc["propel", "shear"]
149 # corr_matrix["opening"]["shear"] = corr_matrix["shear"]["opening"]
150 corr_matrix.loc["shear", "opening"] = corr_matrix.loc["opening", "shear"]
152 # stretch
153 # corr_matrix["stretch"]["stagger"] = stretch.corrwith(stagger, method="pearson")
154 corr_matrix.loc["stagger", "stretch"] = stretch.corrwith(stagger, method="pearson").values[0]
155 # corr_matrix["stretch"]["buckle"] = stretch.corrwith(buckle, method=self.circlineal)
156 corr_matrix.loc["buckle", "stretch"] = stretch.corrwith(buckle, method=self.circlineal).values[0] # type: ignore
157 # corr_matrix["stretch"]["propel"] = stretch.corrwith(propel, method=self.circlineal)
158 corr_matrix.loc["propel", "stretch"] = stretch.corrwith(propel, method=self.circlineal).values[0] # type: ignore
159 # corr_matrix["stretch"]["opening"] = stretch.corrwith(opening, method=self.circlineal)
160 corr_matrix.loc["opening", "stretch"] = stretch.corrwith(opening, method=self.circlineal).values[0] # type: ignore
161 # symmetric values
162 # corr_matrix["stagger"]["stretch"] = corr_matrix["stretch"]["stagger"]
163 corr_matrix.loc["stretch", "stagger"] = corr_matrix.loc["stagger", "stretch"]
164 # corr_matrix["buckle"]["stretch"] = corr_matrix["stretch"]["buckle"]
165 corr_matrix.loc["stretch", "buckle"] = corr_matrix.loc["buckle", "stretch"]
166 # corr_matrix["propel"]["stretch"] = corr_matrix["stretch"]["propel"]
167 corr_matrix.loc["stretch", "propel"] = corr_matrix.loc["propel", "stretch"]
168 # corr_matrix["opening"]["stretch"] = corr_matrix["stretch"]["opening"]
169 corr_matrix.loc["stretch", "opening"] = corr_matrix.loc["opening", "stretch"]
171 # stagger
172 # corr_matrix["stagger"]["buckle"] = stagger.corrwith(buckle, method=self.circlineal)
173 corr_matrix.loc["buckle", "stagger"] = stagger.corrwith(buckle, method=self.circlineal).values[0] # type: ignore
174 # corr_matrix["stagger"]["propel"] = stagger.corrwith(propel, method=self.circlineal)
175 corr_matrix.loc["propel", "stagger"] = stagger.corrwith(propel, method=self.circlineal).values[0] # type: ignore
176 # corr_matrix["stagger"]["opening"] = stagger.corrwith(opening, method=self.circlineal)
177 corr_matrix.loc["opening", "stagger"] = stagger.corrwith(opening, method=self.circlineal).values[0] # type: ignore
178 # symmetric values
179 # corr_matrix["buckle"]["stagger"] = corr_matrix["stagger"]["buckle"]
180 corr_matrix.loc["stagger", "buckle"] = corr_matrix.loc["buckle", "stagger"]
181 # corr_matrix["propel"]["stagger"] = corr_matrix["stagger"]["propel"]
182 corr_matrix.loc["stagger", "propel"] = corr_matrix.loc["propel", "stagger"]
183 # corr_matrix["opening"]["stagger"] = corr_matrix["stagger"]["opening"]
184 corr_matrix.loc["stagger", "opening"] = corr_matrix.loc["opening", "stagger"]
186 # buckle
187 # corr_matrix["buckle"]["propel"] = buckle.corrwith(propel, method=self.circular)
188 corr_matrix.loc["propel", "buckle"] = buckle.corrwith(propel, method=self.circular).values[0] # type: ignore
189 # corr_matrix["buckle"]["opening"] = buckle.corrwith(opening, method=self.circular)
190 corr_matrix.loc["opening", "buckle"] = buckle.corrwith(opening, method=self.circular).values[0] # type: ignore
191 # symmetric values
192 # corr_matrix["propel"]["buckle"] = corr_matrix["buckle"]["propel"]
193 corr_matrix.loc["buckle", "propel"] = corr_matrix.loc["propel", "buckle"]
194 # corr_matrix["opening"]["buckle"] = corr_matrix["buckle"]["opening"]
195 corr_matrix.loc["buckle", "opening"] = corr_matrix.loc["opening", "buckle"]
197 # propel
198 # corr_matrix["propel"]["opening"] = propel.corrwith(opening, method=self.circular)
199 corr_matrix.loc["opening", "propel"] = propel.corrwith(opening, method=self.circular).values[0] # type: ignore
200 # symmetric values
201 # corr_matrix["opening"]["propel"] = corr_matrix["propel"]["opening"]
202 corr_matrix.loc["propel", "opening"] = corr_matrix.loc["opening", "propel"]
204 # save csv data
205 corr_matrix.to_csv(self.stage_io_dict["out"]["output_csv_path"])
207 # create heatmap
208 fig, axs = plt.subplots(1, 1, dpi=300, tight_layout=True)
209 axs.pcolor(corr_matrix)
210 # Loop over data dimensions and create text annotations.
211 for i in range(len(corr_matrix)):
212 for j in range(len(corr_matrix)):
213 axs.text(
214 j+.5,
215 i+.5,
216 f"{corr_matrix[coordinates[j]].loc[coordinates[i]]:.2f}",
217 ha="center",
218 va="center",
219 color="w")
220 axs.set_xticks([i + 0.5 for i in range(len(corr_matrix))])
221 axs.set_xticklabels(corr_matrix.columns, rotation=90)
222 axs.set_yticks([i+0.5 for i in range(len(corr_matrix))])
223 axs.set_yticklabels(corr_matrix.index)
224 axs.set_title(
225 "Helical Parameter Correlation "
226 f"for Base Pair Step \'{self.base}\'")
227 fig.tight_layout()
228 fig.savefig(
229 self.stage_io_dict['out']['output_jpg_path'],
230 format="jpg")
231 plt.close()
233 # Copy files to host
234 self.copy_to_host()
236 # Remove temporary file(s)
237 # self.tmp_files.extend([
238 # self.stage_io_dict.get("unique_dir", "")
239 # ])
240 self.remove_tmp_files()
242 self.check_arguments(output_files_created=True, raise_exception=False)
244 return 0
246 def get_corr_method(self, corrtype1, corrtype2):
247 if corrtype1 == "circular" and corrtype2 == "linear":
248 method = self.circlineal
249 if corrtype1 == "linear" and corrtype2 == "circular":
250 method = self.circlineal
251 elif corrtype1 == "circular" and corrtype2 == "circular":
252 method = self.circular
253 else:
254 method = "pearson"
255 return method
257 @staticmethod
258 def circular(x1, x2):
259 x1 = x1 * np.pi / 180
260 x2 = x2 * np.pi / 180
261 diff_1 = np.sin(x1 - x1.mean())
262 diff_2 = np.sin(x2 - x2.mean())
263 num = (diff_1 * diff_2).sum()
264 den = np.sqrt((diff_1 ** 2).sum() * (diff_2 ** 2).sum())
265 return num / den
267 @staticmethod
268 def circlineal(x1, x2):
269 x2 = x2 * np.pi / 180
270 rc = np.corrcoef(x1, np.cos(x2))[1, 0]
271 rs = np.corrcoef(x1, np.sin(x2))[1, 0]
272 rcs = np.corrcoef(np.sin(x2), np.cos(x2))[1, 0]
273 num = (rc ** 2) + (rs ** 2) - 2 * rc * rs * rcs
274 den = 1 - (rcs ** 2)
275 correlation = np.sqrt(num / den)
276 if np.corrcoef(x1, x2)[1, 0] < 0:
277 correlation *= -1
278 return correlation
281def intrahpcorr(
282 input_filename_shear: str, input_filename_stretch: str,
283 input_filename_stagger: str, input_filename_buckle: str,
284 input_filename_propel: str, input_filename_opening: str,
285 output_csv_path: str, output_jpg_path: str,
286 properties: Optional[dict] = None, **kwargs) -> int:
287 """Create :class:`IntraHelParCorrelation <intrabp_correlations.intrahpcorr.IntraHelParCorrelation>` class and
288 execute the :meth:`launch() <intrabp_correlations.intrahpcorr.IntraHelParCorrelation.launch>` method."""
290 return IntraHelParCorrelation(
291 input_filename_shear=input_filename_shear,
292 input_filename_stretch=input_filename_stretch,
293 input_filename_stagger=input_filename_stagger,
294 input_filename_buckle=input_filename_buckle,
295 input_filename_propel=input_filename_propel,
296 input_filename_opening=input_filename_opening,
297 output_csv_path=output_csv_path,
298 output_jpg_path=output_jpg_path,
299 properties=properties, **kwargs).launch()
301 intrahpcorr.__doc__ = IntraHelParCorrelation.__doc__
304def main():
305 """Command line execution of this building block. Please check the command line documentation."""
306 parser = argparse.ArgumentParser(description='Load helical parameter file and save base data individually.',
307 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999))
308 parser.add_argument('--config', required=False, help='Configuration file')
310 required_args = parser.add_argument_group('required arguments')
311 required_args.add_argument('--input_filename_shear', required=True,
312 help='Path to csv file with inputs. Accepted formats: csv.')
313 required_args.add_argument('--input_filename_stretch', required=True,
314 help='Path to csv file with inputs. Accepted formats: csv.')
315 required_args.add_argument('--input_filename_stagger', required=True,
316 help='Path to csv file with inputs. Accepted formats: csv.')
317 required_args.add_argument('--input_filename_buckle', required=True,
318 help='Path to csv file with inputs. Accepted formats: csv.')
319 required_args.add_argument('--input_filename_propel', required=True,
320 help='Path to csv file with inputs. Accepted formats: csv.')
321 required_args.add_argument('--input_filename_opening', required=True,
322 help='Path to csv file with inputs. Accepted formats: csv.')
323 required_args.add_argument('--output_csv_path', required=True,
324 help='Path to output file. Accepted formats: csv.')
325 required_args.add_argument('--output_jpg_path', required=True,
326 help='Path to output file. Accepted formats: csv.')
328 args = parser.parse_args()
329 args.config = args.config or "{}"
330 properties = settings.ConfReader(config=args.config).get_prop_dic()
332 intrahpcorr(
333 input_filename_shear=args.input_filename_shear,
334 input_filename_stretch=args.input_filename_stretch,
335 input_filename_stagger=args.input_filename_stagger,
336 input_filename_buckle=args.input_filename_buckle,
337 input_filename_propel=args.input_filename_propel,
338 input_filename_opening=args.input_filename_opening,
339 output_csv_path=args.output_csv_path,
340 output_jpg_path=args.output_jpg_path,
341 properties=properties)
344if __name__ == '__main__':
345 main()