Coverage for biobb_dna/intrabp_correlations/intrahpcorr.py: 81%
135 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 09:06 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 09:06 +0000
1#!/usr/bin/env python3
3"""Module containing the IntraHelParCorrelation class and the command line interface."""
4import argparse
6import pandas as pd
7import numpy as np
8import matplotlib.pyplot as plt
10from biobb_common.generic.biobb_object import BiobbObject
11from biobb_common.configuration import settings
12from biobb_common.tools import file_utils as fu
13from biobb_common.tools.file_utils import launchlogger
14from biobb_dna.utils.loader import load_data
17class IntraHelParCorrelation(BiobbObject):
18 """
19 | biobb_dna IntraHelParCorrelation
20 | Calculate correlation between helical parameters for a single intra-base pair.
22 Args:
23 input_filename_shear (str): Path to .csv file with data for helical parameter 'shear'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_shear_A.csv>`_. Accepted formats: csv (edam:format_3752).
24 input_filename_stretch (str): Path to .csv file with data for helical parameter 'stretch'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_stretch_A.csv>`_. Accepted formats: csv (edam:format_3752).
25 input_filename_stagger (str): Path to .csv file with data for helical parameter 'stagger'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_stagger_A.csv>`_. Accepted formats: csv (edam:format_3752).
26 input_filename_buckle (str): Path to .csv file with data for helical parameter 'buckle'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_buckle_A.csv>`_. Accepted formats: csv (edam:format_3752).
27 input_filename_propel (str): Path to .csv file with data for helical parameter 'propeller'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_propel_A.csv>`_. Accepted formats: csv (edam:format_3752).
28 input_filename_opening (str): Path to .csv file with data for helical parameter 'opening'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_opening_A.csv>`_. Accepted formats: csv (edam:format_3752).
29 output_csv_path (str): Path to directory where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/correlation/intra_hpcorr_ref.csv>`_. Accepted formats: csv (edam:format_3752).
30 output_jpg_path (str): Path to .jpg file where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/correlation/intra_hpcorr_ref.jpg>`_. Accepted formats: jpg (edam:format_3579).
31 properties (dict):
32 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
33 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
34 * **base** (*str*) - (None) Name of base analyzed.
36 Examples:
37 This is a use example of how to use the building block from Python::
39 from biobb_dna.intrabp_correlations.intrahpcorr import intrahpcorr
41 prop = {
42 'base': 'A',
43 }
44 intrahpcorr(
45 input_filename_shear='path/to/shear.csv',
46 input_filename_stretch='path/to/stretch.csv',
47 input_filename_stagger='path/to/stagger.csv',
48 input_filename_buckle='path/to/buckle.csv',
49 input_filename_propel='path/to/propel.csv',
50 input_filename_opening='path/to/opening.csv',
51 output_csv_path='path/to/output/file.csv',
52 output_jpg_path='path/to/output/file.jpg',
53 properties=prop)
54 Info:
55 * wrapped_software:
56 * name: In house
57 * license: Apache-2.0
58 * ontology:
59 * name: EDAM
60 * schema: http://edamontology.org/EDAM.owl
62 """
64 def __init__(
65 self, input_filename_shear, input_filename_stretch,
66 input_filename_stagger, input_filename_buckle,
67 input_filename_propel, input_filename_opening,
68 output_csv_path, output_jpg_path,
69 properties=None, **kwargs) -> None:
70 properties = properties or {}
72 # Call parent class constructor
73 super().__init__(properties)
74 self.locals_var_dict = locals().copy()
76 # Input/Output files
77 self.io_dict = {
78 'in': {
79 'input_filename_shear': input_filename_shear,
80 'input_filename_stretch': input_filename_stretch,
81 'input_filename_stagger': input_filename_stagger,
82 'input_filename_buckle': input_filename_buckle,
83 'input_filename_propel': input_filename_propel,
84 'input_filename_opening': input_filename_opening
85 },
86 'out': {
87 'output_csv_path': output_csv_path,
88 'output_jpg_path': output_jpg_path
89 }
90 }
92 self.properties = properties
93 self.base = properties.get("base", None)
95 # Check the properties
96 self.check_properties(properties)
97 self.check_arguments()
99 @launchlogger
100 def launch(self) -> int:
101 """Execute the :class:`IntraHelParCorrelation <intrabp_correlations.intrahpcorr.IntraHelParCorrelation>` object."""
103 # Setup Biobb
104 if self.check_restart():
105 return 0
106 self.stage_files()
108 # Creating temporary folder
109 self.tmp_folder = fu.create_unique_dir(prefix="hpcorrelation_")
110 fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)
112 # read input
113 shear = load_data(self.io_dict["in"]["input_filename_shear"])
114 stretch = load_data(self.io_dict["in"]["input_filename_stretch"])
115 stagger = load_data(self.io_dict["in"]["input_filename_stagger"])
116 buckle = load_data(self.io_dict["in"]["input_filename_buckle"])
117 propel = load_data(self.io_dict["in"]["input_filename_propel"])
118 opening = load_data(self.io_dict["in"]["input_filename_opening"])
120 # get base
121 if self.base is None:
122 self.base = shear.columns[0]
124 # make matrix
125 # coordinates = ["shear", "stretch", "stagger", "buckle", "propel", "opening"]
126 coordinates = [
127 "shear", "stretch", "stagger", "buckle", "propel", "opening"]
128 corr_matrix = pd.DataFrame(
129 np.eye(6, 6), index=coordinates, columns=coordinates)
131 # shear
132 corr_matrix["shear"]["stretch"] = shear.corrwith(
133 stretch, method="pearson")
134 corr_matrix["shear"]["stagger"] = shear.corrwith(
135 stagger, method="pearson")
136 corr_matrix["shear"]["buckle"] = shear.corrwith(
137 buckle, method=self.circlineal)
138 corr_matrix["shear"]["propel"] = shear.corrwith(
139 propel, method=self.circlineal)
140 corr_matrix["shear"]["opening"] = shear.corrwith(
141 opening, method=self.circlineal)
142 # symmetric values
143 corr_matrix["stretch"]["shear"] = corr_matrix["shear"]["stretch"]
144 corr_matrix["stagger"]["shear"] = corr_matrix["shear"]["stagger"]
145 corr_matrix["buckle"]["shear"] = corr_matrix["shear"]["buckle"]
146 corr_matrix["propel"]["shear"] = corr_matrix["shear"]["propel"]
147 corr_matrix["opening"]["shear"] = corr_matrix["shear"]["opening"]
149 # stretch
150 corr_matrix["stretch"]["stagger"] = stretch.corrwith(
151 stagger, method="pearson")
152 corr_matrix["stretch"]["buckle"] = stretch.corrwith(
153 buckle, method=self.circlineal)
154 corr_matrix["stretch"]["propel"] = stretch.corrwith(
155 propel, method=self.circlineal)
156 corr_matrix["stretch"]["opening"] = stretch.corrwith(
157 opening, method=self.circlineal)
158 # symmetric values
159 corr_matrix["stagger"]["stretch"] = corr_matrix["stretch"]["stagger"]
160 corr_matrix["buckle"]["stretch"] = corr_matrix["stretch"]["buckle"]
161 corr_matrix["propel"]["stretch"] = corr_matrix["stretch"]["propel"]
162 corr_matrix["opening"]["stretch"] = corr_matrix["stretch"]["opening"]
164 # stagger
165 corr_matrix["stagger"]["buckle"] = stagger.corrwith(
166 buckle, method=self.circlineal)
167 corr_matrix["stagger"]["propel"] = stagger.corrwith(
168 propel, method=self.circlineal)
169 corr_matrix["stagger"]["opening"] = stagger.corrwith(
170 opening, method=self.circlineal)
171 # symmetric values
172 corr_matrix["buckle"]["stagger"] = corr_matrix["stagger"]["buckle"]
173 corr_matrix["propel"]["stagger"] = corr_matrix["stagger"]["propel"]
174 corr_matrix["opening"]["stagger"] = corr_matrix["stagger"]["opening"]
176 # buckle
177 corr_matrix["buckle"]["propel"] = buckle.corrwith(
178 propel, method=self.circular)
179 corr_matrix["buckle"]["opening"] = buckle.corrwith(
180 opening, method=self.circular)
181 # symmetric values
182 corr_matrix["propel"]["buckle"] = corr_matrix["buckle"]["propel"]
183 corr_matrix["opening"]["buckle"] = corr_matrix["buckle"]["opening"]
185 # propel
186 corr_matrix["propel"]["opening"] = propel.corrwith(
187 opening, method=self.circular)
188 # symmetric values
189 corr_matrix["opening"]["propel"] = corr_matrix["propel"]["opening"]
191 # save csv data
192 corr_matrix.to_csv(self.io_dict["out"]["output_csv_path"])
194 # create heatmap
195 fig, axs = plt.subplots(1, 1, dpi=300, tight_layout=True)
196 axs.pcolor(corr_matrix)
197 # Loop over data dimensions and create text annotations.
198 for i in range(len(corr_matrix)):
199 for j in range(len(corr_matrix)):
200 axs.text(
201 j+.5,
202 i+.5,
203 f"{corr_matrix[coordinates[j]].loc[coordinates[i]]:.2f}",
204 ha="center",
205 va="center",
206 color="w")
207 axs.set_xticks([i + 0.5 for i in range(len(corr_matrix))])
208 axs.set_xticklabels(corr_matrix.columns, rotation=90)
209 axs.set_yticks([i+0.5 for i in range(len(corr_matrix))])
210 axs.set_yticklabels(corr_matrix.index)
211 axs.set_title(
212 "Helical Parameter Correlation "
213 f"for Base Pair Step \'{self.base}\'")
214 fig.tight_layout()
215 fig.savefig(
216 self.io_dict['out']['output_jpg_path'],
217 format="jpg")
218 plt.close()
220 # Remove temporary file(s)
221 self.tmp_files.extend([
222 self.stage_io_dict.get("unique_dir"),
223 self.tmp_folder
224 ])
225 self.remove_tmp_files()
227 self.check_arguments(output_files_created=True, raise_exception=False)
229 return 0
231 def get_corr_method(self, corrtype1, corrtype2):
232 if corrtype1 == "circular" and corrtype2 == "linear":
233 method = self.circlineal
234 if corrtype1 == "linear" and corrtype2 == "circular":
235 method = self.circlineal
236 elif corrtype1 == "circular" and corrtype2 == "circular":
237 method = self.circular
238 else:
239 method = "pearson"
240 return method
242 @staticmethod
243 def circular(x1, x2):
244 x1 = x1 * np.pi / 180
245 x2 = x2 * np.pi / 180
246 diff_1 = np.sin(x1 - x1.mean())
247 diff_2 = np.sin(x2 - x2.mean())
248 num = (diff_1 * diff_2).sum()
249 den = np.sqrt((diff_1 ** 2).sum() * (diff_2 ** 2).sum())
250 return num / den
252 @staticmethod
253 def circlineal(x1, x2):
254 x2 = x2 * np.pi / 180
255 rc = np.corrcoef(x1, np.cos(x2))[1, 0]
256 rs = np.corrcoef(x1, np.sin(x2))[1, 0]
257 rcs = np.corrcoef(np.sin(x2), np.cos(x2))[1, 0]
258 num = (rc ** 2) + (rs ** 2) - 2 * rc * rs * rcs
259 den = 1 - (rcs ** 2)
260 correlation = np.sqrt(num / den)
261 if np.corrcoef(x1, x2)[1, 0] < 0:
262 correlation *= -1
263 return correlation
266def intrahpcorr(
267 input_filename_shear: str, input_filename_stretch: str,
268 input_filename_stagger: str, input_filename_buckle: str,
269 input_filename_propel: str, input_filename_opening: str,
270 output_csv_path: str, output_jpg_path: str,
271 properties: dict = None, **kwargs) -> int:
272 """Create :class:`IntraHelParCorrelation <intrabp_correlations.intrahpcorr.IntraHelParCorrelation>` class and
273 execute the :meth:`launch() <intrabp_correlations.intrahpcorr.IntraHelParCorrelation.launch>` method."""
275 return IntraHelParCorrelation(
276 input_filename_shear=input_filename_shear,
277 input_filename_stretch=input_filename_stretch,
278 input_filename_stagger=input_filename_stagger,
279 input_filename_buckle=input_filename_buckle,
280 input_filename_propel=input_filename_propel,
281 input_filename_opening=input_filename_opening,
282 output_csv_path=output_csv_path,
283 output_jpg_path=output_jpg_path,
284 properties=properties, **kwargs).launch()
287def main():
288 """Command line execution of this building block. Please check the command line documentation."""
289 parser = argparse.ArgumentParser(description='Load helical parameter file and save base data individually.',
290 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999))
291 parser.add_argument('--config', required=False, help='Configuration file')
293 required_args = parser.add_argument_group('required arguments')
294 required_args.add_argument('--input_filename_shear', required=True,
295 help='Path to csv file with inputs. Accepted formats: csv.')
296 required_args.add_argument('--input_filename_stretch', required=True,
297 help='Path to csv file with inputs. Accepted formats: csv.')
298 required_args.add_argument('--input_filename_stagger', required=True,
299 help='Path to csv file with inputs. Accepted formats: csv.')
300 required_args.add_argument('--input_filename_buckle', required=True,
301 help='Path to csv file with inputs. Accepted formats: csv.')
302 required_args.add_argument('--input_filename_propel', required=True,
303 help='Path to csv file with inputs. Accepted formats: csv.')
304 required_args.add_argument('--input_filename_opening', required=True,
305 help='Path to csv file with inputs. Accepted formats: csv.')
306 required_args.add_argument('--output_csv_path', required=True,
307 help='Path to output file. Accepted formats: csv.')
308 required_args.add_argument('--output_jpg_path', required=True,
309 help='Path to output file. Accepted formats: csv.')
311 args = parser.parse_args()
312 args.config = args.config or "{}"
313 properties = settings.ConfReader(config=args.config).get_prop_dic()
315 intrahpcorr(
316 input_filename_shear=args.input_filename_shear,
317 input_filename_stretch=args.input_filename_stretch,
318 input_filename_stagger=args.input_filename_stagger,
319 input_filename_buckle=args.input_filename_buckle,
320 input_filename_propel=args.input_filename_propel,
321 input_filename_opening=args.input_filename_opening,
322 output_csv_path=args.output_csv_path,
323 output_jpg_path=args.output_jpg_path,
324 properties=properties)
327if __name__ == '__main__':
328 main()