Coverage for biobb_dna/stiffness/average_stiffness.py: 74%
94 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 09:06 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 09:06 +0000
1#!/usr/bin/env python3
2"""Module containing the AverageStiffness class and the command line interface."""
4import shutil
5import argparse
6from pathlib import Path
8import matplotlib.pyplot as plt
9import pandas as pd
10import numpy as np
11from biobb_dna.utils import constants
12from biobb_dna.utils.loader import read_series
13from biobb_common.generic.biobb_object import BiobbObject
14from biobb_common.tools.file_utils import launchlogger
15from biobb_common.tools import file_utils as fu
16from biobb_common.configuration import settings
19class AverageStiffness(BiobbObject):
20 """
21 | biobb_dna AverageStiffness
22 | Calculate average stiffness constants for each base pair of a trajectory's series.
24 Args:
25 input_ser_path (str): Path to .ser file for helical parameter. File is expected to be a table, with the first column being an index and the rest the helical parameter values for each base/basepair. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/stiffness/canal_output_roll.ser>`_. Accepted formats: ser (edam:format_2330).
26 output_csv_path (str): Path to .csv file where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/stiffness/stiffavg_roll.csv>`_. Accepted formats: csv (edam:format_3752).
27 output_jpg_path (str): Path to .jpg file where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/stiffness/stiffavg_roll.jpg>`_. Accepted formats: jpg (edam:format_3579).
28 properties (dict):
29 * **KT** (*float*) - (0.592186827) Value of Boltzmann temperature factor.
30 * **sequence** (*str*) - (None) Nucleic acid sequence corresponding to the input .ser file. Length of sequence is expected to be the same as the total number of columns in the .ser file, minus the index column (even if later on a subset of columns is selected with the *usecols* option).
31 * **helpar_name** (*str*) - (None) helical parameter name.
32 * **seqpos** (*list*) - (None) list of sequence positions (columns indices starting by 0) to analyze. If not specified it will analyse the complete sequence.
33 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
34 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
36 Examples:
37 This is a use example of how to use the building block from Python::
39 from biobb_dna.stiffness.average_stiffness import average_stiffness
41 prop = {
42 'helpar_name': 'twist',
43 'sequence': 'GCAT',
44 }
45 average_stiffness(
46 input_ser_path='/path/to/twist.ser',
47 output_csv_path='/path/to/table/output.csv',
48 output_jpg_path='/path/to/table/output.jpg',
49 properties=prop)
50 Info:
51 * wrapped_software:
52 * name: In house
53 * license: Apache-2.0
54 * ontology:
55 * name: EDAM
56 * schema: http://edamontology.org/EDAM.owl
58 """
60 def __init__(self, input_ser_path, output_csv_path, output_jpg_path,
61 properties=None, **kwargs) -> None:
62 properties = properties or {}
64 # Call parent class constructor
65 super().__init__(properties)
66 self.locals_var_dict = locals().copy()
68 # Input/Output files
69 self.io_dict = {
70 'in': {
71 'input_ser_path': input_ser_path,
72 },
73 'out': {
74 'output_csv_path': output_csv_path,
75 'output_jpg_path': output_jpg_path
76 }
77 }
79 self.properties = properties
80 self.sequence = properties.get("sequence")
81 self.KT = properties.get(
82 "KT", 0.592186827)
83 self.seqpos = properties.get("seqpos", None)
84 self.helpar_name = properties.get("helpar_name", None)
86 # Check the properties
87 self.check_properties(properties)
88 self.check_arguments()
90 @launchlogger
91 def launch(self) -> int:
92 """Execute the :class:`AverageStiffness <stiffness.average_stiffness.AverageStiffness>` object."""
94 # Setup Biobb
95 if self.check_restart():
96 return 0
97 self.stage_files()
99 # check sequence
100 if self.sequence is None or len(self.sequence) < 2:
101 raise ValueError("sequence is null or too short!")
103 # get helical parameter from filename if not specified
104 if self.helpar_name is None:
105 for hp in constants.helical_parameters:
106 if hp.lower() in Path(
107 self.io_dict['in']['input_ser_path']).name.lower():
108 self.helpar_name = hp
109 if self.helpar_name is None:
110 raise ValueError(
111 "Helical parameter name can't be inferred from file, "
112 "so it must be specified!")
113 else:
114 if self.helpar_name not in constants.helical_parameters:
115 raise ValueError(
116 "Helical parameter name is invalid! "
117 f"Options: {constants.helical_parameters}")
119 # get base length and unit from helical parameter name
120 if self.helpar_name.lower() in ["roll", "tilt", "twist"]:
121 self.hp_unit = "kcal/(mol*degree²)"
122 scale = 1
123 else:
124 self.hp_unit = "kcal/(mol*Ų)"
125 scale = 10.6
127 # check seqpos
128 if self.seqpos is not None:
129 if (max(self.seqpos) > len(self.sequence) - 2) or (min(self.seqpos) < 1):
130 raise ValueError(
131 f"seqpos values must be between 1 and {len(self.sequence) - 2}")
132 if not (isinstance(self.seqpos, list) and len(self.seqpos) > 1):
133 raise ValueError(
134 "seqpos must be a list of at least two integers")
136 # Creating temporary folder
137 self.tmp_folder = fu.create_unique_dir(prefix="avgstiffness_")
138 fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)
140 # Copy input_file_path1 to temporary folder
141 shutil.copy(self.io_dict['in']['input_ser_path'], self.tmp_folder)
143 # read input .ser file
144 ser_data = read_series(
145 self.io_dict['in']['input_ser_path'],
146 usecols=self.seqpos)
147 if self.seqpos is None:
148 ser_data = ser_data[ser_data.columns[1:-1]]
149 # discard first and last base(pairs) from sequence
150 sequence = self.sequence[1:]
151 xlabels = [
152 f"{sequence[i:i+2]}"
153 for i in range(len(ser_data.columns))]
154 else:
155 sequence = self.sequence
156 xlabels = [
157 f"{sequence[i:i+2]}"
158 for i in self.seqpos]
160 # calculate average stiffness
161 cov = ser_data.cov()
162 stiff = np.linalg.inv(cov) * self.KT
163 avg_stiffness = np.diag(stiff) * scale
165 # save plot
166 fig, axs = plt.subplots(1, 1, dpi=300, tight_layout=True)
167 axs.plot(
168 range(len(xlabels)),
169 avg_stiffness,
170 "-o")
171 axs.set_xticks(range(len(xlabels)))
172 axs.set_xticklabels(xlabels)
173 axs.set_xlabel("Sequence Base Pair")
174 axs.set_ylabel(f"{self.helpar_name.capitalize()} ({self.hp_unit})")
175 axs.set_title(
176 "Base Pair Helical Parameter Stiffness: "
177 f"{self.helpar_name.capitalize()}")
178 fig.savefig(
179 self.io_dict['out']['output_jpg_path'],
180 format="jpg")
182 # save table
183 dataset = pd.DataFrame(
184 data=avg_stiffness,
185 index=xlabels,
186 columns=[f"{self.helpar_name}_stiffness"])
187 dataset.to_csv(self.io_dict['out']['output_csv_path'])
189 plt.close()
191 # Remove temporary file(s)
192 self.tmp_files.extend([
193 self.stage_io_dict.get("unique_dir"),
194 self.tmp_folder
195 ])
196 self.remove_tmp_files()
198 self.check_arguments(output_files_created=True, raise_exception=False)
200 return 0
203def average_stiffness(
204 input_ser_path: str, output_csv_path: str, output_jpg_path: str,
205 properties: dict = None, **kwargs) -> int:
206 """Create :class:`AverageStiffness <stiffness.average_stiffness.AverageStiffness>` class and
207 execute the :meth:`launch() <stiffness.average_stiffness.AverageStiffness.launch>` method."""
209 return AverageStiffness(
210 input_ser_path=input_ser_path,
211 output_csv_path=output_csv_path,
212 output_jpg_path=output_jpg_path,
213 properties=properties, **kwargs).launch()
216def main():
217 """Command line execution of this building block. Please check the command line documentation."""
218 parser = argparse.ArgumentParser(description='Calculate average stiffness constants for each base pair of a trajectory\'s series.',
219 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999))
220 parser.add_argument('--config', required=False, help='Configuration file')
222 required_args = parser.add_argument_group('required arguments')
223 required_args.add_argument('--input_ser_path', required=True,
224 help='Helical parameter input ser file path. Accepted formats: ser.')
225 required_args.add_argument('--output_csv_path', required=True,
226 help='Path to output csv file. Accepted formats: csv.')
227 required_args.add_argument('--output_jpg_path', required=True,
228 help='Path to output jpg file. Accepted formats: jpg.')
230 args = parser.parse_args()
231 args.config = args.config or "{}"
232 properties = settings.ConfReader(config=args.config).get_prop_dic()
234 average_stiffness(
235 input_ser_path=args.input_ser_path,
236 output_csv_path=args.output_csv_path,
237 output_jpg_path=args.output_jpg_path,
238 properties=properties)
241if __name__ == '__main__':
242 main()