Coverage for biobb_dna / stiffness / average_stiffness.py: 82%
80 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-22 13:05 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-22 13:05 +0000
1#!/usr/bin/env python3
2"""Module containing the AverageStiffness class and the command line interface."""
4from pathlib import Path
5from typing import Optional
7import matplotlib.pyplot as plt
8import numpy as np
9import pandas as pd
10from biobb_common.generic.biobb_object import BiobbObject
11from biobb_common.tools.file_utils import launchlogger
13from biobb_dna.utils import constants
14from biobb_dna.utils.common import _from_string_to_list
15from biobb_dna.utils.loader import read_series
18class AverageStiffness(BiobbObject):
19 """
20 | biobb_dna AverageStiffness
21 | Calculate average stiffness constants for each base pair of a trajectory's series.
22 | Calculate the average stiffness constants for each base pair of a trajectory's series. The input is a .ser file with the helical parameter values for each base/basepair. The output is a .csv file with the average stiffness constants for each base pair and a .jpg file with a plot of the average stiffness constants for each base pair.
24 Args:
25 input_ser_path (str): Path to .ser file for helical parameter. File is expected to be a table, with the first column being an index and the rest the helical parameter values for each base/basepair. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/stiffness/canal_output_roll.ser>`_. Accepted formats: ser (edam:format_2330).
26 output_csv_path (str): Path to .csv file where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/stiffness/stiffavg_roll.csv>`_. Accepted formats: csv (edam:format_3752).
27 output_jpg_path (str): Path to .jpg file where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/stiffness/stiffavg_roll.jpg>`_. Accepted formats: jpg (edam:format_3579).
28 properties (dict):
29 * **KT** (*float*) - (0.592186827) Value of Boltzmann temperature factor.
30 * **sequence** (*str*) - (None) Nucleic acid sequence corresponding to the input .ser file. Length of sequence is expected to be the same as the total number of columns in the .ser file, minus the index column (even if later on a subset of columns is selected with the *usecols* option).
31 * **helpar_name** (*str*) - (None) helical parameter name.
32 * **seqpos** (*list*) - (None) list of sequence positions (columns indices starting by 0) to analyze. If not specified it will analyse the complete sequence.
33 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
34 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
35 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
37 Examples:
38 This is a use example of how to use the building block from Python::
40 from biobb_dna.stiffness.average_stiffness import average_stiffness
42 prop = {
43 'helpar_name': 'twist',
44 'sequence': 'GCAT',
45 }
46 average_stiffness(
47 input_ser_path='/path/to/twist.ser',
48 output_csv_path='/path/to/table/output.csv',
49 output_jpg_path='/path/to/table/output.jpg',
50 properties=prop)
51 Info:
52 * wrapped_software:
53 * name: In house
54 * license: Apache-2.0
55 * ontology:
56 * name: EDAM
57 * schema: http://edamontology.org/EDAM.owl
59 """
61 def __init__(
62 self,
63 input_ser_path,
64 output_csv_path,
65 output_jpg_path,
66 properties=None,
67 **kwargs,
68 ) -> None:
69 properties = properties or {}
71 # Call parent class constructor
72 super().__init__(properties)
73 self.locals_var_dict = locals().copy()
75 # Input/Output files
76 self.io_dict = {
77 "in": {
78 "input_ser_path": input_ser_path,
79 },
80 "out": {
81 "output_csv_path": output_csv_path,
82 "output_jpg_path": output_jpg_path,
83 },
84 }
86 self.properties = properties
87 self.sequence = properties.get("sequence")
88 self.KT = properties.get("KT", 0.592186827)
89 self.seqpos = [
90 int(elem) for elem in _from_string_to_list(properties.get("seqpos", None))
91 ]
92 self.helpar_name = properties.get("helpar_name", None)
94 # Check the properties
95 self.check_properties(properties)
96 self.check_arguments()
98 @launchlogger
99 def launch(self) -> int:
100 """Execute the :class:`AverageStiffness <stiffness.average_stiffness.AverageStiffness>` object."""
102 # Setup Biobb
103 if self.check_restart():
104 return 0
105 self.stage_files()
107 # check sequence
108 if self.sequence is None or len(self.sequence) < 2:
109 raise ValueError("sequence is null or too short!")
111 # get helical parameter from filename if not specified
112 if self.helpar_name is None:
113 for hp in constants.helical_parameters:
114 if (
115 hp.lower()
116 in Path(self.stage_io_dict["in"]["input_ser_path"]).name.lower()
117 ):
118 self.helpar_name = hp
119 if self.helpar_name is None:
120 raise ValueError(
121 "Helical parameter name can't be inferred from file, "
122 "so it must be specified!"
123 )
124 else:
125 if self.helpar_name not in constants.helical_parameters:
126 raise ValueError(
127 "Helical parameter name is invalid! "
128 f"Options: {constants.helical_parameters}"
129 )
131 # get base length and unit from helical parameter name
132 if self.helpar_name.lower() in ["roll", "tilt", "twist"]:
133 self.hp_unit = "kcal/(mol*degree²)"
134 scale = 1.0
135 else:
136 self.hp_unit = "kcal/(mol*Ų)"
137 scale = 10.6
139 # check seqpos
140 if self.seqpos:
141 if (max(self.seqpos) > len(self.sequence) - 2) or (min(self.seqpos) < 1):
142 raise ValueError(
143 f"seqpos values must be between 1 and {len(self.sequence) - 2}"
144 )
145 if not (isinstance(self.seqpos, list) and len(self.seqpos) > 1):
146 raise ValueError("seqpos must be a list of at least two integers")
147 else:
148 self.seqpos = None # type: ignore
150 # read input .ser file
151 ser_data = read_series(
152 self.stage_io_dict["in"]["input_ser_path"], usecols=self.seqpos
153 )
154 if not self.seqpos:
155 ser_data = ser_data[ser_data.columns[1:-1]]
156 # discard first and last base(pairs) from sequence
157 sequence = self.sequence[1:]
158 xlabels = [f"{sequence[i:i+2]}" for i in range(len(ser_data.columns))]
159 else:
160 sequence = self.sequence
161 xlabels = [f"{sequence[i:i+2]}" for i in self.seqpos]
163 # calculate average stiffness
164 cov = ser_data.cov()
165 stiff = np.linalg.inv(cov) * self.KT
166 avg_stiffness = np.diag(stiff) * scale
168 # save plot
169 fig, axs = plt.subplots(1, 1, dpi=300, tight_layout=True)
170 axs.plot(range(len(xlabels)), avg_stiffness, "-o")
171 axs.set_xticks(range(len(xlabels)))
172 axs.set_xticklabels(xlabels)
173 axs.set_xlabel("Sequence Base Pair")
174 axs.set_ylabel(f"{self.helpar_name.capitalize()} ({self.hp_unit})")
175 axs.set_title(
176 "Base Pair Helical Parameter Stiffness: " f"{self.helpar_name.capitalize()}"
177 )
178 fig.savefig(self.stage_io_dict["out"]["output_jpg_path"], format="jpg")
180 # save table
181 dataset = pd.DataFrame(
182 data=avg_stiffness, index=xlabels, columns=[f"{self.helpar_name}_stiffness"]
183 )
184 dataset.to_csv(self.stage_io_dict["out"]["output_csv_path"])
186 plt.close()
188 # Copy files to host
189 self.copy_to_host()
191 # Remove temporary file(s)
192 self.remove_tmp_files()
194 self.check_arguments(output_files_created=True, raise_exception=False)
196 return 0
199def average_stiffness(
200 input_ser_path: str,
201 output_csv_path: str,
202 output_jpg_path: str,
203 properties: Optional[dict] = None,
204 **kwargs,
205) -> int:
206 """Create :class:`AverageStiffness <stiffness.average_stiffness.AverageStiffness>` class and
207 execute the :meth:`launch() <stiffness.average_stiffness.AverageStiffness.launch>` method."""
208 return AverageStiffness(**dict(locals())).launch()
211average_stiffness.__doc__ = AverageStiffness.__doc__
212main = AverageStiffness.get_main(average_stiffness, "Calculate average stiffness constants for each base pair of a trajectory's series.")
214if __name__ == '__main__':
215 main()