Coverage for biobb_dna / stiffness / average_stiffness.py: 82%

80 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-22 13:05 +0000

1#!/usr/bin/env python3 

2"""Module containing the AverageStiffness class and the command line interface.""" 

3 

4from pathlib import Path 

5from typing import Optional 

6 

7import matplotlib.pyplot as plt 

8import numpy as np 

9import pandas as pd 

10from biobb_common.generic.biobb_object import BiobbObject 

11from biobb_common.tools.file_utils import launchlogger 

12 

13from biobb_dna.utils import constants 

14from biobb_dna.utils.common import _from_string_to_list 

15from biobb_dna.utils.loader import read_series 

16 

17 

18class AverageStiffness(BiobbObject): 

19 """ 

20 | biobb_dna AverageStiffness 

21 | Calculate average stiffness constants for each base pair of a trajectory's series. 

22 | Calculate the average stiffness constants for each base pair of a trajectory's series. The input is a .ser file with the helical parameter values for each base/basepair. The output is a .csv file with the average stiffness constants for each base pair and a .jpg file with a plot of the average stiffness constants for each base pair. 

23 

24 Args: 

25 input_ser_path (str): Path to .ser file for helical parameter. File is expected to be a table, with the first column being an index and the rest the helical parameter values for each base/basepair. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/stiffness/canal_output_roll.ser>`_. Accepted formats: ser (edam:format_2330). 

26 output_csv_path (str): Path to .csv file where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/stiffness/stiffavg_roll.csv>`_. Accepted formats: csv (edam:format_3752). 

27 output_jpg_path (str): Path to .jpg file where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/stiffness/stiffavg_roll.jpg>`_. Accepted formats: jpg (edam:format_3579). 

28 properties (dict): 

29 * **KT** (*float*) - (0.592186827) Value of Boltzmann temperature factor. 

30 * **sequence** (*str*) - (None) Nucleic acid sequence corresponding to the input .ser file. Length of sequence is expected to be the same as the total number of columns in the .ser file, minus the index column (even if later on a subset of columns is selected with the *usecols* option). 

31 * **helpar_name** (*str*) - (None) helical parameter name. 

32 * **seqpos** (*list*) - (None) list of sequence positions (columns indices starting by 0) to analyze. If not specified it will analyse the complete sequence. 

33 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

34 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

35 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

36 

37 Examples: 

38 This is a use example of how to use the building block from Python:: 

39 

40 from biobb_dna.stiffness.average_stiffness import average_stiffness 

41 

42 prop = { 

43 'helpar_name': 'twist', 

44 'sequence': 'GCAT', 

45 } 

46 average_stiffness( 

47 input_ser_path='/path/to/twist.ser', 

48 output_csv_path='/path/to/table/output.csv', 

49 output_jpg_path='/path/to/table/output.jpg', 

50 properties=prop) 

51 Info: 

52 * wrapped_software: 

53 * name: In house 

54 * license: Apache-2.0 

55 * ontology: 

56 * name: EDAM 

57 * schema: http://edamontology.org/EDAM.owl 

58 

59 """ 

60 

61 def __init__( 

62 self, 

63 input_ser_path, 

64 output_csv_path, 

65 output_jpg_path, 

66 properties=None, 

67 **kwargs, 

68 ) -> None: 

69 properties = properties or {} 

70 

71 # Call parent class constructor 

72 super().__init__(properties) 

73 self.locals_var_dict = locals().copy() 

74 

75 # Input/Output files 

76 self.io_dict = { 

77 "in": { 

78 "input_ser_path": input_ser_path, 

79 }, 

80 "out": { 

81 "output_csv_path": output_csv_path, 

82 "output_jpg_path": output_jpg_path, 

83 }, 

84 } 

85 

86 self.properties = properties 

87 self.sequence = properties.get("sequence") 

88 self.KT = properties.get("KT", 0.592186827) 

89 self.seqpos = [ 

90 int(elem) for elem in _from_string_to_list(properties.get("seqpos", None)) 

91 ] 

92 self.helpar_name = properties.get("helpar_name", None) 

93 

94 # Check the properties 

95 self.check_properties(properties) 

96 self.check_arguments() 

97 

98 @launchlogger 

99 def launch(self) -> int: 

100 """Execute the :class:`AverageStiffness <stiffness.average_stiffness.AverageStiffness>` object.""" 

101 

102 # Setup Biobb 

103 if self.check_restart(): 

104 return 0 

105 self.stage_files() 

106 

107 # check sequence 

108 if self.sequence is None or len(self.sequence) < 2: 

109 raise ValueError("sequence is null or too short!") 

110 

111 # get helical parameter from filename if not specified 

112 if self.helpar_name is None: 

113 for hp in constants.helical_parameters: 

114 if ( 

115 hp.lower() 

116 in Path(self.stage_io_dict["in"]["input_ser_path"]).name.lower() 

117 ): 

118 self.helpar_name = hp 

119 if self.helpar_name is None: 

120 raise ValueError( 

121 "Helical parameter name can't be inferred from file, " 

122 "so it must be specified!" 

123 ) 

124 else: 

125 if self.helpar_name not in constants.helical_parameters: 

126 raise ValueError( 

127 "Helical parameter name is invalid! " 

128 f"Options: {constants.helical_parameters}" 

129 ) 

130 

131 # get base length and unit from helical parameter name 

132 if self.helpar_name.lower() in ["roll", "tilt", "twist"]: 

133 self.hp_unit = "kcal/(mol*degree²)" 

134 scale = 1.0 

135 else: 

136 self.hp_unit = "kcal/(mol*Ų)" 

137 scale = 10.6 

138 

139 # check seqpos 

140 if self.seqpos: 

141 if (max(self.seqpos) > len(self.sequence) - 2) or (min(self.seqpos) < 1): 

142 raise ValueError( 

143 f"seqpos values must be between 1 and {len(self.sequence) - 2}" 

144 ) 

145 if not (isinstance(self.seqpos, list) and len(self.seqpos) > 1): 

146 raise ValueError("seqpos must be a list of at least two integers") 

147 else: 

148 self.seqpos = None # type: ignore 

149 

150 # read input .ser file 

151 ser_data = read_series( 

152 self.stage_io_dict["in"]["input_ser_path"], usecols=self.seqpos 

153 ) 

154 if not self.seqpos: 

155 ser_data = ser_data[ser_data.columns[1:-1]] 

156 # discard first and last base(pairs) from sequence 

157 sequence = self.sequence[1:] 

158 xlabels = [f"{sequence[i:i+2]}" for i in range(len(ser_data.columns))] 

159 else: 

160 sequence = self.sequence 

161 xlabels = [f"{sequence[i:i+2]}" for i in self.seqpos] 

162 

163 # calculate average stiffness 

164 cov = ser_data.cov() 

165 stiff = np.linalg.inv(cov) * self.KT 

166 avg_stiffness = np.diag(stiff) * scale 

167 

168 # save plot 

169 fig, axs = plt.subplots(1, 1, dpi=300, tight_layout=True) 

170 axs.plot(range(len(xlabels)), avg_stiffness, "-o") 

171 axs.set_xticks(range(len(xlabels))) 

172 axs.set_xticklabels(xlabels) 

173 axs.set_xlabel("Sequence Base Pair") 

174 axs.set_ylabel(f"{self.helpar_name.capitalize()} ({self.hp_unit})") 

175 axs.set_title( 

176 "Base Pair Helical Parameter Stiffness: " f"{self.helpar_name.capitalize()}" 

177 ) 

178 fig.savefig(self.stage_io_dict["out"]["output_jpg_path"], format="jpg") 

179 

180 # save table 

181 dataset = pd.DataFrame( 

182 data=avg_stiffness, index=xlabels, columns=[f"{self.helpar_name}_stiffness"] 

183 ) 

184 dataset.to_csv(self.stage_io_dict["out"]["output_csv_path"]) 

185 

186 plt.close() 

187 

188 # Copy files to host 

189 self.copy_to_host() 

190 

191 # Remove temporary file(s) 

192 self.remove_tmp_files() 

193 

194 self.check_arguments(output_files_created=True, raise_exception=False) 

195 

196 return 0 

197 

198 

199def average_stiffness( 

200 input_ser_path: str, 

201 output_csv_path: str, 

202 output_jpg_path: str, 

203 properties: Optional[dict] = None, 

204 **kwargs, 

205) -> int: 

206 """Create :class:`AverageStiffness <stiffness.average_stiffness.AverageStiffness>` class and 

207 execute the :meth:`launch() <stiffness.average_stiffness.AverageStiffness.launch>` method.""" 

208 return AverageStiffness(**dict(locals())).launch() 

209 

210 

211average_stiffness.__doc__ = AverageStiffness.__doc__ 

212main = AverageStiffness.get_main(average_stiffness, "Calculate average stiffness constants for each base pair of a trajectory's series.") 

213 

214if __name__ == '__main__': 

215 main()