Coverage for biobb_dna/stiffness/average_stiffness.py: 73%

92 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-28 10:36 +0000

1#!/usr/bin/env python3 

2"""Module containing the AverageStiffness class and the command line interface.""" 

3 

4import argparse 

5from pathlib import Path 

6from typing import Optional 

7 

8import matplotlib.pyplot as plt 

9import numpy as np 

10import pandas as pd 

11from biobb_common.configuration import settings 

12from biobb_common.generic.biobb_object import BiobbObject 

13from biobb_common.tools.file_utils import launchlogger 

14 

15from biobb_dna.utils import constants 

16from biobb_dna.utils.common import _from_string_to_list 

17from biobb_dna.utils.loader import read_series 

18 

19 

20class AverageStiffness(BiobbObject): 

21 """ 

22 | biobb_dna AverageStiffness 

23 | Calculate average stiffness constants for each base pair of a trajectory's series. 

24 | Calculate the average stiffness constants for each base pair of a trajectory's series. The input is a .ser file with the helical parameter values for each base/basepair. The output is a .csv file with the average stiffness constants for each base pair and a .jpg file with a plot of the average stiffness constants for each base pair. 

25 

26 Args: 

27 input_ser_path (str): Path to .ser file for helical parameter. File is expected to be a table, with the first column being an index and the rest the helical parameter values for each base/basepair. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/stiffness/canal_output_roll.ser>`_. Accepted formats: ser (edam:format_2330). 

28 output_csv_path (str): Path to .csv file where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/stiffness/stiffavg_roll.csv>`_. Accepted formats: csv (edam:format_3752). 

29 output_jpg_path (str): Path to .jpg file where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/stiffness/stiffavg_roll.jpg>`_. Accepted formats: jpg (edam:format_3579). 

30 properties (dict): 

31 * **KT** (*float*) - (0.592186827) Value of Boltzmann temperature factor. 

32 * **sequence** (*str*) - (None) Nucleic acid sequence corresponding to the input .ser file. Length of sequence is expected to be the same as the total number of columns in the .ser file, minus the index column (even if later on a subset of columns is selected with the *usecols* option). 

33 * **helpar_name** (*str*) - (None) helical parameter name. 

34 * **seqpos** (*list*) - (None) list of sequence positions (columns indices starting by 0) to analyze. If not specified it will analyse the complete sequence. 

35 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

36 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

37 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

38 

39 Examples: 

40 This is a use example of how to use the building block from Python:: 

41 

42 from biobb_dna.stiffness.average_stiffness import average_stiffness 

43 

44 prop = { 

45 'helpar_name': 'twist', 

46 'sequence': 'GCAT', 

47 } 

48 average_stiffness( 

49 input_ser_path='/path/to/twist.ser', 

50 output_csv_path='/path/to/table/output.csv', 

51 output_jpg_path='/path/to/table/output.jpg', 

52 properties=prop) 

53 Info: 

54 * wrapped_software: 

55 * name: In house 

56 * license: Apache-2.0 

57 * ontology: 

58 * name: EDAM 

59 * schema: http://edamontology.org/EDAM.owl 

60 

61 """ 

62 

63 def __init__( 

64 self, 

65 input_ser_path, 

66 output_csv_path, 

67 output_jpg_path, 

68 properties=None, 

69 **kwargs, 

70 ) -> None: 

71 properties = properties or {} 

72 

73 # Call parent class constructor 

74 super().__init__(properties) 

75 self.locals_var_dict = locals().copy() 

76 

77 # Input/Output files 

78 self.io_dict = { 

79 "in": { 

80 "input_ser_path": input_ser_path, 

81 }, 

82 "out": { 

83 "output_csv_path": output_csv_path, 

84 "output_jpg_path": output_jpg_path, 

85 }, 

86 } 

87 

88 self.properties = properties 

89 self.sequence = properties.get("sequence") 

90 self.KT = properties.get("KT", 0.592186827) 

91 self.seqpos = [ 

92 int(elem) for elem in _from_string_to_list(properties.get("seqpos", None)) 

93 ] 

94 self.helpar_name = properties.get("helpar_name", None) 

95 

96 # Check the properties 

97 self.check_properties(properties) 

98 self.check_arguments() 

99 

100 @launchlogger 

101 def launch(self) -> int: 

102 """Execute the :class:`AverageStiffness <stiffness.average_stiffness.AverageStiffness>` object.""" 

103 

104 # Setup Biobb 

105 if self.check_restart(): 

106 return 0 

107 self.stage_files() 

108 

109 # check sequence 

110 if self.sequence is None or len(self.sequence) < 2: 

111 raise ValueError("sequence is null or too short!") 

112 

113 # get helical parameter from filename if not specified 

114 if self.helpar_name is None: 

115 for hp in constants.helical_parameters: 

116 if ( 

117 hp.lower() 

118 in Path(self.stage_io_dict["in"]["input_ser_path"]).name.lower() 

119 ): 

120 self.helpar_name = hp 

121 if self.helpar_name is None: 

122 raise ValueError( 

123 "Helical parameter name can't be inferred from file, " 

124 "so it must be specified!" 

125 ) 

126 else: 

127 if self.helpar_name not in constants.helical_parameters: 

128 raise ValueError( 

129 "Helical parameter name is invalid! " 

130 f"Options: {constants.helical_parameters}" 

131 ) 

132 

133 # get base length and unit from helical parameter name 

134 if self.helpar_name.lower() in ["roll", "tilt", "twist"]: 

135 self.hp_unit = "kcal/(mol*degree²)" 

136 scale = 1.0 

137 else: 

138 self.hp_unit = "kcal/(mol*Ų)" 

139 scale = 10.6 

140 

141 # check seqpos 

142 if self.seqpos: 

143 if (max(self.seqpos) > len(self.sequence) - 2) or (min(self.seqpos) < 1): 

144 raise ValueError( 

145 f"seqpos values must be between 1 and {len(self.sequence) - 2}" 

146 ) 

147 if not (isinstance(self.seqpos, list) and len(self.seqpos) > 1): 

148 raise ValueError("seqpos must be a list of at least two integers") 

149 else: 

150 self.seqpos = None # type: ignore 

151 

152 # read input .ser file 

153 ser_data = read_series( 

154 self.stage_io_dict["in"]["input_ser_path"], usecols=self.seqpos 

155 ) 

156 if not self.seqpos: 

157 ser_data = ser_data[ser_data.columns[1:-1]] 

158 # discard first and last base(pairs) from sequence 

159 sequence = self.sequence[1:] 

160 xlabels = [f"{sequence[i:i+2]}" for i in range(len(ser_data.columns))] 

161 else: 

162 sequence = self.sequence 

163 xlabels = [f"{sequence[i:i+2]}" for i in self.seqpos] 

164 

165 # calculate average stiffness 

166 cov = ser_data.cov() 

167 stiff = np.linalg.inv(cov) * self.KT 

168 avg_stiffness = np.diag(stiff) * scale 

169 

170 # save plot 

171 fig, axs = plt.subplots(1, 1, dpi=300, tight_layout=True) 

172 axs.plot(range(len(xlabels)), avg_stiffness, "-o") 

173 axs.set_xticks(range(len(xlabels))) 

174 axs.set_xticklabels(xlabels) 

175 axs.set_xlabel("Sequence Base Pair") 

176 axs.set_ylabel(f"{self.helpar_name.capitalize()} ({self.hp_unit})") 

177 axs.set_title( 

178 "Base Pair Helical Parameter Stiffness: " f"{self.helpar_name.capitalize()}" 

179 ) 

180 fig.savefig(self.stage_io_dict["out"]["output_jpg_path"], format="jpg") 

181 

182 # save table 

183 dataset = pd.DataFrame( 

184 data=avg_stiffness, index=xlabels, columns=[f"{self.helpar_name}_stiffness"] 

185 ) 

186 dataset.to_csv(self.stage_io_dict["out"]["output_csv_path"]) 

187 

188 plt.close() 

189 

190 # Copy files to host 

191 self.copy_to_host() 

192 

193 # Remove temporary file(s) 

194 # self.tmp_files.extend([self.stage_io_dict.get("unique_dir", "")]) 

195 self.remove_tmp_files() 

196 

197 self.check_arguments(output_files_created=True, raise_exception=False) 

198 

199 return 0 

200 

201 

202def average_stiffness( 

203 input_ser_path: str, 

204 output_csv_path: str, 

205 output_jpg_path: str, 

206 properties: Optional[dict] = None, 

207 **kwargs, 

208) -> int: 

209 """Create :class:`AverageStiffness <stiffness.average_stiffness.AverageStiffness>` class and 

210 execute the :meth:`launch() <stiffness.average_stiffness.AverageStiffness.launch>` method.""" 

211 

212 return AverageStiffness( 

213 input_ser_path=input_ser_path, 

214 output_csv_path=output_csv_path, 

215 output_jpg_path=output_jpg_path, 

216 properties=properties, 

217 **kwargs, 

218 ).launch() 

219 

220 average_stiffness.__doc__ = AverageStiffness.__doc__ 

221 

222 

223def main(): 

224 """Command line execution of this building block. Please check the command line documentation.""" 

225 parser = argparse.ArgumentParser( 

226 description="Calculate average stiffness constants for each base pair of a trajectory's series.", 

227 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999), 

228 ) 

229 parser.add_argument("--config", required=False, help="Configuration file") 

230 

231 required_args = parser.add_argument_group("required arguments") 

232 required_args.add_argument( 

233 "--input_ser_path", 

234 required=True, 

235 help="Helical parameter input ser file path. Accepted formats: ser.", 

236 ) 

237 required_args.add_argument( 

238 "--output_csv_path", 

239 required=True, 

240 help="Path to output csv file. Accepted formats: csv.", 

241 ) 

242 required_args.add_argument( 

243 "--output_jpg_path", 

244 required=True, 

245 help="Path to output jpg file. Accepted formats: jpg.", 

246 ) 

247 

248 args = parser.parse_args() 

249 args.config = args.config or "{}" 

250 properties = settings.ConfReader(config=args.config).get_prop_dic() 

251 

252 average_stiffness( 

253 input_ser_path=args.input_ser_path, 

254 output_csv_path=args.output_csv_path, 

255 output_jpg_path=args.output_jpg_path, 

256 properties=properties, 

257 ) 

258 

259 

260if __name__ == "__main__": 

261 main()