Coverage for biobb_dna/stiffness/average_stiffness.py: 74%

94 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-07 09:06 +0000

1#!/usr/bin/env python3 

2"""Module containing the AverageStiffness class and the command line interface.""" 

3 

4import shutil 

5import argparse 

6from pathlib import Path 

7 

8import matplotlib.pyplot as plt 

9import pandas as pd 

10import numpy as np 

11from biobb_dna.utils import constants 

12from biobb_dna.utils.loader import read_series 

13from biobb_common.generic.biobb_object import BiobbObject 

14from biobb_common.tools.file_utils import launchlogger 

15from biobb_common.tools import file_utils as fu 

16from biobb_common.configuration import settings 

17 

18 

19class AverageStiffness(BiobbObject): 

20 """ 

21 | biobb_dna AverageStiffness 

22 | Calculate average stiffness constants for each base pair of a trajectory's series. 

23 

24 Args: 

25 input_ser_path (str): Path to .ser file for helical parameter. File is expected to be a table, with the first column being an index and the rest the helical parameter values for each base/basepair. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/stiffness/canal_output_roll.ser>`_. Accepted formats: ser (edam:format_2330). 

26 output_csv_path (str): Path to .csv file where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/stiffness/stiffavg_roll.csv>`_. Accepted formats: csv (edam:format_3752). 

27 output_jpg_path (str): Path to .jpg file where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/stiffness/stiffavg_roll.jpg>`_. Accepted formats: jpg (edam:format_3579). 

28 properties (dict): 

29 * **KT** (*float*) - (0.592186827) Value of Boltzmann temperature factor. 

30 * **sequence** (*str*) - (None) Nucleic acid sequence corresponding to the input .ser file. Length of sequence is expected to be the same as the total number of columns in the .ser file, minus the index column (even if later on a subset of columns is selected with the *usecols* option). 

31 * **helpar_name** (*str*) - (None) helical parameter name. 

32 * **seqpos** (*list*) - (None) list of sequence positions (columns indices starting by 0) to analyze. If not specified it will analyse the complete sequence. 

33 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

34 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

35 

36 Examples: 

37 This is a use example of how to use the building block from Python:: 

38 

39 from biobb_dna.stiffness.average_stiffness import average_stiffness 

40 

41 prop = { 

42 'helpar_name': 'twist', 

43 'sequence': 'GCAT', 

44 } 

45 average_stiffness( 

46 input_ser_path='/path/to/twist.ser', 

47 output_csv_path='/path/to/table/output.csv', 

48 output_jpg_path='/path/to/table/output.jpg', 

49 properties=prop) 

50 Info: 

51 * wrapped_software: 

52 * name: In house 

53 * license: Apache-2.0 

54 * ontology: 

55 * name: EDAM 

56 * schema: http://edamontology.org/EDAM.owl 

57 

58 """ 

59 

60 def __init__(self, input_ser_path, output_csv_path, output_jpg_path, 

61 properties=None, **kwargs) -> None: 

62 properties = properties or {} 

63 

64 # Call parent class constructor 

65 super().__init__(properties) 

66 self.locals_var_dict = locals().copy() 

67 

68 # Input/Output files 

69 self.io_dict = { 

70 'in': { 

71 'input_ser_path': input_ser_path, 

72 }, 

73 'out': { 

74 'output_csv_path': output_csv_path, 

75 'output_jpg_path': output_jpg_path 

76 } 

77 } 

78 

79 self.properties = properties 

80 self.sequence = properties.get("sequence") 

81 self.KT = properties.get( 

82 "KT", 0.592186827) 

83 self.seqpos = properties.get("seqpos", None) 

84 self.helpar_name = properties.get("helpar_name", None) 

85 

86 # Check the properties 

87 self.check_properties(properties) 

88 self.check_arguments() 

89 

90 @launchlogger 

91 def launch(self) -> int: 

92 """Execute the :class:`AverageStiffness <stiffness.average_stiffness.AverageStiffness>` object.""" 

93 

94 # Setup Biobb 

95 if self.check_restart(): 

96 return 0 

97 self.stage_files() 

98 

99 # check sequence 

100 if self.sequence is None or len(self.sequence) < 2: 

101 raise ValueError("sequence is null or too short!") 

102 

103 # get helical parameter from filename if not specified 

104 if self.helpar_name is None: 

105 for hp in constants.helical_parameters: 

106 if hp.lower() in Path( 

107 self.io_dict['in']['input_ser_path']).name.lower(): 

108 self.helpar_name = hp 

109 if self.helpar_name is None: 

110 raise ValueError( 

111 "Helical parameter name can't be inferred from file, " 

112 "so it must be specified!") 

113 else: 

114 if self.helpar_name not in constants.helical_parameters: 

115 raise ValueError( 

116 "Helical parameter name is invalid! " 

117 f"Options: {constants.helical_parameters}") 

118 

119 # get base length and unit from helical parameter name 

120 if self.helpar_name.lower() in ["roll", "tilt", "twist"]: 

121 self.hp_unit = "kcal/(mol*degree²)" 

122 scale = 1 

123 else: 

124 self.hp_unit = "kcal/(mol*Ų)" 

125 scale = 10.6 

126 

127 # check seqpos 

128 if self.seqpos is not None: 

129 if (max(self.seqpos) > len(self.sequence) - 2) or (min(self.seqpos) < 1): 

130 raise ValueError( 

131 f"seqpos values must be between 1 and {len(self.sequence) - 2}") 

132 if not (isinstance(self.seqpos, list) and len(self.seqpos) > 1): 

133 raise ValueError( 

134 "seqpos must be a list of at least two integers") 

135 

136 # Creating temporary folder 

137 self.tmp_folder = fu.create_unique_dir(prefix="avgstiffness_") 

138 fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log) 

139 

140 # Copy input_file_path1 to temporary folder 

141 shutil.copy(self.io_dict['in']['input_ser_path'], self.tmp_folder) 

142 

143 # read input .ser file 

144 ser_data = read_series( 

145 self.io_dict['in']['input_ser_path'], 

146 usecols=self.seqpos) 

147 if self.seqpos is None: 

148 ser_data = ser_data[ser_data.columns[1:-1]] 

149 # discard first and last base(pairs) from sequence 

150 sequence = self.sequence[1:] 

151 xlabels = [ 

152 f"{sequence[i:i+2]}" 

153 for i in range(len(ser_data.columns))] 

154 else: 

155 sequence = self.sequence 

156 xlabels = [ 

157 f"{sequence[i:i+2]}" 

158 for i in self.seqpos] 

159 

160 # calculate average stiffness 

161 cov = ser_data.cov() 

162 stiff = np.linalg.inv(cov) * self.KT 

163 avg_stiffness = np.diag(stiff) * scale 

164 

165 # save plot 

166 fig, axs = plt.subplots(1, 1, dpi=300, tight_layout=True) 

167 axs.plot( 

168 range(len(xlabels)), 

169 avg_stiffness, 

170 "-o") 

171 axs.set_xticks(range(len(xlabels))) 

172 axs.set_xticklabels(xlabels) 

173 axs.set_xlabel("Sequence Base Pair") 

174 axs.set_ylabel(f"{self.helpar_name.capitalize()} ({self.hp_unit})") 

175 axs.set_title( 

176 "Base Pair Helical Parameter Stiffness: " 

177 f"{self.helpar_name.capitalize()}") 

178 fig.savefig( 

179 self.io_dict['out']['output_jpg_path'], 

180 format="jpg") 

181 

182 # save table 

183 dataset = pd.DataFrame( 

184 data=avg_stiffness, 

185 index=xlabels, 

186 columns=[f"{self.helpar_name}_stiffness"]) 

187 dataset.to_csv(self.io_dict['out']['output_csv_path']) 

188 

189 plt.close() 

190 

191 # Remove temporary file(s) 

192 self.tmp_files.extend([ 

193 self.stage_io_dict.get("unique_dir"), 

194 self.tmp_folder 

195 ]) 

196 self.remove_tmp_files() 

197 

198 self.check_arguments(output_files_created=True, raise_exception=False) 

199 

200 return 0 

201 

202 

203def average_stiffness( 

204 input_ser_path: str, output_csv_path: str, output_jpg_path: str, 

205 properties: dict = None, **kwargs) -> int: 

206 """Create :class:`AverageStiffness <stiffness.average_stiffness.AverageStiffness>` class and 

207 execute the :meth:`launch() <stiffness.average_stiffness.AverageStiffness.launch>` method.""" 

208 

209 return AverageStiffness( 

210 input_ser_path=input_ser_path, 

211 output_csv_path=output_csv_path, 

212 output_jpg_path=output_jpg_path, 

213 properties=properties, **kwargs).launch() 

214 

215 

216def main(): 

217 """Command line execution of this building block. Please check the command line documentation.""" 

218 parser = argparse.ArgumentParser(description='Calculate average stiffness constants for each base pair of a trajectory\'s series.', 

219 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

220 parser.add_argument('--config', required=False, help='Configuration file') 

221 

222 required_args = parser.add_argument_group('required arguments') 

223 required_args.add_argument('--input_ser_path', required=True, 

224 help='Helical parameter input ser file path. Accepted formats: ser.') 

225 required_args.add_argument('--output_csv_path', required=True, 

226 help='Path to output csv file. Accepted formats: csv.') 

227 required_args.add_argument('--output_jpg_path', required=True, 

228 help='Path to output jpg file. Accepted formats: jpg.') 

229 

230 args = parser.parse_args() 

231 args.config = args.config or "{}" 

232 properties = settings.ConfReader(config=args.config).get_prop_dic() 

233 

234 average_stiffness( 

235 input_ser_path=args.input_ser_path, 

236 output_csv_path=args.output_csv_path, 

237 output_jpg_path=args.output_jpg_path, 

238 properties=properties) 

239 

240 

241if __name__ == '__main__': 

242 main()