Coverage for biobb_dna/intrabp_correlations/intrahpcorr.py: 81%

135 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-07 09:06 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the IntraHelParCorrelation class and the command line interface.""" 

4import argparse 

5 

6import pandas as pd 

7import numpy as np 

8import matplotlib.pyplot as plt 

9 

10from biobb_common.generic.biobb_object import BiobbObject 

11from biobb_common.configuration import settings 

12from biobb_common.tools import file_utils as fu 

13from biobb_common.tools.file_utils import launchlogger 

14from biobb_dna.utils.loader import load_data 

15 

16 

17class IntraHelParCorrelation(BiobbObject): 

18 """ 

19 | biobb_dna IntraHelParCorrelation 

20 | Calculate correlation between helical parameters for a single intra-base pair. 

21 

22 Args: 

23 input_filename_shear (str): Path to .csv file with data for helical parameter 'shear'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_shear_A.csv>`_. Accepted formats: csv (edam:format_3752). 

24 input_filename_stretch (str): Path to .csv file with data for helical parameter 'stretch'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_stretch_A.csv>`_. Accepted formats: csv (edam:format_3752). 

25 input_filename_stagger (str): Path to .csv file with data for helical parameter 'stagger'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_stagger_A.csv>`_. Accepted formats: csv (edam:format_3752). 

26 input_filename_buckle (str): Path to .csv file with data for helical parameter 'buckle'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_buckle_A.csv>`_. Accepted formats: csv (edam:format_3752). 

27 input_filename_propel (str): Path to .csv file with data for helical parameter 'propeller'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_propel_A.csv>`_. Accepted formats: csv (edam:format_3752). 

28 input_filename_opening (str): Path to .csv file with data for helical parameter 'opening'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_opening_A.csv>`_. Accepted formats: csv (edam:format_3752). 

29 output_csv_path (str): Path to directory where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/correlation/intra_hpcorr_ref.csv>`_. Accepted formats: csv (edam:format_3752). 

30 output_jpg_path (str): Path to .jpg file where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/correlation/intra_hpcorr_ref.jpg>`_. Accepted formats: jpg (edam:format_3579). 

31 properties (dict): 

32 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

33 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

34 * **base** (*str*) - (None) Name of base analyzed. 

35 

36 Examples: 

37 This is a use example of how to use the building block from Python:: 

38 

39 from biobb_dna.intrabp_correlations.intrahpcorr import intrahpcorr 

40 

41 prop = { 

42 'base': 'A', 

43 } 

44 intrahpcorr( 

45 input_filename_shear='path/to/shear.csv', 

46 input_filename_stretch='path/to/stretch.csv', 

47 input_filename_stagger='path/to/stagger.csv', 

48 input_filename_buckle='path/to/buckle.csv', 

49 input_filename_propel='path/to/propel.csv', 

50 input_filename_opening='path/to/opening.csv', 

51 output_csv_path='path/to/output/file.csv', 

52 output_jpg_path='path/to/output/file.jpg', 

53 properties=prop) 

54 Info: 

55 * wrapped_software: 

56 * name: In house 

57 * license: Apache-2.0 

58 * ontology: 

59 * name: EDAM 

60 * schema: http://edamontology.org/EDAM.owl 

61 

62 """ 

63 

64 def __init__( 

65 self, input_filename_shear, input_filename_stretch, 

66 input_filename_stagger, input_filename_buckle, 

67 input_filename_propel, input_filename_opening, 

68 output_csv_path, output_jpg_path, 

69 properties=None, **kwargs) -> None: 

70 properties = properties or {} 

71 

72 # Call parent class constructor 

73 super().__init__(properties) 

74 self.locals_var_dict = locals().copy() 

75 

76 # Input/Output files 

77 self.io_dict = { 

78 'in': { 

79 'input_filename_shear': input_filename_shear, 

80 'input_filename_stretch': input_filename_stretch, 

81 'input_filename_stagger': input_filename_stagger, 

82 'input_filename_buckle': input_filename_buckle, 

83 'input_filename_propel': input_filename_propel, 

84 'input_filename_opening': input_filename_opening 

85 }, 

86 'out': { 

87 'output_csv_path': output_csv_path, 

88 'output_jpg_path': output_jpg_path 

89 } 

90 } 

91 

92 self.properties = properties 

93 self.base = properties.get("base", None) 

94 

95 # Check the properties 

96 self.check_properties(properties) 

97 self.check_arguments() 

98 

99 @launchlogger 

100 def launch(self) -> int: 

101 """Execute the :class:`IntraHelParCorrelation <intrabp_correlations.intrahpcorr.IntraHelParCorrelation>` object.""" 

102 

103 # Setup Biobb 

104 if self.check_restart(): 

105 return 0 

106 self.stage_files() 

107 

108 # Creating temporary folder 

109 self.tmp_folder = fu.create_unique_dir(prefix="hpcorrelation_") 

110 fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log) 

111 

112 # read input 

113 shear = load_data(self.io_dict["in"]["input_filename_shear"]) 

114 stretch = load_data(self.io_dict["in"]["input_filename_stretch"]) 

115 stagger = load_data(self.io_dict["in"]["input_filename_stagger"]) 

116 buckle = load_data(self.io_dict["in"]["input_filename_buckle"]) 

117 propel = load_data(self.io_dict["in"]["input_filename_propel"]) 

118 opening = load_data(self.io_dict["in"]["input_filename_opening"]) 

119 

120 # get base 

121 if self.base is None: 

122 self.base = shear.columns[0] 

123 

124 # make matrix 

125 # coordinates = ["shear", "stretch", "stagger", "buckle", "propel", "opening"] 

126 coordinates = [ 

127 "shear", "stretch", "stagger", "buckle", "propel", "opening"] 

128 corr_matrix = pd.DataFrame( 

129 np.eye(6, 6), index=coordinates, columns=coordinates) 

130 

131 # shear 

132 corr_matrix["shear"]["stretch"] = shear.corrwith( 

133 stretch, method="pearson") 

134 corr_matrix["shear"]["stagger"] = shear.corrwith( 

135 stagger, method="pearson") 

136 corr_matrix["shear"]["buckle"] = shear.corrwith( 

137 buckle, method=self.circlineal) 

138 corr_matrix["shear"]["propel"] = shear.corrwith( 

139 propel, method=self.circlineal) 

140 corr_matrix["shear"]["opening"] = shear.corrwith( 

141 opening, method=self.circlineal) 

142 # symmetric values 

143 corr_matrix["stretch"]["shear"] = corr_matrix["shear"]["stretch"] 

144 corr_matrix["stagger"]["shear"] = corr_matrix["shear"]["stagger"] 

145 corr_matrix["buckle"]["shear"] = corr_matrix["shear"]["buckle"] 

146 corr_matrix["propel"]["shear"] = corr_matrix["shear"]["propel"] 

147 corr_matrix["opening"]["shear"] = corr_matrix["shear"]["opening"] 

148 

149 # stretch 

150 corr_matrix["stretch"]["stagger"] = stretch.corrwith( 

151 stagger, method="pearson") 

152 corr_matrix["stretch"]["buckle"] = stretch.corrwith( 

153 buckle, method=self.circlineal) 

154 corr_matrix["stretch"]["propel"] = stretch.corrwith( 

155 propel, method=self.circlineal) 

156 corr_matrix["stretch"]["opening"] = stretch.corrwith( 

157 opening, method=self.circlineal) 

158 # symmetric values 

159 corr_matrix["stagger"]["stretch"] = corr_matrix["stretch"]["stagger"] 

160 corr_matrix["buckle"]["stretch"] = corr_matrix["stretch"]["buckle"] 

161 corr_matrix["propel"]["stretch"] = corr_matrix["stretch"]["propel"] 

162 corr_matrix["opening"]["stretch"] = corr_matrix["stretch"]["opening"] 

163 

164 # stagger 

165 corr_matrix["stagger"]["buckle"] = stagger.corrwith( 

166 buckle, method=self.circlineal) 

167 corr_matrix["stagger"]["propel"] = stagger.corrwith( 

168 propel, method=self.circlineal) 

169 corr_matrix["stagger"]["opening"] = stagger.corrwith( 

170 opening, method=self.circlineal) 

171 # symmetric values 

172 corr_matrix["buckle"]["stagger"] = corr_matrix["stagger"]["buckle"] 

173 corr_matrix["propel"]["stagger"] = corr_matrix["stagger"]["propel"] 

174 corr_matrix["opening"]["stagger"] = corr_matrix["stagger"]["opening"] 

175 

176 # buckle 

177 corr_matrix["buckle"]["propel"] = buckle.corrwith( 

178 propel, method=self.circular) 

179 corr_matrix["buckle"]["opening"] = buckle.corrwith( 

180 opening, method=self.circular) 

181 # symmetric values 

182 corr_matrix["propel"]["buckle"] = corr_matrix["buckle"]["propel"] 

183 corr_matrix["opening"]["buckle"] = corr_matrix["buckle"]["opening"] 

184 

185 # propel 

186 corr_matrix["propel"]["opening"] = propel.corrwith( 

187 opening, method=self.circular) 

188 # symmetric values 

189 corr_matrix["opening"]["propel"] = corr_matrix["propel"]["opening"] 

190 

191 # save csv data 

192 corr_matrix.to_csv(self.io_dict["out"]["output_csv_path"]) 

193 

194 # create heatmap 

195 fig, axs = plt.subplots(1, 1, dpi=300, tight_layout=True) 

196 axs.pcolor(corr_matrix) 

197 # Loop over data dimensions and create text annotations. 

198 for i in range(len(corr_matrix)): 

199 for j in range(len(corr_matrix)): 

200 axs.text( 

201 j+.5, 

202 i+.5, 

203 f"{corr_matrix[coordinates[j]].loc[coordinates[i]]:.2f}", 

204 ha="center", 

205 va="center", 

206 color="w") 

207 axs.set_xticks([i + 0.5 for i in range(len(corr_matrix))]) 

208 axs.set_xticklabels(corr_matrix.columns, rotation=90) 

209 axs.set_yticks([i+0.5 for i in range(len(corr_matrix))]) 

210 axs.set_yticklabels(corr_matrix.index) 

211 axs.set_title( 

212 "Helical Parameter Correlation " 

213 f"for Base Pair Step \'{self.base}\'") 

214 fig.tight_layout() 

215 fig.savefig( 

216 self.io_dict['out']['output_jpg_path'], 

217 format="jpg") 

218 plt.close() 

219 

220 # Remove temporary file(s) 

221 self.tmp_files.extend([ 

222 self.stage_io_dict.get("unique_dir"), 

223 self.tmp_folder 

224 ]) 

225 self.remove_tmp_files() 

226 

227 self.check_arguments(output_files_created=True, raise_exception=False) 

228 

229 return 0 

230 

231 def get_corr_method(self, corrtype1, corrtype2): 

232 if corrtype1 == "circular" and corrtype2 == "linear": 

233 method = self.circlineal 

234 if corrtype1 == "linear" and corrtype2 == "circular": 

235 method = self.circlineal 

236 elif corrtype1 == "circular" and corrtype2 == "circular": 

237 method = self.circular 

238 else: 

239 method = "pearson" 

240 return method 

241 

242 @staticmethod 

243 def circular(x1, x2): 

244 x1 = x1 * np.pi / 180 

245 x2 = x2 * np.pi / 180 

246 diff_1 = np.sin(x1 - x1.mean()) 

247 diff_2 = np.sin(x2 - x2.mean()) 

248 num = (diff_1 * diff_2).sum() 

249 den = np.sqrt((diff_1 ** 2).sum() * (diff_2 ** 2).sum()) 

250 return num / den 

251 

252 @staticmethod 

253 def circlineal(x1, x2): 

254 x2 = x2 * np.pi / 180 

255 rc = np.corrcoef(x1, np.cos(x2))[1, 0] 

256 rs = np.corrcoef(x1, np.sin(x2))[1, 0] 

257 rcs = np.corrcoef(np.sin(x2), np.cos(x2))[1, 0] 

258 num = (rc ** 2) + (rs ** 2) - 2 * rc * rs * rcs 

259 den = 1 - (rcs ** 2) 

260 correlation = np.sqrt(num / den) 

261 if np.corrcoef(x1, x2)[1, 0] < 0: 

262 correlation *= -1 

263 return correlation 

264 

265 

266def intrahpcorr( 

267 input_filename_shear: str, input_filename_stretch: str, 

268 input_filename_stagger: str, input_filename_buckle: str, 

269 input_filename_propel: str, input_filename_opening: str, 

270 output_csv_path: str, output_jpg_path: str, 

271 properties: dict = None, **kwargs) -> int: 

272 """Create :class:`IntraHelParCorrelation <intrabp_correlations.intrahpcorr.IntraHelParCorrelation>` class and 

273 execute the :meth:`launch() <intrabp_correlations.intrahpcorr.IntraHelParCorrelation.launch>` method.""" 

274 

275 return IntraHelParCorrelation( 

276 input_filename_shear=input_filename_shear, 

277 input_filename_stretch=input_filename_stretch, 

278 input_filename_stagger=input_filename_stagger, 

279 input_filename_buckle=input_filename_buckle, 

280 input_filename_propel=input_filename_propel, 

281 input_filename_opening=input_filename_opening, 

282 output_csv_path=output_csv_path, 

283 output_jpg_path=output_jpg_path, 

284 properties=properties, **kwargs).launch() 

285 

286 

287def main(): 

288 """Command line execution of this building block. Please check the command line documentation.""" 

289 parser = argparse.ArgumentParser(description='Load helical parameter file and save base data individually.', 

290 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

291 parser.add_argument('--config', required=False, help='Configuration file') 

292 

293 required_args = parser.add_argument_group('required arguments') 

294 required_args.add_argument('--input_filename_shear', required=True, 

295 help='Path to csv file with inputs. Accepted formats: csv.') 

296 required_args.add_argument('--input_filename_stretch', required=True, 

297 help='Path to csv file with inputs. Accepted formats: csv.') 

298 required_args.add_argument('--input_filename_stagger', required=True, 

299 help='Path to csv file with inputs. Accepted formats: csv.') 

300 required_args.add_argument('--input_filename_buckle', required=True, 

301 help='Path to csv file with inputs. Accepted formats: csv.') 

302 required_args.add_argument('--input_filename_propel', required=True, 

303 help='Path to csv file with inputs. Accepted formats: csv.') 

304 required_args.add_argument('--input_filename_opening', required=True, 

305 help='Path to csv file with inputs. Accepted formats: csv.') 

306 required_args.add_argument('--output_csv_path', required=True, 

307 help='Path to output file. Accepted formats: csv.') 

308 required_args.add_argument('--output_jpg_path', required=True, 

309 help='Path to output file. Accepted formats: csv.') 

310 

311 args = parser.parse_args() 

312 args.config = args.config or "{}" 

313 properties = settings.ConfReader(config=args.config).get_prop_dic() 

314 

315 intrahpcorr( 

316 input_filename_shear=args.input_filename_shear, 

317 input_filename_stretch=args.input_filename_stretch, 

318 input_filename_stagger=args.input_filename_stagger, 

319 input_filename_buckle=args.input_filename_buckle, 

320 input_filename_propel=args.input_filename_propel, 

321 input_filename_opening=args.input_filename_opening, 

322 output_csv_path=args.output_csv_path, 

323 output_jpg_path=args.output_jpg_path, 

324 properties=properties) 

325 

326 

327if __name__ == '__main__': 

328 main()