Coverage for biobb_dna/utils/loader.py: 62%
26 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 10:36 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 10:36 +0000
1#!/usr/bin/env python3
3"""Utility functions to load files."""
4import zipfile
5from pathlib import Path
7import pandas as pd
10def read_series(input_serfile, usecols=None):
11 """Read .ser file"""
12 extra_kwargs = dict(
13 header=None,
14 sep='\\s+',
15 index_col=0)
16 ser_data = pd.read_csv(input_serfile, **extra_kwargs) # type: ignore
17 if usecols is not None:
18 if 0 in usecols:
19 usecols.pop(usecols.index(0))
20 ser_data = ser_data[[i+1 for i in usecols]]
21 return ser_data
24def load_data(data_filename, inner_file=None):
25 """Read .csv file directly or from inside a .zip file."""
26 if Path(data_filename).suffix == ".zip":
27 zf = zipfile.ZipFile(data_filename, "r")
28 # use provided data filename of look for csv file
29 if inner_file is not None:
30 dataset = zf.open(inner_file)
31 else:
32 print(
33 "inner file name not provided, "
34 "using first .csv file found inside .zip.")
35 for fn in zf.infolist():
36 if fn.filename.endswith(".csv"):
37 dataset = zf.open(fn)
38 break
39 elif Path(data_filename).suffix == ".csv":
40 dataset = data_filename
41 else:
42 raise IOError("input file extension must be .zip or .csv!")
43 data = pd.read_csv(dataset, index_col=0)
44 return data