279 lines
9.7 KiB
Python
279 lines
9.7 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
import os
|
|
import matplotlib.pyplot as plt
|
|
import pickle
|
|
import logging
|
|
log = logging.getLogger(__name__)
|
|
|
|
from ..utility.file_io import get_next_filename, sanitize_filename
|
|
|
|
FLUSH_TYPE = "pickle-ndarray"
|
|
FLUSH_PREFIX = "PART_"
|
|
METADATA_FILENAME = FLUSH_PREFIX + "measurement_metadata.pkl"
|
|
|
|
|
|
class PrsData:
|
|
"""
|
|
Class managing data and metadata.
|
|
Can be initialized from data directly, or a file or directory path.
|
|
"""
|
|
def __init__(self, path:str|None=None, data:tuple|None=None, metadata:dict|None=None, verbose=False):
|
|
self.data = data
|
|
if type(metadata) == dict:
|
|
self.metadata = metadata
|
|
else:
|
|
self.metadata = {}
|
|
if data is None and path is None:
|
|
raise ValueError("Either path or data must be defined.")
|
|
if data is not None and path is not None:
|
|
raise ValueError("Either path or data must be defined, but not both.")
|
|
if path is not None: # load from file
|
|
if os.path.isdir(path):
|
|
self.data, md = PrsData.load_data_from_dir(path, verbose=verbose)
|
|
self.metadata |= md
|
|
elif os.path.isfile(path):
|
|
if path.endswith(".csv"):
|
|
self.data, md = PrsData.load_data_from_csv(path)
|
|
self.metadata |= md
|
|
elif path.endswith(".pkl"):
|
|
self.data, md = PrsData.load_data_from_pkl(path)
|
|
self.metadata |= md
|
|
else:
|
|
raise NotImplementedError(f"Only .csv and .pkl files are supported")
|
|
else:
|
|
raise FileNotFoundError(f"Path '{path}' is neither a file nor a directory.")
|
|
else:
|
|
self.data = data
|
|
|
|
# Convert data
|
|
def to_dataframe(self):
|
|
df = pd.DataFrame(self.data, columns=self.columns)
|
|
df.meta = str(self.metadata)
|
|
return df
|
|
|
|
def to_csv(self, sep=","):
|
|
# self.to_dataframe().to_csv(os.path.join(self.path, self.name + ".csv"), index=False, metadata=True)
|
|
return PrsData.get_csv(self.data, self.metadata, sep=sep)
|
|
|
|
|
|
def save_csv_at(self, filepath, sep=",", verbose=False):
|
|
if verbose: print(f"Writing csv to {filepath}")
|
|
log.info(f"Writing csv to {filepath}")
|
|
with open(filepath, "w") as file:
|
|
file.write(self.to_csv(sep=sep))
|
|
|
|
def save_csv(self, sep=",", verbose=False):
|
|
"""Save the csv inside the data directory"""
|
|
filepath = os.path.join(self.path, self.dirname + ".csv")
|
|
self.save_csv_at(filepath, sep, verbose)
|
|
|
|
# STATIC CONVERTER
|
|
@staticmethod
|
|
def get_csv(data, metadata, sep=","):
|
|
csv = ""
|
|
for k, v in metadata.items():
|
|
csv += f"# {k}: {v}\n"
|
|
csv += "".join(f"{colname}{sep}" for colname in PrsData.columns).strip(sep) + "\n"
|
|
for i in range(data.shape[0]):
|
|
csv += f"{i}{sep}{data[i,1]}{sep}{data[i,2]}{sep}{data[i,3]}\n"
|
|
return csv.strip("\n")
|
|
|
|
# STATIC LOADERS
|
|
@staticmethod
|
|
def load_data_from_csv(filepath:str, sep: str=",") -> tuple[np.ndarray, dict]:
|
|
"""
|
|
Loads data from a single csv file.
|
|
Lines with this format are interpreted as metadata:
|
|
# key: value
|
|
Lines with this format are interpreted as data:
|
|
index, timestamp [s], CPD [V], LED [%]
|
|
Parameters
|
|
----------
|
|
filepath
|
|
Path to the csv file.
|
|
sep
|
|
csv separator
|
|
Returns
|
|
-------
|
|
data
|
|
2D numpy array with shape (n, 4) where n is the number of data points.
|
|
metadata
|
|
Dictionary with metadata.
|
|
"""
|
|
metadata = {}
|
|
with open(filepath, "r") as f:
|
|
# this loop will read the metadata at the beginning and skip also the header row
|
|
for line in f:
|
|
if line.startswith("#"):
|
|
colon = line.find(":")
|
|
if colon == -1: # normal comment
|
|
continue
|
|
key = line[1:colon].strip()
|
|
value = line[colon+1:].strip()
|
|
metadata[key] = value
|
|
else:
|
|
break
|
|
# here, the generator has only data lines
|
|
data = np.loadtxt(f, delimiter=sep)
|
|
return data, metadata
|
|
|
|
@classmethod
|
|
def load_data_from_pkl(cls, filepath:str) -> tuple[np.ndarray, dict]:
|
|
"""
|
|
Loads data from a single csv file.
|
|
Lines with this format are interpreted as metadata:
|
|
# key: value
|
|
Lines with this format are interpreted as data:
|
|
index, timestamp [s], CPD [V], LED [%]
|
|
Parameters
|
|
----------
|
|
filepath
|
|
Path to the csv file.
|
|
Returns
|
|
-------
|
|
data
|
|
2D numpy array with shape (n, 4) where n is the number of data points.
|
|
metadata
|
|
Dictionary with metadata.
|
|
"""
|
|
data = None
|
|
metadata = {}
|
|
with open(filepath, "rb") as f:
|
|
obj = pickle.load(f)
|
|
if isinstance(obj, tuple):
|
|
if not len(obj) == 2:
|
|
raise ValueError(f"Pickle file is a tuple with length {len(obj)}, however it must be 2: (data, metadata)")
|
|
data = obj[0]
|
|
metadata = obj[1]
|
|
if not isinstance(data, np.ndarray):
|
|
raise ValueError(f"First object in tuple is not a numpy.ndarray")
|
|
elif isinstance(obj, np.ndarray):
|
|
data = obj
|
|
else:
|
|
raise ValueError(f"Pickled object must be either numpy.ndarray or (numpy.ndarray, dict), but is of type {type(obj)}")
|
|
# must be loaded by now
|
|
if not len(data.shape) == 2 and data.shape[1] == len(cls.columns):
|
|
raise ValueError(f"numpy.ndarray has invalid shape: {data.shape}, however the shape must be (N, {len(cls.columns)})")
|
|
if not isinstance(metadata, dict):
|
|
raise ValueError(f"Metadata is not a of type dict")
|
|
return data, metadata
|
|
|
|
@staticmethod
|
|
def load_data_from_dir(dirpath:str, verbose:bool=False) -> tuple[np.ndarray, dict]:
|
|
"""
|
|
Combines all data files with the FLUSH_PREFIX from a directory into a numpy array
|
|
|
|
Parameters
|
|
----------
|
|
dirpath : str
|
|
Path to the data directory
|
|
verbose : bool, optional
|
|
If True, print a message for every file that is opened. The default is False.
|
|
|
|
Raises
|
|
------
|
|
NotImplementedError
|
|
DESCRIPTION.
|
|
|
|
Returns
|
|
-------
|
|
data : ndarray
|
|
First index: Measurement
|
|
Second index: (index, timestamp [s], CPD [V], LED [%])
|
|
"""
|
|
files = os.listdir(dirpath)
|
|
files.sort()
|
|
data = np.empty((0, 4))
|
|
metadata = {}
|
|
for filename in files:
|
|
filepath = os.path.join(dirpath, filename)
|
|
if filename.startswith(FLUSH_PREFIX):
|
|
if filename.endswith(".csv"):
|
|
if verbose: print(f"Opening {filepath} as csv")
|
|
df = pd.read_csv(filepath)
|
|
arr = df.to_numpy()
|
|
data = np.concatenate((data, arr))
|
|
elif filename.endswith(".ndarray.pkl"):
|
|
with open(filepath, "rb") as file:
|
|
arr = pickle.load(file)
|
|
if len(arr.shape) != 2 or arr.shape[1] != 4:
|
|
print(f"Skipping file '{filepath}' with invalid array shape: {arr.shape}")
|
|
continue
|
|
data = np.concatenate((data, arr))
|
|
elif filename == METADATA_FILENAME: # Metadata filename must also start with FLUSH_PREFIX
|
|
with open(filepath, "rb") as file:
|
|
metadata = pickle.load(file)
|
|
else:
|
|
raise NotImplementedError(f"Unknown file extension for file '{filepath}'")
|
|
else:
|
|
log.info(f"Skipping unknown file: '{filepath}'")
|
|
return data, metadata
|
|
|
|
|
|
def plot_cpd_data(data: str or pd.DataFrame or np.ndarray, t: str="seconds", title: str="", CPD:bool=True, LED:bool=True):
|
|
"""
|
|
Plot recorded data
|
|
|
|
Parameters
|
|
----------
|
|
data : str or np.ndarray
|
|
Path to the data directory or
|
|
numpy array with columns (idx, t [s], V [V], LED [%])
|
|
t : str, optional
|
|
Which timescale to use for the x axis:
|
|
Must be one of "seconds", "mintutes", "hours".
|
|
The default is "seconds".
|
|
title : str, optional
|
|
Title for the plot. The default is "".
|
|
CPD : bool, optional
|
|
Wether to plot the voltage (CPD) line. The default is True.
|
|
LED : bool, optional
|
|
Wether to plot the LED state line. The default is False.
|
|
|
|
Returns
|
|
-------
|
|
fig : TYPE
|
|
Matplotlib figure object.
|
|
"""
|
|
if type(data) == str:
|
|
_data, _ = PrsData.load_data_from_dir(data)
|
|
else:
|
|
_data = data
|
|
fig, ax = plt.subplots()
|
|
xdata = _data[:,1].copy()
|
|
xlabel = "t [s]"
|
|
if t == "minutes":
|
|
xdata /= 60
|
|
xlabel = "t [minutes]"
|
|
elif t == "hours":
|
|
xdata /= 3600
|
|
xlabel = "t [hours]"
|
|
ax.set_xlabel(xlabel)
|
|
ax_cpd = ax
|
|
ax_led = ax
|
|
if CPD and LED:
|
|
ax_led = ax.twinx()
|
|
if CPD:
|
|
ax_cpd = ax
|
|
ax_cpd.set_ylabel("CPD [V]")
|
|
ax_cpd.plot(xdata, _data[:,2], color="blue", label="CPD")
|
|
if LED:
|
|
ax_led.set_ylabel("LED [%]")
|
|
ax_led.plot(xdata, _data[:,3], color="orange", label="LED")
|
|
ax_led.set_ylim(-2, 102)
|
|
ax_led.set_yticks([0, 20, 40, 60, 80, 100])
|
|
if CPD and LED:
|
|
# ax_led.legend()
|
|
# ax_cpd.legend()
|
|
pass
|
|
if title:
|
|
ax.set_title(title)
|
|
fig.tight_layout()
|
|
return fig
|
|
|
|
|
|
|
|
|
|
|