2025-05-02 15:48:36 +02:00

281 lines
9.7 KiB
Python

import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import pickle
import logging
log = logging.getLogger(__name__)
from cpdctrl.utility.file_io import get_next_filename, sanitize_filename
FLUSH_TYPE = "pickle-ndarray"
FLUSH_PREFIX = "PART_"
METADATA_FILENAME = FLUSH_PREFIX + "measurement_metadata.pkl"
class CpdData:
"""
Class managing data and metadata.
Can be initialized from data directly, or a file or directory path.
"""
columns = ["idx", "t [s]", "V [V]", "LED [%]"]
def __init__(self, path:str|None=None, data:np.ndarray|None=None, metadata:dict|None=None, verbose=False):
self.data = data
if type(metadata) == dict:
self.metadata = metadata
else:
self.metadata = {}
if data is None and path is None:
raise ValueError("Either path or data must be defined.")
if data is not None and path is not None:
raise ValueError("Either path or data must be defined, but not both.")
if path is not None: # load from file
if os.path.isdir(path):
self.data, md = CpdData.load_data_from_dir(path, verbose=verbose)
self.metadata |= md
elif os.path.isfile(path):
if path.endswith(".csv"):
self.data, md = CpdData.load_data_from_csv(path)
self.metadata |= md
elif path.endswith(".pkl"):
self.data, md = CpdData.load_data_from_pkl(path)
self.metadata |= md
else:
raise NotImplementedError(f"Only .csv and .pkl files are supported")
else:
raise FileNotFoundError(f"Path '{path}' is neither a file nor a directory.")
else:
self.data = data
# Convert data
def to_dataframe(self):
df = pd.DataFrame(self.data, columns=CpdData.columns)
df.meta = str(self.metadata)
return df
def to_csv(self, sep=","):
# self.to_dataframe().to_csv(os.path.join(self.path, self.name + ".csv"), index=False, metadata=True)
return CpdData.get_csv(self.data, self.metadata, sep=sep)
def save_csv_at(self, filepath, sep=",", verbose=False):
if verbose: print(f"Writing csv to {filepath}")
log.info(f"Writing csv to {filepath}")
with open(filepath, "w") as file:
file.write(self.to_csv(sep=sep))
def save_csv(self, sep=",", verbose=False):
"""Save the csv inside the data directory"""
filepath = os.path.join(self.path, self.dirname + ".csv")
self.save_csv_at(filepath, sep, verbose)
# STATIC CONVERTER
@staticmethod
def get_csv(data, metadata, sep=","):
csv = ""
for k, v in metadata.items():
csv += f"# {k}: {v}\n"
csv += "".join(f"{colname}{sep}" for colname in CpdData.columns).strip(sep) + "\n"
for i in range(data.shape[0]):
csv += f"{i}{sep}{data[i,1]}{sep}{data[i,2]}{sep}{data[i,3]}\n"
return csv.strip("\n")
# STATIC LOADERS
@staticmethod
def load_data_from_csv(filepath:str, sep: str=",") -> tuple[np.ndarray, dict]:
"""
Loads data from a single csv file.
Lines with this format are interpreted as metadata:
# key: value
Lines with this format are interpreted as data:
index, timestamp [s], CPD [V], LED [%]
Parameters
----------
filepath
Path to the csv file.
sep
csv separator
Returns
-------
data
2D numpy array with shape (n, 4) where n is the number of data points.
metadata
Dictionary with metadata.
"""
metadata = {}
with open(filepath, "r") as f:
# this loop will read the metadata at the beginning and skip also the header row
for line in f:
if line.startswith("#"):
colon = line.find(":")
if colon == -1: # normal comment
continue
key = line[1:colon].strip()
value = line[colon+1:].strip()
metadata[key] = value
else:
break
# here, the generator has only data lines
data = np.loadtxt(f, delimiter=sep)
return data, metadata
@staticmethod
def load_data_from_pkl(filepath:str) -> tuple[np.ndarray, dict]:
"""
Loads data from a single csv file.
Lines with this format are interpreted as metadata:
# key: value
Lines with this format are interpreted as data:
index, timestamp [s], CPD [V], LED [%]
Parameters
----------
filepath
Path to the csv file.
sep
csv separator
Returns
-------
data
2D numpy array with shape (n, 4) where n is the number of data points.
metadata
Dictionary with metadata.
"""
data = None
metadata = {}
with open(filepath, "rb") as f:
obj = pickle.load(f)
if isinstance(obj, tuple):
if not len(obj) == 2:
raise ValueError(f"Pickle file is a tuple with length {len(obj)}, however it must be 2: (data, metadata)")
data = obj[0]
metadata = obj[1]
if not isinstance(data, np.ndarray):
raise ValueError(f"First object in tuple is not a numpy.ndarray")
elif isinstance(obj, np.ndarray):
data = obj
else:
raise ValueError(f"Pickled object must be either numpy.ndarray or (numpy.ndarray, dict), but is of type {type(obj)}")
# must be loaded by now
if not len(data.shape) == 2 and data.shape[1] == 4:
raise ValueError(f"numpy.ndarray has invalid shape: {data.shape}, however the shape must be (N, 4)")
if not isinstance(metadata, dict):
raise ValueError(f"Metadata is not a of type dict")
return data, metadata
@staticmethod
def load_data_from_dir(dirpath:str, verbose:bool=False) -> tuple[np.ndarray, dict]:
"""
Combines all data files with the FLUSH_PREFIX from a directory into a numpy array
Parameters
----------
dirpath : str
Path to the data directory
verbose : bool, optional
If True, print a message for every file that is opened. The default is False.
Raises
------
NotImplementedError
DESCRIPTION.
Returns
-------
data : ndarray
First index: Measurement
Second index: (index, timestamp [s], CPD [V], LED [%])
"""
files = os.listdir(dirpath)
files.sort()
data = np.empty((0, 4))
metadata = {}
for filename in files:
filepath = os.path.join(dirpath, filename)
if filename.startswith(FLUSH_PREFIX):
if filename.endswith(".csv"):
if verbose: print(f"Opening {filepath} as csv")
df = pd.read_csv(filepath)
arr = df.to_numpy()
data = np.concatenate((data, arr))
elif filename.endswith(".ndarray.pkl"):
with open(filepath, "rb") as file:
arr = pickle.load(file)
if len(arr.shape) != 2 or arr.shape[1] != 4:
print(f"Skipping file '{filepath}' with invalid array shape: {arr.shape}")
continue
data = np.concatenate((data, arr))
elif filename == METADATA_FILENAME: # Metadata filename must also start with FLUSH_PREFIX
with open(filepath, "rb") as file:
metadata = pickle.load(file)
else:
raise NotImplementedError(f"Unknown file extension for file '{filepath}'")
else:
log.info(f"Skipping unknown file: '{filepath}'")
return data, metadata
def plot_cpd_data(data: str or pd.DataFrame or np.ndarray, t: str="seconds", title: str="", CPD:bool=True, LED:bool=True):
"""
Plot recorded data
Parameters
----------
data : str or np.ndarray
Path to the data directory or
numpy array with columns (idx, t [s], V [V], LED [%])
t : str, optional
Which timescale to use for the x axis:
Must be one of "seconds", "mintutes", "hours".
The default is "seconds".
title : str, optional
Title for the plot. The default is "".
CPD : bool, optional
Wether to plot the voltage (CPD) line. The default is True.
LED : bool, optional
Wether to plot the LED state line. The default is False.
Returns
-------
fig : TYPE
Matplotlib figure object.
"""
if type(data) == str:
_data, _ = CpdData.load_data_from_dir(data)
else:
_data = data
fig, ax = plt.subplots()
xdata = _data[:,1].copy()
xlabel = "t [s]"
if t == "minutes":
xdata /= 60
xlabel = "t [minutes]"
elif t == "hours":
xdata /= 3600
xlabel = "t [hours]"
ax.set_xlabel(xlabel)
ax_cpd = ax
ax_led = ax
if CPD and LED:
ax_led = ax.twinx()
if CPD:
ax_cpd = ax
ax_cpd.set_ylabel("CPD [V]")
ax_cpd.plot(xdata, _data[:,2], color="blue", label="CPD")
if LED:
ax_led.set_ylabel("LED [%]")
ax_led.plot(xdata, _data[:,3], color="orange", label="LED")
ax_led.set_ylim(-2, 102)
ax_led.set_yticks([0, 20, 40, 60, 80, 100])
if CPD and LED:
# ax_led.legend()
# ax_cpd.legend()
pass
if title:
ax.set_title(title)
fig.tight_layout()
return fig