import pandas as pd import numpy as np import os import matplotlib.pyplot as plt import pickle import logging log = logging.getLogger(__name__) from ..utility.file_io import get_next_filename, sanitize_filename FLUSH_TYPE = "pickle-ndarray" FLUSH_PREFIX = "PART_" METADATA_FILENAME = FLUSH_PREFIX + "measurement_metadata.pkl" class PrsData: """ Class managing data and metadata. Can be initialized from data directly, or a file or directory path. """ def __init__(self, path:str|None=None, data:tuple|None=None, metadata:dict|None=None, verbose=False): self.data = data if type(metadata) == dict: self.metadata = metadata else: self.metadata = {} if data is None and path is None: raise ValueError("Either path or data must be defined.") if data is not None and path is not None: raise ValueError("Either path or data must be defined, but not both.") if path is not None: # load from file if os.path.isdir(path): self.data, md = PrsData.load_data_from_dir(path, verbose=verbose) self.metadata |= md elif os.path.isfile(path): if path.endswith(".csv"): self.data, md = PrsData.load_data_from_csv(path) self.metadata |= md elif path.endswith(".pkl"): self.data, md = PrsData.load_data_from_pkl(path) self.metadata |= md else: raise NotImplementedError(f"Only .csv and .pkl files are supported") else: raise FileNotFoundError(f"Path '{path}' is neither a file nor a directory.") else: self.data = data # Convert data def to_dataframe(self): df = pd.DataFrame(self.data, columns=self.columns) df.meta = str(self.metadata) return df def to_csv(self, sep=","): # self.to_dataframe().to_csv(os.path.join(self.path, self.name + ".csv"), index=False, metadata=True) return PrsData.get_csv(self.data, self.metadata, sep=sep) def save_csv_at(self, filepath, sep=",", verbose=False): if verbose: print(f"Writing csv to {filepath}") log.info(f"Writing csv to {filepath}") with open(filepath, "w") as file: file.write(self.to_csv(sep=sep)) def save_csv(self, sep=",", verbose=False): """Save the csv inside the data directory""" filepath = os.path.join(self.path, self.dirname + ".csv") self.save_csv_at(filepath, sep, verbose) # STATIC CONVERTER @staticmethod def get_csv(data, metadata, sep=","): csv = "" for k, v in metadata.items(): csv += f"# {k}: {v}\n" csv += "".join(f"{colname}{sep}" for colname in PrsData.columns).strip(sep) + "\n" for i in range(data.shape[0]): csv += f"{i}{sep}{data[i,1]}{sep}{data[i,2]}{sep}{data[i,3]}\n" return csv.strip("\n") # STATIC LOADERS @staticmethod def load_data_from_csv(filepath:str, sep: str=",") -> tuple[np.ndarray, dict]: """ Loads data from a single csv file. Lines with this format are interpreted as metadata: # key: value Lines with this format are interpreted as data: index, timestamp [s], CPD [V], LED [%] Parameters ---------- filepath Path to the csv file. sep csv separator Returns ------- data 2D numpy array with shape (n, 4) where n is the number of data points. metadata Dictionary with metadata. """ metadata = {} with open(filepath, "r") as f: # this loop will read the metadata at the beginning and skip also the header row for line in f: if line.startswith("#"): colon = line.find(":") if colon == -1: # normal comment continue key = line[1:colon].strip() value = line[colon+1:].strip() metadata[key] = value else: break # here, the generator has only data lines data = np.loadtxt(f, delimiter=sep) return data, metadata @classmethod def load_data_from_pkl(cls, filepath:str) -> tuple[np.ndarray, dict]: """ Loads data from a single csv file. Lines with this format are interpreted as metadata: # key: value Lines with this format are interpreted as data: index, timestamp [s], CPD [V], LED [%] Parameters ---------- filepath Path to the csv file. Returns ------- data 2D numpy array with shape (n, 4) where n is the number of data points. metadata Dictionary with metadata. """ data = None metadata = {} with open(filepath, "rb") as f: obj = pickle.load(f) if isinstance(obj, tuple): if not len(obj) == 2: raise ValueError(f"Pickle file is a tuple with length {len(obj)}, however it must be 2: (data, metadata)") data = obj[0] metadata = obj[1] if not isinstance(data, np.ndarray): raise ValueError(f"First object in tuple is not a numpy.ndarray") elif isinstance(obj, np.ndarray): data = obj else: raise ValueError(f"Pickled object must be either numpy.ndarray or (numpy.ndarray, dict), but is of type {type(obj)}") # must be loaded by now if not len(data.shape) == 2 and data.shape[1] == len(cls.columns): raise ValueError(f"numpy.ndarray has invalid shape: {data.shape}, however the shape must be (N, {len(cls.columns)})") if not isinstance(metadata, dict): raise ValueError(f"Metadata is not a of type dict") return data, metadata @staticmethod def load_data_from_dir(dirpath:str, verbose:bool=False) -> tuple[np.ndarray, dict]: """ Combines all data files with the FLUSH_PREFIX from a directory into a numpy array Parameters ---------- dirpath : str Path to the data directory verbose : bool, optional If True, print a message for every file that is opened. The default is False. Raises ------ NotImplementedError DESCRIPTION. Returns ------- data : ndarray First index: Measurement Second index: (index, timestamp [s], CPD [V], LED [%]) """ files = os.listdir(dirpath) files.sort() data = np.empty((0, 4)) metadata = {} for filename in files: filepath = os.path.join(dirpath, filename) if filename.startswith(FLUSH_PREFIX): if filename.endswith(".csv"): if verbose: print(f"Opening {filepath} as csv") df = pd.read_csv(filepath) arr = df.to_numpy() data = np.concatenate((data, arr)) elif filename.endswith(".ndarray.pkl"): with open(filepath, "rb") as file: arr = pickle.load(file) if len(arr.shape) != 2 or arr.shape[1] != 4: print(f"Skipping file '{filepath}' with invalid array shape: {arr.shape}") continue data = np.concatenate((data, arr)) elif filename == METADATA_FILENAME: # Metadata filename must also start with FLUSH_PREFIX with open(filepath, "rb") as file: metadata = pickle.load(file) else: raise NotImplementedError(f"Unknown file extension for file '{filepath}'") else: log.info(f"Skipping unknown file: '{filepath}'") return data, metadata def plot_cpd_data(data: str or pd.DataFrame or np.ndarray, t: str="seconds", title: str="", CPD:bool=True, LED:bool=True): """ Plot recorded data Parameters ---------- data : str or np.ndarray Path to the data directory or numpy array with columns (idx, t [s], V [V], LED [%]) t : str, optional Which timescale to use for the x axis: Must be one of "seconds", "mintutes", "hours". The default is "seconds". title : str, optional Title for the plot. The default is "". CPD : bool, optional Wether to plot the voltage (CPD) line. The default is True. LED : bool, optional Wether to plot the LED state line. The default is False. Returns ------- fig : TYPE Matplotlib figure object. """ if type(data) == str: _data, _ = PrsData.load_data_from_dir(data) else: _data = data fig, ax = plt.subplots() xdata = _data[:,1].copy() xlabel = "t [s]" if t == "minutes": xdata /= 60 xlabel = "t [minutes]" elif t == "hours": xdata /= 3600 xlabel = "t [hours]" ax.set_xlabel(xlabel) ax_cpd = ax ax_led = ax if CPD and LED: ax_led = ax.twinx() if CPD: ax_cpd = ax ax_cpd.set_ylabel("CPD [V]") ax_cpd.plot(xdata, _data[:,2], color="blue", label="CPD") if LED: ax_led.set_ylabel("LED [%]") ax_led.plot(xdata, _data[:,3], color="orange", label="LED") ax_led.set_ylim(-2, 102) ax_led.set_yticks([0, 20, 40, 60, 80, 100]) if CPD and LED: # ax_led.legend() # ax_cpd.legend() pass if title: ax.set_title(title) fig.tight_layout() return fig