import pandas as pd import numpy as np import os import datetime import pickle import logging from abc import abstractmethod log = logging.getLogger(__name__) from ..utility.file_io import get_next_filename, sanitize_filename from ..utility.prsdata import PrsData, FLUSH_TYPE, FLUSH_PREFIX, METADATA_FILENAME """ Wollen: - Daten während der Messung hinzufügen und in Snippets auf die Disk schreiben - Daten nach der Messung laden, aus Rohdaten (directory), aus Berechneten Daten (csv) """ class DataCollector: """ Class managing data collection and partial storage """ def __init__(self, data_path: str, data_name: str="PRS", metadata: dict[str, str]={}, dirname: str|None=None, add_number_if_dir_exists=True, data_container=list, ): self.data_type = data_container self.data = data_container() self.full_data = None # if loaded, this contains the final numpy array self.name = data_name self.metadata = metadata self.path = os.path.abspath(os.path.expanduser(data_path)) if dirname is None: self.dirname = sanitize_filename(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M") + "_" + self.name) else: self.dirname = sanitize_filename(dirname) self.dirpath = os.path.join(self.path, self.dirname) if os.path.exists(self.dirpath): if not add_number_if_dir_exists: raise Exception(f"Directory '{self.dirname}' already exists. Provide a different directory or pass `add_number_if_dir_exists=True` to ignore this") else: i = 1 dirpath = f"{self.dirpath}-{i}" while os.path.exists(dirpath): i += 1 dirpath = f"{self.dirpath}-{i}" print(f"Directory '{self.dirname}' already exists. Trying '{dirpath}' instead") self.dirpath = dirpath self.assert_directory_exists() self.flushed = False # OPERATION def clear(self): self.data = [] self.full_data = None def assert_directory_exists(self): if not os.path.isdir(self.dirpath): os.makedirs(self.dirpath) def get_data(self) -> PrsData: """ Load the full data and return it together with the metadata Returns ------- tuple[np.ndarray, dict] The full data and the metadata """ if self.full_data is None: self.full_data = PrsData(path=self.dirpath, metadata=self.metadata) return self.full_data def save_csv_in_dir(self, sep=",", verbose=False): """Save full data as csv inside the directory with temporary data""" self.get_data() filepath = os.path.join(self.dirpath, self.dirname + ".csv") self.full_data.save_csv_at(filepath, sep, verbose) def write_metadata(self): f""" Write the metadata to the disk as '{METADATA_FILENAME}' Returns ------- None. """ filepath = os.path.join(self.dirpath, METADATA_FILENAME) log.debug(f"Writing metadata to {filepath}") with open(filepath, "wb") as file: pickle.dump(self.metadata, file) class PrsDataCollector(DataCollector): def __init__(self, data_path: str, data_name: str="PRS", metadata: dict[str, str]={}, dirname: str|None=None, add_number_if_dir_exists=True, ): super().__init__(data_path, data_name, metadata, dirname, add_number_if_dir_exists, dict) @abstractmethod def add_data(self, wavelength, raw): self.data[wavelength] = raw self.full_data = None # no longer up to date @abstractmethod def flush(self, verbose: bool = False): """ Write the current data to a file and clear the internal data Parameters ---------- verbose : bool, optional If True, print a message when flushing data. The default is False. Raises ------ ValueError If the FLUSH_TYPE is invalid. Returns ------- None. """ # dont flush empty data if len(self.data) == 0: return self.assert_directory_exists() for key, key_data in self.data.items(): if FLUSH_TYPE == "csv": filename = self._get_flush_filename(key) + ".csv" filepath = os.path.join(self.dirpath, filename) log.info(f"Flushing data to {filepath}") if verbose: print(f"Flushing data to {filepath}") df = pd.DataFrame(key_data, columns=PrsData.columns) df.meta = str(self.metadata) df.to_csv(filepath, sep=",", index=False, metadata=True) elif FLUSH_TYPE == "pickle-ndarray": filename = self._get_flush_filename(key) + ".ndarray.pkl" filepath = os.path.join(self.dirpath, filename) log.info(f"Flushing data to {filepath}") if verbose: print(f"Flushing data to {filepath}") with open(filepath, "wb") as file: pickle.dump(np.array(key_data), file) else: raise ValueError(f"Invalid FLUSH_TYPE: '{FLUSH_TYPE}'") self.clear() # File IO def _get_flush_filename(self, key): return sanitize_filename(self.name + "_" + str(key))