import pandas as pd import numpy as np import os import matplotlib.pyplot as plt import pickle import datetime import logging log = logging.getLogger(__name__) from ..utility.file_io import get_next_filename, sanitize_filename FLUSH_TYPE = "pickle-ndarray" PARTIAL_PREFIX = "PART_" METADATA_FILENAME = PARTIAL_PREFIX + "measurement_metadata.pkl" class PrsData: """ Class managing data and metadata. Can be initialized from data directly, or from a file or directory path. Keys: - dR: delta R, the change in reflectivity. (This is the signal amplitude "R" from the lock-in) - R: the baseline reflectivity. (DC signal measured using Aux In of the lock-in) - theta: phase - _raw: The raw measurement data (all individual samples) data is a dictionary with: - key: wavelength as int - value: dictionary with: - key: quantity as string: ["dR_raw", "R_raw", "theta_raw", "dR", ...] - value: quantity value, or array if "_raw" """ def __init__(self, data: dict | None=None, metadata: dict | None=None, load_data_path: str|None=None, write_data_path: str|None=None, write_data_name: str = "PRS", write_dirname: str | None = None, add_number_if_dir_exists=True, ): if type(metadata) == dict: self.metadata = metadata else: self.metadata = {} self.data = data if data is None and load_data_path is None: raise ValueError("Either path or data must be defined.") if data is not None and load_data_path is not None: raise ValueError("Either path or data must be defined, but not both.") if load_data_path is not None: # load from file if os.path.isdir(load_data_path): self.data, md = PrsData.load_data_from_dir(load_data_path) self.metadata |= md elif os.path.isfile(load_data_path): if load_data_path.endswith(".csv"): self.data, md = PrsData.load_data_from_csv(load_data_path) self.metadata |= md elif load_data_path.endswith(".pkl"): self.data, md = PrsData.load_data_from_pkl(load_data_path) self.metadata |= md else: raise NotImplementedError(f"Only .csv and .pkl files are supported") else: raise FileNotFoundError(f"Path '{load_data_path}' is neither a file nor a directory.") self.wavelengths = [] keys = list(self.data.keys()) for key in keys: # for some reason, the wavelengths can end up as string try: wl = int(key) self.wavelengths.append(wl) self.data[wl] = self.data[key] del self.data[key] except ValueError: pass self.wavelengths.sort() # INIT WRITE MODE self.dirname = None self.path = None self.name = write_data_name if write_data_path: self.path = os.path.abspath(os.path.expanduser(write_data_path)) if write_dirname is None: self.dirname = sanitize_filename(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M") + "_" + self.name) else: self.dirname = sanitize_filename(write_dirname) self.dirpath = os.path.join(self.path, self.dirname) if os.path.exists(self.dirpath): if not add_number_if_dir_exists: raise Exception(f"Directory '{self.dirname}' already exists. Provide a different directory or pass `add_number_if_dir_exists=True` to ignore this") else: i = 1 dirpath = f"{self.dirpath}-{i}" while os.path.exists(dirpath): i += 1 dirpath = f"{self.dirpath}-{i}" print(f"Directory '{self.dirname}' already exists. Trying '{dirpath}' instead") self.dirpath = dirpath self._assert_directory_exists() def __setitem__(self, key, value): self.data[key] = value try: wl = int(key) self.wavelengths.append(wl) self.wavelengths.sort() except ValueError: pass def __getitem__(self, key): return self.data[key] def _check_has_wavelength(self, wl): if wl not in self.data: raise KeyError(f"No data for wavelength '{wl}'") def get_for_wl(self, wl, key) -> float: self._check_has_wavelength(wl) if key in self.data[wl]: return self.data[wl][key] elif key == "dR_R": return self.calculate_dR_R_for_wl(wl)[0] elif key == "sdR_R": return self.calculate_dR_R_for_wl(wl)[1] elif f"{key}_raw" in self.data[wl]: vals = self.data[wl][f"{key}_raw"] mean = np.mean(vals) self.data[wl][key] = mean return mean elif key.startswith("s") and f"{key[1:]}_raw" in self.data[wl]: vals = self.data[wl][f"{key[1:]}_raw"] err = np.std(vals) self.data[wl][key] = err return err raise KeyError(f"No '{key}' data for wavelength '{wl}'") def calculate_dR_R_for_wl(self, wl) -> tuple[float, float]: dR, sdR = self.get_for_wl(wl, "dR"), self.get_for_wl(wl, "sdR") R, sR = self.get_for_wl(wl, "R"), self.get_for_wl(wl, "sR") dR_R = dR / R sdR_R = np.sqrt((sdR / R)**2 + (dR * sR/R**2)**2) self.data[wl]["dR_R"] = dR_R self.data[wl]["sdR_R"] = sdR_R return dR_R, sdR_R key_names = { "wl": "Wavelength [nm]", "dR": "dR [V]", "R": "R [V]", "sR": "sigma(R) [V]", "sdR": "sigma(dR) [V]", "dR_R": "dR/R", "sdR_R": "sigma(dR/R)", "theta": "theta [°]", "stheta": "sigma(theta) [°]" } default_spectrum_columns=["wl", "dR", "sdR", "R", "sR", "dR_R", "sdR_R", "theta", "stheta"] labels = { "dR_R": r"$\Delta R/R$", "dR": r"$\Delta R$ [V]", "R": r"$R$ [V]", "theta": r"$\theta$ [°]", "sdR_R": r"$\sigma_{\Delta R/R}$", "sdR": r"$\sigma_{\Delta R}$ [V]", "sR": r"$\sigma_R$ [V]", "stheta": r"$\sigma_\theta$ [°]", } def get_spectrum_data(self, wavelengths=None, keys=None) -> np.ndarray: """ Return the spectral data for the specified keys and wavelengths. :param wavelengths: List of wavelengths, or None to use all wavelengths. :param keys: List of keys, or None to use dR, R, dR_R, Theta :return: numpy.ndarray where the first index is (wavelength=0, ...=1...) and the second is the wavelengths. """ if keys is None: keys = self.default_spectrum_columns if wavelengths is None: wavelengths = self.wavelengths data = np.empty((len(wavelengths), len(keys)), dtype=float) # this might be slow but it shouldnt be called often for j, wl in enumerate(wavelengths): for i, key in enumerate(keys): if key == "wl": data[j][i] = wl else: val = self.get_for_wl(wl, key) data[j][i] = val return data def to_csv(self, sep=","): # self.to_dataframe().to_csv(os.path.join(self.path, self.name + ".csv"), index=False, metadata=True) return PrsData.get_csv(self.get_spectrum_data(), self.metadata, sep=sep) def save_csv_at(self, filepath, sep=",", verbose=False): if verbose: print(f"Writing csv to {filepath}") log.info(f"Writing csv to {filepath}") with open(filepath, "w") as file: file.write(self.to_csv(sep=sep)) def save_csv(self, sep=",", verbose=False): """Save the csv inside the data directory""" filepath = os.path.join(self.path, self.dirname + ".csv") self.save_csv_at(filepath, sep, verbose) # FILE IO def _check_write_mode(self): if self.dirpath is None: raise RuntimeError(f"Can not write data because {__class__.__name__} is not in write mode.") def _assert_directory_exists(self): if not os.path.isdir(self.dirpath): os.makedirs(self.dirpath) def write_partial_file(self, key): self._check_write_mode() if key not in self.data: raise KeyError(f"Invalid key '{key}'") filename = sanitize_filename(PARTIAL_PREFIX + str(key)) + ".pkl" self._assert_directory_exists() filepath = os.path.join(self.dirpath, filename) log.info(f"Writing data '{key}' to {filepath}") with open(filepath, "wb") as file: pickle.dump(self.data[key], file) def write_full_file(self): filename = sanitize_filename("full-data") + ".pkl" self._assert_directory_exists() filepath = os.path.join(self.dirpath, filename) log.info(f"Writing data to {filepath}") with open(filepath, "wb") as file: pickle.dump((self.data, self.metadata), file) def write_metadata(self): f""" Write the metadata to the disk as '{METADATA_FILENAME}' """ filepath = os.path.join(self.dirpath, METADATA_FILENAME) log.debug(f"Writing metadata to {filepath}") with open(filepath, "wb") as file: pickle.dump(self.metadata, file) # STATIC CONVERTER @staticmethod def get_csv(data: np.ndarray, metadata: dict, columns: list, sep=","): csv = "" # metadata md_keys = list(metadata.keys()) md_keys.sort() for k in md_keys: v = metadata[k] if type(v) == dict: csv += f"# {k}:\n" for kk, vv in v.items(): csv += f"# {kk}:{vv}\n" if type(v) == list: csv += f"# {k}:\n" for vv in v: csv += f"# - {vv}\n" else: csv += f"# {k}: {v}\n" # data header csv += "".join(f"{colname}{sep}" for colname in columns).strip(sep) + "\n" # data for i in range(data.shape[0]): csv += f"{data[i, 0]}" for j in range(1, data.shape[1]): csv += f"{sep}{data[i,j]}" csv += "\n" return csv.strip("\n") # STATIC LOADERS # TODO @staticmethod def load_data_from_csv(filepath:str, sep: str=",") -> tuple[dict, dict]: """ Loads data from a single csv file. Lines with this format are interpreted as metadata: # key: value Lines with this format are interpreted as data: index, timestamp [s], CPD [V], LED [%] Parameters ---------- filepath Path to the csv file. sep csv separator Returns ------- data 2D numpy array with shape (n, 4) where n is the number of data points. metadata Dictionary with metadata. """ metadata = {} with open(filepath, "r") as f: # this loop will read the metadata at the beginning and skip also the header row for line in f: if line.startswith("#"): colon = line.find(":") if colon == -1: # normal comment continue key = line[1:colon].strip() value = line[colon+1:].strip() metadata[key] = value else: break # here, the generator has only data lines data = np.loadtxt(f, delimiter=sep) return data, metadata @classmethod def load_data_from_pkl(cls, filepath:str) -> tuple[dict, dict]: """ Loads data from a single pkl file. Parameters ---------- :param filepath Path to the file. :return data 2D numpy array with shape (n, 4) where n is the number of data points. metadata Dictionary with metadata. """ metadata = {} with open(filepath, "rb") as f: obj = pickle.load(f) if isinstance(obj, tuple): if not len(obj) == 2: raise ValueError(f"Pickle file is a tuple with length {len(obj)}, however it must be 2: (data, metadata)") data = obj[0] metadata = obj[1] if not isinstance(data, dict): raise ValueError(f"First object in tuple is not a dictionary but {type(data)}") elif isinstance(obj, dict): data = obj else: raise ValueError(f"Pickled object must be either dict=data or (dict=data, dict=metadata), but is of type {type(obj)}") # must be loaded by now if not isinstance(metadata, dict): raise ValueError(f"Metadata is not a of type dict") return data, metadata @staticmethod def load_data_from_dir(dirpath:str) -> tuple[dict, dict]: """ Combines all data files with the PARTIAL_PREFIX from a directory into a dictionary :param dirpath Path to the data directory :return data, metadata """ files = os.listdir(dirpath) files.sort() data = {} metadata = {} for filename in files: filepath = os.path.join(dirpath, filename) if filename.startswith(PARTIAL_PREFIX): log.debug(f"Loading {filename}") # must be first if filename == METADATA_FILENAME: # Metadata filename must also start with FLUSH_PREFIX with open(filepath, "rb") as file: metadata = pickle.load(file) elif filename.endswith(".csv"): raise NotADirectoryError(f"Partial .csv files are not supported: '{filename}'") elif filename.endswith(".pkl"): key = filename.strip(PARTIAL_PREFIX).strip(".pkl") with open(filepath, "rb") as file: val = pickle.load(file) data[key] = val else: raise NotImplementedError(f"Unknown file extension for file '{filepath}'") else: log.info(f"Skipping unknown file: '{filepath}'") return data, metadata def plot_raw_for_wl(self, wl, what=["dR", "R", "theta"]): self._check_has_wavelength(wl) fig, ax = plt.subplots(len(what)) # no sharex since theta might have less points ax[-1].set_xlabel("Index") fig.suptitle(f"Raw data for $\\lambda = {wl}$ nm") for i, qty in enumerate(what): ax[i].set_ylabel(PrsData.labels[qty]) ax[i].plot(self.data[wl][f"{qty}_raw"]) fig.tight_layout() return fig def plot_spectrum(data: str or pd.DataFrame or np.ndarray, title: str="", errors=False, what=["dR_R"]): """ Plot recorded data Parameters ---------- data : str or np.ndarray Path to the data directory or numpy array with columns PrsData.default_spectrum_columns :param title : str, optional Title for the plot. The default is "". :return fig Matplotlib figure object. """ if type(data) == str: prsdata = PrsData(data) _data = prsdata.get_spectrum_data() elif isinstance(data, PrsData): _data = data.get_spectrum_data() else: _data = data n_plots = len(what) if errors: n_plots *= 2 fig, ax = plt.subplots(n_plots, 1, sharex=True) ax[-1].set_xlabel("Wavelength [nm]") i = 0 colors = { "dR_R": "red", "dR": "green", "R": "blue", "theta": "magenta", } wl_idx = PrsData.default_spectrum_columns.index("wl") for key in what: key_and_err = [key, f"s{key}"] if errors else [key] for k in key_and_err: data_idx = PrsData.default_spectrum_columns.index(k) ax[i].plot(_data[:,wl_idx], _data[:,data_idx], color=colors[key]) ax[i].set_ylabel(PrsData.labels[k]) i += 1 if title: fig.suptitle(title) fig.tight_layout() return fig