164 lines
5.5 KiB
Python
164 lines
5.5 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
import os
|
|
import datetime
|
|
import pickle
|
|
import logging
|
|
from abc import abstractmethod
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
from ..utility.file_io import get_next_filename, sanitize_filename
|
|
from ..utility.prsdata import PrsData, FLUSH_TYPE, FLUSH_PREFIX, METADATA_FILENAME
|
|
|
|
"""
|
|
Wollen:
|
|
- Daten während der Messung hinzufügen und in Snippets auf die Disk schreiben
|
|
- Daten nach der Messung laden, aus Rohdaten (directory), aus Berechneten Daten (csv)
|
|
|
|
"""
|
|
|
|
class DataCollector:
|
|
"""
|
|
Class managing data collection and partial storage
|
|
"""
|
|
def __init__(self,
|
|
data_path: str,
|
|
data_name: str="PRS",
|
|
metadata: dict[str, str]={},
|
|
dirname: str|None=None,
|
|
add_number_if_dir_exists=True,
|
|
data_container=list,
|
|
):
|
|
self.data_type = data_container
|
|
self.data = data_container()
|
|
self.full_data = None # if loaded, this contains the final numpy array
|
|
self.name = data_name
|
|
self.metadata = metadata
|
|
self.path = os.path.abspath(os.path.expanduser(data_path))
|
|
if dirname is None:
|
|
self.dirname = sanitize_filename(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M") + "_" + self.name)
|
|
else:
|
|
self.dirname = sanitize_filename(dirname)
|
|
self.dirpath = os.path.join(self.path, self.dirname)
|
|
|
|
if os.path.exists(self.dirpath):
|
|
if not add_number_if_dir_exists:
|
|
raise Exception(f"Directory '{self.dirname}' already exists. Provide a different directory or pass `add_number_if_dir_exists=True` to ignore this")
|
|
else:
|
|
i = 1
|
|
dirpath = f"{self.dirpath}-{i}"
|
|
while os.path.exists(dirpath):
|
|
i += 1
|
|
dirpath = f"{self.dirpath}-{i}"
|
|
print(f"Directory '{self.dirname}' already exists. Trying '{dirpath}' instead")
|
|
self.dirpath = dirpath
|
|
self.assert_directory_exists()
|
|
self.flushed = False
|
|
|
|
|
|
# OPERATION
|
|
def clear(self):
|
|
self.data = []
|
|
self.full_data = None
|
|
|
|
|
|
def assert_directory_exists(self):
|
|
if not os.path.isdir(self.dirpath):
|
|
os.makedirs(self.dirpath)
|
|
|
|
def get_data(self) -> PrsData:
|
|
"""
|
|
Load the full data and return it together with the metadata
|
|
Returns
|
|
-------
|
|
tuple[np.ndarray, dict]
|
|
The full data and the metadata
|
|
"""
|
|
if self.full_data is None:
|
|
self.full_data = PrsData(path=self.dirpath, metadata=self.metadata)
|
|
return self.full_data
|
|
|
|
def save_csv_in_dir(self, sep=",", verbose=False):
|
|
"""Save full data as csv inside the directory with temporary data"""
|
|
self.get_data()
|
|
filepath = os.path.join(self.dirpath, self.dirname + ".csv")
|
|
self.full_data.save_csv_at(filepath, sep, verbose)
|
|
|
|
def write_metadata(self):
|
|
f"""
|
|
Write the metadata to the disk as '{METADATA_FILENAME}'
|
|
|
|
Returns
|
|
-------
|
|
None.
|
|
"""
|
|
filepath = os.path.join(self.dirpath, METADATA_FILENAME)
|
|
log.debug(f"Writing metadata to {filepath}")
|
|
with open(filepath, "wb") as file:
|
|
pickle.dump(self.metadata, file)
|
|
|
|
class PrsDataCollector(DataCollector):
|
|
def __init__(self,
|
|
data_path: str,
|
|
data_name: str="PRS",
|
|
metadata: dict[str, str]={},
|
|
dirname: str|None=None,
|
|
add_number_if_dir_exists=True,
|
|
):
|
|
super().__init__(data_path, data_name, metadata, dirname, add_number_if_dir_exists, dict)
|
|
|
|
@abstractmethod
|
|
def add_data(self, wavelength, raw):
|
|
self.data[wavelength] = raw
|
|
self.full_data = None # no longer up to date
|
|
|
|
@abstractmethod
|
|
def flush(self, verbose: bool = False):
|
|
"""
|
|
Write the current data to a file and clear the internal data
|
|
|
|
Parameters
|
|
----------
|
|
verbose : bool, optional
|
|
If True, print a message when flushing data. The default is False.
|
|
|
|
Raises
|
|
------
|
|
ValueError
|
|
If the FLUSH_TYPE is invalid.
|
|
|
|
Returns
|
|
-------
|
|
None.
|
|
|
|
"""
|
|
# dont flush empty data
|
|
if len(self.data) == 0:
|
|
return
|
|
self.assert_directory_exists()
|
|
for key, key_data in self.data.items():
|
|
if FLUSH_TYPE == "csv":
|
|
filename = self._get_flush_filename(key) + ".csv"
|
|
filepath = os.path.join(self.dirpath, filename)
|
|
log.info(f"Flushing data to {filepath}")
|
|
if verbose: print(f"Flushing data to {filepath}")
|
|
df = pd.DataFrame(key_data, columns=PrsData.columns)
|
|
df.meta = str(self.metadata)
|
|
df.to_csv(filepath, sep=",", index=False, metadata=True)
|
|
elif FLUSH_TYPE == "pickle-ndarray":
|
|
filename = self._get_flush_filename(key) + ".ndarray.pkl"
|
|
filepath = os.path.join(self.dirpath, filename)
|
|
log.info(f"Flushing data to {filepath}")
|
|
if verbose: print(f"Flushing data to {filepath}")
|
|
with open(filepath, "wb") as file:
|
|
pickle.dump(np.array(key_data), file)
|
|
else:
|
|
raise ValueError(f"Invalid FLUSH_TYPE: '{FLUSH_TYPE}'")
|
|
self.clear()
|
|
|
|
# File IO
|
|
def _get_flush_filename(self, key):
|
|
return sanitize_filename(self.name + "_" + str(key))
|
|
|