diff --git a/teng_ml/data_preprocess.py b/teng_ml/data_preprocess.py index d15f11f..01e24c2 100644 --- a/teng_ml/data_preprocess.py +++ b/teng_ml/data_preprocess.py @@ -1,6 +1,8 @@ import pandas as pd import numpy as np import scipy.signal as signal +import matplotlib as mpl +mpl.use("TkAgg") # fixes focus issues for me import matplotlib.pyplot as plt from time import sleep from random import choice as r_choice @@ -26,7 +28,7 @@ file = "/home/matth/Uni/TENG/teng_2/data/2023-06-28_foam_black_1_188mm_06V001.cs class InteractiveDataSelector: - re_file = r'\d{4}-\d{2}-\d{2}_([a-zA-Z_]+)_([a-zA-Z0-9]+)_(\d+(?:\.\d+)?mm)_(\d+V)(\d+)\.csv' + re_file = r'\d{4}-\d{2}-\d{2}_([a-zA-Z0-9_]+)_([a-zA-Z0-9]+)_(\d+(?:\.\d+)?mm)_(\d+V)(\d+)\.csv' re_index_group_nr = 5 # group number of the index part of the filename """ Go through all .csv files in a directory, split the data and exclude sections with the mouse, then write the sections as single files into a new directory @@ -57,12 +59,16 @@ class InteractiveDataSelector: if not self._in_files: raise ValueError(f"No matching files in 'in_dir' = '{in_dir}'") + self._history: list[tuple[str, list]] = [] # (in_file, [out_files...]) + self._keep_index = keep_index self.split_at_exclude = split_at_exclude plt.ion() self._fig, self._ax = plt.subplots() + mpl.rcParams['keymap.save'].remove('s') # s is used for split + mpl.rcParams['keymap.quit'].remove('q') self._fig.canvas.mpl_connect("button_press_event", lambda ev: self._fig_on_button_press(ev)) self._fig.canvas.mpl_connect("key_press_event", lambda ev: self._fig_on_key_press(ev)) @@ -72,8 +78,20 @@ class InteractiveDataSelector: plt.pause(0.01) def _set_titles(self): - help_str = "[(e)xclude, (S)plit, (w)rite]" - self._fig.suptitle(f"{help_str}\ncurret mode: {self._mode}") + help_str = "[(e)xclude, (s)plit, (w)rite+next, (U)ndo last file, (Q)uit]" + self._fig.suptitle(f"{help_str}\nuse left click to select, right click to undo\ncurret mode: {self._mode}") + + def _undo_file(self): + if len(self._history) == 0: + print("Nothing to undo") + return + # delete written files + for outfile in self._history[-1][1]: + print(f"Deleting '{outfile}'") + os.remove(outfile) + self._in_files.insert(0, self._history[-1][0]) + self._history.pop() + self._next_file() def _next_file(self): # runtime stuff @@ -82,7 +100,7 @@ class InteractiveDataSelector: self._current_file = self._in_files.pop(0) self._current_dataframe = pd.read_csv(os.path.join(self._in_dir, self._current_file)) self._current_array = self._current_dataframe.to_numpy() - # self._current_array = np.loadtxt(os.path.join(self._in_dir, self._current_file), skiprows=1, delimiter=",") + self._current_array = np.loadtxt(os.path.join(self._in_dir, self._current_file), skiprows=1, delimiter=",") # plot stuff @@ -99,8 +117,6 @@ class InteractiveDataSelector: self._mode = "exclude" # split or exclude self._set_titles() - self._set_titles() - def _fig_on_button_press(self, event): """ left click: set split / exclude section (depends on mode) @@ -124,18 +140,30 @@ class InteractiveDataSelector: def _fig_on_key_press(self, event): """ - S: set split mode + s: set split mode e: set exclude mode w: write and got to next file + Q: quit all """ - if event.key == 'S': + if event.key == 's': self._mode = "split" elif event.key == 'e': self._mode = "exclude" elif event.key == 'w': self._save_as_new_files() + try: + self._next_file() + except IndexError: + print(f"All files processed.") + exit(0) + elif event.key == 'U': + self._undo_file() + elif event.key == 'Q': + print(f"Quitting before all files have been processed!") + exit(1) self._set_titles() + def _update_lines(self): # print(self._splits, self._excludes) ymin, ymax = self._ax.get_ylim() @@ -206,6 +234,7 @@ class InteractiveDataSelector: if len(new_frames[i]) == 0: new_frames.pop(i) + self._history.append((self._current_file, [])) for frame in new_frames: filename = self._get_next_filename() pathname = os.path.join(self._out_dir, filename) @@ -215,6 +244,7 @@ class InteractiveDataSelector: t_column_name = frame.columns[0] frame[t_column_name] -= frame.iloc[0][t_column_name] frame.to_csv(pathname, index=False) + self._history[-1][1].append(pathname) print(f"Saved range of length {len(frame.index):04} to {pathname}") @@ -223,9 +253,11 @@ if __name__ == "__main__": parser = argparse.ArgumentParser("data_preprocess") parser.add_argument("in_dir") parser.add_argument("out_dir") - parser.add_argument("--keep_index", action="store_true") + parser.add_argument("-i", "--keep_index", action="store_true") + parser.add_argument("-e", "--split_at_exclude", action="store_true") ns = parser.parse_args() - selector = InteractiveDataSelector(ns.in_dir, ns.out_dir, ns.keep_index) + selector = InteractiveDataSelector(ns.in_dir, ns.out_dir, keep_index=ns.keep_index, split_at_exclude=ns.split_at_exclude) selector.run() + exit(2) # selector should exit in _fig_on_key_press