Compare commits

..

3 Commits

Author SHA1 Message Date
matthias@arch
ddeec83e31 added file history 2023-08-02 13:12:39 +02:00
matthias@arch
9aa1ffd7e0 implemented file saving 2023-08-02 10:58:15 +02:00
matthias@arch
7ef99b5811 tested other colorscheme 2023-08-02 10:57:01 +02:00
2 changed files with 184 additions and 48 deletions

View File

@ -1,10 +1,14 @@
import pandas as pd
import numpy as np
import scipy.signal as signal
import matplotlib as mpl
mpl.use("TkAgg") # fixes focus issues for me
import matplotlib.pyplot as plt
from time import sleep
from random import choice as r_choice
from sys import exit
import os
import re
if __name__ == "__main__":
@ -18,36 +22,107 @@ if __name__ == "__main__":
from .util.transform import Normalize
from .util.data_loader import get_datafiles
from .util.file_io import get_next_digits
file = "/home/matth/Uni/TENG/teng_2/data/2023-06-28_foam_black_1_188mm_06V001.csv"
class InteractiveDataSelector:
"""
Helper class for "iterating" through selected peaks.
"""
def __init__(self, out_name, out_dir, fig, ax):
self._out_dir = out_dir
self._out_name = out_name
self._fig = fig
self._ax = ax
class InteractiveDataSelector:
re_file = r'\d{4}-\d{2}-\d{2}_([a-zA-Z0-9_]+)_([a-zA-Z0-9]+)_(\d+(?:\.\d+)?mm)_(\d+V)(\d+)\.csv'
re_index_group_nr = 5 # group number of the index part of the filename
"""
Go through all .csv files in a directory, split the data and exclude sections with the mouse, then write the sections as single files into a new directory
"""
def __init__(self, in_dir, out_dir, keep_index=True, split_at_exclude=True):
"""
@param keep_index:
If True: append the split number as triple digits to the existing filename (file001.csv -> file001001.csv, file001002.csv ...)
Else: remove the indices from the filename before adding the split number (file001.csv -> file001.csv, file002.csv ...)
@param split_at_exclude:
If True: When excluding an area, split the data before and after the excluded zone
Else: remove the excluded zone and join the previous and later part
"""
if os.path.isdir(out_dir):
if os.listdir(out_dir):
raise ValueError(f"'out_dir' = '{out_dir}' is not empty")
else:
os.makedirs(out_dir)
self._out_dir = out_dir
self._in_dir = in_dir
self._in_files = os.listdir(in_dir)
self._in_files.sort()
for i in reversed(range(len(self._in_files))):
if not re.fullmatch(InteractiveDataSelector.re_file, self._in_files[i]):
print(f"Dropping non-matching file '{self._in_files[i]}'")
self._in_files.pop(i)
if not self._in_files:
raise ValueError(f"No matching files in 'in_dir' = '{in_dir}'")
self._history: list[tuple[str, list]] = [] # (in_file, [out_files...])
self._keep_index = keep_index
self.split_at_exclude = split_at_exclude
plt.ion()
self._fig, self._ax = plt.subplots()
mpl.rcParams['keymap.save'].remove('s') # s is used for split
mpl.rcParams['keymap.quit'].remove('q')
self._fig.canvas.mpl_connect("button_press_event", lambda ev: self._fig_on_button_press(ev))
self._fig.canvas.mpl_connect("key_press_event", lambda ev: self._fig_on_key_press(ev))
self._splits_lines = None # vlines
self._excludes_lines = None
self._excludes_areas = [] # list of areas
self._splits: list[int] = []
self._excludes: list[int] = []
self._mode = None # split or exclude
self._set_mode("split")
def run(self):
self._next_file()
while plt.fignum_exists(self._fig.number):
plt.pause(0.01)
def _set_titles(self):
help_str = "[(e)xclude, (s)plit, (w)rite+next, (U)ndo last file, (Q)uit]"
self._fig.suptitle(f"{help_str}\nuse left click to select, right click to undo\ncurret mode: {self._mode}")
def _undo_file(self):
if len(self._history) == 0:
print("Nothing to undo")
return
# delete written files
for outfile in self._history[-1][1]:
print(f"Deleting '{outfile}'")
os.remove(outfile)
self._in_files.insert(0, self._history[-1][0])
self._history.pop()
self._next_file()
def _next_file(self):
# runtime stuff
if len(self._in_files) == 0:
raise IndexError("No more files to process")
self._current_file = self._in_files.pop(0)
self._current_dataframe = pd.read_csv(os.path.join(self._in_dir, self._current_file))
self._current_array = self._current_dataframe.to_numpy()
self._current_array = np.loadtxt(os.path.join(self._in_dir, self._current_file), skiprows=1, delimiter=",")
# plot stuff
self._splits_lines = None # vlines
self._excludes_lines = None
self._excludes_areas = [] # list of areas
self._fig.clear()
self._ax = self._fig.subplots()
self._ax.plot(self._current_array[:,0], self._current_array[:,2])
self._ax.set_xlabel(self._current_file)
self._splits: list[int] = []
self._excludes: list[int] = []
self._mode = "exclude" # split or exclude
self._set_titles()
def _fig_on_button_press(self, event):
"""
left click: set split / exclude section (depends on mode)
right click: undo last action of selected mode
"""
if event.xdata is None: return
if event.xdata in self._excludes or event.xdata in self._splits: return
if event.button == 1: # left click, add position
if self._mode == "split":
@ -64,22 +139,33 @@ class InteractiveDataSelector:
self._update_lines()
def _fig_on_key_press(self, event):
if event.key == 'S':
self._set_mode("split")
elif event.key == 'e':
self._set_mode("exclude")
def _set_mode(self, mode):
help_str = "[(e)xclude - (S)plit]"
if mode == "split":
"""
s: set split mode
e: set exclude mode
w: write and got to next file
Q: quit all
"""
if event.key == 's':
self._mode = "split"
fig.suptitle(f"-> split mode {help_str}")
else:
elif event.key == 'e':
self._mode = "exclude"
fig.suptitle(f"-> exclude mode {help_str}")
elif event.key == 'w':
self._save_as_new_files()
try:
self._next_file()
except IndexError:
print(f"All files processed.")
exit(0)
elif event.key == 'U':
self._undo_file()
elif event.key == 'Q':
print(f"Quitting before all files have been processed!")
exit(1)
self._set_titles()
def _update_lines(self):
print(self._splits, self._excludes)
# print(self._splits, self._excludes)
ymin, ymax = self._ax.get_ylim()
if self._splits_lines is not None: self._splits_lines.remove()
@ -100,28 +186,78 @@ class InteractiveDataSelector:
self._ax.set_ylim(ymin, ymax) # reset, since margins are added to lines
self._fig.canvas.draw()
def _get_next_filename(self):
if self._keep_index:
# 5th group is index
match = re.fullmatch(InteractiveDataSelector.re_file, self._current_file)
assert(type(match) is not None)
basename = self._current_file[:match.start(InteractiveDataSelector.re_index_group_nr)]
else:
basename = self._current_file[:-4] # extension
index = get_next_digits(basename, self._out_dir, digits=3)
return f"{basename}{index}.csv"
def _save_as_new_files(self):
# convert timestamps to their closest index
excludes_idx = [np.abs(self._current_array[:,0] - t).argmin() for t in self._excludes]
splits_idx = [np.abs(self._current_array[:,0] - t).argmin() for t in self._splits]
if self.split_at_exclude:
# split before the start of the exclucded range
splits_idx += [ excludes_idx[i]-1 for i in range(0, len(excludes_idx), 2) ]
# split after the end of the exclucded range
splits_idx += [ excludes_idx[i]+1 for i in range(1, len(excludes_idx), 2) ]
splits_idx = list(set(splits_idx)) # remove duplicates
splits_idx.sort()
df = self._current_dataframe.copy()
# 1) remove excluded parts
for i in range(1, len(excludes_idx), 2):
df = df.drop(index=range(excludes_idx[i-1], excludes_idx[i]+1))
# 2) splits
new_frames = []
start_i = df.index[0]
for i in range(0, len(splits_idx)):
end_i = splits_idx[i]
# print(start_i, end_i)
# check if valid start and end index
if start_i in df.index and end_i in df.index:
new_frames.append(df.loc[start_i:end_i])
start_i = end_i + 1
# append rest
if start_i in df.index:
new_frames.append(df.loc[start_i:])
# 3) remove empty
for i in reversed(range(len(new_frames))):
if len(new_frames[i]) == 0:
new_frames.pop(i)
self._history.append((self._current_file, []))
for frame in new_frames:
filename = self._get_next_filename()
pathname = os.path.join(self._out_dir, filename)
# until now, frame is a copy of a slice
frame = frame.copy()
# transform timestamps so that first value is 0
t_column_name = frame.columns[0]
frame[t_column_name] -= frame.iloc[0][t_column_name]
frame.to_csv(pathname, index=False)
self._history[-1][1].append(pathname)
print(f"Saved range of length {len(frame.index):04} to {pathname}")
if __name__ == "__main__":
"""
Peak identification:
plot, let user choose first, second, last and lowest peak for identification
"""
df = pd.read_csv(file)
a = df.to_numpy()
import argparse
parser = argparse.ArgumentParser("data_preprocess")
parser.add_argument("in_dir")
parser.add_argument("out_dir")
parser.add_argument("-i", "--keep_index", action="store_true")
parser.add_argument("-e", "--split_at_exclude", action="store_true")
ns = parser.parse_args()
# a2 = interpolate_to_linear_time()
# print(a2)
# exit()
vdata = Normalize(0, 1)(a[:,2])
plt.ion()
fig, ax = plt.subplots()
ax.plot(vdata)
ax.grid(True)
selector = InteractiveDataSelector("bla", "test", fig, ax)
selector = InteractiveDataSelector(ns.in_dir, ns.out_dir, keep_index=ns.keep_index, split_at_exclude=ns.split_at_exclude)
selector.run()
exit(2) # selector should exit in _fig_on_key_press

View File

@ -1,5 +1,6 @@
from ..util.data_loader import LabelConverter
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import time
import torch
import numpy as np
@ -141,8 +142,7 @@ class EpochTracker:
label_names = self.labels.get_labels()
fig, ax = plt.subplots(layout="tight")
im = ax.imshow(normalized_predictions, cmap='Blues') # cmap='BuPu'
im = ax.imshow(normalized_predictions, cmap='Blues') # cmap='BuPu', , norm=colors.PowerNorm(1./2.)
ax.set_xticks(np.arange(N))
ax.set_yticks(np.arange(N))
ax.set_xticklabels(label_names)