Compare commits

..

No commits in common. "ddeec83e31d2ff5ac5daf85c31771a18e60d5bed" and "c4f90ff281246a5736c5f8501612271ede8d618a" have entirely different histories.

2 changed files with 41 additions and 177 deletions

View File

@ -1,14 +1,10 @@
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import scipy.signal as signal import scipy.signal as signal
import matplotlib as mpl
mpl.use("TkAgg") # fixes focus issues for me
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from time import sleep from time import sleep
from random import choice as r_choice from random import choice as r_choice
from sys import exit from sys import exit
import os
import re
if __name__ == "__main__": if __name__ == "__main__":
@ -22,107 +18,36 @@ if __name__ == "__main__":
from .util.transform import Normalize from .util.transform import Normalize
from .util.data_loader import get_datafiles from .util.data_loader import get_datafiles
from .util.file_io import get_next_digits
file = "/home/matth/Uni/TENG/teng_2/data/2023-06-28_foam_black_1_188mm_06V001.csv" file = "/home/matth/Uni/TENG/teng_2/data/2023-06-28_foam_black_1_188mm_06V001.csv"
class InteractiveDataSelector: class InteractiveDataSelector:
re_file = r'\d{4}-\d{2}-\d{2}_([a-zA-Z0-9_]+)_([a-zA-Z0-9]+)_(\d+(?:\.\d+)?mm)_(\d+V)(\d+)\.csv'
re_index_group_nr = 5 # group number of the index part of the filename
""" """
Go through all .csv files in a directory, split the data and exclude sections with the mouse, then write the sections as single files into a new directory Helper class for "iterating" through selected peaks.
""" """
def __init__(self, in_dir, out_dir, keep_index=True, split_at_exclude=True): def __init__(self, out_name, out_dir, fig, ax):
"""
@param keep_index:
If True: append the split number as triple digits to the existing filename (file001.csv -> file001001.csv, file001002.csv ...)
Else: remove the indices from the filename before adding the split number (file001.csv -> file001.csv, file002.csv ...)
@param split_at_exclude:
If True: When excluding an area, split the data before and after the excluded zone
Else: remove the excluded zone and join the previous and later part
"""
if os.path.isdir(out_dir):
if os.listdir(out_dir):
raise ValueError(f"'out_dir' = '{out_dir}' is not empty")
else:
os.makedirs(out_dir)
self._out_dir = out_dir self._out_dir = out_dir
self._out_name = out_name
self._fig = fig
self._ax = ax
self._in_dir = in_dir
self._in_files = os.listdir(in_dir)
self._in_files.sort()
for i in reversed(range(len(self._in_files))):
if not re.fullmatch(InteractiveDataSelector.re_file, self._in_files[i]):
print(f"Dropping non-matching file '{self._in_files[i]}'")
self._in_files.pop(i)
if not self._in_files:
raise ValueError(f"No matching files in 'in_dir' = '{in_dir}'")
self._history: list[tuple[str, list]] = [] # (in_file, [out_files...])
self._keep_index = keep_index
self.split_at_exclude = split_at_exclude
plt.ion()
self._fig, self._ax = plt.subplots()
mpl.rcParams['keymap.save'].remove('s') # s is used for split
mpl.rcParams['keymap.quit'].remove('q')
self._fig.canvas.mpl_connect("button_press_event", lambda ev: self._fig_on_button_press(ev)) self._fig.canvas.mpl_connect("button_press_event", lambda ev: self._fig_on_button_press(ev))
self._fig.canvas.mpl_connect("key_press_event", lambda ev: self._fig_on_key_press(ev)) self._fig.canvas.mpl_connect("key_press_event", lambda ev: self._fig_on_key_press(ev))
def run(self):
self._next_file()
while plt.fignum_exists(self._fig.number):
plt.pause(0.01)
def _set_titles(self):
help_str = "[(e)xclude, (s)plit, (w)rite+next, (U)ndo last file, (Q)uit]"
self._fig.suptitle(f"{help_str}\nuse left click to select, right click to undo\ncurret mode: {self._mode}")
def _undo_file(self):
if len(self._history) == 0:
print("Nothing to undo")
return
# delete written files
for outfile in self._history[-1][1]:
print(f"Deleting '{outfile}'")
os.remove(outfile)
self._in_files.insert(0, self._history[-1][0])
self._history.pop()
self._next_file()
def _next_file(self):
# runtime stuff
if len(self._in_files) == 0:
raise IndexError("No more files to process")
self._current_file = self._in_files.pop(0)
self._current_dataframe = pd.read_csv(os.path.join(self._in_dir, self._current_file))
self._current_array = self._current_dataframe.to_numpy()
self._current_array = np.loadtxt(os.path.join(self._in_dir, self._current_file), skiprows=1, delimiter=",")
# plot stuff
self._splits_lines = None # vlines self._splits_lines = None # vlines
self._excludes_lines = None self._excludes_lines = None
self._excludes_areas = [] # list of areas self._excludes_areas = [] # list of areas
self._fig.clear()
self._ax = self._fig.subplots()
self._ax.plot(self._current_array[:,0], self._current_array[:,2])
self._ax.set_xlabel(self._current_file)
self._splits: list[int] = [] self._splits: list[int] = []
self._excludes: list[int] = [] self._excludes: list[int] = []
self._mode = "exclude" # split or exclude self._mode = None # split or exclude
self._set_titles() self._set_mode("split")
def run(self):
while plt.fignum_exists(self._fig.number):
plt.pause(0.01)
def _fig_on_button_press(self, event): def _fig_on_button_press(self, event):
"""
left click: set split / exclude section (depends on mode)
right click: undo last action of selected mode
"""
if event.xdata is None: return
if event.xdata in self._excludes or event.xdata in self._splits: return if event.xdata in self._excludes or event.xdata in self._splits: return
if event.button == 1: # left click, add position if event.button == 1: # left click, add position
if self._mode == "split": if self._mode == "split":
@ -139,33 +64,22 @@ class InteractiveDataSelector:
self._update_lines() self._update_lines()
def _fig_on_key_press(self, event): def _fig_on_key_press(self, event):
""" if event.key == 'S':
s: set split mode self._set_mode("split")
e: set exclude mode
w: write and got to next file
Q: quit all
"""
if event.key == 's':
self._mode = "split"
elif event.key == 'e': elif event.key == 'e':
self._mode = "exclude" self._set_mode("exclude")
elif event.key == 'w':
self._save_as_new_files()
try:
self._next_file()
except IndexError:
print(f"All files processed.")
exit(0)
elif event.key == 'U':
self._undo_file()
elif event.key == 'Q':
print(f"Quitting before all files have been processed!")
exit(1)
self._set_titles()
def _set_mode(self, mode):
help_str = "[(e)xclude - (S)plit]"
if mode == "split":
self._mode = "split"
fig.suptitle(f"-> split mode {help_str}")
else:
self._mode = "exclude"
fig.suptitle(f"-> exclude mode {help_str}")
def _update_lines(self): def _update_lines(self):
# print(self._splits, self._excludes) print(self._splits, self._excludes)
ymin, ymax = self._ax.get_ylim() ymin, ymax = self._ax.get_ylim()
if self._splits_lines is not None: self._splits_lines.remove() if self._splits_lines is not None: self._splits_lines.remove()
@ -186,78 +100,28 @@ class InteractiveDataSelector:
self._ax.set_ylim(ymin, ymax) # reset, since margins are added to lines self._ax.set_ylim(ymin, ymax) # reset, since margins are added to lines
self._fig.canvas.draw() self._fig.canvas.draw()
def _get_next_filename(self):
if self._keep_index:
# 5th group is index
match = re.fullmatch(InteractiveDataSelector.re_file, self._current_file)
assert(type(match) is not None)
basename = self._current_file[:match.start(InteractiveDataSelector.re_index_group_nr)]
else:
basename = self._current_file[:-4] # extension
index = get_next_digits(basename, self._out_dir, digits=3)
return f"{basename}{index}.csv"
def _save_as_new_files(self): def _save_as_new_files(self):
# convert timestamps to their closest index
excludes_idx = [np.abs(self._current_array[:,0] - t).argmin() for t in self._excludes]
splits_idx = [np.abs(self._current_array[:,0] - t).argmin() for t in self._splits]
if self.split_at_exclude:
# split before the start of the exclucded range
splits_idx += [ excludes_idx[i]-1 for i in range(0, len(excludes_idx), 2) ]
# split after the end of the exclucded range
splits_idx += [ excludes_idx[i]+1 for i in range(1, len(excludes_idx), 2) ]
splits_idx = list(set(splits_idx)) # remove duplicates
splits_idx.sort()
df = self._current_dataframe.copy()
# 1) remove excluded parts
for i in range(1, len(excludes_idx), 2):
df = df.drop(index=range(excludes_idx[i-1], excludes_idx[i]+1))
# 2) splits
new_frames = []
start_i = df.index[0]
for i in range(0, len(splits_idx)):
end_i = splits_idx[i]
# print(start_i, end_i)
# check if valid start and end index
if start_i in df.index and end_i in df.index:
new_frames.append(df.loc[start_i:end_i])
start_i = end_i + 1
# append rest
if start_i in df.index:
new_frames.append(df.loc[start_i:])
# 3) remove empty
for i in reversed(range(len(new_frames))):
if len(new_frames[i]) == 0:
new_frames.pop(i)
self._history.append((self._current_file, []))
for frame in new_frames:
filename = self._get_next_filename()
pathname = os.path.join(self._out_dir, filename)
# until now, frame is a copy of a slice
frame = frame.copy()
# transform timestamps so that first value is 0
t_column_name = frame.columns[0]
frame[t_column_name] -= frame.iloc[0][t_column_name]
frame.to_csv(pathname, index=False)
self._history[-1][1].append(pathname)
print(f"Saved range of length {len(frame.index):04} to {pathname}")
if __name__ == "__main__": if __name__ == "__main__":
import argparse """
parser = argparse.ArgumentParser("data_preprocess") Peak identification:
parser.add_argument("in_dir") plot, let user choose first, second, last and lowest peak for identification
parser.add_argument("out_dir") """
parser.add_argument("-i", "--keep_index", action="store_true") df = pd.read_csv(file)
parser.add_argument("-e", "--split_at_exclude", action="store_true") a = df.to_numpy()
ns = parser.parse_args()
selector = InteractiveDataSelector(ns.in_dir, ns.out_dir, keep_index=ns.keep_index, split_at_exclude=ns.split_at_exclude) # a2 = interpolate_to_linear_time()
# print(a2)
# exit()
vdata = Normalize(0, 1)(a[:,2])
plt.ion()
fig, ax = plt.subplots()
ax.plot(vdata)
ax.grid(True)
selector = InteractiveDataSelector("bla", "test", fig, ax)
selector.run() selector.run()
exit(2) # selector should exit in _fig_on_key_press

View File

@ -1,6 +1,5 @@
from ..util.data_loader import LabelConverter from ..util.data_loader import LabelConverter
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import matplotlib.colors as colors
import time import time
import torch import torch
import numpy as np import numpy as np
@ -142,7 +141,8 @@ class EpochTracker:
label_names = self.labels.get_labels() label_names = self.labels.get_labels()
fig, ax = plt.subplots(layout="tight") fig, ax = plt.subplots(layout="tight")
im = ax.imshow(normalized_predictions, cmap='Blues') # cmap='BuPu', , norm=colors.PowerNorm(1./2.)
im = ax.imshow(normalized_predictions, cmap='Blues') # cmap='BuPu'
ax.set_xticks(np.arange(N)) ax.set_xticks(np.arange(N))
ax.set_yticks(np.arange(N)) ax.set_yticks(np.arange(N))
ax.set_xticklabels(label_names) ax.set_xticklabels(label_names)