Compare commits
4 Commits
9f9d3d7e06
...
3d823a19c7
Author | SHA1 | Date | |
---|---|---|---|
|
3d823a19c7 | ||
|
570d6dbc25 | ||
|
77b266929d | ||
|
f281b981ea |
14
readme.md
14
readme.md
@ -1,13 +1,17 @@
|
||||
# Machine learning for material recognition with a TENG
|
||||
(Bi)LSTM for name classification.
|
||||
More information on the project are [on my website](https://quintern.xyz/en/teng.html).
|
||||
# Machine learning for material recognition with a triboelectric nanogenerator (TENG)
|
||||
This project was written for my bachelor's thesis.
|
||||
|
||||
It was written to classify TENG voltage output from pressing it against different materials.
|
||||
Contents:
|
||||
- Data preparation/plotting/loading utilites
|
||||
- (Bi)LSTM + fully connected + softmax model for name classifiying TENG output
|
||||
- Progress tracking utilities to easily find the best parameters
|
||||
|
||||
## Model training
|
||||
Adjust the parameters in `main.py` and run it.
|
||||
Adjust the parameters in `main.py` and run it.
|
||||
All models and the settings they were trained with are automatically serialized with pickle and stored in a subfolder
|
||||
of the `<model_dir>` that was set in `main.py`.
|
||||
|
||||
|
||||
## Model evaluation
|
||||
Run `find_best_model.py <model_dir>` with the `<model_dir>` specified in `main.py` during training.
|
||||
|
||||
|
@ -42,9 +42,10 @@ def test_interpol():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
labels = LabelConverter(["foam_PDMS_white", "foam_PDMS_black", "foam_PDMS_TX100", "foam_PE", "antistatic_foil", "cardboard", "glass", "kapton", "bubble_wrap_PE", "fabric_PP", ])
|
||||
# labels = LabelConverter(["foam_PDMS_white", "foam_PDMS_black", "foam_PDMS_TX100", "foam_PE", "kapton", "bubble_wrap_PE", "fabric_PP", ])
|
||||
models_dir = "/home/matth/Uni/TENG/teng_2/models_gen_12" # where to save models, settings and results
|
||||
# labels = LabelConverter(["foam_PDMS_white", "foam_PDMS_black", "foam_PDMS_TX100", "foam_PE", "antistatic_foil", "cardboard", "glass", "kapton", "bubble_wrap_PE", "fabric_PP" ])
|
||||
labels = LabelConverter(["foam_PDMS_white", "foam_PDMS_black", "foam_PDMS_TX100", "foam_PE", "antistatic_foil", "cardboard", "kapton", "bubble_wrap_PE", "fabric_PP" ])
|
||||
# labels = LabelConverter(["foam_PDMS_white", "foam_PDMS_black", "foam_PDMS_TX100", "foam_PE", "kapton", "bubble_wrap_PE", "fabric_PP" ])
|
||||
models_dir = "/home/matth/Uni/TENG/teng_2/models_gen_15" # where to save models, settings and results
|
||||
if not path.isdir(models_dir):
|
||||
makedirs(models_dir)
|
||||
data_dir = "/home/matth/Uni/TENG/teng_2/sorted_data"
|
||||
@ -53,18 +54,18 @@ if __name__ == "__main__":
|
||||
# gen_6 best options: no glass, cardboard and antistatic_foil, not bidirectional, lr=0.0007, no datasplitter, 2 layers n_hidden = 10
|
||||
|
||||
# Test with
|
||||
num_layers = [ 2, 3 ]
|
||||
hidden_size = [ 21, 28 ]
|
||||
bidirectional = [ False, True ]
|
||||
num_layers = [ 4, 5 ]
|
||||
hidden_size = [ 28, 36 ]
|
||||
bidirectional = [ True ]
|
||||
t_const_int = ConstantInterval(0.01) # TODO check if needed: data was taken at equal rate, but it isnt perfect -> maybe just ignore?
|
||||
t_norm = Normalize(-1, 1)
|
||||
transforms = [[ t_norm ]] #, [ t_norm, t_const_int ]]
|
||||
transforms = [[]] #, [ t_norm, t_const_int ]]
|
||||
batch_sizes = [ 4 ]
|
||||
splitters = [ DataSplitter(50, drop_if_smaller_than=30) ] # smallest file has length 68 TODO: try with 0.5-1second snippets
|
||||
num_epochs = [ 80 ]
|
||||
# (epoch, min_accuracy)
|
||||
training_cancel_points = [(15, 20), (40, 25)]
|
||||
# training_cancel_points = []
|
||||
# training_cancel_points = [(15, 20), (40, 25)]
|
||||
training_cancel_points = []
|
||||
|
||||
args = [num_layers, hidden_size, bidirectional, [None], [None], [None], transforms, splitters, num_epochs, batch_sizes]
|
||||
|
||||
@ -81,7 +82,7 @@ if __name__ == "__main__":
|
||||
None,
|
||||
# lambda optimizer, st: torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9),
|
||||
# lambda optimizer, st: torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.5),
|
||||
lambda optimizer, st: torch.optim.lr_scheduler.StepLR(optimizer, step_size=st.num_epochs // 8, gamma=0.60, verbose=False),
|
||||
# lambda optimizer, st: torch.optim.lr_scheduler.StepLR(optimizer, step_size=st.num_epochs // 8, gamma=0.60, verbose=False),
|
||||
# lambda optimizer, st: torch.optim.lr_scheduler.StepLR(optimizer, step_size=st.num_epochs // 10, gamma=0.75, verbose=False),
|
||||
]
|
||||
|
||||
|
@ -10,8 +10,13 @@ import threading
|
||||
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
from teng_ml.util.transform import Multiply
|
||||
|
||||
# groups: date, name, n_object, voltage, distance, index
|
||||
# re_filename = r"(\d{4}-\d{2}-\d{2})_([a-zA-Z_]+)_(\d{1,2}(?:\.\d*)?)V_(\d+(?:\.\d*)?)mm(\d+).csv"
|
||||
# for teng_1
|
||||
# re_filename = r"(\d{4}-\d{2}-\d{2})_([a-zA-Z_]+)_()(\d{1,2}(?:\.\d*)?)V_(\d+(?:\.\d*)?)mm(\d+).csv"
|
||||
# for teng_2
|
||||
re_filename = r"(\d{4}-\d{2}-\d{2})_([a-zA-Z0-9_]+)_(\d+)_(\d{1,2}(?:\.\d*)?)V_(\d+(?:\.\d*)?)mm(\d+).csv"
|
||||
|
||||
class LabelConverter:
|
||||
@ -51,7 +56,7 @@ class Datasample:
|
||||
def __init__(self, date: str, label: str, n_object: str, voltage: str, distance: str, index: str, label_vec, datapath: str, init_data=False):
|
||||
self.date = date
|
||||
self.label = label
|
||||
self.n_object = int(n_object)
|
||||
self.n_object = 0 if n_object == "" else int(n_object)
|
||||
self.voltage = float(voltage)
|
||||
self.distance = float(distance)
|
||||
self.index = int(index)
|
||||
@ -86,6 +91,19 @@ class Dataset:
|
||||
"""
|
||||
self.transforms = transforms
|
||||
self.data = [] # (data, label)
|
||||
|
||||
# NORMALIZE ALL DATA WITH THE SAME FACTOR
|
||||
# sup = 0
|
||||
# inf = 0
|
||||
# for sample in datasamples:
|
||||
# data = sample.get_data()
|
||||
# max_ = np.max(data[:,2])
|
||||
# min_ = np.min(data[:,2])
|
||||
# if max_ > sup: sup = max_
|
||||
# if min_ < inf: inf = min_
|
||||
# multiplier = 1 / max(sup, abs(inf))
|
||||
# self.transforms.append(Multiply(multiplier))
|
||||
|
||||
for sample in datasamples:
|
||||
data = self.apply_transforms(sample.get_data())
|
||||
if split_function is None:
|
||||
@ -128,7 +146,7 @@ def get_datafiles(datadir, labels: LabelConverter, exclude_n_object=None, filter
|
||||
label = match.groups()[1]
|
||||
if label not in labels: continue
|
||||
|
||||
sample_n_object = float(match.groups()[2])
|
||||
sample_n_object = 0 if match.groups()[2] == "" else int(match.groups()[2])
|
||||
if exclude_n_object and exclude_n_object == sample_n_object: continue
|
||||
sample_voltage = float(match.groups()[3])
|
||||
if filter_voltage and filter_voltage != sample_voltage: continue
|
||||
|
@ -1,5 +1,6 @@
|
||||
import numpy as np
|
||||
from scipy.interpolate import interp1d
|
||||
from torch import mul
|
||||
|
||||
class Normalize:
|
||||
"""
|
||||
@ -41,6 +42,15 @@ class NormalizeAmplitude:
|
||||
return f"NormalizeAmplitude(high={self.high})"
|
||||
|
||||
|
||||
class Multiply:
|
||||
def __init__(self, multiplier):
|
||||
self.multiplier = multiplier
|
||||
def __call__(self, data):
|
||||
return data * self.multiplier
|
||||
def __repr__(self):
|
||||
return f"Multiply(multiplier={self.multiplier})"
|
||||
|
||||
|
||||
class ConstantInterval:
|
||||
"""
|
||||
Interpolate the data to have a constant interval / sample rate,
|
||||
|
Loading…
Reference in New Issue
Block a user