Compare commits

..

No commits in common. "33d1945de2b00eefce596c1fc221cd091d9ab719" and "ad2e3468f7c771254c323fc5fafbc9bf971b6837" have entirely different histories.

8 changed files with 115 additions and 218 deletions

View File

@ -18,9 +18,8 @@ import time
from os import makedirs, path
from .util.transform import ConstantInterval, Normalize
from .util.data_loader import load_datasets, LabelConverter, count_data
from .util.data_loader import load_datasets, LabelConverter
from .util.split import DataSplitter
from .util.pad import PadSequences
from .util.settings import MLSettings
from .rnn.rnn import RNN
from .rnn.training import train_validate_save, select_device
@ -42,30 +41,34 @@ def test_interpol():
if __name__ == "__main__":
# labels = LabelConverter(["foam_PDMS_white", "foam_PDMS_black", "foam_PDMS_TX100", "foam_PE", "antistatic_foil", "cardboard", "glass", "kapton", "bubble_wrap_PE", "fabric_PP", ])
labels = LabelConverter(["foam_PDMS_white", "foam_PDMS_black", "foam_PDMS_TX100", "foam_PE", "kapton", "bubble_wrap_PE", "fabric_PP", ])
models_dir = "/home/matth/Uni/TENG/teng_2/models_gen_8" # where to save models, settings and results
labels = LabelConverter(["white_foam", "black_foam", "rigid_foam", "cardboard", "glass", "Kapton", "bubble_wrap", "cloth_ffp2", ])
models_dir = "/home/matth/Uni/TENG/teng_2/models_gen_1" # where to save models, settings and results
if not path.isdir(models_dir):
makedirs(models_dir)
data_dir = "/home/matth/Uni/TENG/teng_2/sorted_data"
# gen_5 best options: datasplitter, not bidirectional, lr=0.001, no scheduler
# gen_6 best options: no glass, cardboard and antistatic_foil, not bidirectional, lr=0.0007, no datasplitter, 2 layers n_hidden = 10
# Test with
num_layers = [ 2 ]
hidden_size = [ 7, 11, 14 ]
bidirectional = [ False, True ]
t_const_int = ConstantInterval(0.01) # TODO check if needed: data was taken at equal rate, but it isnt perfect -> maybe just ignore?
num_layers = [ 3 ]
hidden_size = [ 8 ]
bidirectional = [ True ]
# t_const_int = ConstantInterval(0.01) TODO check if needed: data was taken at equal rate, but it isnt perfect -> maybe just ignore?
t_norm = Normalize(-1, 1)
transforms = [[ ], [ t_norm ]] #, [ t_norm, t_const_int ]]
batch_sizes = [ 4 ]
splitters = [ DataSplitter(50, drop_if_smaller_than=30), DataSplitter(100, drop_if_smaller_than=30) ] # smallest file has length 68 TODO: try with 0.5-1second snippets
num_epochs = [ 5 ]
# (epoch, min_accuracy)
training_cancel_points = [(10, 10), (20, 20), (40, 30)]
# training_cancel_points = []
transforms = [[ t_const_int, t_norm ]]
batch_sizes = [ 64 ] # , 16]
splitters = [ DataSplitter(100) ] # TODO: try with 0.5-1second snippets
num_epochs = [ 60 ]
# num_layers=1,
# hidden_size=1,
# bidirectional=True,
# optimizer=None,
# scheduler=None,
# loss_func=None,
# transforms=[],
# splitter=None,
# num_epochs=10,
# batch_size=5,
args = [num_layers, hidden_size, bidirectional, [None], [None], [None], transforms, splitters, num_epochs, batch_sizes]
# create settings for every possible combination
@ -75,28 +78,23 @@ if __name__ == "__main__":
loss_func = nn.CrossEntropyLoss()
optimizers = [
lambda model: torch.optim.Adam(model.parameters(), lr=0.0005),
lambda model: torch.optim.Adam(model.parameters(), lr=0.0007),
# lambda model: torch.optim.Adam(model.parameters(), lr=0.008),
lambda model: torch.optim.Adam(model.parameters(), lr=0.03),
# lambda model: torch.optim.Adam(model.parameters(), lr=0.25),
# lambda model: torch.optim.Adam(model.parameters(), lr=0.50),
]
schedulers = [
None,
# lambda optimizer, st: torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9),
# lambda optimizer, st: torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.5),
lambda optimizer, st: torch.optim.lr_scheduler.StepLR(optimizer, step_size=st.num_epochs // 8, gamma=0.50, verbose=False),
lambda optimizer, st: torch.optim.lr_scheduler.StepLR(optimizer, step_size=st.num_epochs // 10, gamma=0.40, verbose=False),
# lambda optimizer, st: torch.optim.lr_scheduler.StepLR(optimizer, step_size=st.num_epochs // 10, gamma=0.75, verbose=False),
]
device = select_device(force_device="cpu") # TODO cuda is not supported because something throws NotImplementedError with my gpu
n_total = len(settings) * len(optimizers) * len(schedulers)
print(f"Testing {n_total} possible configurations, device='{device}'")
print(f"Testing {n_total} possible configurations")
# scheduler2 =
def create_model(st, optimizer_f, scheduler_f):
model=RNN(input_size=st.num_features, hidden_size=st.hidden_size, num_layers=st.num_layers, num_classes=len(labels), bidirectional=st.bidirectional)
optimizer = optimizer_f(model)
if scheduler_f is not None:
scheduler = scheduler_f(optimizer, st)
else: scheduler = None
return model, optimizer, scheduler
t_begin = time.time()
@ -105,21 +103,19 @@ if __name__ == "__main__":
for s in range(len(schedulers)):
for i in range(len(settings)):
st = settings[i]
train_set, test_set = load_datasets(data_dir, labels, exclude_n_object=None, voltage=None, transforms=st.transforms, split_function=st.splitter, train_to_test_ratio=0.7, random_state=80, num_workers=4)
# print(st.get_name())
train_set, test_set = load_datasets(data_dir, labels, voltage=8.2, transforms=st.transforms, split_function=st.splitter, train_to_test_ratio=0.7, random_state=42, num_workers=4)
generator = torch.manual_seed(42)
train_loader = DataLoader(train_set, batch_size=st.batch_size, shuffle=True, generator=generator, collate_fn=PadSequences())
test_loader = DataLoader(test_set, batch_size=None, shuffle=True, generator=generator)
# set batch_size to None and remove collate_fn for this to work
# count_data(train_loader, st.labels, print_summary="training data")
# count_data(test_loader, st.labels, print_summary="validation data")
model, optimizer, scheduler = create_model(st, optimizers[o], schedulers[s])
# train_loader = iter(DataLoader(train_set))
# test_loader = iter(DataLoader(test_set))
train_loader = DataLoader(train_set, batch_size=st.batch_size, shuffle=True, generator=generator)
test_loader = DataLoader(test_set, batch_size=st.batch_size, shuffle=True, generator=generator)
print(f"Testing {n}/{n_total}: (o={o}, s={s}, i={i})")
model, optimizer, scheduler = create_model(st, optimizers[o], schedulers[s])
device = select_device(force_device="cpu")
try:
train_validate_save(model, optimizer, scheduler, loss_func, train_loader, test_loader, st, models_dir, print_interval=1, print_continuous=True, training_cancel_points=training_cancel_points)
train_validate_save(model, optimizer, scheduler, loss_func, train_loader, test_loader, st, models_dir, print_interval=1)
except KeyboardInterrupt:
if input("Cancelled current training. Quit? (q/*): ") == "q":
t_end = time.time()

View File

@ -19,59 +19,52 @@ class RNN(nn.Module):
self.softmax = nn.Softmax(dim=1)
self.D = 2 if self.is_bidirectional == True else 1
def forward(self, x, unpadded_lengths=None):
"""
@param x:
Tensor (seq_length, features) for unbatched inputs
Tensor (batch_size, seq_length, features) for batch inputs
PackedSequence for padded batched inputs
@param unpadded_lengths: Tensor(batch_size) with lengths of the unpadded sequences, when using padding but without PackedSequence
@returns (batch_size, num_classes) with batch_size == 1 for unbatched inputs
"""
# if type(x) == torch.Tensor:
# device = x.device
# # h0: initial hidden states
# # c0: initial cell states
# if len(x.shape) == 2: # x: (seq_length, features)
# h0 = torch.zeros(self.D * self.num_layers, self.hidden_size).to(device)
# c0 = torch.zeros(self.D * self.num_layers, self.hidden_size).to(device)
# elif len(x.shape) == 3: # x: (batch, seq_length, features)
# batch_size = x.shape[0]
# h0 = torch.zeros(self.D * self.num_layers, batch_size, self.hidden_size).to(device)
# c0 = torch.zeros(self.D * self.num_layers, batch_size, self.hidden_size).to(device)
# else:
# raise ValueError(f"RNN.forward: invalid input shape: {x.shape}. Must be (batch, seq_length, features) or (seq_length, features)")
# elif type(x) == nn.utils.rnn.PackedSequence:
# device = x.data.device
# h0 = torch.zeros(self.D * self.num_layers, self.hidden_size).to(device)
# c0 = torch.zeros(self.D * self.num_layers, self.hidden_size).to(device)
# else:
# raise ValueError(f"RNN.forward: invalid input type: {type(x)}. Must be Tensor or PackedSequence")
def forward(self, x):
device = x.device
# h0: initial hidden states
# c0: initial cell states
if len(x.shape) == 2: # x: (seq_length, features)
h0 = torch.zeros(self.D * self.num_layers, self.hidden_size).to(device)
c0 = torch.zeros(self.D * self.num_layers, self.hidden_size).to(device)
elif len(x.shape) == 3: # x: (batch, seq_length, features)
batch_size = x.shape[0]
h0 = torch.zeros(self.D * self.num_layers, batch_size, self.hidden_size).to(device)
c0 = torch.zeros(self.D * self.num_layers, batch_size, self.hidden_size).to(device)
else:
raise ValueError(f"RNN.forward: invalid iput shape: {x.shape}. Must be (batch, seq_length, features) or (seq_length, features)")
# lstm: (batch_size, seq_length, features) -> (batch_size, hidden_size)
# or: packed_sequence -> packed_sequence
# out, (h_n, c_n) = self.lstm(x, (h0, c0))
out, (h_n, c_n) = self.lstm(x) # (h0, c0) defaults to zeros
out, (h_n, c_n) = self.lstm(x, (h0, c0))
print(f"forward: out.shape={out.shape} TODO verify comment")
# out: (N, L, D * hidden_size)
# h_n: (D * num_layers, hidden_size)
# c_n: (D * num_layers, hidden_size)
# print(f"out({out.shape})={out}")
# print(f"h_n({h_n.shape})={h_n}")
# print(f"c_n({c_n.shape})={c_n}")
# print(f"out({out.shape})=...")
# print(f"h_n({h_n.shape})=...")
# print(f"c_n({c_n.shape})=...")
# select the last state of lstm's neurons
if type(out) == nn.utils.rnn.PackedSequence:
# padding has to be considered
out, lengths = nn.utils.rnn.pad_packed_sequence(out, batch_first=True)
# the unpadded length of batch i is lengths[i], so that is the last non-zero state
out = torch.stack([out[i,lengths[i].item()-1,:] for i in range(len(lengths))])
elif unpadded_lengths is not None:
out = torch.stack([out[i,unpadded_lengths[i].item()-1,:] for i in range(len(unpadded_lengths))])
"""
# select only last layer [-1] -> last layer,
last_layer_state = h_n.view(self.num_layers, D, batch_size, self.hidden_size)[-1]
if D == 1:
# [1, batch_size, hidden_size] -> [batch_size, hidden_size]
X = last_layer_state.squeeze() # TODO what if batch_size == 1
elif D == 2:
h_1, h_2 = last_layer_state[0], last_layer_state[1] # states of both directions
# concatenate both states, X-size: (Batch, hidden_size * 2
X = torch.cat((h_1, h_2), dim=1)
else:
if out.shape[0] == 3: # batched
out = out[:,-1,:]
else: # unbatched
# softmax requires (batch_size, *)
out = torch.stack([out[-1,:]])
raise ValueError("D must be 1 or 2")
""" # all this is quivalent to line below
out = out[:,-1,:] # select last time step
# fc fully connected layer: (*, hidden_size) -> (*, num_classes)
out = self.fc(out)
# softmax: (batch_size, *) -> (batch_size, *)
# softmax: (*) -> (*)
out = self.softmax(out)
return out

View File

@ -7,7 +7,7 @@ from torch.utils.data import DataLoader
from ..util.settings import MLSettings
from ..tracker.epoch_tracker import EpochTracker
from ..util.file_io import get_next_digits
from ..util.string import class_str, optimizer_str
from ..util.string import class_str
from ..util import model_io as mio
@ -30,20 +30,29 @@ def select_device(force_device=None):
return device
def train(model, optimizer, scheduler, loss_func, train_loader: DataLoader, st: MLSettings, print_interval=1, print_continuous=False, training_cancel_points=[]) -> EpochTracker:
def train(model, optimizer, scheduler, loss_func, train_loader: DataLoader, st: MLSettings, print_interval=1) -> EpochTracker:
epoch_tracker = EpochTracker(st.labels)
epoch_tracker.begin()
for ep in range(st.num_epochs):
loss = -1
for i, (data, lengths, y) in enumerate(train_loader):
for i, (data, y) in enumerate(train_loader):
# print(data, y)
# data = batch, seq, features
# print(f"data({data.shape})={data}")
x = data[:,:,[2]].float() # select voltage data
# print(f"x({x.shape}, {x.dtype})=...")
# print(f"y({y.shape}, {y.dtype})=...")
# pack = torch.nn.utils.rnn.pack_padded_sequence(x, lengths, batch_first=True)
# out = model(pack) # really slow
out = model(x, lengths)
# length = torch.tensor([x.shape[1] for _ in range(x.shape[0])], dtype=torch.int64)
# print(f"length({length.shape})={length}")
# batch_size = x.shape[0]
# print(f"batch_size={batch_size}")
# v = x.view(batch_size, -1, feature_count)
# data = rnn_utils.pack_padded_sequence(v.type(torch.FloatTensor), length, batch_first=True).to(device)[0]
# print(f"data({data.shape})={data}")
out = model(x)
# print(f"out({out.shape}={out})")
# print(f" y({y.shape}={y})")
with torch.no_grad():
predicted = torch.argmax(out, dim=1, keepdim=False) # -> [ label_indices ]
correct = torch.argmax(y, dim=1, keepdim=False) # -> [ label_indices ]
@ -63,18 +72,8 @@ def train(model, optimizer, scheduler, loss_func, train_loader: DataLoader, st:
# predicted = torch.max(torch.nn.functional.softmax(out), 1)[1]
epoch_tracker.end_epoch(loss, optimizer.param_groups[0]["lr"])
if (ep+1) % print_interval == 0:
if print_continuous: end='\r'
else: end='\n'
print(f"Training:", epoch_tracker.get_epoch_summary_str(), end=end)
# cancel training if model is not good enough
if len(training_cancel_points) > 0 and ep+1 == training_cancel_points[0][0]:
if epoch_tracker.accuracies[-1] < training_cancel_points[0][1]:
print(f"Training cancelled because the models accuracy={epoch_tracker.accuracies[-1]:.2f} < {training_cancel_points[0][1]} after {ep+1} epochs.")
break;
training_cancel_points.pop(0)
if scheduler is not None:
if ep+1 % print_interval == 0:
print(f"Training:", epoch_tracker.get_epoch_summary_str())
scheduler.step()
print("Training:", epoch_tracker.end())
return epoch_tracker
@ -86,27 +85,24 @@ def validate(model, test_loader: DataLoader, st: MLSettings) -> EpochTracker:
with torch.no_grad():
for i, (data, y) in enumerate(test_loader):
# print(ep, "Test")
x = data[:,[2]].float()
x = data[:,:,[2]].float()
out = model(x)
predicted = torch.argmax(out, dim=1, keepdim=False) # -> [ label_indices ]
if y.shape[0] == 2: # batched
correct = torch.argmax(y, dim=1, keepdim=False) # -> [ label_indices ]
else: # unbatched
correct = torch.argmax(y, dim=0, keepdim=True) # -> [ label_indices ]
epoch_tracker.add_prediction(correct, predicted)
print("Validation:", epoch_tracker.end())
return epoch_tracker
def train_validate_save(model, optimizer, scheduler, loss_func, train_loader: DataLoader, test_loader: DataLoader, st: MLSettings, models_dir, print_interval=1, print_continuous=False, show_plots=False, training_cancel_points=[]):
def train_validate_save(model, optimizer, scheduler, loss_func, train_loader: DataLoader, test_loader: DataLoader, st: MLSettings, models_dir, print_interval=1, show_plots=False):
# assumes model and data is already on correct device
# train_loader.to(device)
# test_loader.to(device)
# store optimizer, scheduler and loss_func in settings
st.optimizer = optimizer_str(optimizer)
st.optimizer = class_str(optimizer)
st.scheduler = class_str(scheduler)
st.loss_func = class_str(loss_func)
@ -115,15 +111,15 @@ def train_validate_save(model, optimizer, scheduler, loss_func, train_loader: Da
def add_tab(s):
return "\t" + str(s).replace("\n", "\n\t")
print(100 * '=')
print("model name:", model_name)
print("Model Name:", model_name)
print(f"model:\n", add_tab(model))
print(f"loss_func: {st.loss_func}")
print(f"optimizer: {st.optimizer}")
print(f"scheduler: {st.scheduler}")
# print(f"loss_func:\n", add_tab(class_str(loss_func)))
# print(f"optimizer:\n", add_tab(class_str(optimizer)))
# print(f"scheduler:\n", add_tab(class_str(scheduler)))
print(100 * '-')
training_tracker = train(model, optimizer, scheduler, loss_func, train_loader, st, print_interval=print_interval, print_continuous=print_continuous, training_cancel_points=training_cancel_points)
training_tracker = train(model, optimizer, scheduler, loss_func, train_loader, st, print_interval=print_interval)
# print("Training: Count per label:", training_tracker.get_count_per_label())
# print("Training: Predictions per label:", training_tracker.get_predictions_per_label())

View File

@ -1,4 +1,3 @@
from os import stat
from ..util.data_loader import LabelConverter
import matplotlib.pyplot as plt
import matplotlib.colors as colors

View File

@ -4,7 +4,6 @@ import re
import numpy as np
import pandas as pd
from scipy.sparse import data
import torch
import threading
@ -25,13 +24,7 @@ class LabelConverter:
vec[self.class_labels.index(label)] = 1.0
return vec
def get_label_index(self, one_hot: torch.Tensor):
"""return one hot vector for given label"""
return int(torch.argmax(one_hot).item())
def __getitem__(self, index):
if type(index) == torch.Tensor:
return self.class_labels[self.get_label_index(index)]
return self.class_labels[index]
def __contains__(self, value):
@ -91,11 +84,8 @@ class Dataset:
if split_function is None:
self.data.append((data, sample.label_vec))
else:
try:
for data_split in split_function(data):
self.data.append((data_split, sample.label_vec))
except ValueError as e:
raise ValueError(f"Exception occured during splitting of sample '{sample.datapath}': {e}")
def apply_transforms(self, data):
if type(self.transforms) == list:
@ -121,9 +111,7 @@ def get_datafiles(datadir, labels: LabelConverter, exclude_n_object=None, filter
files.sort()
for file in files:
match = re.fullmatch(re_filename, file)
if not match:
print(f"get_datafiles: dropping non matching file '{file}'")
continue
if not match: continue
label = match.groups()[1]
if label not in labels: continue
@ -137,16 +125,16 @@ def get_datafiles(datadir, labels: LabelConverter, exclude_n_object=None, filter
return datafiles
def load_datasets(datadir, labels: LabelConverter, transforms=None, split_function=None, exclude_n_object=None, voltage=None, train_to_test_ratio=0.7, random_state=None, num_workers=None):
def load_datasets(datadir, labels: LabelConverter, transforms=None, split_function=None, voltage=None, train_to_test_ratio=0.7, random_state=None, num_workers=None):
"""
load all data from datadir that are in the format: yyyy-mm-dd_label_x.xV_xxxmm.csv
"""
datasamples = []
if num_workers == None:
for file, match, label in get_datafiles(datadir, labels, exclude_n_object=exclude_n_object, filter_voltage=voltage):
for file, match, label in get_datafiles(datadir, labels, voltage):
datasamples.append(Datasample(*match.groups(), labels.get_one_hot(label), file))
else:
files = get_datafiles(datadir, labels, exclude_n_object=exclude_n_object, filter_voltage=voltage)
files = get_datafiles(datadir, labels, voltage)
def worker():
while True:
try:
@ -167,30 +155,3 @@ def load_datasets(datadir, labels: LabelConverter, transforms=None, split_functi
train_dataset = Dataset(train_samples, transforms=transforms, split_function=split_function)
test_dataset = Dataset(test_samples, transforms=transforms, split_function=split_function)
return train_dataset, test_dataset
def count_data(data_loader, label_converter: LabelConverter, print_summary=False):
"""
@param data_loader: unbatched data loader
"""
n_sequences = 0 # count number of sequences
labels = [ 0 for _ in range(len(label_converter)) ] # count number of sequences per label
len_data = [ 0 for _ in range(len(label_converter)) ] # count number of datapoints per label
for i, (data, y) in enumerate(data_loader):
n_sequences = i
label_i = label_converter.get_label_index(y)
len_data[label_i] += data.shape[0]
labels[label_i] += 1
if print_summary:
print("=" * 50)
print("Dataset summary" + f" for {print_summary}:" if type(print_summary) == str else ":")
print(f"Number of sequences: {n_sequences}")
for i in range(len(label_converter)):
print(f"- {label_converter[i]:15}: {labels[i]:3} sequences, {len_data[i]:5} datapoints")
return n_sequences, labels, len_data

View File

@ -1,14 +0,0 @@
import torch
import torch.nn.utils.rnn as rnn
import numpy as np
class PadSequences:
def __call__(self, batch):
# batch = [(data, label)]
# sort by length
sorted_batch = sorted(batch, key=lambda sample: sample[0].shape[0], reverse=True)
sequences = [torch.Tensor(sample[0]) for sample in sorted_batch]
labels = torch.Tensor(np.array([sample[1] for sample in sorted_batch]))
lengths = torch.IntTensor(np.array([seq.shape[0] for seq in sequences]))
sequences_padded = rnn.pad_sequence(sequences, batch_first=True)
return sequences_padded, lengths, labels

View File

@ -5,12 +5,8 @@ class DataSplitter:
Split a numpy array into smaller arrays of size datapoints_per_split
If data.shape(0) % datapoints_per_split != 0, the remaining datapoints are dropped
"""
def __init__(self, datapoints_per_split, drop_if_smaller_than=-1):
"""
@param drop_if_smaller_than: drop the remaining datapoints if the sequence would be smaller than this value. -1 means drop_if_smaller_than=datapoints_per_split
"""
def __init__(self, datapoints_per_split):
self.split_size = datapoints_per_split
self.drop_threshhold = datapoints_per_split if drop_if_smaller_than == -1 else drop_if_smaller_than
def __call__(self, data: np.ndarray):
"""
@ -19,11 +15,6 @@ class DataSplitter:
ret_data = []
for i in range(self.split_size, data.shape[0], self.split_size):
ret_data.append(data[i-self.split_size:i, :])
rest_start = len(ret_data) * self.split_size
if len(data) - rest_start >= self.drop_threshhold:
ret_data.append(data[rest_start:,:])
if len(ret_data) == 0:
raise ValueError(f"data has only {data.shape[0]}, but datapoints_per_split is set to {self.split_size}")
return ret_data

View File

@ -13,50 +13,25 @@ def fill_and_center(s: str, fill_char="=", length=100):
else:
return s
def class_str(x):
"""
Return the constructor of the class of x with arguemnts
"""
name = type(x).__name__
params = []
try:
signature = inspect.signature(type(x))
params = []
for param_name, param_value in x.__dict__.items():
if param_name not in signature.parameters:
continue
default_value = signature.parameters[param_name].default
if param_value != default_value:
params.append(f"{param_name}={param_value!r}")
except ValueError:
pass
if params:
return f"{name}({', '.join(params)})"
else:
return name
def optimizer_str(x):
# optimizer stores everything in 'defaults' dict and is thus not compatible with class_str
name = type(x).__name__
params = []
try:
signature = inspect.signature(type(x))
for param_name, param_value in x.__dict__["defaults"].items():
if param_name not in signature.parameters:
continue
default_value = signature.parameters[param_name].default
if param_value != default_value:
params.append(f"{param_name}={param_value!r}")
except ValueError:
pass
if params:
return f"{name}({', '.join(params)})"
else:
return name
def cleanup_str(s):
"""
convert to string if necessary and