From 9660de248aa23ceb4ce5dcdf30673710cf3705bf Mon Sep 17 00:00:00 2001 From: "Matthias@Dell" Date: Sun, 7 May 2023 21:40:39 +0200 Subject: [PATCH] added tracker & settings --- teng-ml/main.py | 102 +++++++++++++++++++++------------- teng-ml/util/data_loader.py | 7 ++- teng-ml/util/epoch_tracker.py | 83 +++++++++++++++++++++++++++ teng-ml/util/settings.py | 35 ++++++++++++ 4 files changed, 186 insertions(+), 41 deletions(-) create mode 100644 teng-ml/util/epoch_tracker.py create mode 100644 teng-ml/util/settings.py diff --git a/teng-ml/main.py b/teng-ml/main.py index fd93b3e..6a0f882 100644 --- a/teng-ml/main.py +++ b/teng-ml/main.py @@ -13,9 +13,14 @@ import torch import torch.nn as nn import torch.nn.utils.rnn as rnn_utils from torch.utils.data import DataLoader +import json +import time +import pickle from .util.transform import ConstantInterval, Normalize from .util.data_loader import load_datasets, LabelConverter +from .util.epoch_tracker import EpochTracker +from .util.settings import MLSettings def test_interpol(): file = "/home/matth/data/2023-04-27_glass_8.2V_179mm000.csv" @@ -40,31 +45,31 @@ if __name__ == "__main__": if torch.backends.mps.is_available() else "cpu" ) - print(f"Using device: {device}") - settings = {} labels = LabelConverter(["foam", "glass", "kapton", "foil", "cloth", "rigid_foam"]) t_const_int = ConstantInterval(0.01) t_norm = Normalize(0, 1) - transforms = [ t_const_int, t_norm ] + st = MLSettings(num_features=1, + num_layers=1, + hidden_size=1, + bidirectional=True, + transforms=transforms, + num_epochs=40, + batch_size=3, + labels=labels, + ) - settings["transforms"] = str(transforms) + print(f"Using device: {device}") - train_set, test_set = load_datasets("/home/matth/Uni/TENG/data", labels, voltage=8.2, transforms=[t_const_int], train_to_test_ratio=0.7, random_state=42) + + train_set, test_set = load_datasets("/home/matth/Uni/TENG/data", labels, voltage=8.2, transforms=st.transforms, train_to_test_ratio=0.7, random_state=42) # train_loader = iter(DataLoader(train_set)) # test_loader = iter(DataLoader(test_set)) - train_loader = DataLoader(train_set, batch_size=3, shuffle=True) - test_loader = DataLoader(test_set, batch_size=3, shuffle=True) -# , dtype=torch.float32 - # sample = next(train_loader) - # print(sample) - - feature_count = 1 - settings["feature_count"] = str(feature_count) - + train_loader = DataLoader(train_set, batch_size=st.batch_size, shuffle=True) + test_loader = DataLoader(test_set, batch_size=st.batch_size, shuffle=True) class RNN(nn.Module): def __init__(self, input_size, hidden_size, num_layers, num_classes, bidirectional): @@ -115,7 +120,7 @@ if __name__ == "__main__": # print(f"out({output.shape})={output}") return output - model=RNN(input_size=1, hidden_size=8, num_layers=3, num_classes=len(labels), bidirectional=True).to(device) + model=RNN(input_size=st.num_features, hidden_size=st.hidden_size, num_layers=st.num_layers, num_classes=len(labels), bidirectional=st.bidirectional).to(device) loss_func = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.02) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95) @@ -125,14 +130,19 @@ if __name__ == "__main__": print(f"optimizer={optimizer}") print(f"scheduler={scheduler}") - num_epochs = 10 + + + epoch_tracker = EpochTracker(labels) + + print(f"train_loader") + for i, (data, y) in enumerate(train_loader): + print(y) + print(f"{i:3} - {torch.argmax(y, dim=1, keepdim=False)}") + # training - for ep in range(num_epochs): - train_correct = 0 - train_total = 0 - val_correct = 0 - val_total = 0 + epoch_tracker.train_begin() + for ep in range(st.num_epochs): for i, (data, y) in enumerate(train_loader): # print(data, y) # data = batch, seq, features @@ -160,13 +170,15 @@ if __name__ == "__main__": # predicted = torch.max(torch.nn.functional.softmax(out), 1)[1] predicted = torch.argmax(out, dim=1, keepdim=False) # -> [ label_indices ] - actual = torch.argmax(y, dim=1, keepdim=False) # -> [ label_indices ] - # print(f"predicted={predicted}, actual={actual}") - train_total += y.size(0) - train_correct += (predicted == actual).sum().item() - - print(f"epoch={ep+1:3}: Training accuracy={100 * train_correct / train_total:.2f}%, loss={loss:3f}") + correct = torch.argmax(y, dim=1, keepdim=False) # -> [ label_indices ] + # print(f"predicted={predicted}, correct={correct}") + # train_total += y.size(0) + # train_correct += (predicted == correct).sum().item() + epoch_tracker.train(correct, predicted) + epoch_tracker.next_epoch(loss) + print(epoch_tracker.get_last_epoch_summary_str()) scheduler.step() + t_end = time.time() with torch.no_grad(): for i, (data, y) in enumerate(test_loader): @@ -176,20 +188,34 @@ if __name__ == "__main__": loss = loss_func(out, y) predicted = torch.argmax(out, dim=1, keepdim=False) # -> [ label_indices ] - actual = torch.argmax(y, dim=1, keepdim=False) # -> [ label_indices ] - # print(f"predicted={predicted}, actual={actual}") - val_total += y.size(0) - val_correct += (predicted == actual).sum().item() + correct = torch.argmax(y, dim=1, keepdim=False) # -> [ label_indices ] + # print(f"predicted={predicted}, correct={correct}") + # val_total += y.size(0) + # val_correct += (predicted == correct).sum().item() + + epoch_tracker.test(correct, predicted) # print(f"train_total={train_total}, val_total={val_total}") - if train_total == 0: train_total = -1 - if val_total == 0: val_total = -1 + # if train_total == 0: train_total = -1 + # if val_total == 0: val_total = -1 - print(f"epoch={ep+1:3}: Testing accuracy={100 * val_correct / val_total:.2f}") - print(f"End result: Training accuracy={100 * train_correct / train_total:.2f}%, Testing accuracy={100 * val_correct / val_total:.2f}") - settings["model"] = str(model) + # print(f"epoch={ep+1:3}: Testing accuracy={100 * val_correct / val_total:.2f}") + # print(f"End result: Training accuracy={100 * train_correct / train_total:.2f}%, Testing accuracy={100 * val_correct / val_total:.2f}, training took {t_end - t_begin:.2f} seconds") - with open("settings.txt", "w") as file: - file.write(str(settings)) + epoch_tracker.get_test_statistics() + # epoch_tracker.() + # print(epoch_tracker.get_training_summary_str()) + print(epoch_tracker.get_training_count_per_label()) + + model_name = st.get_name() + # save the settings, results and model + with open(model_name + "_settings.pkl", "wb") as file: + pickle.dump(st, file) + + with open(model_name + "_results.pkl", "wb") as file: + pickle.dump(epoch_tracker, file) + + with open(model_name + "_model.pkl", "wb") as file: + pickle.dump(model, file) diff --git a/teng-ml/util/data_loader.py b/teng-ml/util/data_loader.py index 5b4684e..22526ac 100644 --- a/teng-ml/util/data_loader.py +++ b/teng-ml/util/data_loader.py @@ -7,7 +7,7 @@ import pandas as pd from sklearn.model_selection import train_test_split # groups: date, name, voltage, distance, index -re_filename = r"(\d{4}-\d{2}-\d{2})_([a-zA-Z]+)_(\d{1,2}(?:\.\d*)?)V_(\d+(?:\.\d*)?)mm(\d+).csv" +re_filename = r"(\d{4}-\d{2}-\d{2})_([a-zA-Z_]+)_(\d{1,2}(?:\.\d*)?)V_(\d+(?:\.\d*)?)mm(\d+).csv" class LabelConverter: def __init__(self, class_labels): @@ -46,7 +46,7 @@ class Datasample: self.data = None def __repr__(self): - size = self.data.size if self.data else "Unknown" + size = self.data.size if self.data is not None else "Unknown" return f"{self.label}-{self.index}: dimension={size}, recorded at {self.date} with U={self.voltage}V, d={self.distance}mm" def _load_data(self): @@ -71,13 +71,14 @@ class Dataset: def __getitem__(self, index): data, label = self.datasamples[index].get_data(), self.datasamples[index].label_vec + # print(f"loading dataset {self.datasamples[index]}") if type(self.transforms) == list: for t in self.transforms: data = t(data) elif self.transforms: data = self.transforms(data) # TODO - return data[:400], label + return data[:2000], label def __len__(self): return len(self.datasamples) diff --git a/teng-ml/util/epoch_tracker.py b/teng-ml/util/epoch_tracker.py new file mode 100644 index 0000000..cd8f5f9 --- /dev/null +++ b/teng-ml/util/epoch_tracker.py @@ -0,0 +1,83 @@ +from ..util.data_loader import LabelConverter +import time +import torch + +class EpochTracker: + """ + Track progress through epochs and generate statistics + """ + def __init__(self, labels: LabelConverter): + # Training + self.accuracy = [] + self.loss = [] + self.times = [] # timestamps for each epoch end + self.trainings = [] + self.training_indices = [[]] # epoch, batch_nr, (correct_indices, predicted_indices), ind:ex_nr + self._current_epoch = 0 + + self.labels = labels + + # Testing + self.tests = [] # (correct_indices, predicted_indices) + + def train_begin(self): + """for time tracking""" + self.times.append(time.time()) + + # TRAINING + def train(self, correct_indices: torch.Tensor, predicted_indices: torch.Tensor): + self.training_indices[self._current_epoch].append((correct_indices, predicted_indices)) + + def next_epoch(self, loss): + self.times.append(time.time()) + self.loss.append(loss) + correct_predictions = 0 + total_predictions = 0 + for predicted_indices, correct_indices in self.training_indices[self._current_epoch]: + correct_predictions += (predicted_indices == correct_indices).sum().item() + total_predictions += predicted_indices.size(0) + accuracy = 100 * correct_predictions / total_predictions + self.accuracy.append(accuracy) + self._current_epoch += 1 + self.training_indices.append([]) + + def get_last_epoch_summary_str(self): + """call after next_epoch()""" + return f"Epoch {self._current_epoch:3}: Accuracy={self.accuracy[-1]:.2f}, Loss={self.loss[-1]:.3f}, Training duration={self.times[-1] - self.times[0]:.2f}s" + def get_last_epoch_summary(self): + """ + @returns accuracy, loss, training time + """ + return self.accuracy[-1], self.loss[-1], self.times[-1] - self.times[0] + + def get_training_count_per_label(self): + count_per_label = [ 0 for _ in range(len(self.labels)) ] + for i in range(len(self.training_indices)): + for j in range(len(self.training_indices[i])): + for k in range(self.training_indices[i][j][0].size(0)): + # epoch, batch_nr, 0 = correct_indices, correct_index_nr + count_per_label[self.training_indices[i][j][0][k]] += 1 + return count_per_label + + def __len__(self): + return len(self.accuracy) + + def __getitem__(self, idx): + return (self.accuracy[idx], self.loss[idx]) + + # TESTING + def test(self, correct_indices: torch.Tensor, predicted_indices: torch.Tensor): + """ + @param correct_indices and predicted_indices: 1 dim Tensor + """ + for i in range(correct_indices.size(0)): + self.tests.append((correct_indices[i], predicted_indices[i])) + + + def get_test_statistics(self): + # label i, label_j was predicted when label_i was correct + statistics = [ [ 0 for _ in range(len(self.labels))] for _ in range(len(self.labels)) ] + for corr, pred in self.tests: + statistics[corr][pred] += 1 + print(statistics) + return statistics diff --git a/teng-ml/util/settings.py b/teng-ml/util/settings.py new file mode 100644 index 0000000..9de0341 --- /dev/null +++ b/teng-ml/util/settings.py @@ -0,0 +1,35 @@ +from ..util.data_loader import LabelConverter + +class MLSettings: + """ + Manage model and training settings for easy saving and loading + """ + def __init__(self, + num_features=1, + num_layers=1, + hidden_size=1, + bidirectional=True, + transforms=[], + num_epochs=10, + batch_size=5, + labels=LabelConverter([]), + ): + self.num_features = num_features + self.num_layers = num_layers + self.hidden_size = hidden_size + self.num_epochs = num_epochs + self.bidirectional = bidirectional + self.transforms = transforms + self.batch_size = batch_size + self.labels = labels + + def get_name(self): + """ + F = num_features + L = num_layers + H = hidden_size + B = bidirectional + T = #transforms + E = #epochs + """ + return f"F{self.num_features}L{self.num_layers}H{self.hidden_size}B{'1' if self.bidirectional else '0'}T{len(self.transforms)}"