added tracker & settings
This commit is contained in:
parent
341cc39f24
commit
9660de248a
102
teng-ml/main.py
102
teng-ml/main.py
@ -13,9 +13,14 @@ import torch
|
|||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.nn.utils.rnn as rnn_utils
|
import torch.nn.utils.rnn as rnn_utils
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import pickle
|
||||||
|
|
||||||
from .util.transform import ConstantInterval, Normalize
|
from .util.transform import ConstantInterval, Normalize
|
||||||
from .util.data_loader import load_datasets, LabelConverter
|
from .util.data_loader import load_datasets, LabelConverter
|
||||||
|
from .util.epoch_tracker import EpochTracker
|
||||||
|
from .util.settings import MLSettings
|
||||||
|
|
||||||
def test_interpol():
|
def test_interpol():
|
||||||
file = "/home/matth/data/2023-04-27_glass_8.2V_179mm000.csv"
|
file = "/home/matth/data/2023-04-27_glass_8.2V_179mm000.csv"
|
||||||
@ -40,31 +45,31 @@ if __name__ == "__main__":
|
|||||||
if torch.backends.mps.is_available()
|
if torch.backends.mps.is_available()
|
||||||
else "cpu"
|
else "cpu"
|
||||||
)
|
)
|
||||||
print(f"Using device: {device}")
|
|
||||||
|
|
||||||
settings = {}
|
|
||||||
|
|
||||||
labels = LabelConverter(["foam", "glass", "kapton", "foil", "cloth", "rigid_foam"])
|
labels = LabelConverter(["foam", "glass", "kapton", "foil", "cloth", "rigid_foam"])
|
||||||
t_const_int = ConstantInterval(0.01)
|
t_const_int = ConstantInterval(0.01)
|
||||||
t_norm = Normalize(0, 1)
|
t_norm = Normalize(0, 1)
|
||||||
|
|
||||||
transforms = [ t_const_int, t_norm ]
|
transforms = [ t_const_int, t_norm ]
|
||||||
|
st = MLSettings(num_features=1,
|
||||||
|
num_layers=1,
|
||||||
|
hidden_size=1,
|
||||||
|
bidirectional=True,
|
||||||
|
transforms=transforms,
|
||||||
|
num_epochs=40,
|
||||||
|
batch_size=3,
|
||||||
|
labels=labels,
|
||||||
|
)
|
||||||
|
|
||||||
settings["transforms"] = str(transforms)
|
print(f"Using device: {device}")
|
||||||
|
|
||||||
train_set, test_set = load_datasets("/home/matth/Uni/TENG/data", labels, voltage=8.2, transforms=[t_const_int], train_to_test_ratio=0.7, random_state=42)
|
|
||||||
|
train_set, test_set = load_datasets("/home/matth/Uni/TENG/data", labels, voltage=8.2, transforms=st.transforms, train_to_test_ratio=0.7, random_state=42)
|
||||||
|
|
||||||
# train_loader = iter(DataLoader(train_set))
|
# train_loader = iter(DataLoader(train_set))
|
||||||
# test_loader = iter(DataLoader(test_set))
|
# test_loader = iter(DataLoader(test_set))
|
||||||
train_loader = DataLoader(train_set, batch_size=3, shuffle=True)
|
train_loader = DataLoader(train_set, batch_size=st.batch_size, shuffle=True)
|
||||||
test_loader = DataLoader(test_set, batch_size=3, shuffle=True)
|
test_loader = DataLoader(test_set, batch_size=st.batch_size, shuffle=True)
|
||||||
# , dtype=torch.float32
|
|
||||||
# sample = next(train_loader)
|
|
||||||
# print(sample)
|
|
||||||
|
|
||||||
feature_count = 1
|
|
||||||
settings["feature_count"] = str(feature_count)
|
|
||||||
|
|
||||||
|
|
||||||
class RNN(nn.Module):
|
class RNN(nn.Module):
|
||||||
def __init__(self, input_size, hidden_size, num_layers, num_classes, bidirectional):
|
def __init__(self, input_size, hidden_size, num_layers, num_classes, bidirectional):
|
||||||
@ -115,7 +120,7 @@ if __name__ == "__main__":
|
|||||||
# print(f"out({output.shape})={output}")
|
# print(f"out({output.shape})={output}")
|
||||||
return output
|
return output
|
||||||
|
|
||||||
model=RNN(input_size=1, hidden_size=8, num_layers=3, num_classes=len(labels), bidirectional=True).to(device)
|
model=RNN(input_size=st.num_features, hidden_size=st.hidden_size, num_layers=st.num_layers, num_classes=len(labels), bidirectional=st.bidirectional).to(device)
|
||||||
loss_func = torch.nn.CrossEntropyLoss()
|
loss_func = torch.nn.CrossEntropyLoss()
|
||||||
optimizer = torch.optim.Adam(model.parameters(), lr=0.02)
|
optimizer = torch.optim.Adam(model.parameters(), lr=0.02)
|
||||||
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)
|
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)
|
||||||
@ -125,14 +130,19 @@ if __name__ == "__main__":
|
|||||||
print(f"optimizer={optimizer}")
|
print(f"optimizer={optimizer}")
|
||||||
print(f"scheduler={scheduler}")
|
print(f"scheduler={scheduler}")
|
||||||
|
|
||||||
num_epochs = 10
|
|
||||||
|
|
||||||
|
epoch_tracker = EpochTracker(labels)
|
||||||
|
|
||||||
|
print(f"train_loader")
|
||||||
|
for i, (data, y) in enumerate(train_loader):
|
||||||
|
print(y)
|
||||||
|
print(f"{i:3} - {torch.argmax(y, dim=1, keepdim=False)}")
|
||||||
|
|
||||||
|
|
||||||
# training
|
# training
|
||||||
for ep in range(num_epochs):
|
epoch_tracker.train_begin()
|
||||||
train_correct = 0
|
for ep in range(st.num_epochs):
|
||||||
train_total = 0
|
|
||||||
val_correct = 0
|
|
||||||
val_total = 0
|
|
||||||
for i, (data, y) in enumerate(train_loader):
|
for i, (data, y) in enumerate(train_loader):
|
||||||
# print(data, y)
|
# print(data, y)
|
||||||
# data = batch, seq, features
|
# data = batch, seq, features
|
||||||
@ -160,13 +170,15 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
# predicted = torch.max(torch.nn.functional.softmax(out), 1)[1]
|
# predicted = torch.max(torch.nn.functional.softmax(out), 1)[1]
|
||||||
predicted = torch.argmax(out, dim=1, keepdim=False) # -> [ label_indices ]
|
predicted = torch.argmax(out, dim=1, keepdim=False) # -> [ label_indices ]
|
||||||
actual = torch.argmax(y, dim=1, keepdim=False) # -> [ label_indices ]
|
correct = torch.argmax(y, dim=1, keepdim=False) # -> [ label_indices ]
|
||||||
# print(f"predicted={predicted}, actual={actual}")
|
# print(f"predicted={predicted}, correct={correct}")
|
||||||
train_total += y.size(0)
|
# train_total += y.size(0)
|
||||||
train_correct += (predicted == actual).sum().item()
|
# train_correct += (predicted == correct).sum().item()
|
||||||
|
epoch_tracker.train(correct, predicted)
|
||||||
print(f"epoch={ep+1:3}: Training accuracy={100 * train_correct / train_total:.2f}%, loss={loss:3f}")
|
epoch_tracker.next_epoch(loss)
|
||||||
|
print(epoch_tracker.get_last_epoch_summary_str())
|
||||||
scheduler.step()
|
scheduler.step()
|
||||||
|
t_end = time.time()
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
for i, (data, y) in enumerate(test_loader):
|
for i, (data, y) in enumerate(test_loader):
|
||||||
@ -176,20 +188,34 @@ if __name__ == "__main__":
|
|||||||
loss = loss_func(out, y)
|
loss = loss_func(out, y)
|
||||||
|
|
||||||
predicted = torch.argmax(out, dim=1, keepdim=False) # -> [ label_indices ]
|
predicted = torch.argmax(out, dim=1, keepdim=False) # -> [ label_indices ]
|
||||||
actual = torch.argmax(y, dim=1, keepdim=False) # -> [ label_indices ]
|
correct = torch.argmax(y, dim=1, keepdim=False) # -> [ label_indices ]
|
||||||
# print(f"predicted={predicted}, actual={actual}")
|
# print(f"predicted={predicted}, correct={correct}")
|
||||||
val_total += y.size(0)
|
# val_total += y.size(0)
|
||||||
val_correct += (predicted == actual).sum().item()
|
# val_correct += (predicted == correct).sum().item()
|
||||||
|
|
||||||
|
epoch_tracker.test(correct, predicted)
|
||||||
|
|
||||||
# print(f"train_total={train_total}, val_total={val_total}")
|
# print(f"train_total={train_total}, val_total={val_total}")
|
||||||
if train_total == 0: train_total = -1
|
# if train_total == 0: train_total = -1
|
||||||
if val_total == 0: val_total = -1
|
# if val_total == 0: val_total = -1
|
||||||
|
|
||||||
print(f"epoch={ep+1:3}: Testing accuracy={100 * val_correct / val_total:.2f}")
|
# print(f"epoch={ep+1:3}: Testing accuracy={100 * val_correct / val_total:.2f}")
|
||||||
print(f"End result: Training accuracy={100 * train_correct / train_total:.2f}%, Testing accuracy={100 * val_correct / val_total:.2f}")
|
# print(f"End result: Training accuracy={100 * train_correct / train_total:.2f}%, Testing accuracy={100 * val_correct / val_total:.2f}, training took {t_end - t_begin:.2f} seconds")
|
||||||
settings["model"] = str(model)
|
|
||||||
|
|
||||||
with open("settings.txt", "w") as file:
|
epoch_tracker.get_test_statistics()
|
||||||
file.write(str(settings))
|
# epoch_tracker.()
|
||||||
|
|
||||||
|
# print(epoch_tracker.get_training_summary_str())
|
||||||
|
print(epoch_tracker.get_training_count_per_label())
|
||||||
|
|
||||||
|
model_name = st.get_name()
|
||||||
|
# save the settings, results and model
|
||||||
|
with open(model_name + "_settings.pkl", "wb") as file:
|
||||||
|
pickle.dump(st, file)
|
||||||
|
|
||||||
|
with open(model_name + "_results.pkl", "wb") as file:
|
||||||
|
pickle.dump(epoch_tracker, file)
|
||||||
|
|
||||||
|
with open(model_name + "_model.pkl", "wb") as file:
|
||||||
|
pickle.dump(model, file)
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@ import pandas as pd
|
|||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
# groups: date, name, voltage, distance, index
|
# groups: date, name, voltage, distance, index
|
||||||
re_filename = r"(\d{4}-\d{2}-\d{2})_([a-zA-Z]+)_(\d{1,2}(?:\.\d*)?)V_(\d+(?:\.\d*)?)mm(\d+).csv"
|
re_filename = r"(\d{4}-\d{2}-\d{2})_([a-zA-Z_]+)_(\d{1,2}(?:\.\d*)?)V_(\d+(?:\.\d*)?)mm(\d+).csv"
|
||||||
|
|
||||||
class LabelConverter:
|
class LabelConverter:
|
||||||
def __init__(self, class_labels):
|
def __init__(self, class_labels):
|
||||||
@ -46,7 +46,7 @@ class Datasample:
|
|||||||
self.data = None
|
self.data = None
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
size = self.data.size if self.data else "Unknown"
|
size = self.data.size if self.data is not None else "Unknown"
|
||||||
return f"{self.label}-{self.index}: dimension={size}, recorded at {self.date} with U={self.voltage}V, d={self.distance}mm"
|
return f"{self.label}-{self.index}: dimension={size}, recorded at {self.date} with U={self.voltage}V, d={self.distance}mm"
|
||||||
|
|
||||||
def _load_data(self):
|
def _load_data(self):
|
||||||
@ -71,13 +71,14 @@ class Dataset:
|
|||||||
|
|
||||||
def __getitem__(self, index):
|
def __getitem__(self, index):
|
||||||
data, label = self.datasamples[index].get_data(), self.datasamples[index].label_vec
|
data, label = self.datasamples[index].get_data(), self.datasamples[index].label_vec
|
||||||
|
# print(f"loading dataset {self.datasamples[index]}")
|
||||||
if type(self.transforms) == list:
|
if type(self.transforms) == list:
|
||||||
for t in self.transforms:
|
for t in self.transforms:
|
||||||
data = t(data)
|
data = t(data)
|
||||||
elif self.transforms:
|
elif self.transforms:
|
||||||
data = self.transforms(data)
|
data = self.transforms(data)
|
||||||
# TODO
|
# TODO
|
||||||
return data[:400], label
|
return data[:2000], label
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self.datasamples)
|
return len(self.datasamples)
|
||||||
|
83
teng-ml/util/epoch_tracker.py
Normal file
83
teng-ml/util/epoch_tracker.py
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
from ..util.data_loader import LabelConverter
|
||||||
|
import time
|
||||||
|
import torch
|
||||||
|
|
||||||
|
class EpochTracker:
|
||||||
|
"""
|
||||||
|
Track progress through epochs and generate statistics
|
||||||
|
"""
|
||||||
|
def __init__(self, labels: LabelConverter):
|
||||||
|
# Training
|
||||||
|
self.accuracy = []
|
||||||
|
self.loss = []
|
||||||
|
self.times = [] # timestamps for each epoch end
|
||||||
|
self.trainings = []
|
||||||
|
self.training_indices = [[]] # epoch, batch_nr, (correct_indices, predicted_indices), ind:ex_nr
|
||||||
|
self._current_epoch = 0
|
||||||
|
|
||||||
|
self.labels = labels
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
self.tests = [] # (correct_indices, predicted_indices)
|
||||||
|
|
||||||
|
def train_begin(self):
|
||||||
|
"""for time tracking"""
|
||||||
|
self.times.append(time.time())
|
||||||
|
|
||||||
|
# TRAINING
|
||||||
|
def train(self, correct_indices: torch.Tensor, predicted_indices: torch.Tensor):
|
||||||
|
self.training_indices[self._current_epoch].append((correct_indices, predicted_indices))
|
||||||
|
|
||||||
|
def next_epoch(self, loss):
|
||||||
|
self.times.append(time.time())
|
||||||
|
self.loss.append(loss)
|
||||||
|
correct_predictions = 0
|
||||||
|
total_predictions = 0
|
||||||
|
for predicted_indices, correct_indices in self.training_indices[self._current_epoch]:
|
||||||
|
correct_predictions += (predicted_indices == correct_indices).sum().item()
|
||||||
|
total_predictions += predicted_indices.size(0)
|
||||||
|
accuracy = 100 * correct_predictions / total_predictions
|
||||||
|
self.accuracy.append(accuracy)
|
||||||
|
self._current_epoch += 1
|
||||||
|
self.training_indices.append([])
|
||||||
|
|
||||||
|
def get_last_epoch_summary_str(self):
|
||||||
|
"""call after next_epoch()"""
|
||||||
|
return f"Epoch {self._current_epoch:3}: Accuracy={self.accuracy[-1]:.2f}, Loss={self.loss[-1]:.3f}, Training duration={self.times[-1] - self.times[0]:.2f}s"
|
||||||
|
def get_last_epoch_summary(self):
|
||||||
|
"""
|
||||||
|
@returns accuracy, loss, training time
|
||||||
|
"""
|
||||||
|
return self.accuracy[-1], self.loss[-1], self.times[-1] - self.times[0]
|
||||||
|
|
||||||
|
def get_training_count_per_label(self):
|
||||||
|
count_per_label = [ 0 for _ in range(len(self.labels)) ]
|
||||||
|
for i in range(len(self.training_indices)):
|
||||||
|
for j in range(len(self.training_indices[i])):
|
||||||
|
for k in range(self.training_indices[i][j][0].size(0)):
|
||||||
|
# epoch, batch_nr, 0 = correct_indices, correct_index_nr
|
||||||
|
count_per_label[self.training_indices[i][j][0][k]] += 1
|
||||||
|
return count_per_label
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.accuracy)
|
||||||
|
|
||||||
|
def __getitem__(self, idx):
|
||||||
|
return (self.accuracy[idx], self.loss[idx])
|
||||||
|
|
||||||
|
# TESTING
|
||||||
|
def test(self, correct_indices: torch.Tensor, predicted_indices: torch.Tensor):
|
||||||
|
"""
|
||||||
|
@param correct_indices and predicted_indices: 1 dim Tensor
|
||||||
|
"""
|
||||||
|
for i in range(correct_indices.size(0)):
|
||||||
|
self.tests.append((correct_indices[i], predicted_indices[i]))
|
||||||
|
|
||||||
|
|
||||||
|
def get_test_statistics(self):
|
||||||
|
# label i, label_j was predicted when label_i was correct
|
||||||
|
statistics = [ [ 0 for _ in range(len(self.labels))] for _ in range(len(self.labels)) ]
|
||||||
|
for corr, pred in self.tests:
|
||||||
|
statistics[corr][pred] += 1
|
||||||
|
print(statistics)
|
||||||
|
return statistics
|
35
teng-ml/util/settings.py
Normal file
35
teng-ml/util/settings.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
from ..util.data_loader import LabelConverter
|
||||||
|
|
||||||
|
class MLSettings:
|
||||||
|
"""
|
||||||
|
Manage model and training settings for easy saving and loading
|
||||||
|
"""
|
||||||
|
def __init__(self,
|
||||||
|
num_features=1,
|
||||||
|
num_layers=1,
|
||||||
|
hidden_size=1,
|
||||||
|
bidirectional=True,
|
||||||
|
transforms=[],
|
||||||
|
num_epochs=10,
|
||||||
|
batch_size=5,
|
||||||
|
labels=LabelConverter([]),
|
||||||
|
):
|
||||||
|
self.num_features = num_features
|
||||||
|
self.num_layers = num_layers
|
||||||
|
self.hidden_size = hidden_size
|
||||||
|
self.num_epochs = num_epochs
|
||||||
|
self.bidirectional = bidirectional
|
||||||
|
self.transforms = transforms
|
||||||
|
self.batch_size = batch_size
|
||||||
|
self.labels = labels
|
||||||
|
|
||||||
|
def get_name(self):
|
||||||
|
"""
|
||||||
|
F = num_features
|
||||||
|
L = num_layers
|
||||||
|
H = hidden_size
|
||||||
|
B = bidirectional
|
||||||
|
T = #transforms
|
||||||
|
E = #epochs
|
||||||
|
"""
|
||||||
|
return f"F{self.num_features}L{self.num_layers}H{self.hidden_size}B{'1' if self.bidirectional else '0'}T{len(self.transforms)}"
|
Loading…
Reference in New Issue
Block a user