Compare commits

...

2 Commits

Author SHA1 Message Date
Matthias@Dell
9660de248a added tracker & settings 2023-05-07 21:40:39 +02:00
Matthias@Dell
341cc39f24 model working 2023-05-05 18:26:44 +02:00
5 changed files with 240 additions and 59 deletions

View File

@ -13,9 +13,14 @@ import torch
import torch.nn as nn
import torch.nn.utils.rnn as rnn_utils
from torch.utils.data import DataLoader
import json
import time
import pickle
from .util.transform import ConstantInterval, Normalize
from .util.data_loader import load_datasets, LabelConverter
from .util.epoch_tracker import EpochTracker
from .util.settings import MLSettings
def test_interpol():
file = "/home/matth/data/2023-04-27_glass_8.2V_179mm000.csv"
@ -40,90 +45,111 @@ if __name__ == "__main__":
if torch.backends.mps.is_available()
else "cpu"
)
print(f"Using device: {device}")
labels = LabelConverter(["foam", "glass", "kapton", "foil", "cloth", "rigid_foam"])
t_const_int = ConstantInterval(0.01)
t_norm = Normalize(0, 1)
train_set, test_set = load_datasets("/home/matth/Uni/TENG/testdata", labels, voltage=8.2, transforms=[t_const_int], train_to_test_ratio=0.7, random_state=42)
transforms = [ t_const_int, t_norm ]
st = MLSettings(num_features=1,
num_layers=1,
hidden_size=1,
bidirectional=True,
transforms=transforms,
num_epochs=40,
batch_size=3,
labels=labels,
)
print(f"Using device: {device}")
train_set, test_set = load_datasets("/home/matth/Uni/TENG/data", labels, voltage=8.2, transforms=st.transforms, train_to_test_ratio=0.7, random_state=42)
# train_loader = iter(DataLoader(train_set))
# test_loader = iter(DataLoader(test_set))
train_loader = iter(DataLoader(train_set, batch_size=3, shuffle=True))
test_loader = iter(DataLoader(test_set, batch_size=3, shuffle=True))
# , dtype=torch.float32
sample = next(train_loader)
print(sample)
feature_count = 1
train_loader = DataLoader(train_set, batch_size=st.batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=st.batch_size, shuffle=True)
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes, if_bidirectional):
def __init__(self, input_size, hidden_size, num_layers, num_classes, bidirectional):
super(RNN, self).__init__()
self.num_layers = num_layers
self.hidden_size = hidden_size
self.if_bidirectional = if_bidirectional
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=if_bidirectional)
self.is_bidirectional = bidirectional
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional)
# x = (batch_size, sequence, feature)
if if_bidirectional == True:
if bidirectional == True:
self.fc = nn.Linear(hidden_size * 2, num_classes)
else:
self.fc = nn.Linear(hidden_size, num_classes)
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
# x: batches, length, features
print(f"forward pass")
D = 2 if self.if_bidirectional == True else 1
# print(f"forward pass")
D = 2 if self.is_bidirectional == True else 1
print(f"x({x.shape})=...")
# print(f"x({x.shape})=...")
batch_size = x.shape[0]
print(f"batch_size={batch_size}")
# print(f"batch_size={batch_size}")
h0 = torch.zeros(D * self.num_layers, batch_size, self.hidden_size).to(device)
print(f"h0({h0.shape})=...")
# print(f"h1({h0.shape})=...")
c0 = torch.zeros(D * self.num_layers, batch_size, self.hidden_size).to(device)
x.to(device)
_, (h_n, _) = self.lstm(x, (h0, c0))
print(f"h_n({h_n.shape})=...")
# print(f"h_n({h_n.shape})=...")
final_state = h_n.view(self.num_layers, D, batch_size, self.hidden_size)[-1] # num_layers, num_directions, batch, hidden_size
print(f"final_state({final_state.shape})=...")
# print(f"final_state({final_state.shape})=...")
if D == 1:
X = final_state.squeeze()
X = final_state.squeeze() # TODO what if batch_size == 1
elif D == 2:
h_1, h_2 = final_state[0], final_state[1] # forward & backward pass
#X = h_1 + h_2 # Add both states
X = torch.cat((h_1, h_2), 1) # Concatenate both states, X-size: (Batch, hidden_size * 2
#X = h_1 + h_2 # Add both states
X = torch.cat((h_1, h_2), 1) # Concatenate both states, X-size: (Batch, hidden_size * 2
else:
raise ValueError("D must be 1 or 2")
# print(f"X({X.shape})={X}")
output = self.fc(X) # fully-connected layer
print(f"out({output.shape})={output}")
# print(f"out({output.shape})={output}")
output = self.softmax(output)
# print(f"out({output.shape})={output}")
return output
model=RNN(input_size=1, hidden_size=8, num_layers=3, num_classes=18, if_bidirectional=True).to(device)
model=RNN(input_size=st.num_features, hidden_size=st.hidden_size, num_layers=st.num_layers, num_classes=len(labels), bidirectional=st.bidirectional).to(device)
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.02)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)
print(model)
print(f"model:", model)
print(f"loss_func={loss_func}")
print(f"optimizer={optimizer}")
print(f"scheduler={scheduler}")
epoch_tracker = EpochTracker(labels)
print(f"train_loader")
for i, (data, y) in enumerate(train_loader):
print(y)
print(f"{i:3} - {torch.argmax(y, dim=1, keepdim=False)}")
# training
for ep in range(40):
train_correct = 0
train_total = 0
val_correct = 0
val_total = 0
for data, y in train_loader:
epoch_tracker.train_begin()
for ep in range(st.num_epochs):
for i, (data, y) in enumerate(train_loader):
# print(data, y)
# data = batch, seq, features
print(ep, "Train")
# print(f"data({data.shape})={data}")
x = data[:,:,[2]].float() # select voltage data
print(f"x({x.shape}, {x.dtype})=...")
print(f"y({y.shape}, {y.dtype})=...")
# print(f"x({x.shape}, {x.dtype})=...")
# print(f"y({y.shape}, {y.dtype})=...")
# length = torch.tensor([x.shape[1] for _ in range(x.shape[0])], dtype=torch.int64)
# print(f"length({length.shape})={length}")
# batch_size = x.shape[0]
@ -134,6 +160,7 @@ if __name__ == "__main__":
# print(data.batch_sizes[0])
# print(data)
out = model(x)
# print(f"out({out.shape}={out})")
loss = loss_func(out, y)
# print(loss)
@ -141,28 +168,54 @@ if __name__ == "__main__":
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
predicted = torch.max(torch.nn.functional.softmax(out), 1)[1]
train_total += y.size(0)
train_correct += (predicted == y).sum().item()
# predicted = torch.max(torch.nn.functional.softmax(out), 1)[1]
predicted = torch.argmax(out, dim=1, keepdim=False) # -> [ label_indices ]
correct = torch.argmax(y, dim=1, keepdim=False) # -> [ label_indices ]
# print(f"predicted={predicted}, correct={correct}")
# train_total += y.size(0)
# train_correct += (predicted == correct).sum().item()
epoch_tracker.train(correct, predicted)
epoch_tracker.next_epoch(loss)
print(epoch_tracker.get_last_epoch_summary_str())
scheduler.step()
t_end = time.time()
for data, y in test_loader:
print(ep, "Test")
x = data[:,:,[2]]
print(f"x({x.shape})={x}")
# length = torch.tensor(x.shape[1], dtype=torch.int64)
# print(f"length={length}")
# batch_size = x.shape[0]
# print(f"batch_size={batch_size}")
# v = x.view(batch_size, -1, feature_count)
# data = rnn_utils.pack_padded_sequence(v.type(torch.FloatTensor), length, batch_first=True).to(device)
with torch.no_grad():
for i, (data, y) in enumerate(test_loader):
# print(ep, "Test")
x = data[:,:,[2]].float()
out = model(x)
loss = loss_func(out, y)
predicted = torch.max(torch.nn.functional.softmax(out), 1)[1]
val_total += y.size(0)
val_correct += (predicted == y).sum().item()
predicted = torch.argmax(out, dim=1, keepdim=False) # -> [ label_indices ]
correct = torch.argmax(y, dim=1, keepdim=False) # -> [ label_indices ]
# print(f"predicted={predicted}, correct={correct}")
# val_total += y.size(0)
# val_correct += (predicted == correct).sum().item()
print("epoch: ", ep + 1, 'Accuracy of the Train: %.2f %%' % (100 * train_correct / train_total), 'Accuracy of the Test: %.2f %%' % (100 * val_correct / val_total))
epoch_tracker.test(correct, predicted)
# print(f"train_total={train_total}, val_total={val_total}")
# if train_total == 0: train_total = -1
# if val_total == 0: val_total = -1
# print(f"epoch={ep+1:3}: Testing accuracy={100 * val_correct / val_total:.2f}")
# print(f"End result: Training accuracy={100 * train_correct / train_total:.2f}%, Testing accuracy={100 * val_correct / val_total:.2f}, training took {t_end - t_begin:.2f} seconds")
epoch_tracker.get_test_statistics()
# epoch_tracker.()
# print(epoch_tracker.get_training_summary_str())
print(epoch_tracker.get_training_count_per_label())
model_name = st.get_name()
# save the settings, results and model
with open(model_name + "_settings.pkl", "wb") as file:
pickle.dump(st, file)
with open(model_name + "_results.pkl", "wb") as file:
pickle.dump(epoch_tracker, file)
with open(model_name + "_model.pkl", "wb") as file:
pickle.dump(model, file)

View File

@ -7,7 +7,7 @@ import pandas as pd
from sklearn.model_selection import train_test_split
# groups: date, name, voltage, distance, index
re_filename = r"(\d{4}-\d{2}-\d{2})_([a-zA-Z]+)_(\d{1,2}(?:\.\d*)?)V_(\d+(?:\.\d*)?)mm(\d+).csv"
re_filename = r"(\d{4}-\d{2}-\d{2})_([a-zA-Z_]+)_(\d{1,2}(?:\.\d*)?)V_(\d+(?:\.\d*)?)mm(\d+).csv"
class LabelConverter:
def __init__(self, class_labels):
@ -26,6 +26,9 @@ class LabelConverter:
def __contains__(self, value):
return value in self.class_labels
def __len__(self):
return len(self.class_labels)
def get_labels(self):
return self.class_labels.copy()
@ -43,7 +46,7 @@ class Datasample:
self.data = None
def __repr__(self):
size = self.data.size if self.data else "Unknown"
size = self.data.size if self.data is not None else "Unknown"
return f"{self.label}-{self.index}: dimension={size}, recorded at {self.date} with U={self.voltage}V, d={self.distance}mm"
def _load_data(self):
@ -52,7 +55,7 @@ class Datasample:
def get_data(self):
"""[[timestamps, idata, vdata]]"""
if not self.data:
if self.data is None:
self._load_data()
return self.data
@ -68,13 +71,14 @@ class Dataset:
def __getitem__(self, index):
data, label = self.datasamples[index].get_data(), self.datasamples[index].label_vec
# print(f"loading dataset {self.datasamples[index]}")
if type(self.transforms) == list:
for t in self.transforms:
data = t(data)
elif self.transforms:
data = self.transforms(data)
# TODO
return data[:400], label
return data[:2000], label
def __len__(self):
return len(self.datasamples)

View File

@ -0,0 +1,83 @@
from ..util.data_loader import LabelConverter
import time
import torch
class EpochTracker:
"""
Track progress through epochs and generate statistics
"""
def __init__(self, labels: LabelConverter):
# Training
self.accuracy = []
self.loss = []
self.times = [] # timestamps for each epoch end
self.trainings = []
self.training_indices = [[]] # epoch, batch_nr, (correct_indices, predicted_indices), ind:ex_nr
self._current_epoch = 0
self.labels = labels
# Testing
self.tests = [] # (correct_indices, predicted_indices)
def train_begin(self):
"""for time tracking"""
self.times.append(time.time())
# TRAINING
def train(self, correct_indices: torch.Tensor, predicted_indices: torch.Tensor):
self.training_indices[self._current_epoch].append((correct_indices, predicted_indices))
def next_epoch(self, loss):
self.times.append(time.time())
self.loss.append(loss)
correct_predictions = 0
total_predictions = 0
for predicted_indices, correct_indices in self.training_indices[self._current_epoch]:
correct_predictions += (predicted_indices == correct_indices).sum().item()
total_predictions += predicted_indices.size(0)
accuracy = 100 * correct_predictions / total_predictions
self.accuracy.append(accuracy)
self._current_epoch += 1
self.training_indices.append([])
def get_last_epoch_summary_str(self):
"""call after next_epoch()"""
return f"Epoch {self._current_epoch:3}: Accuracy={self.accuracy[-1]:.2f}, Loss={self.loss[-1]:.3f}, Training duration={self.times[-1] - self.times[0]:.2f}s"
def get_last_epoch_summary(self):
"""
@returns accuracy, loss, training time
"""
return self.accuracy[-1], self.loss[-1], self.times[-1] - self.times[0]
def get_training_count_per_label(self):
count_per_label = [ 0 for _ in range(len(self.labels)) ]
for i in range(len(self.training_indices)):
for j in range(len(self.training_indices[i])):
for k in range(self.training_indices[i][j][0].size(0)):
# epoch, batch_nr, 0 = correct_indices, correct_index_nr
count_per_label[self.training_indices[i][j][0][k]] += 1
return count_per_label
def __len__(self):
return len(self.accuracy)
def __getitem__(self, idx):
return (self.accuracy[idx], self.loss[idx])
# TESTING
def test(self, correct_indices: torch.Tensor, predicted_indices: torch.Tensor):
"""
@param correct_indices and predicted_indices: 1 dim Tensor
"""
for i in range(correct_indices.size(0)):
self.tests.append((correct_indices[i], predicted_indices[i]))
def get_test_statistics(self):
# label i, label_j was predicted when label_i was correct
statistics = [ [ 0 for _ in range(len(self.labels))] for _ in range(len(self.labels)) ]
for corr, pred in self.tests:
statistics[corr][pred] += 1
print(statistics)
return statistics

35
teng-ml/util/settings.py Normal file
View File

@ -0,0 +1,35 @@
from ..util.data_loader import LabelConverter
class MLSettings:
"""
Manage model and training settings for easy saving and loading
"""
def __init__(self,
num_features=1,
num_layers=1,
hidden_size=1,
bidirectional=True,
transforms=[],
num_epochs=10,
batch_size=5,
labels=LabelConverter([]),
):
self.num_features = num_features
self.num_layers = num_layers
self.hidden_size = hidden_size
self.num_epochs = num_epochs
self.bidirectional = bidirectional
self.transforms = transforms
self.batch_size = batch_size
self.labels = labels
def get_name(self):
"""
F = num_features
L = num_layers
H = hidden_size
B = bidirectional
T = #transforms
E = #epochs
"""
return f"F{self.num_features}L{self.num_layers}H{self.hidden_size}B{'1' if self.bidirectional else '0'}T{len(self.transforms)}"

View File

@ -20,6 +20,9 @@ class Normalize:
a -= self.low
return a
def __repr__(self):
return f"Normalize(low={self.low}, high={self.high})"
class ConstantInterval:
"""
@ -49,3 +52,6 @@ class ConstantInterval:
# sug_interval = 0.5 * avg_interval
# print(f"Average interval: {avg_interval}, Suggestion: {sug_interval}")
def __repr__(self):
return f"ConstantInterval(interval={self.interval})"