diff --git a/teng-ml/main.py b/teng-ml/main.py index 3857553..ae71f5f 100644 --- a/teng-ml/main.py +++ b/teng-ml/main.py @@ -11,10 +11,10 @@ import matplotlib.pyplot as plt import pandas as pd import torch import torch.nn as nn +import torch.nn.utils.rnn as rnn_utils from torch.utils.data import DataLoader - -from .util.transform import ConstantInterval +from .util.transform import ConstantInterval, Normalize from .util.data_loader import load_datasets, LabelConverter def test_interpol(): @@ -24,7 +24,7 @@ def test_interpol(): array = df.to_numpy() print(ConstantInterval.get_average_interval(array[:,0])) transformer = ConstantInterval(0.05) - interp_array = transformer(array[:,0], array[:,2]) + interp_array = transformer(array[:,[0,2]]) fig1, ax1 = plt.subplots() ax1.plot(interp_array[:,0], interp_array[:,1], color="r", label="Interpolated") @@ -42,15 +42,22 @@ if __name__ == "__main__": ) print(f"Using device: {device}") - labels = LabelConverter(["foam", "glass", "kapton", "foil"]) - train_set, test_set = load_datasets("/home/matth/data", labels, voltage=8.2) + labels = LabelConverter(["foam", "glass", "kapton", "foil", "cloth", "rigid_foam"]) + t_const_int = ConstantInterval(0.01) + t_norm = Normalize(0, 1) + train_set, test_set = load_datasets("/home/matth/Uni/TENG/testdata", labels, voltage=8.2, transforms=[t_const_int], train_to_test_ratio=0.7, random_state=42) # train_loader = iter(DataLoader(train_set)) # test_loader = iter(DataLoader(test_set)) - # sample = next(train_loader) - # print(sample) - train_loader = iter(DataLoader(train_set)) - test_loader = iter(DataLoader(test_set)) + train_loader = iter(DataLoader(train_set, batch_size=3, shuffle=True)) + test_loader = iter(DataLoader(test_set, batch_size=3, shuffle=True)) + + sample = next(train_loader) + print(sample) + + feature_count = 1 + + class RNN(nn.Module): def __init__(self, input_size, hidden_size, num_layers, num_classes, if_bidirectional): super(RNN, self).__init__() @@ -58,6 +65,7 @@ if __name__ == "__main__": self.hidden_size = hidden_size self.if_bidirectional = if_bidirectional self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=if_bidirectional) + # x = (batch_size, sequence, feature) if if_bidirectional == True: self.fc = nn.Linear(hidden_size * 2, num_classes) @@ -66,14 +74,21 @@ if __name__ == "__main__": def forward(self, x): + print(f"forward pass") D = 2 if self.if_bidirectional == True else 1 - Batch = x.batch_sizes[0] - h0 = torch.zeros(D * self.num_layers, Batch, self.hidden_size).to(device) - c0 = torch.zeros(D * self.num_layers, Batch, self.hidden_size).to(device) + print(f"x({x.shape})={x}") + batch_size = x.shape[1] + print(f"batch_size={batch_size}") + + h0 = torch.zeros(D * self.num_layers, batch_size, self.hidden_size).to(device) + print(f"h0={h0}") + c0 = torch.zeros(D * self.num_layers, batch_size, self.hidden_size).to(device) x.to(device) _, (h_n, _) = self.lstm(x, (h0, c0)) - final_state = h_n.view(self.num_layers, D, Batch, self.hidden_size)[-1] # num_layers, num_directions, batch, hidden_size + print(f"h_n={h_n}") + final_state = h_n.view(self.num_layers, D, batch_size, self.hidden_size)[-1] # num_layers, num_directions, batch, hidden_size + print(f"final_state={final_state}") if D == 1: X = final_state.squeeze() @@ -81,12 +96,14 @@ if __name__ == "__main__": h_1, h_2 = final_state[0], final_state[1] # forward & backward pass #X = h_1 + h_2 # Add both states X = torch.cat((h_1, h_2), 1) # Concatenate both states, X-size: (Batch, hidden_size * 2) - + else: + raise ValueError("D must be 1 or 2") output = self.fc(X) # fully-connected layer + print(f"out={output}") return output - model = RNN(input_size = 1, hidden_size = 8, num_layers = 3, num_classes = 18, if_bidirectional = True).to(device) + model=RNN(input_size=1, hidden_size=8, num_layers=3, num_classes=18, if_bidirectional=True).to(device) loss_func = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.02) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95) @@ -99,34 +116,47 @@ if __name__ == "__main__": train_total = 0 val_correct = 0 val_total = 0 - for (x, y), length in train_loader: + for data, y in train_loader: + # data = batch, seq, features + print(ep, "Train") + print(f"data({data.shape})={data}") + x = data[:,:,2] # select voltage data + print(f"x({x.shape})={x}") + length = torch.tensor([x.shape[1] for _ in range(x.shape[0])], dtype=torch.int64) + print(f"length({length.shape})={length}") batch_size = x.shape[0] - v = x.view(batch_size, -1, nFeatrue) - data = rnn_utils.pack_padded_sequence(v.type(torch.FloatTensor), length, batch_first=True).to(device) + print(f"batch_size={batch_size}") + v = x.view(batch_size, -1, feature_count) + data = rnn_utils.pack_padded_sequence(v.type(torch.FloatTensor), length, batch_first=True).to(device)[0] # print(data.batch_sizes[0]) # print(data) out = model(data) - loss = loss_func(out, y) + loss = loss_func(out, y) # print(loss) optimizer.zero_grad() # clear gradients for next train loss.backward() # backpropagation, compute gradients optimizer.step() # apply gradients - + predicted = torch.max(torch.nn.functional.softmax(out), 1)[1] train_total += y.size(0) train_correct += (predicted == y).sum().item() - scheduler.step() - - for (x, y), length in test_loader: + + for data, y in test_loader: + print(ep, "Test") + x = data[:,2] + print(f"x({x.shape})={x}") + length = torch.tensor(x.shape[0], dtype=torch.int64) + print(f"length={length}") batch_size = x.shape[0] - v = x.view(batch_size, -1, nFeatrue) + print(f"batch_size={batch_size}") + v = x.view(batch_size, -1, feature_count) data = rnn_utils.pack_padded_sequence(v.type(torch.FloatTensor), length, batch_first=True).to(device) out = model(data) - loss = loss_func(out, y) - + loss = loss_func(out, y) + predicted = torch.max(torch.nn.functional.softmax(out), 1)[1] val_total += y.size(0) val_correct += (predicted == y).sum().item() diff --git a/teng-ml/rnn.py b/teng-ml/rnn.py index 2733d4b..f8425c4 100644 --- a/teng-ml/rnn.py +++ b/teng-ml/rnn.py @@ -31,7 +31,7 @@ class RNN(nn.Module): X = final_state.squeeze() elif D == 2: h_1, h_2 = final_state[0], final_state[1] # forward & backward pass - #X = h_1 + h_2 # Add both states + # X = h_1 + h_2 # Add both states X = torch.cat((h_1, h_2), 1) # Concatenate both states, X-size: (Batch, hidden_size * 2) output = self.fc(X) # fully-connected layer diff --git a/teng-ml/util/__pycache__/transform.cpython-310.pyc b/teng-ml/util/__pycache__/transform.cpython-310.pyc index 9ccaebd..6a44ccc 100644 Binary files a/teng-ml/util/__pycache__/transform.cpython-310.pyc and b/teng-ml/util/__pycache__/transform.cpython-310.pyc differ diff --git a/teng-ml/util/data_loader.py b/teng-ml/util/data_loader.py index 226ef6f..8284752 100644 --- a/teng-ml/util/data_loader.py +++ b/teng-ml/util/data_loader.py @@ -60,18 +60,26 @@ class Dataset: """ Store the whole dataset, compatible with torch.data.Dataloader """ - def __init__(self, datasamples): + def __init__(self, datasamples, transforms=None): self.datasamples = datasamples + self.transforms = transforms # self.labels = [ d.label_vec for d in datasamples ] # self.data = [ d.get_data() for d in datasamples ] def __getitem__(self, index): - return self.datasamples[index].get_data(), self.datasamples[index].label_vec + data, label = self.datasamples[index].get_data(), self.datasamples[index].label_vec + if type(self.transforms) == list: + for t in self.transforms: + data = t(data) + elif self.transforms: + data = self.transforms(data) + # TODO + return data[:400], label def __len__(self): return len(self.datasamples) -def load_datasets(datadir, labels: LabelConverter, voltage=None, train_to_test_ratio=0.7, random_state=None): +def load_datasets(datadir, labels: LabelConverter, transforms=None, voltage=None, train_to_test_ratio=0.7, random_state=None): """ load all data from datadir that are in the format: yyyy-mm-dd_label_x.xV_xxxmm.csv """ @@ -90,6 +98,6 @@ def load_datasets(datadir, labels: LabelConverter, voltage=None, train_to_test_r datasamples.append(Datasample(*match.groups(), labels.get_one_hot(label), datadir + "/" + file)) train_samples, test_samples = train_test_split(datasamples, train_size=train_to_test_ratio, shuffle=True, random_state=random_state) - train_dataset = Dataset(train_samples) - test_dataset = Dataset(test_samples) + train_dataset = Dataset(train_samples, transforms=transforms) + test_dataset = Dataset(test_samples, transforms=transforms) return train_dataset, test_dataset diff --git a/teng-ml/util/transform.py b/teng-ml/util/transform.py index d468c73..abbf99a 100644 --- a/teng-ml/util/transform.py +++ b/teng-ml/util/transform.py @@ -25,20 +25,24 @@ class ConstantInterval: """ Interpolate the data to have a constant interval / sample rate, so that 1 index step is always equivalent to a certain time step - Expects: timestamps, idata, vdata """ def __init__(self, interval): self.interval = interval - def __call__(self, timestamps, data): - interp = interp1d(timestamps, data) - new_stamps = np.arange(0, timestamps[-1], self.interval) - print(f"old=({timestamps.size}) {timestamps}, new=({new_stamps.size}){new_stamps}") + def __call__(self, a): + """ + array: [timestamps, data1, data2...] + """ + timestamps = a[:,0] + new_stamps = np.arange(timestamps[0], timestamps[-1], self.interval) + ret = new_stamps + for i in range(1, a.shape[1]): # + interp = interp1d(timestamps, a[:,i]) + new_vals = interp(new_stamps) + ret = np.vstack((ret, new_vals)) + return ret.T - new_vals = interp(new_stamps) - return np.vstack((new_stamps, new_vals)).T @staticmethod - def get_average_interval(timestamps): avg_interval = np.average([ timestamps[i] - timestamps[i-1] for i in range(1, len(timestamps))]) return avg_interval