VoxelEngine/labirinth_ai/Models/BaseModel.py

146 lines
4.4 KiB
Python

import torch
from torch import nn
import numpy as np
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
import os
os.environ["TORCH_AUTOGRAD_SHUTDOWN_WAIT_LIMIT"] = "0"
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
# Define model
class BaseModel(nn.Module):
evolutionary = False
def __init__(self, view_dimension, action_num, channels):
super(BaseModel, self).__init__()
self.flatten = nn.Flatten()
self.actions = []
self.action_num = action_num
self.viewD = view_dimension
self.channels = channels
for action in range(action_num):
action_sequence = nn.Sequential(
nn.Linear(channels * (2 * self.viewD + 1) * (2 * self.viewD + 1) + 2,
(2 * self.viewD + 1) * (2 * self.viewD + 1)),
nn.ELU(),
nn.Linear((2 * self.viewD + 1) * (2 * self.viewD + 1), (self.viewD + 1) * (self.viewD + 1)),
nn.ELU(),
nn.Linear((self.viewD + 1) * (self.viewD + 1), 2)
)
self.add_module('action_' + str(action), action_sequence)
self.actions.append(action_sequence)
def forward(self, x):
x_flat = self.flatten(x)
actions = []
for action in range(self.action_num):
actions.append(self.actions[action](x_flat))
return torch.stack(actions, dim=1)
class BaseDataSet(Dataset):
def __init__(self, states, targets):
assert len(states) == len(targets), "Needs to have as many states as targets!"
self.states = torch.tensor(np.array(states), dtype=torch.float32)
self.targets = torch.tensor(np.array(targets), dtype=torch.float32)
def __len__(self):
return len(self.states)
def __getitem__(self, idx):
return self.states[idx], self.targets[idx]
def create_optimizer(model):
return torch.optim.RMSprop(model.parameters(), lr=1e-3)
def create_loss_function(action):
lambda_factor = 0.0
split_factor = 1.0
def custom_loss(prediction, target):
return torch.mean(split_factor * torch.square(
# discounted best estimate the old weights made for t+1
lambda_factor * target[:, 0, 0] +
# actual reward for t
target[:, 1, 0] -
# estimate for current weights
(prediction[:, action, 0] + prediction[:, action, 1])) +
# trying to learn present reward separate from future reward
(1.0 - split_factor) * torch.square(target[:, 1, 0] - prediction[:, action, 0]), dim=0)
return custom_loss
def from_numpy(x):
return torch.tensor(np.array(x), dtype=torch.float32)
def train(states, targets, model, optimizer):
for action in range(model.action_num):
data_set = BaseDataSet(states[action], targets[action])
dataloader = DataLoader(data_set, batch_size=256, shuffle=True)
loss_fn = create_loss_function(action)
size = len(dataloader)
model.train()
epochs = 1
with tqdm(range(epochs)) as progress_bar:
for _ in enumerate(progress_bar):
losses = []
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()
losses.append(loss.item())
progress_bar.set_postfix(loss=np.average(losses))
progress_bar.update()
model.eval()
del data_set
del dataloader
if __name__ == '__main__':
sample = np.random.random((1, 486))
model = BaseModel(5, 4, 4).to(device)
print(model)
test = model(torch.tensor(sample, dtype=torch.float32))
# test = test.cpu().detach().numpy()
print(test)
state = np.random.random((486,))
target = np.random.random((4, 2))
states = [
[state],
[state],
[state],
[state],
]
targets = [
[target],
[target],
[target],
[target],
]
optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-3)
train(states, targets, model, optimizer)