move to pytorch
This commit is contained in:
parent
0638d5e666
commit
e718873caa
4 changed files with 135 additions and 22 deletions
|
@ -17,9 +17,9 @@ class LabyrinthClient(Client):
|
||||||
if self.world_provider.world.board[x, y] in [1, 2]:
|
if self.world_provider.world.board[x, y] in [1, 2]:
|
||||||
r, g, b = 57, 92, 152
|
r, g, b = 57, 92, 152
|
||||||
if 1.5 >= self.world_provider.world.hunter_grass[x, y] > 0.5:
|
if 1.5 >= self.world_provider.world.hunter_grass[x, y] > 0.5:
|
||||||
r, g, b = 25, 149, 156
|
|
||||||
if 3 >= self.world_provider.world.hunter_grass[x, y] > 1.5:
|
|
||||||
r, g, b = 112, 198, 169
|
r, g, b = 112, 198, 169
|
||||||
|
if 3 >= self.world_provider.world.hunter_grass[x, y] > 1.5:
|
||||||
|
r, g, b = 25, 149, 156
|
||||||
self.world_provider.world.set_color(x, y, 0, r / 255.0, g / 255.0, b / 255.0)
|
self.world_provider.world.set_color(x, y, 0, r / 255.0, g / 255.0, b / 255.0)
|
||||||
if self.world_provider.world.board[x, y] == 3:
|
if self.world_provider.world.board[x, y] == 3:
|
||||||
self.world_provider.world.set_color(x, y, 0, 139 / 255.0, 72 / 255.0, 82 / 255.0)
|
self.world_provider.world.set_color(x, y, 0, 139 / 255.0, 72 / 255.0, 82 / 255.0)
|
||||||
|
|
125
labirinth_ai/Models/BaseModel.py
Normal file
125
labirinth_ai/Models/BaseModel.py
Normal file
|
@ -0,0 +1,125 @@
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
import numpy as np
|
||||||
|
import tqdm
|
||||||
|
from torch.utils.data import Dataset, DataLoader
|
||||||
|
|
||||||
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
print(f"Using {device} device")
|
||||||
|
|
||||||
|
|
||||||
|
# Define model
|
||||||
|
class BaseModel(nn.Module):
|
||||||
|
def __init__(self, view_dimension, action_num, channels):
|
||||||
|
super(BaseModel, self).__init__()
|
||||||
|
self.flatten = nn.Flatten()
|
||||||
|
self.actions = []
|
||||||
|
self.action_num = action_num
|
||||||
|
self.viewD = view_dimension
|
||||||
|
self.channels = channels
|
||||||
|
for action in range(action_num):
|
||||||
|
action_sequence = nn.Sequential(
|
||||||
|
nn.Linear(channels * (2 * self.viewD + 1) * (2 * self.viewD + 1) + 2,
|
||||||
|
(2 * self.viewD + 1) * (2 * self.viewD + 1)),
|
||||||
|
nn.ELU(),
|
||||||
|
nn.Linear((2 * self.viewD + 1) * (2 * self.viewD + 1), (self.viewD + 1) * (self.viewD + 1)),
|
||||||
|
nn.ELU(),
|
||||||
|
nn.Linear((self.viewD + 1) * (self.viewD + 1), 2)
|
||||||
|
)
|
||||||
|
self.add_module('action_' + str(action), action_sequence)
|
||||||
|
self.actions.append(action_sequence)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x_flat = self.flatten(x)
|
||||||
|
actions = []
|
||||||
|
for action in range(self.action_num):
|
||||||
|
actions.append(self.actions[action](x_flat))
|
||||||
|
return torch.stack(actions, dim=1)
|
||||||
|
|
||||||
|
|
||||||
|
class BaseDataSet(Dataset):
|
||||||
|
def __init__(self, states, targets):
|
||||||
|
assert len(states) == len(targets), "Needs to have as many states as targets!"
|
||||||
|
self.states = torch.tensor(states, dtype=torch.float32)
|
||||||
|
self.targets = torch.tensor(targets, dtype=torch.float32)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.states)
|
||||||
|
|
||||||
|
def __getitem__(self, idx):
|
||||||
|
return self.states[idx], self.targets[idx]
|
||||||
|
|
||||||
|
|
||||||
|
def create_optimizer(model):
|
||||||
|
return torch.optim.RMSprop(model.parameters(), lr=1e-3)
|
||||||
|
|
||||||
|
|
||||||
|
def create_loss_function(action):
|
||||||
|
def custom_loss(prediction, target):
|
||||||
|
return torch.mean(0.5 * torch.square(
|
||||||
|
0.1 * target[:, 0, 0] + target[:, 1, 0] - (
|
||||||
|
prediction[:, action, 0] + prediction[:, action, 1])) + 0.5 * torch.square(
|
||||||
|
target[:, 1, 0] - prediction[:, action, 0]), dim=0)
|
||||||
|
|
||||||
|
return custom_loss
|
||||||
|
|
||||||
|
|
||||||
|
def from_numpy(x):
|
||||||
|
return torch.tensor(x, dtype=torch.float32)
|
||||||
|
|
||||||
|
|
||||||
|
def train(states, targets, model, optimizer):
|
||||||
|
for action in range(model.action_num):
|
||||||
|
data_set = BaseDataSet(states[action], targets[action])
|
||||||
|
dataloader = DataLoader(data_set, batch_size=64, shuffle=True)
|
||||||
|
loss_fn = create_loss_function(action)
|
||||||
|
|
||||||
|
size = len(dataloader)
|
||||||
|
model.train()
|
||||||
|
for batch, (X, y) in enumerate(dataloader):
|
||||||
|
X, y = X.to(device), y.to(device)
|
||||||
|
|
||||||
|
# Compute prediction error
|
||||||
|
pred = model(X)
|
||||||
|
loss = loss_fn(pred, y)
|
||||||
|
|
||||||
|
# Backpropagation
|
||||||
|
optimizer.zero_grad()
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
if batch % 100 == 0:
|
||||||
|
loss, current = loss.item(), batch * len(X)
|
||||||
|
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sample = np.random.random((1, 4, 11, 11))
|
||||||
|
|
||||||
|
model = BaseModel(5, 4, 4).to(device)
|
||||||
|
print(model)
|
||||||
|
|
||||||
|
test = model(torch.tensor(sample, dtype=torch.float32))
|
||||||
|
# test = test.cpu().detach().numpy()
|
||||||
|
print(test)
|
||||||
|
|
||||||
|
state = np.random.random((4, 11, 11))
|
||||||
|
target = np.random.random((4, 2))
|
||||||
|
states = [
|
||||||
|
[state],
|
||||||
|
[state],
|
||||||
|
[state],
|
||||||
|
[state],
|
||||||
|
]
|
||||||
|
targets = [
|
||||||
|
[target],
|
||||||
|
[target],
|
||||||
|
[target],
|
||||||
|
[target],
|
||||||
|
]
|
||||||
|
|
||||||
|
optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-3)
|
||||||
|
|
||||||
|
train(states, targets, model, optimizer)
|
||||||
|
|
0
labirinth_ai/Models/__init__.py
Normal file
0
labirinth_ai/Models/__init__.py
Normal file
|
@ -5,6 +5,7 @@ from tensorflow import keras
|
||||||
|
|
||||||
from labirinth_ai.LabyrinthWorld import LabyrinthWorld
|
from labirinth_ai.LabyrinthWorld import LabyrinthWorld
|
||||||
from labirinth_ai.loss import loss2, loss3
|
from labirinth_ai.loss import loss2, loss3
|
||||||
|
from labirinth_ai.Models.BaseModel import BaseModel, train, create_optimizer, device, from_numpy
|
||||||
|
|
||||||
# import torch
|
# import torch
|
||||||
# dtype = torch.float
|
# dtype = torch.float
|
||||||
|
@ -369,22 +370,9 @@ class NetLearner(Subject):
|
||||||
self.x_in = []
|
self.x_in = []
|
||||||
self.actions = []
|
self.actions = []
|
||||||
self.target = []
|
self.target = []
|
||||||
for i in range(4):
|
self.model = BaseModel(self.viewD, 4, 4)
|
||||||
x_in = keras.Input(shape=(self.channels * (2 * self.viewD + 1) * (2 * self.viewD + 1) + 2))
|
self.model.to(device)
|
||||||
self.x_in.append(x_in)
|
self.optimizer = create_optimizer(self.model)
|
||||||
inVec = keras.layers.Flatten()(x_in)
|
|
||||||
actions = keras.layers.Dense(((2 * self.viewD + 1) * (2 * self.viewD + 1)), activation='elu',
|
|
||||||
kernel_regularizer=keras.regularizers.l2(0.001),
|
|
||||||
name=self.name + str(self.id) + 'Dense' + str(i) + 'l1')(inVec)
|
|
||||||
actions = keras.layers.Dense(((self.viewD + 1) * (self.viewD + 1)), activation='elu',
|
|
||||||
kernel_regularizer=keras.regularizers.l2(0.001))(actions)
|
|
||||||
self.target.append(keras.Input(shape=(2, 1)))
|
|
||||||
self.actions.append(keras.layers.Dense(2, activation='linear', use_bias=False, kernel_regularizer=keras.regularizers.l2(0.001))(actions))
|
|
||||||
|
|
||||||
self.model = keras.Model(inputs=self.x_in, outputs=self.actions)
|
|
||||||
|
|
||||||
self.model.compile(optimizer=tf.keras.optimizers.RMSprop(self.learningRate), loss=loss3,
|
|
||||||
target_tensors=self.target)
|
|
||||||
|
|
||||||
if len(self.samples) < self.randomBuffer:
|
if len(self.samples) < self.randomBuffer:
|
||||||
self.random = True
|
self.random = True
|
||||||
|
@ -508,7 +496,7 @@ class NetLearner(Subject):
|
||||||
if state is None:
|
if state is None:
|
||||||
state = self.createState(world)
|
state = self.createState(world)
|
||||||
if vals is None:
|
if vals is None:
|
||||||
vals = self.model.predict([state, state, state, state])
|
vals = self.model(from_numpy(state)).detach().numpy()
|
||||||
vals = np.reshape(np.transpose(np.reshape(vals, (4, 2)), (1, 0)),
|
vals = np.reshape(np.transpose(np.reshape(vals, (4, 2)), (1, 0)),
|
||||||
(1, 8))
|
(1, 8))
|
||||||
|
|
||||||
|
@ -623,9 +611,9 @@ class NetLearner(Subject):
|
||||||
target[:, 1, 0] = samples[:, 1, 3] #reward t-2 got
|
target[:, 1, 0] = samples[:, 1, 3] #reward t-2 got
|
||||||
|
|
||||||
nextState = np.concatenate(samples[:, 1, 0]) #states of t-1
|
nextState = np.concatenate(samples[:, 1, 0]) #states of t-1
|
||||||
nextVals = self.model.predict([nextState, nextState, nextState, nextState])
|
nextVals = self.model(from_numpy(nextState)).detach().numpy()
|
||||||
|
|
||||||
nextVals2 = nextVals[i][:, 0] + nextVals[i][:, 1]
|
nextVals2 = nextVals[:, i, 0] + nextVals[:, i, 1]
|
||||||
target[:, 0, 0] = nextVals2 #best q t-1
|
target[:, 0, 0] = nextVals2 #best q t-1
|
||||||
else:
|
else:
|
||||||
target[:, 1, 0] = np.array(list(map(lambda elem: list(elem), list(np.array(samples[:, 1, 4])))))[:, i] # reward t-2 got
|
target[:, 1, 0] = np.array(list(map(lambda elem: list(elem), list(np.array(samples[:, 1, 4])))))[:, i] # reward t-2 got
|
||||||
|
@ -639,7 +627,7 @@ class NetLearner(Subject):
|
||||||
def train(self):
|
def train(self):
|
||||||
print(self.name)
|
print(self.name)
|
||||||
states, target = self.generateSamples()
|
states, target = self.generateSamples()
|
||||||
self.model.fit(states, target, epochs=1)
|
train(states, target, self.model, self.optimizer)
|
||||||
|
|
||||||
self.samples = self.samples[-self.historySizeMul*self.batchsize:]
|
self.samples = self.samples[-self.historySizeMul*self.batchsize:]
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue