VoxelEngine/labirinth_ai/Subject.py

983 lines
40 KiB
Python

import random
import numpy as np
import tensorflow as tf
from tensorflow import keras
from labirinth_ai.LabyrinthWorld import LabyrinthWorld
from labirinth_ai.Models.EvolutionModel import EvolutionModel
from labirinth_ai.loss import loss2, loss3
from labirinth_ai.Models.BaseModel import BaseModel, train, create_optimizer, device, from_numpy
# import torch
# dtype = torch.float
# device = torch.device("cpu")
class Subject:
name = 'random'
col = 8
num = 0
random = True
r = 255
g = 255
b = 255
def __init__(self, x, y):
self.alive = True
self.x = x
self.y = y
self.kills = 0
self.lives = 1
self.tick = 0
self.id = self.num
Subject.num += 1
def update(self, world: LabyrinthWorld):
# 0, 0 is top left
right = (1, 0)
left = (-1, 0)
up = (0, -1)
down = (0, 1)
directions = []
if self.x - 1 >= 0:
if world.board[self.x - 1, self.y] != 0:
directions.append(left)
if self.x + 1 < world.board_shape[0]:
if world.board[self.x + 1, self.y] != 0:
directions.append(right)
if self.y - 1 >= 0:
if world.board[self.x, self.y - 1] != 0:
directions.append(up)
if self.y + 1 < world.board_shape[1]:
if world.board[self.x, self.y + 1] != 0:
directions.append(down)
if directions != [] and self.alive:
if len(directions) > 1:
d = directions[random.randint(0, len(directions) - 1)]
else:
d = directions[0]
if len(world.subjectDict[(self.x + d[0], self.y + d[1])]) > 0:
for sub in world.subjectDict[(self.x + d[0], self.y + d[1])]:
if sub.alive:
self.kills += 1
sub.alive = False
self.alive = True
world.subjectDict[(self.x, self.y)].remove(self)
world.trailMix[self.x, self.y] += 1
self.x += d[0]
self.y += d[1]
world.subjectDict[(self.x, self.y)].append(self)
def respawnUpdate(self, x, y, world: LabyrinthWorld):
world.subjectDict[(self.x, self.y)].remove(self)
self.x = x
self.y = y
world.subjectDict[(self.x, self.y)].append(self)
self.alive = True
self.lives += 1
class QLearner(Subject):
name = 'QLearner'
col = 14
learningRate = 0.25
discountFactor = 0.5
random = False
Q = {}
def __init__(self, x, y):
super(QLearner, self).__init__(x, y)
# self.Q = {}
self.viewD = 3
self.lastAction = None
self.lastState = None
self.lastReward = 0
def respawnUpdate(self, x, y, world: LabyrinthWorld):
super(QLearner, self).respawnUpdate(x, y, world)
self.lastReward -= 20
def createState(self, world: LabyrinthWorld):
state = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.int) # - 1
maxdirleft = self.x - max(self.x - (self.viewD), 0)
maxdirright = min(self.x + (self.viewD), (world.board_shape[0] - 1)) - self.x
maxdirup = self.y - max(self.y - (self.viewD), 0)
maxdirdown = min(self.y + (self.viewD), (world.board_shape[1] - 1)) - self.y
# state[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.board[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown]
for sub in world.subjects:
if abs(sub.x - self.x) < self.viewD and abs(sub.y - self.y) < self.viewD:
if state[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] != 3:
state[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] = state[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] * 100 + 1# sub.col
return state
def update(self, world: LabyrinthWorld):
# 0, 0 is top left
right = (1, 0)
left = (-1, 0)
up = (0, -1)
down = (0, 1)
directions = []
if self.x - 1 >= 0:
if world.board[self.x - 1, self.y] != 0:
directions.append(left)
if self.x + 1 < world.board_shape[0]:
if world.board[self.x + 1, self.y] != 0:
directions.append(right)
if self.y - 1 >= 0:
if world.board[self.x, self.y - 1] != 0:
directions.append(up)
if self.y + 1 < world.board_shape[1]:
if world.board[self.x, self.y + 1] != 0:
directions.append(down)
if directions != [] and self.alive:
state = self.createState(world)
if str(state) not in self.Q.keys():
self.Q[str(state)] = {}
for dir in directions:
if dir not in self.Q[str(state)].keys():
self.Q[str(state)][dir] = random.randint(0, 5)
allowedActions = dict(filter(lambda elem: elem[0] in directions,self.Q[str(state)].items()))
action = max(allowedActions, key=allowedActions.get)
if self.learningRate != 0:
self.Q[str(state)][action] = (1 - self.learningRate) * self.Q[str(state)][action] + self.learningRate * (self.lastReward + self.discountFactor * self.Q[str(state)][action])
self.lastAction = action
self.lastState = state
self.lastReward = 0
if len(action) == 2:
if len(world.subjectDict[(self.x + action[0], self.y + action[1])]) > 0:
for sub in world.subjectDict[(self.x + action[0], self.y + action[1])]:
if sub.alive:
self.kills += 1
sub.alive = False
self.alive = True
self.lastReward += 10
world.subjectDict[(self.x, self.y)].remove(self)
self.x += action[0]
self.y += action[1]
world.subjectDict[(self.x, self.y)].append(self)
pass
class DoubleQLearner(QLearner):
name = 'DoubleQLearner'
col = 11
learningRate = 0.5
discountFactor = 0.5
random = False
QA = {}
QB = {}
def __init__(self, x, y):
super(DoubleQLearner, self).__init__(x, y)
self.viewD = 3
self.lastAction = None
self.lastState = None
self.lastReward = 0
def respawnUpdate(self, x, y, world: LabyrinthWorld):
super(DoubleQLearner, self).respawnUpdate(x, y, world)
def update(self, world: LabyrinthWorld):
# 0, 0 is top left
right = (1, 0)
left = (-1, 0)
up = (0, -1)
down = (0, 1)
directions = []
if self.x - 1 >= 0:
if world.board[self.x - 1, self.y] != 0:
directions.append(left)
if self.x + 1 < world.board_shape[0]:
if world.board[self.x + 1, self.y] != 0:
directions.append(right)
if self.y - 1 >= 0:
if world.board[self.x, self.y - 1] != 0:
directions.append(up)
if self.y + 1 < world.board_shape[1]:
if world.board[self.x, self.y + 1] != 0:
directions.append(down)
if directions != [] and self.alive:
state = self.createState(world)
if str(state) not in self.QA.keys():
self.QA[str(state)] = {}
self.QB[str(state)] = {}
for dir in directions:
if dir not in self.QA[str(state)].keys():
self.QA[str(state)][dir] = random.randint(0, 5)
self.QB[str(state)][dir] = random.randint(0, 5)
allowedActionsA = dict(filter(lambda elem: elem[0] in directions, self.QA[str(state)].items()))
allowedActionsB = dict(filter(lambda elem: elem[0] in directions, self.QB[str(state)].items()))
allowedActions = {}
for key in allowedActionsA.keys():
allowedActions[key] = allowedActionsA[key] + allowedActionsB[key]
actionA = max(allowedActionsA, key=allowedActionsA.get)
actionB = max(allowedActionsB, key=allowedActionsB.get)
action = max(allowedActions, key=allowedActions.get)
if self.learningRate != 0:
if random.randint(0, 1) == 0:
valA = self.QA[str(state)][action]
self.QA[str(state)][action] = valA + self.learningRate * (self.lastReward + self.discountFactor * self.QB[str(state)][actionA] - valA)
else:
valB = self.QB[str(state)][action]
self.QB[str(state)][action] = valB + self.learningRate * (self.lastReward + self.discountFactor * self.QA[str(state)][actionB] - valB)
self.lastAction = action
self.lastState = state
self.lastReward = 0
if len(action) == 2:
if len(world.subjectDict[(self.x + action[0], self.y + action[1])]) > 0:
for sub in world.subjectDict[(self.x + action[0], self.y + action[1])]:
if sub.alive:
self.kills += 1
sub.alive = False
self.alive = True
self.lastReward += 10
world.subjectDict[(self.x, self.y)].remove(self)
self.x += action[0]
self.y += action[1]
world.subjectDict[(self.x, self.y)].append(self)
pass
RECALCULATE = False
class NetLearner(Subject):
right = (1, 0)
left = (-1, 0)
up = (0, -1)
down = (0, 1)
act2IDict = {right: 0, left: 1, up: 2, down: 3}
name = 'NetLearner'
col = 15
viewD = 3
historyLength = 2
channels = 4
learningRate = 0.001
discountFactor = 0.5
randomBuffer = 0
batchsize = 1000
randomBuffer = max(4*batchsize, randomBuffer)
randomChance = 9
historySizeMul = 20
# samples = []
# x_in = keras.Input(shape=(4 * (2 * viewD + 1) * (2 * viewD + 1) + 2))
# target = keras.Input(shape=(10, 1))
# inVec = keras.layers.Flatten()(x_in)
# # kernel_regularizer=keras.regularizers.l2(0.01)
# actions = keras.layers.Dense((3 * (2 * viewD + 1) * (2 * viewD + 1)), activation='relu')(inVec)
# actions = keras.layers.Dense(((2 * viewD + 1) * (2 * viewD + 1)), activation='relu')(actions)
# actions = keras.layers.Dense(8, activation='linear', use_bias=False)(actions)
#
# model = keras.Model(inputs=x_in, outputs=actions)
#
# # model.compile(optimizer='adam', loss=loss, target_tensors=[target])
# model.compile(optimizer=tf.keras.optimizers.RMSprop(learningRate), loss=loss, target_tensors=[target])
def respawnUpdate(self, x, y, world: LabyrinthWorld):
super(NetLearner, self).respawnUpdate(x, y, world)
# self.lastReward -= 20
if len(self.samples) < self.randomBuffer or random.randint(0, 10) > self.randomChance:
self.random = True
# print('Rando ' + self.name)
pass
else:
self.random = False
# print('Slau ' + self.name)
self.strikes = 0
def __init__(self, x, y, genes=None, genotype_class=None):
super(NetLearner, self).__init__(x, y)
self.action = None
self.state = None
self.actDict = {}
self.history = []
self.lastAction = None
self.lastState = None
self.lastReward = 0
self.lastVal = 0
self.random = False
self.nextTrain = self.randomBuffer
self.samples = []
self.x_in = []
self.actions = []
self.target = []
# self.model = BaseModel(self.viewD, 4, 4).to(device)
self.model = EvolutionModel(self.viewD, 4, 4, genes=genes, genotype_class=genotype_class).to(device)
self.optimizer = create_optimizer(self.model)
if len(self.samples) < self.randomBuffer:
self.random = True
else:
self.random = False
self.strikes = 0
self.lastRewards = []
self.accumulated_rewards = 0
def visualize(self):
print(self.name)
layers = self.model.get_weights()
# layers.reverse()
layersN = [[0, 1, 8, 9, 16], [2, 3, 10, 11, 17], [4, 5, 12, 13, 18], [6, 7, 14, 15, 19]]
for action in range(8):
v = np.zeros((1, 2))
v[0][0 if action < 4 else 1] = 1.0
layerN = list(layersN[action % 4])
layerN.reverse()
for n in layerN:
l = layers[n]
if len(l.shape) == 2:
layer = np.transpose(l)
v = np.dot(v, layer)
else:
layer = np.array([l])
v = v + layer
lastAction = v[0, -2:]
v = np.reshape(v[0, :-2], (4, (2 * self.viewD + 1), (2 * self.viewD + 1)))
# right, left, up, down
dir = {0: 'right', 1: 'left', 2: 'up', 3: 'down'}
dir = dir[action % 4]
#0-3 current
#4-8 future
if action < 4:
time = 'current '
else:
time = 'future '
import matplotlib
import matplotlib.pyplot as plt
fig, axs = plt.subplots(2, 2, figsize=(5, 5))
fig.suptitle(time + dir)
im = axs[0, 0].pcolor(np.rot90(v[0]))
fig.colorbar(im, ax=axs[0, 0])
axs[0, 0].set_title('board')
axs[0, 1].pcolor(np.rot90(v[1]))
fig.colorbar(im, ax=axs[0, 1])
axs[0, 1].set_title('subjects')
axs[1, 0].pcolor(np.rot90(v[2]))
fig.colorbar(im, ax=axs[1, 0])
axs[1, 0].set_title('trail')
axs[1, 1].pcolor(np.rot90(v[3]))
fig.colorbar(im, ax=axs[1, 1])
axs[1, 1].set_title('grass')
plt.show(block=True)
def createState(self, world: LabyrinthWorld):
state = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1
state2 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1
state3 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1
state4 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1
maxdirleft = self.x - max(self.x - (self.viewD), 0)
maxdirright = min(self.x + (self.viewD), (world.board_shape[0] - 1)) - self.x
maxdirup = self.y - max(self.y - (self.viewD), 0)
maxdirdown = min(self.y + (self.viewD), (world.board_shape[1] - 1)) - self.y
state[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.board[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown]
# for sub in world.subjects:
# if abs(sub.x - self.x) < self.viewD and abs(sub.y - self.y) < self.viewD:
# if state[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] != 3:
# state2[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] = sub.col
for x in range(-maxdirleft, maxdirright, 1):
for y in range(-maxdirup, maxdirdown, 1):
if world.subjectDict[(self.x + x, self.y + y)] != []:
state2[x + maxdirleft, y + maxdirup] = 1#world.subjectDict[(self.x + x, self.y + y)][0].col
state3[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.trailMix[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown]
state4[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.hunter_grass[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown]
if not self.random:
test=1
area = np.reshape(np.stack((state, state2, state3, state4)), (4 * (2 * self.viewD + 1) * (2 * self.viewD + 1)))
action = [0, 0]
if self.lastAction is not None:
action = self.lastAction
return np.reshape(np.concatenate((area, action)), (1, 4 * (2 * self.viewD + 1) * (2 * self.viewD + 1) + 2))
def generate_valid_directions(self, world: LabyrinthWorld):
directions = []
if self.x - 1 >= 0:
if world.board[self.x - 1, self.y] != 0:
directions.append(self.left)
if self.x + 1 < world.board_shape[0]:
if world.board[self.x + 1, self.y] != 0:
directions.append(self.right)
if self.y - 1 >= 0:
if world.board[self.x, self.y - 1] != 0:
directions.append(self.up)
if self.y + 1 < world.board_shape[1]:
if world.board[self.x, self.y + 1] != 0:
directions.append(self.down)
return directions
def calculateAction(self, world: LabyrinthWorld, vals=None, state=None):
# 0, 0 is top left
directions = self.generate_valid_directions(world)
if directions == []:
print('Wut?')
return
if directions != [] and self.alive:
if state is None:
state = self.createState(world)
if vals is None:
vals = self.model(from_numpy(state)).detach().numpy()
vals = np.reshape(np.transpose(np.reshape(vals, (4, 2)), (1, 0)),
(1, 8))
self.actDict = {self.right: vals[0][0] + vals[0][4], self.left: vals[0][1] + vals[0][5], self.up: vals[0][2] + vals[0][6], self.down: vals[0][3] + vals[0][7]}
allowedActions = dict(filter(lambda elem: elem[0] in directions, self.actDict.items()))
# if self.name == 'Herbivore' and self.id == 11 and not self.random:
# print(allowedActions)
# print(self.lastReward)
if self.strikes <= 0:
self.random = False
if not self.random:
self.action = max(allowedActions, key=allowedActions.get)
else:
self.action = self.randomAct(world)
self.state = state
def update(self, world: LabyrinthWorld, doTrain=True):
if self.lastAction is not None:
if not self.random:
if self.lastAction[0] + self.action[0] == 0 and self.lastAction[1] + self.action[1] == 0:
self.strikes += 1
else:
self.strikes -= 1
if self.strikes > 100:
self.random = True
else:
self.strikes -= 1
if len(self.history) >= self.historyLength:
self.history.pop(0)
self.history.append((self.lastState.copy(), int(self.act2IDict[self.lastAction]), int(self.lastVal), float(self.lastReward), np.array(self.lastRewards)))
# if self.lastReward != 0 or random.randint(0, 9) == 0:
if len(self.history) == self.historyLength:
self.samples.append(self.history.copy())
# if len(self.samples) % self.batchsize == 0 and len(self.samples) >= self.randomBuffer:
if len(self.samples) > self.nextTrain and doTrain:
print('train', len(self.samples))
self.train()
self.nextTrain = len(self.samples)
self.nextTrain = min(self.batchsize + self.nextTrain, (self.historySizeMul + 1) * self.batchsize)
print(len(self.samples), self.nextTrain)
if not self.random:
self.accumulated_rewards += self.lastReward
self.lastAction = self.action
self.lastState = self.state
self.lastReward = 0
self.lastVal = self.actDict[self.action]
maxVal = 0
self.executeAction(world, self.action)
def randomAct(self, world: LabyrinthWorld):
directions = self.generate_valid_directions(world)
if len(directions) == 0:
return 0, 0
d = random.randint(0, len(directions) - 1)
action = directions[d]
return action
def executeAction(self, world: LabyrinthWorld, action):
pass
def generateSamples(self):
# history element: (self.lastState.copy(), self.act2IDict[self.lastAction], self.lastVal, self.lastReward, np.array(self.lastRewards))
# history: [t-2, t-1]
states = []
targets = []
for i in range(4):
true_batch = int(self.batchsize/4)
target = np.zeros((true_batch, 2, 1))
samples = np.array(self.samples[:-self.batchsize])
# print('Samples for ' + str(i))
# print(len(samples))
samples = np.array(list(filter(lambda e: e[0, 1] == i, list(samples))))
# print(len(samples))
partTwo = True
if len(samples) == 0:
print('No samples for:' + str(i))
partTwo = False
samples = np.array(self.samples[:-self.batchsize])
buffer_size = len(samples)
index = np.random.choice(np.arange(buffer_size),
size=true_batch,
replace=True)
samples = samples[index]
# self.samples = []
target[:, 1, 0] = samples[:, 0, 3] # reward t-2 got
if partTwo:
if RECALCULATE:
nextState = np.concatenate(samples[:, 1, 0]) #states of t-1
nextVals = self.model(from_numpy(nextState)).detach().numpy()
nextVals2 = np.max(nextVals[:, :, 0] + nextVals[:, :, 1], axis=1)
target[:, 0, 0] = nextVals2 #best q t-1
else:
target[:, 0, 0] = samples[:, 1, 2] #best q t-1
targets.append(target)
states.append(np.concatenate(samples[:, 0, 0])) #states of t-2
return states, targets
def train(self):
print(self.name)
states, target = self.generateSamples()
train(states, target, self.model, self.optimizer)
self.samples = self.samples[-self.historySizeMul*self.batchsize:]
# print(self.model.get_weights())
pass
class Herbivore(NetLearner):
name = 'Herbivore'
col = 9
r = 255
g = 255
b = 0
viewD = 3
historyLength = 2
learningRate = 0.001
discountFactor = 0.5
randomBuffer = 0
batchsize = 1000
randomBuffer = max(2 * batchsize, randomBuffer)
randomChance = 9
samples = []
def createState(self, world: LabyrinthWorld):
state = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1
state2 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1
state3 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1
state4 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1
maxdirleft = self.x - max(self.x - (self.viewD), 0)
maxdirright = min(self.x + (self.viewD), (world.board_shape[0] - 1)) - self.x
maxdirup = self.y - max(self.y - (self.viewD), 0)
maxdirdown = min(self.y + (self.viewD), (world.board_shape[1] - 1)) - self.y
state[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.board[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown]
# for sub in world.subjects:
# if abs(sub.x - self.x) < self.viewD and abs(sub.y - self.y) < self.viewD:
# if state[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] != 3:
# state2[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] = sub.col
for x in range(-maxdirleft, maxdirright, 1):
for y in range(-maxdirup, maxdirdown, 1):
if world.subjectDict[(self.x + x, self.y + y)] != []:
state2[x + maxdirleft, y + maxdirup] = 1#world.subjectDict[(self.x + x, self.y + y)][0].col
state3[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.trailMix[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown]
state4[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.grass[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown]
if not self.random:
test=1
area = np.reshape(np.stack((state, state2, state3, state4)), (4 * (2 * self.viewD + 1) * (2 * self.viewD + 1)))
action = [0, 0]
if self.lastAction is not None:
action = self.lastAction
return np.reshape(np.concatenate((area, action)), (1, 4 * (2 * self.viewD + 1) * (2 * self.viewD + 1) + 2))
def executeAction(self, world: LabyrinthWorld, action):
directions = self.generate_valid_directions(world)
if len(action) == 2:
if len(world.subjectDict[(self.x + action[0], self.y + action[1])]) > 0:
for sub in world.subjectDict[(self.x + action[0], self.y + action[1])]:
if isinstance(sub, Hunter):
if sub.alive:
sub.kills += 1
sub.alive = True
sub.lastReward += 10
self.alive = False
self.lastRewards = []
if self.right in directions:
self.lastRewards.append(world.grass[self.x + 1, self.y])
else:
self.lastRewards.append(0)
if self.left in directions:
self.lastRewards.append(world.grass[self.x - 1, self.y])
else:
self.lastRewards.append(0)
if self.up in directions:
self.lastRewards.append(world.grass[self.x, self.y - 1])
else:
self.lastRewards.append(0)
if self.down in directions:
self.lastRewards.append(world.grass[self.x, self.y + 1])
else:
self.lastRewards.append(0)
assert len(self.lastRewards) == 4, 'Last Rewards not filled correctly!'
world.subjectDict[(self.x, self.y)].remove(self)
# self.lastReward += world.trailMix[self.x, self.y]
self.x += action[0]
self.y += action[1]
world.subjectDict[(self.x, self.y)].append(self)
world.trailMix[self.x, self.y] = max(1.0, world.trailMix[self.x, self.y])
self.lastReward += (world.grass[self.x, self.y] - 0.0)
world.grass[self.x, self.y] = 0
world.hunter_grass[self.x, self.y] = 0
def generate_valid_directions(self, world: LabyrinthWorld):
directions = []
if self.x - 1 >= 0:
if world.board[self.x - 1, self.y] != 0:
if not world.subjectDict[(self.x - 1, self.y)]:
directions.append(self.left)
if self.x + 1 < world.board_shape[0]:
if world.board[self.x + 1, self.y] != 0:
if not world.subjectDict[(self.x + 1, self.y)]:
directions.append(self.right)
if self.y - 1 >= 0:
if world.board[self.x, self.y - 1] != 0:
if not world.subjectDict[(self.x, self.y - 1)]:
directions.append(self.up)
if self.y + 1 < world.board_shape[1]:
if world.board[self.x, self.y + 1] != 0:
if not world.subjectDict[(self.x, self.y + 1)]:
directions.append(self.down)
return directions
def randomAct(self, world: LabyrinthWorld):
directions = []
actDict = {}
if self.x - 1 >= 0:
if world.board[self.x - 1, self.y] != 0:
if not world.subjectDict[(self.x - 1, self.y)]:
directions.append(self.left)
actDict[self.left] = world.grass[self.x - 1, self.y]
if self.x + 1 < world.board_shape[0]:
if world.board[self.x + 1, self.y] != 0:
if not world.subjectDict[(self.x + 1, self.y)]:
directions.append(self.right)
actDict[self.right] = world.grass[self.x + 1, self.y]
if self.y - 1 >= 0:
if world.board[self.x, self.y - 1] != 0:
if not world.subjectDict[(self.x, self.y - 1)]:
directions.append(self.up)
actDict[self.up] = world.grass[self.x, self.y - 1]
if self.y + 1 < world.board_shape[1]:
if world.board[self.x, self.y + 1] != 0:
if not world.subjectDict[(self.x, self.y + 1)]:
directions.append(self.down)
actDict[self.down] = world.grass[self.x, self.y + 1]
if len(directions) == 0:
return 0, 0
allowedActions = dict(filter(lambda elem: elem[0] in directions, actDict.items()))
action = max(allowedActions, key=allowedActions.get)
return action
def respawnUpdate(self, x, y, world: LabyrinthWorld):
super(Herbivore, self).respawnUpdate(x, y, world)
# self.lastReward -= 1
class Hunter(NetLearner):
name = 'Hunter'
hunterGrassScale = 0.5
r = 0
g = 255
b = 255
def randomAct(self, world: LabyrinthWorld):
directions = []
actDict = {}
if self.x - 1 >= 0:
if world.board[self.x - 1, self.y] > 0.01:
directions.append(self.left)
sub = self.getClosestSubject(world, self.x - 1, self.y)
dist = self.viewD
if sub is not None:
dist = np.sqrt(np.square(self.x - 1 - sub.x) + np.square(self.y - sub.y))
distReward = self.viewD - dist
actDict[self.left] = world.trailMix[self.x - 1, self.y] + world.hunter_grass[self.x - 1, self.y] * self.hunterGrassScale + distReward
if len(world.subjectDict[(self.x + self.left[0], self.y + self.left[1])]) > 0:
for sub in world.subjectDict[(self.x + self.left[0], self.y + self.left[1])]:
if sub.col != self.col:
actDict[self.left] += 10
if self.x + 1 < world.board_shape[0]:
if world.board[self.x + 1, self.y] > 0.01:
directions.append(self.right)
sub = self.getClosestSubject(world, self.x + 1, self.y)
dist = self.viewD
if sub is not None:
dist = np.sqrt(np.square(self.x + 1 - sub.x) + np.square(self.y - sub.y))
distReward = self.viewD - dist
actDict[self.right] = world.trailMix[self.x + 1, self.y] + world.hunter_grass[self.x + 1, self.y] * self.hunterGrassScale + distReward
if len(world.subjectDict[(self.x + self.right[0], self.y + self.right[1])]) > 0:
for sub in world.subjectDict[(self.x + self.right[0], self.y + self.right[1])]:
if sub.col != self.col:
actDict[self.right] += 10
if self.y - 1 >= 0:
if world.board[self.x, self.y - 1] > 0.01:
directions.append(self.up)
sub = self.getClosestSubject(world, self.x, self.y - 1)
dist = self.viewD
if sub is not None:
dist = np.sqrt(np.square(self.x - sub.x) + np.square(self.y - 1 - sub.y))
distReward = self.viewD - dist
actDict[self.up] = world.trailMix[self.x, self.y - 1] + world.hunter_grass[self.x, self.y - 1] * self.hunterGrassScale + distReward
if len(world.subjectDict[(self.x + self.up[0], self.y + self.up[1])]) > 0:
for sub in world.subjectDict[(self.x + self.up[0], self.y + self.up[1])]:
if sub.col != self.col:
actDict[self.up] += 10
if self.y + 1 < world.board_shape[1]:
if world.board[self.x, self.y + 1] > 0.01:
directions.append(self.down)
sub = self.getClosestSubject(world, self.x, self.y + 1)
dist = self.viewD
if sub is not None:
dist = np.sqrt(np.square(self.x - sub.x) + np.square(self.y + 1 - sub.y))
distReward = self.viewD - dist
actDict[self.down] = world.trailMix[self.x, self.y + 1] + world.hunter_grass[self.x, self.y + 1] * self.hunterGrassScale + distReward
if len(world.subjectDict[(self.x + self.down[0], self.y + self.down[1])]) > 0:
for sub in world.subjectDict[(self.x + self.down[0], self.y + self.down[1])]:
if sub.col != self.col:
actDict[self.down] += 10
if len(actDict) > 0:
allowedActions = dict(filter(lambda elem: elem[0] in directions, actDict.items()))
else:
return super(Hunter, self).randomAct(world)
action = max(allowedActions, key=allowedActions.get)
return action
def respawnUpdate(self, x, y, world: LabyrinthWorld):
super(Hunter, self).respawnUpdate(x, y, world)
self.lastReward -= 1
def getClosestSubject(self, world, x, y):
for dist in range(1, self.viewD):
dy = dist
for dx in range(-dist, dist):
if world.board_shape[0] > x + dx >= 0 and world.board_shape[1] > y + dy >= 0:
for sub in world.subjectDict[(x + dx, y + dy)]:
if sub.alive and sub.col != self.col:
return sub
dy = -dist
for dx in range(-dist, dist):
if world.board_shape[0] > x + dx >= 0 and world.board_shape[1] > y + dy >= 0:
for sub in world.subjectDict[(x + dx, y + dy)]:
if sub.alive and sub.col != self.col:
return sub
dx = dist
for dy in range(-dist, dist):
if world.board_shape[0] > x + dx >= 0 and world.board_shape[1] > y + dy >= 0:
for sub in world.subjectDict[(x + dx, y + dy)]:
if sub.alive and sub.col != self.col:
return sub
dx = -dist
for dy in range(-dist, dist):
if world.board_shape[0] > x + dx >= 0 and world.board_shape[1] > y + dy >= 0:
for sub in world.subjectDict[(x + dx, y + dy)]:
if sub.alive and sub.col != self.col:
return sub
return None
def executeAction(self, world: LabyrinthWorld, action):
grass_factor = 0.5
directions = self.generate_valid_directions(world)
if len(action) == 2:
right_kill = left_kill = up_kill = down_kill = False
if self.right in directions:
for sub in world.subjectDict[(self.x + self.right[0], self.y + self.right[1])]:
if sub.alive:
if sub.col != self.col:
right_kill = True
if self.left in directions:
for sub in world.subjectDict[(self.x + self.left[0], self.y + self.left[1])]:
if sub.alive:
if sub.col != self.col:
left_kill = True
if self.up in directions:
for sub in world.subjectDict[(self.x + self.up[0], self.y + self.up[1])]:
if sub.alive:
if sub.col != self.col:
up_kill = True
if self.down in directions:
for sub in world.subjectDict[(self.x + self.down[0], self.y + self.down[1])]:
if sub.alive:
if sub.col != self.col:
down_kill = True
if len(world.subjectDict[(self.x + action[0], self.y + action[1])]) > 0:
for sub in world.subjectDict[(self.x + action[0], self.y + action[1])]:
if sub.alive:
self.kills += 1
if sub.col != self.col:
self.lastReward += 10
sub.alive = False
self.alive = True
self.lastRewards = []
if self.right in directions:
sub = self.getClosestSubject(world, self.x + 1, self.y)
dist = self.viewD
if sub is not None:
dist = np.sqrt(np.square(self.x + 1 - sub.x) + np.square(self.y - sub.y))
distReward = self.viewD - dist
if right_kill:
self.lastRewards.append(10 + world.trailMix[self.x + 1, self.y] + world.hunter_grass[self.x + 1, self.y] * grass_factor + distReward)
else:
self.lastRewards.append(world.trailMix[self.x + 1, self.y] + world.hunter_grass[self.x + 1, self.y] * grass_factor + distReward)
else:
self.lastRewards.append(0)
if self.left in directions:
sub = self.getClosestSubject(world, self.x - 1, self.y)
dist = self.viewD
if sub is not None:
dist = np.sqrt(np.square(self.x - 1 - sub.x) + np.square(self.y - sub.y))
distReward = self.viewD - dist
if left_kill:
self.lastRewards.append(10 + world.trailMix[self.x - 1, self.y] + world.hunter_grass[self.x - 1, self.y] * grass_factor + distReward)
else:
self.lastRewards.append(world.trailMix[self.x - 1, self.y] + world.hunter_grass[self.x - 1, self.y] * grass_factor + distReward)
else:
self.lastRewards.append(0)
if self.up in directions:
sub = self.getClosestSubject(world, self.x, self.y - 1)
dist = self.viewD
if sub is not None:
dist = np.sqrt(np.square(self.x - sub.x) + np.square(self.y - sub.y - 1))
distReward = self.viewD - dist
if up_kill:
self.lastRewards.append(10 + world.trailMix[self.x, self.y - 1] + world.hunter_grass[self.x, self.y - 1] * grass_factor + distReward)
else:
self.lastRewards.append(world.trailMix[self.x, self.y - 1] + world.hunter_grass[self.x, self.y - 1] * grass_factor + distReward)
else:
self.lastRewards.append(0)
if self.down in directions:
sub = self.getClosestSubject(world, self.x, self.y + 1)
dist = self.viewD
if sub is not None:
dist = np.sqrt(np.square(self.x - sub.x) + np.square(self.y + 1 - sub.y))
distReward = self.viewD - dist
if down_kill:
self.lastRewards.append(10 + world.trailMix[self.x, self.y + 1] + world.hunter_grass[self.x, self.y + 1] * grass_factor + distReward)
else:
self.lastRewards.append(world.trailMix[self.x, self.y + 1] + world.hunter_grass[self.x, self.y + 1] * grass_factor + distReward)
else:
self.lastRewards.append(0)
assert len(self.lastRewards) == 4, 'Last Rewards not filled correctly!'
world.subjectDict[(self.x, self.y)].remove(self)
self.x += action[0]
self.y += action[1]
self.lastReward += world.trailMix[self.x, self.y]
world.subjectDict[(self.x, self.y)].append(self)
self.lastReward += (world.hunter_grass[self.x, self.y] * 0.1)
world.hunter_grass[self.x, self.y] = 0
sub = self.getClosestSubject(world, self.x, self.y)
dist = self.viewD
if sub is not None:
dist = np.sqrt(np.square(self.x - sub.x) + np.square(self.y - sub.y))
distReward = self.viewD - dist
self.lastReward += distReward