From 4a05baa10398eb81008ba13664da1af714ccff67 Mon Sep 17 00:00:00 2001
From: zomseffen <steffen@tom.bi>
Date: Fri, 11 Mar 2022 14:19:55 +0100
Subject: [PATCH] base evolution model. needs different memory handling

---
 labirinth_ai/LabyrinthWorld.py        |  64 ++++------
 labirinth_ai/Models/BaseModel.py      |   8 +-
 labirinth_ai/Models/EvolutionModel.py | 176 ++++++++++++++++++++++++++
 labirinth_ai/Subject.py               |  18 ++-
 4 files changed, 218 insertions(+), 48 deletions(-)
 create mode 100644 labirinth_ai/Models/EvolutionModel.py

diff --git a/labirinth_ai/LabyrinthWorld.py b/labirinth_ai/LabyrinthWorld.py
index 2a2e3e7..b22a0ea 100644
--- a/labirinth_ai/LabyrinthWorld.py
+++ b/labirinth_ai/LabyrinthWorld.py
@@ -146,23 +146,27 @@ class LabyrinthWorld(World):
 
         # adding subjects
         from labirinth_ai.Subject import Hunter, Herbivore
-        while len(self.subjects) < 2:
-            px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim)
-            py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim)
-            if self.board[px, py] == 1:
-                self.subjects.append(Hunter(px, py))
-                self.ins += self.subjects[-1].x_in
-                self.actions += self.subjects[-1].actions
-                self.targets += self.subjects[-1].target
+        for _ in range(10):
+            while True:
+                px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim)
+                py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim)
+                if self.board[px, py] == 1:
+                    self.subjects.append(Hunter(px, py))
+                    self.ins += self.subjects[-1].x_in
+                    self.actions += self.subjects[-1].actions
+                    self.targets += self.subjects[-1].target
+                    break
 
-        while len(self.subjects) < 10:
-            px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim)
-            py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim)
-            if self.board[px, py] == 1:
-                self.subjects.append(Herbivore(px, py))
-                self.ins += self.subjects[-1].x_in
-                self.actions += self.subjects[-1].actions
-                self.targets += self.subjects[-1].target
+        for _ in range(40):
+            while True:
+                px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim)
+                py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim)
+                if self.board[px, py] == 1:
+                    self.subjects.append(Herbivore(px, py))
+                    self.ins += self.subjects[-1].x_in
+                    self.actions += self.subjects[-1].actions
+                    self.targets += self.subjects[-1].target
+                    break
 
         for x in range(self.board_shape[0]):
             for y in range(self.board_shape[1]):
@@ -173,36 +177,14 @@ class LabyrinthWorld(World):
 
     def update(self):
         # start = time.time()
-        if self.model is None:
-            for sub in self.subjects:
-                sub.calculateAction(self)
-        else:
-            states = list(map(lambda e: e.createState(self), self.subjects))
-            states = sum(list(map(lambda e: [e, e, e, e], states)), [])
-            vals = self.model.predict(states)
-            vals = np.reshape(np.transpose(np.reshape(vals, (len(self.subjects), 4, 2)), (0, 2, 1)),
-                              (len(self.subjects), 1, 8))
-            list(map(lambda e: e[1].calculateAction(self, vals[e[0]], states[e[0]]), enumerate(self.subjects)))
+        for sub in self.subjects:
+            sub.calculateAction(self)
 
         for sub in self.subjects:
             if sub.alive:
-                sub.update(self, doTrain=self.model is None)
+                sub.update(self)
             sub.tick += 1
 
-        if self.model is not None:
-            if self.round >= self.nextTrain:
-                samples = list(map(lambda e: e.generateSamples(), self.subjects))
-                states = sum(list(map(lambda e: e[0], samples)), [])
-                targets = sum(list(map(lambda e: e[1], samples)), [])
-                self.model.fit(states, targets)
-                self.nextTrain = self.batchsize / 5
-                self.round = 0
-                for sub in self.subjects:
-                    if len(sub.samples) > 20*self.batchsize:
-                        sub.samples = sub.samples[:-20*self.batchsize]
-            else:
-                self.round += 1
-
         new_subjects = []
         kill_table = {}
         live_table = {}
diff --git a/labirinth_ai/Models/BaseModel.py b/labirinth_ai/Models/BaseModel.py
index e87a3c9..9678f50 100644
--- a/labirinth_ai/Models/BaseModel.py
+++ b/labirinth_ai/Models/BaseModel.py
@@ -13,6 +13,7 @@ print(f"Using {device} device")
 
 # Define model
 class BaseModel(nn.Module):
+    evolutionary = False
     def __init__(self, view_dimension, action_num, channels):
         super(BaseModel, self).__init__()
         self.flatten = nn.Flatten()
@@ -39,6 +40,7 @@ class BaseModel(nn.Module):
             actions.append(self.actions[action](x_flat))
         return torch.stack(actions, dim=1)
 
+
 class BaseDataSet(Dataset):
     def __init__(self, states, targets):
         assert len(states) == len(targets), "Needs to have as many states as targets!"
@@ -87,7 +89,7 @@ def train(states, targets, model, optimizer):
 
             # Backpropagation
             optimizer.zero_grad()
-            loss.backward()
+            loss.backward(retain_graph=True)
             optimizer.step()
 
             if batch % 100 == 0:
@@ -100,7 +102,7 @@ def train(states, targets, model, optimizer):
 
 
 if __name__ == '__main__':
-    sample = np.random.random((1, 4, 11, 11))
+    sample = np.random.random((1, 486))
 
     model = BaseModel(5, 4, 4).to(device)
     print(model)
@@ -109,7 +111,7 @@ if __name__ == '__main__':
     # test = test.cpu().detach().numpy()
     print(test)
 
-    state = np.random.random((4, 11, 11))
+    state = np.random.random((486,))
     target = np.random.random((4, 2))
     states = [
         [state],
diff --git a/labirinth_ai/Models/EvolutionModel.py b/labirinth_ai/Models/EvolutionModel.py
new file mode 100644
index 0000000..38276f6
--- /dev/null
+++ b/labirinth_ai/Models/EvolutionModel.py
@@ -0,0 +1,176 @@
+import torch
+from torch import nn
+import numpy as np
+import tqdm
+from torch.utils.data import Dataset, DataLoader
+from labirinth_ai.Models.BaseModel import device
+
+
+class NodeGene:
+    valid_types = ['sensor', 'hidden', 'output']
+
+    def __init__(self, node_id, node_type, bias=None):
+        assert node_type in self.valid_types, 'Unknown node type!'
+        self.node_id = node_id
+        self.node_type = node_type
+        if node_type == 'hidden':
+            assert bias is not None, 'Expected a bias for hidden node types!'
+            self.bias = bias
+        else:
+            self.bias = None
+
+
+class ConnectionGene:
+    def __init__(self, start, end, enabled, innovation_num, weight=None, recurrent=False):
+        self.start = start
+        self.end = end
+        self.enabled = enabled
+        self.innvovation_num = innovation_num
+        self.recurrent = recurrent
+        if weight is None:
+            self.weight = np.random.random(1)[0] * 2 - 1.0
+        else:
+            self.weight = weight
+
+
+class EvolutionModel(nn.Module):
+    evolutionary = True
+
+    def __init__(self, view_dimension, action_num, channels, genes=None):
+        super(EvolutionModel, self).__init__()
+        self.flatten = nn.Flatten()
+
+        self.action_num = action_num
+        self.viewD = view_dimension
+        self.channels = channels
+
+        if genes is None:
+            self.num_input_nodes = channels * (2 * self.viewD + 1) * (2 * self.viewD + 1) + 2
+
+            self.genes = {'nodes': {}, 'connections': []}
+            node_id = 0
+            for _ in range(self.num_input_nodes):
+                self.genes['nodes'][node_id] = NodeGene(node_id, 'sensor')
+                node_id += 1
+            first_action = node_id
+            for _ in range(action_num * 2):
+                self.genes['nodes'][node_id] = NodeGene(node_id, 'output')
+                node_id += 1
+
+            for index in range(self.num_input_nodes):
+                for action in range(action_num * 2):
+                    self.genes['connections'].append(
+                        ConnectionGene(index, first_action + action, True, index*(action_num * 2) + action)
+                    )
+
+        self.incoming_connections = {}
+        for connection in self.genes['connections']:
+            if connection.end not in self.incoming_connections.keys():
+                self.incoming_connections[connection.end] = []
+            self.incoming_connections[connection.end].append(connection)
+
+        self.layers = {}
+        self.indices = {}
+
+        self.has_recurrent = False
+        non_recurrent_indices = {}
+        with torch.no_grad():
+            for key, value in self.incoming_connections.items():
+                value.sort(key=lambda element: element.start)
+
+                lin = nn.Linear(len(value), 1, bias=self.genes['nodes'][key].bias is not None)
+                for index, connection in enumerate(value):
+                    lin.weight[0, index] = value[index].weight
+                if self.genes['nodes'][key].bias is not None:
+                    lin.bias[0] = self.genes['nodes'][key].bias
+
+                non_lin = nn.ELU()
+                sequence = nn.Sequential(
+                    lin,
+                    non_lin
+                )
+                self.add_module('layer_' + str(key), sequence)
+                self.layers[key] = sequence
+                self.indices[key] = list(map(lambda element: element.start, value))
+
+                non_recurrent_indices[key] = list(filter(lambda element: not element.recurrent, value))
+                if not self.has_recurrent and len(non_recurrent_indices[key]) != len(self.indices[key]):
+                    self.has_recurrent = True
+                non_recurrent_indices[key] = list(map(lambda element: element.start, non_recurrent_indices[key]))
+        rank_of_node = {}
+        for i in range(self.num_input_nodes):
+            rank_of_node[i] = 0
+
+        layers_to_add = list(non_recurrent_indices.items())
+        while len(layers_to_add) > 0:
+            for index, (key, incoming_nodes) in enumerate(list(layers_to_add)):
+                max_rank = -1
+                all_ranks_found = True
+
+                for incoming_node in incoming_nodes:
+                    if incoming_node in rank_of_node.keys():
+                        max_rank = max(max_rank, rank_of_node[incoming_node])
+                    else:
+                        all_ranks_found = False
+
+                if all_ranks_found:
+                    rank_of_node[key] = max_rank + 1
+
+            layers_to_add = list(filter(lambda element: element[0] not in rank_of_node.keys(), layers_to_add))
+        ranked_layers = list(rank_of_node.items())
+        ranked_layers.sort(key=lambda element: element[1])
+        ranked_layers = list(filter(lambda element: element[1] > 0, ranked_layers))
+        self.layer_order = list(map(lambda element: element[0], ranked_layers))
+        self.memory = torch.Tensor((max(map(lambda element: element[1].node_id, self.genes['nodes'].items())) + 1))
+
+    def forward(self, x, memory=None):
+        x_flat = self.flatten(x)
+        if memory is None:
+            memory = torch.Tensor(self.memory)
+            outs = []
+            for batch_element in x_flat:
+                memory[0:self.num_input_nodes] = batch_element
+                for layer_index in self.layer_order:
+                    memory[layer_index] = self.layers[layer_index](memory[self.indices[layer_index]])
+                outs.append(memory[self.num_input_nodes: self.num_input_nodes + self.action_num * 2])
+            outs = torch.stack(outs)
+            self.memory = torch.Tensor(memory)
+            return torch.reshape(outs, (x.shape[0], 4, 2))
+        else:
+            memory[:, 0:self.num_input_nodes] = x
+            for layer_index in self.layer_order:
+                memory[:, layer_index] = self.layers[layer_index](memory[:, self.indices[layer_index]])
+            return torch.reshape(
+                memory[:, self.num_input_nodes: self.num_input_nodes + self.action_num * 2],
+                (x.shape[0], 4, 2))
+
+
+if __name__ == '__main__':
+    sample = np.random.random((1, 486))
+
+    model = EvolutionModel(5, 4, 4).to(device)
+    print(model)
+    print(model.has_recurrent)
+
+    test = model(torch.tensor(sample, dtype=torch.float32))
+    # test = test.cpu().detach().numpy()
+    print(test)
+
+    state = np.random.random((1, 486))
+    target = np.random.random((4, 2))
+    states = [
+        [state],
+        [state],
+        [state],
+        [state],
+    ]
+    targets = [
+        [target],
+        [target],
+        [target],
+        [target],
+    ]
+
+    optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-3)
+    from labirinth_ai.Models.BaseModel import train
+    train(states, targets, model, optimizer)
diff --git a/labirinth_ai/Subject.py b/labirinth_ai/Subject.py
index 5afa2a7..f9426eb 100644
--- a/labirinth_ai/Subject.py
+++ b/labirinth_ai/Subject.py
@@ -382,6 +382,8 @@ class NetLearner(Subject):
 
         self.lastRewards = []
 
+        self.accumulated_rewards = 0
+
     def visualize(self):
         print(self.name)
         layers = self.model.get_weights()
@@ -542,6 +544,8 @@ class NetLearner(Subject):
                 self.train()
                 self.nextTrain = min(self.batchsize + self.nextTrain, (self.historySizeMul + 1) * self.batchsize)
 
+        self.accumulated_rewards += self.lastReward
+
         self.lastAction = self.action
         self.lastState = self.state
         self.lastReward = 0
@@ -728,10 +732,12 @@ class Herbivore(NetLearner):
         if len(action) == 2:
             if len(world.subjectDict[(self.x + action[0], self.y + action[1])]) > 0:
                 for sub in world.subjectDict[(self.x + action[0], self.y + action[1])]:
-                    if sub.alive:
-                        self.kills += 1
-                    sub.alive = False
-                    self.alive = True
+                    if isinstance(sub, Hunter):
+                        if sub.alive:
+                            sub.kills += 1
+                            sub.alive = True
+                            sub.lastReward += 10
+                            self.alive = False
 
             self.lastRewards = []
             if right in directions:
@@ -795,6 +801,10 @@ class Herbivore(NetLearner):
 
         return action
 
+    def respawnUpdate(self, x, y, world: LabyrinthWorld):
+        super(Herbivore, self).respawnUpdate(x, y, world)
+        self.lastReward -= 1
+
 
 class Hunter(NetLearner):
     name = 'Hunter'