FINALLY CLEAR GRAPHS AHAHAHAHA

2019-01-27 14:22:12 +01:00
parent 0be126e57b
commit a88fcfc1f2
2 changed files with 158 additions and 51 deletions
--- a/main.py
+++ b/main.py
@@ -10,10 +10,11 @@ import numpy as np
 import matplotlib as mpl
 mpl.use('TkAgg')  # fixes my macOS bug
 import matplotlib.pyplot as plt
+import matplotlib.colors as colors


 P = 0.1  # Slip probability
-ALPHA = 0.90  # Discount factor
+ALPHA = 0.8  # Discount factor

 A2 = np.array([  # Action index to action mapping
    [-1,  0],  # Up
@@ -34,12 +35,6 @@ G2_X = None  # The second cost function vector representation
 F_X_U_W = None  # The System Equation


-def h_matrix(j, g):
-    result = (PW_OF_X_U * (g[F_X_U_W] + ALPHA*j[F_X_U_W])).sum(axis=2)
-    result[~U_OF_X] = np.inf  # discard invalid policies
-    return result
-
-
 def _valid_target(target):
    return (
        0 <= target[0] < MAZE.shape[0] and
@@ -72,8 +67,9 @@ def init_global(maze_filename):
    maze_cost[MAZE == 'T'] = 50
    maze_cost[MAZE == 'G'] = -1
    G1_X = maze_cost.copy()[state_mask]
-    maze_cost[(MAZE=='0') | (MAZE=='S') | (MAZE=='G')] += 1
-    G2_X = maze_cost.copy()[state_mask]
+    # maze_cost[(MAZE=='0') | (MAZE=='S') | (MAZE=='G')] += 1
+    # G2_X = maze_cost.copy()[state_mask]
+    G2_X = G1_X + 1

    # Actual environment modelling
    U_OF_X = np.zeros((SN, len(A2)), dtype=np.bool)
@@ -97,26 +93,10 @@ def init_global(maze_filename):
                F_X_U_W[ix, iu, -1] = ij_to_s[tuple(x + u)]


-def plot_j_policy_on_maze(j, policy):
-    heatmap = np.full(MAZE.shape, np.nan)
-    heatmap[S_TO_IJ[:, 0], S_TO_IJ[:, 1]] = j
-    cmap = mpl.cm.get_cmap('coolwarm')
-    cmap.set_bad(color='black')
-    plt.imshow(heatmap, cmap=cmap)
-    # plt.colorbar()
-    # quiver has some weird behavior, the arrow y component must be flipped
-    plt.quiver(S_TO_IJ[:, 1], S_TO_IJ[:, 0], A2[policy, 1], -A2[policy, 0])
-    plt.gca().get_xaxis().set_visible(False)
-    plt.tick_params(axis='y', which='both', left=False, labelleft=False)
-
-
-def plot_cost_history(hist):
-    error = np.log10(
-        np.sqrt(np.square(hist[:-1] - hist[-1]).mean(axis=1))
-    )
-    plt.xticks(np.arange(0, len(error), len(error) // 5))
-    plt.yticks(np.linspace(error.min(), error.max(), 5))
-    plt.plot(error)
+def h_matrix(j, g):
+    h_x_u = (PW_OF_X_U * (g[F_X_U_W] + ALPHA*j[F_X_U_W])).sum(axis=2)
+    h_x_u[~U_OF_X] = np.inf  # discard invalid policies
+    return h_x_u


 def _policy_improvement(j, g):
@@ -159,7 +139,7 @@ def _terminate_vi(j, j_old, policy, policy_old):

 def dynamic_programming(optimizer_step, g, terminator, return_history=False):
    j = np.zeros(SN, dtype=np.float64)
-    policy = np.full(SN, -1, dtype=np.int32)  # idle policy
+    policy = np.full(SN, len(A2) - 1, dtype=np.int32)  # idle policy
    history = []
    while True:
        j_old = j
@@ -181,6 +161,43 @@ def dynamic_programming(optimizer_step, g, terminator, return_history=False):
        return history


+def plot_j_policy_on_maze(j, policy, normalize=True):
+
+    heatmap = np.full(MAZE.shape, np.nan, dtype=np.float64)
+    if normalize:
+        # Non-linear, but a discrete representation of different costs
+        norm = colors.BoundaryNorm(boundaries=np.sort(j)[1:-1], ncolors=256)
+        vmin = 0
+        vmax = 256
+    else:
+        norm = lambda x: x
+        vmin = None
+        vmax = None
+
+    heatmap[S_TO_IJ[:, 0], S_TO_IJ[:, 1]] = norm(j)
+
+    cmap = mpl.cm.get_cmap('coolwarm')
+    cmap.set_bad(color='black')
+
+    plt.imshow(
+        heatmap, vmin=vmin, vmax=vmax, cmap=cmap,
+    )
+
+    # quiver has some weird behavior, the arrow y component must be flipped
+    plt.quiver(S_TO_IJ[:, 1], S_TO_IJ[:, 0], A2[policy, 1], -A2[policy, 0])
+    plt.gca().get_xaxis().set_visible(False)
+    plt.tick_params(axis='y', which='both', left=False, labelleft=False)
+
+
+def plot_cost_history(hist):
+    error = np.log10(
+        np.sqrt(np.square(hist[:-1] - hist[-1]).mean(axis=1))
+    )
+    plt.xticks(np.arange(0, len(error), len(error) // 5))
+    plt.yticks(np.linspace(error.min(), error.max(), 5))
+    plt.plot(error)
+
+
 if __name__ == '__main__':
    # Argument Parsing
    ap = ArgumentParser()
@@ -197,27 +214,31 @@ if __name__ == '__main__':
                  'Policy Iteration': policy_iteration}
    terminators = {'Value Iteration': _terminate_vi,
                   'Policy Iteration': _terminate_pi}
+    # cost_transform = {'g1': _neg_log_neg, 'g2': _gamma}

-    for a in [0.9, 0.5, 0.01]:
-        plt.figure(figsize=(9, 7))
-        plt.subplots_adjust(top=0.9, bottom=0.05, left=0.1, right=0.95,
-                            wspace=0.1)
-        plt.suptitle('DISCOUNT = ' + str(a))
-        i = 1
-        for opt in ['Value Iteration', 'Policy Iteration']:
-            for cost in ['g1', 'g2']:
-                name = '{} / {}'.format(opt, cost)
-                ALPHA = a
-                j, policy = dynamic_programming(optimizers[opt], costs[cost],
-                                                terminators[opt])
-                plt.subplot(2, 2, i)
-                plot_j_policy_on_maze(j, policy)
-                if i <= 2:
-                    plt.gca().set_title('Cost: {}'.format(cost),
-                                        fontsize='x-large')
-                if (i - 1) % 2 == 0:
-                    plt.ylabel(opt, fontsize='x-large')
-                i += 1
+    for normalize in [False, True]:
+        for a in [0.9, 0.5, 0.01]:
+            plt.figure(figsize=(9, 7))
+            plt.subplots_adjust(top=0.9, bottom=0.05, left=0.1, right=0.95,
+                                wspace=0.1)
+            plt.suptitle('DISCOUNT: {}'.format(a) +
+                         ('\nNormalized view' if normalize else ''))
+            i = 1
+            for opt in ['Value Iteration', 'Policy Iteration']:
+                for cost in ['g1', 'g2']:
+                    name = '{} / {}'.format(opt, cost)
+                    ALPHA = a
+                    j, policy = dynamic_programming(optimizers[opt],
+                                                    costs[cost],
+                                                    terminators[opt])
+                    plt.subplot(2, 2, i)
+                    plot_j_policy_on_maze(j, policy, normalize=normalize)
+                    if i <= 2:
+                        plt.gca().set_title('Cost: {}'.format(cost),
+                                            fontsize='x-large')
+                    if (i - 1) % 2 == 0:
+                        plt.ylabel(opt, fontsize='x-large')
+                    i += 1

    # Error graphs
    for opt in ['Value Iteration', 'Policy Iteration']: