diff --git a/main.py b/main.py
index 62eee2a..704e174 100644
--- a/main.py
+++ b/main.py
@@ -15,7 +15,8 @@ import matplotlib.pyplot as plt
 
 P = 0.1
 ALPHA = 0.90
-EPSILON = 1e-12  # Convergence criterium
+EPSILON = 1e-12
+# EPSILON = 1e-12  # Convergence criterium
 A2 = np.array([  # Action index to action mapping
     [-1,  0],  # Up
     [ 1,  0],  # Down
@@ -56,7 +57,7 @@ def init_global(maze_filename):
     # Basic maze structure initialization
     MAZE = np.genfromtxt(
         maze_filename,
-        dtype=str,
+        dtype='|S1',
     )
     state_mask = (MAZE != '1')
 
@@ -72,7 +73,7 @@ def init_global(maze_filename):
     maze_cost[MAZE == 'T'] = 50
     maze_cost[MAZE == 'G'] = -1
     G1_X = maze_cost.copy()[state_mask]
-    maze_cost[maze_cost < 1] += 1  # assert np.nan < whatever == False
+    maze_cost[(MAZE=='0') | (MAZE=='S') | (MAZE=='G')] += 1
     G2_X = maze_cost.copy()[state_mask]
 
     # Actual environment modelling
@@ -146,20 +147,23 @@ def policy_iteration(j, g):
     return policy, j
 
 
-def _terminate(j, j_old):
-    # TODO: DIS
-    return np.abs(j - j_old).max() < EPSILON
+def _terminate(j, j_old, policy, policy_old):
+    # eps = EPSILON
+    # return np.abs(j - j_old).max() < eps
+    return np.all(policy == policy_old)
 
 
 def dynamic_programming(optimizer_step, g, return_history=False):
     j = np.zeros(SN, dtype=np.float64)
+    policy = None
     history = []
     while True:
         j_old = j
+        policy_old = policy
         policy, j = optimizer_step(j, g)
         if return_history:
             history.append(j)
-        if _terminate(j, j_old):
+        if _terminate(j, j_old, policy, policy_old):
             break
     if not return_history:
         return j, policy
@@ -191,7 +195,9 @@ if __name__ == '__main__':
                 name = ' / '.join([opt, cost])
                 ALPHA = a
                 j, policy = dynamic_programming(optimizers[opt], costs[cost])
-                print(name, j)
+                print(name)
+                print(j)
+                # print(name, j)
                 plt.subplot(2, 2, i)
                 plt.gca().set_title(name)
                 plot_j_policy_on_maze(j, policy)
@@ -200,6 +206,7 @@ if __name__ == '__main__':
     # Error graphs
     for opt in ['Value Iteration', 'Policy Iteration']:
         plt.figure()
+        plt.subplots_adjust(wspace=0.45, hspace=0.45)
         plt.suptitle(opt)
         i = 1
         for cost in ['g1', 'g2']: