diff --git a/reinforcement_learning/notebooks/Reinforcement_Q_Learning_from_Scratch_in_Python_with_OpenAI_Gym_Taxi.ipynb b/reinforcement_learning/notebooks/Reinforcement_Q_Learning_from_Scratch_in_Python_with_OpenAI_Gym_Taxi.ipynb new file mode 100644 index 0000000..e564b11 --- /dev/null +++ b/reinforcement_learning/notebooks/Reinforcement_Q_Learning_from_Scratch_in_Python_with_OpenAI_Gym_Taxi.ipynb @@ -0,0 +1,1256 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 689 + }, + "id": "T2YZUIm4tgmJ", + "outputId": "a4b83d45-5a86-4283-abfe-c2d226996aa0" + }, + "source": [ + "!pip install cmake 'gym[atari]==0.22.0' scipy" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.9/dist-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n", + " and should_run_async(code)\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Requirement already satisfied: cmake in /usr/local/lib/python3.9/dist-packages (3.25.2)\n", + "Collecting gym[atari]==0.22.0\n", + " Downloading gym-0.22.0.tar.gz (631 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m631.1/631.1 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.9/dist-packages (1.10.1)\n", + "Requirement already satisfied: numpy>=1.18.0 in /usr/local/lib/python3.9/dist-packages (from gym[atari]==0.22.0) (1.22.4)\n", + "Requirement already satisfied: importlib-metadata>=4.10.0 in /usr/local/lib/python3.9/dist-packages (from gym[atari]==0.22.0) (6.3.0)\n", + "Requirement already satisfied: gym-notices>=0.0.4 in /usr/local/lib/python3.9/dist-packages (from gym[atari]==0.22.0) (0.0.8)\n", + "Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.9/dist-packages (from gym[atari]==0.22.0) (2.2.1)\n", + "Collecting ale-py~=0.7.4\n", + " Downloading ale_py-0.7.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m45.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: importlib-resources in /usr/local/lib/python3.9/dist-packages (from ale-py~=0.7.4->gym[atari]==0.22.0) (5.12.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.9/dist-packages (from importlib-metadata>=4.10.0->gym[atari]==0.22.0) (3.15.0)\n", + "Building wheels for collected packages: gym\n", + " Building wheel for gym (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for gym: filename=gym-0.22.0-py3-none-any.whl size=708393 sha256=d6829a811711e7f91023bc5545600c609f35fea94f934d60d8ed621ffddc1e37\n", + " Stored in directory: /root/.cache/pip/wheels/c4/15/15/94c62e06887fb88768c5fa41482b80905ea71f3ede81040ffa\n", + "Successfully built gym\n", + "Installing collected packages: gym, ale-py\n", + " Attempting uninstall: gym\n", + " Found existing installation: gym 0.25.2\n", + " Uninstalling gym-0.25.2:\n", + " Successfully uninstalled gym-0.25.2\n", + "Successfully installed ale-py-0.7.5 gym-0.22.0\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.colab-display-data+json": { + "pip_warning": { + "packages": [ + "gym" + ] + } + } + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "https://www.gymlibrary.dev/environments/toy_text/taxi/" + ], + "metadata": { + "id": "1zPmMwbagW-J" + } + }, + { + "cell_type": "code", + "source": [ + "import time" + ], + "metadata": { + "id": "5AwJw8_qwmu4" + }, + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "R-fJA5uPthQx", + "outputId": "2935628d-99eb-46c9-9d44-16507cb55659" + }, + "source": [ + "import gym\n", + "\n", + "env = gym.make(\"Taxi-v3\").env\n", + "\n", + "env.reset() # reset environment to a new, random state\n", + "\n", + "env.render()" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+---------+\n", + "|R: |\u001b[43m \u001b[0m: :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[34;1mY\u001b[0m| : |B: |\n", + "+---------+\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "5 * 5 * 5 * 4\n", + "# row, col, pick (R,G,B,Y,Car), drop(R,G,B,Y)" + ], + "metadata": { + "id": "IAeH0kRedcXQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "The filled square represents the taxi, which is **yellow** without a passenger and **green** with a passenger.\n", + "\n", + "The pipe (\"|\") represents a wall which the taxi cannot cross.\n", + "\n", + "- **R, G, Y, B** are the possible pickup and destination locations. \n", + "- The **blue** letter represents the current passenger pick-up location, and the **purple** letter is the current destination." + ], + "metadata": { + "id": "aOmdtV5hp5AG" + } + }, + { + "cell_type": "code", + "source": [ + "env.reset() # reset environment to a new, random state\n", + "env.render()\n", + "\n", + "print(\"Action Space {}\".format(env.action_space))\n", + "print(\"State Space {}\".format(env.observation_space))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OPO2pwrQpja0", + "outputId": "3fa309f5-6fa7-4831-b9cb-f3b0d55e7bd1" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+---------+\n", + "|\u001b[34;1mR\u001b[0m:\u001b[43m \u001b[0m| : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[35mB\u001b[0m: |\n", + "+---------+\n", + "\n", + "Action Space Discrete(6)\n", + "State Space Discrete(500)\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-cp0nKFQtmeW", + "outputId": "2c99ac8b-1745-49c5-9a81-f53d28084d1d" + }, + "source": [ + "print(env.step(5)) # 0=Back, 1=Fwd , 2=Right , 3=Left, 4=pickup, 5=dropoff\n", + "#observation, reward, done, info\n", + "env.render()" + ], + "execution_count": 21, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(475, 20, True, {'prob': 1.0})\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[35m\u001b[34;1m\u001b[43mB\u001b[0m\u001b[0m\u001b[0m: |\n", + "+---------+\n", + " (Dropoff)\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JvR2a-FAuNxl", + "outputId": "12a64971-cec0-4ac6-faaa-a595c47a6a3a" + }, + "source": [ + "env.reset() # reset environment to a new, random state\n", + "env.render()\n", + "\n", + "print(\"Action Space {}\".format(env.action_space))\n", + "print(\"State Space {}\".format(env.observation_space))" + ], + "execution_count": 22, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :\u001b[35mG\u001b[0m|\n", + "| : |\u001b[43m \u001b[0m: : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + "\n", + "Action Space Discrete(6)\n", + "State Space Discrete(500)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# blue - pickup, purple - dropoff\n", + "steps = [0,3,3,1,1,4,0,0,2,2,2,2,1,1,5]\n", + "states = []\n", + "\n", + "for step in steps:\n", + " resp = env.step(step)\n", + " print(resp)\n", + " states.append(resp)\n", + " env.render()\n", + " time.sleep(1)" + ], + "metadata": { + "id": "k_B5TwhlsmwO" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "env.reset()\n", + "env.render()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KN_GZrupvP8h", + "outputId": "cb02fcbc-1893-4472-c030-a2e8779ab7b9" + }, + "execution_count": 26, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| :\u001b[43m \u001b[0m|\u001b[34;1mB\u001b[0m: |\n", + "+---------+\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vmN4Rjxqv1Vg", + "outputId": "9d905a7b-199a-4c1a-d2b9-b7340dfe4e38" + }, + "source": [ + "state = env.encode(3, 1, 2, 0) # (taxi row, taxi column, passenger index, destination index)\n", + "print(\"State:\", state)\n", + "\n", + "env.s = state\n", + "env.render()" + ], + "execution_count": 27, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "State: 328\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| :\u001b[43m \u001b[0m|\u001b[34;1mB\u001b[0m: |\n", + "+---------+\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "env.env.s = 328\n", + "env.render()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kpMfr0XJheef", + "outputId": "38d62669-15a8-437e-e1b4-78809ead749f" + }, + "execution_count": 28, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| |\u001b[43m \u001b[0m: | : |\n", + "|\u001b[34;1mY\u001b[0m| : |B: |\n", + "+---------+\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## All possible states" + ], + "metadata": { + "id": "njQX5jhv8Kxc" + } + }, + { + "cell_type": "code", + "source": [ + "for i in range(50):\n", + " print(i)\n", + " env.env.s = i\n", + " env.render()\n", + " # time.sleep(1)" + ], + "metadata": { + "id": "KdTsHXnCwYAh" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "env.env.s = 328\n", + "env.render()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0MVGRfgwzI4-", + "outputId": "ea317787-e1f0-4597-b4f7-2b66234e9571" + }, + "execution_count": 31, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| |\u001b[43m \u001b[0m: | : |\n", + "|\u001b[34;1mY\u001b[0m| : |B: |\n", + "+---------+\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Reward Table\n", + "\n", + "The reward table, also known as the reward function, defines the rewards or penalties associated with each action in each state. It is a fixed table that specifies the immediate reward that the agent receives for taking a particular action in a particular state. The reward table is typically defined by the problem domain, and the Q-learning algorithm uses this table to learn the optimal policy." + ], + "metadata": { + "id": "ekg3LiN08ycz" + } + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2LL-qRKP1vwh", + "outputId": "e2f2fc6e-4b55-4b21-df78-02496f8ab21e" + }, + "source": [ + "env.P[328]" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{0: [(1.0, 428, -1, False)],\n", + " 1: [(1.0, 228, -1, False)],\n", + " 2: [(1.0, 348, -1, False)],\n", + " 3: [(1.0, 328, -1, False)],\n", + " 4: [(1.0, 328, -10, False)],\n", + " 5: [(1.0, 328, -10, False)]}" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "This dictionary has the structure \n", + "\n", + "```{action: [(probability, nextstate, reward, done)]}.```" + ], + "metadata": { + "id": "qjBAsSMI9Jol" + } + }, + { + "cell_type": "code", + "source": [ + "for step in range(6):\n", + " env.env.s = 328\n", + " print(env.step(step))\n", + " env.render()" + ], + "metadata": { + "id": "zuM9bjmUzpQW" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "for i in range(10):\n", + " print(env.action_space.sample())" + ], + "metadata": { + "id": "8MaHb7EV2AmM", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "ffe368a4-6dc6-4e47-9b07-707ef59f5868" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "4\n", + "3\n", + "5\n", + "4\n", + "4\n", + "5\n", + "2\n", + "4\n", + "0\n", + "3\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Solving the environment without Reinforcement Learning\n", + "\n" + ], + "metadata": { + "id": "7DAMMYw69mWH" + } + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZxHflKla8Tn2", + "outputId": "591bb1ec-ba69-419a-a07b-66e3855446d9" + }, + "source": [ + "env.s = 328 # set environment to illustration's state\n", + "\n", + "epochs = 0\n", + "penalties, reward = 0, 0\n", + "\n", + "frames = [] # for animation\n", + "\n", + "done = False\n", + "\n", + "while not done:\n", + " action = env.action_space.sample()\n", + " state, reward, done, info = env.step(action)\n", + "\n", + " if reward == -10:\n", + " penalties += 1\n", + " \n", + " # Put each rendered frame into dict for animation\n", + " frames.append({\n", + " 'frame': env.render(mode='ansi'),\n", + " 'state': state,\n", + " 'action': action,\n", + " 'reward': reward\n", + " }\n", + " )\n", + "\n", + " epochs += 1\n", + " \n", + "print(\"Timesteps taken: {}\".format(epochs))\n", + "print(\"Penalties incurred: {}\".format(penalties))" + ], + "execution_count": 54, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Timesteps taken: 99\n", + "Penalties incurred: 29\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "len(frames)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cl_9msXV1vZu", + "outputId": "4f12e6cf-34f2-42c2-80fa-39573172dd8f" + }, + "execution_count": 47, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "568" + ] + }, + "metadata": {}, + "execution_count": 47 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lXeu2sz6_IKj", + "outputId": "e79413d2-a91a-489f-f8fc-1dc7a6d8beaa" + }, + "source": [ + "from IPython.display import clear_output\n", + "from time import sleep\n", + "\n", + "def print_frames(frames):\n", + " for i, frame in enumerate(frames):\n", + " clear_output(wait=True)\n", + " print(frame['frame'])\n", + " print(f\"Timestep: {i + 1}\")\n", + " print(f\"State: {frame['state']}\")\n", + " print(f\"Action: {frame['action']}\")\n", + " print(f\"Reward: {frame['reward']}\")\n", + " sleep(0.5)\n", + " \n", + "print_frames(frames)" + ], + "execution_count": 55, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+---------+\n", + "|\u001b[35m\u001b[34;1m\u001b[43mR\u001b[0m\u001b[0m\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (Dropoff)\n", + "\n", + "Timestep: 99\n", + "State: 0\n", + "Action: 5\n", + "Reward: 20\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# DP\n", + "```\n", + "[1,2,3,4,5]\n", + "1 = 1\n", + "3 = 1+2\n", + "6 = 1+2+3\n", + "10 = 1+2+3+4\n", + "15 = 1+2+3+4+5\n", + "\n", + "1 = 1\n", + "3 = 1 + 2\n", + "6 = 3 + 3\n", + "10 = 6 + 4\n", + "15 = 10 + 5\n", + "\n", + "```\n" + ], + "metadata": { + "id": "4Pq1CCK1ltAh" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "E1Z7s6TClF1u" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Enter Reinforcement Learning\n", + "\n", + "We are going to use a simple RL algorithm called Q-learning which will give our agent some memory.\n", + "\n", + "## Intro to Q-learning\n", + "\n", + "Essentially, Q-learning lets the agent use the environment's rewards to learn, over time, the best action to take in a given state.\n", + "\n", + "\n", + "![](https://miro.medium.com/v2/resize:fit:1400/1*EQ-tDj-iMdsHlGKUR81Xgw.png)" + ], + "metadata": { + "id": "BQOGUzI0AZJk" + } + }, + { + "cell_type": "markdown", + "source": [ + "![image.png](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAtcAAAAyCAYAAACXmPdFAAAABHNCSVQICAgIfAhkiAAAABl0RVh0U29mdHdhcmUAZ25vbWUtc2NyZWVuc2hvdO8Dvz4AAAAudEVYdENyZWF0aW9uIFRpbWUAU3VuZGF5IDE2IEFwcmlsIDIwMjMgMTA6MzU6NDcgUE1sL7zaAAAgAElEQVR4nO2dd3RU1dr/v2dKqFepmZl0QNd63yXJZBKQdvX6XiQFQlGaL0ICAUQEkWYQpaiErgKGJkWYAEoLSElmEvBFQeolJJPAdSmGJBOSmUlQuNwryZRzzu+P/PZ2JlNSBAK6P2u5XJycc2afXZ797Gc/z7M5u90ugsFgMBgMBoPBYPxuJM1dAAaDwWAwGAwG448CU64ZDAaDwWAwGIz7BFOuGQwGg8FgMBiM+wRTrhkMBoPBYDAYjPsEU64ZDAaDwWAwGIz7BFOuGQwGg0GxWq1wOBzNXQwGg8FoVgRBgM1ma9KzTLlmMBgMBgCguroab775Js6ePdvcRWEwGIxmhed5TJs2DT/99FOjn2XKNYPBYDBgs9kwc+ZMqNVq/PWvf23u4jAYDEazIpfLkZycjFWrVqGoqKhRzzLlmsFgMP7kCIKALVu2AAAmTZoEqVTazCViMBiM5qd3797QaDRYuXIlSktLG/wcU64ZDAbjT05paSl2796NNWvWoEWLFs1dHAaDwXgk4DgOkydPhtlsRmZmZoPjUZhyzWAwGH9i7HY7li9fjqSkJLRu3bq5i8NgMBiPFDKZDBMmTEB6ejpu3rzZoGeYcs1gMBh/UgRBQGlpKQwGA8aNG9fcxWEwGIxHkiFDhkChUCArKws1NTX13s+UawaDwfiTwvM8PvroI2g0Gma1ZjAYDC9IpVKMHz8e6enpMJlM9d7/UJVrm80Gnucf5k8yUDuBNsYRvzlxOByPTVl9YbVaH1pf53keVqv1ofwWwxWr1QpBEJq7GE3GaDQiPz8farUaEgmztTAeDjzPo6SkpLmLwWDAbrfDaDQ26N5BgwZBqVRCp9PV63vdJGnqcDhgtVphNBpRVFSE6upqiKLo85ni4mJMnz6dHU7QDJSXlyM1NfWRF2Y1NTWYMmVKk5O2PyoIgoDNmzdj8+bNsNvtjXrOZrOhpqYGxcXFKC0trVdpdjgcOHz4MDZu3PhYK3mPI6IoYtOmTdi4ceNj2WcFQUBOTg4AIDExsZlLw3gc4XkeNpsN9+7dQ1FREYxGY71GBSLni4uL69UbGIwHjdFoxNatW1FcXFzvvVKpFEqlEoWFhfXOt9JFixa939BC2O12XLx4ETNmzMD69evxr3/9C3q9HtnZ2bDZbHjqqac8pnCqqanBu+++i+TkZISFhTX05xj3iSeffBL+/v749NNPERERgXbt2jV3kdyw2+2YMWMG1Go14uLiwHGcy99FUYTRaMStW7fQoUOHZiplw+A4DtHR0Vi+fDl69eqF9u3bu32PM4IgwGq1YsmSJVi/fj1+/PFH6PV6HDx4EGfOnEH37t3Rvn17j8+dP38eGRkZ+OCDDyCTyR7kZzHqwHEcevbsiczMTHz//ffo2bOnz3Z+1KiursbOnTsBAGPHjn2sys5oXshu6Nq1a5GWlobr169Dp9MhIyMDt2/fhkKh8DjPOBwObNmyBSqVCsOHD2e7JYxmp3379vjhhx/w008/ITIy0mcaUolEArvdjgMHDmDAgAE+dZEGz8ZFRUVYtWoVLBYLUlJS0Lt3b3AcB5vNhu3bt2PlypWwWCx4/fXXXQYMz/PYunUrNBoNevbs2dCfY9xnnn32WVy5cgWZmZmYMmXKI5fH9ujRozCZTPj000/pJC8IAmpqaqDT6VBQUIDs7GwkJibiqaeeaubS1o9cLsf8+fMxd+5crFu3DqGhoR7v43keFy9exMqVKxEZGYn9+/dDLpeD53mUl5fjrbfewqxZs7B27Vq3hWlpaSnmzp2L3bt3Qy6XP4SvYtRFKpVi4cKFGDlyJAYPHowuXbo06T2CIDx0RaOyshIFBQVITEz8Qys5giDA4XCA47hGjRNRFNmCwwN2ux3btm2DVqtFYmIiFixYAJlMBlEUcenSJaxcuRKFhYWYP3++m9wrLS3FiRMn8MUXXzxyc9CfCUEQwPM8OI6jRhkyTiQSyZ/KUENS7Q0fPhwKhQIvvfSSz3GvUqnAcRwMBgO6du3q9d56Jaooirh37x5Wr14NANi/fz/69esHqVQKiUSCli1bYvLkydBoNEhPT3fzXSH5U5OSkprcYE3dOhJF8ZHdKvdULkEQcPbs2QficyyRSDBo0CBkZ2c32L/oYWG1WrF69WqMHz/eJceuKIpYunQpDAYD/P39m7GETaNnz54QRRFZWVke27u6uhoXL17E7NmzMW/ePCxevBh+fn5U4IWGhiIlJQVmsxlZWVku262CIECv12Ps2LEICQlpUvl+z9h4VMcV4Fle2Gw2nDlz5oGUWy6XIzIyEjqdrsl+9qdPn37oR46ToJw/sgJptVoxdepUjBo1CqmpqQ1uf0EQsHbt2gZlBfgzce/ePapYr1mzBlOnToWfnx8kEgmkUin69OmDmJgY5OfnIzc312Us2u12rFy5EnFxcWjTpk0zfsVv3C950NT3NIeOwvM8PvzwQ4wYMQLTp0+H3W6n10aOHIlp06Y1yp2xKXjTf7777rtmibmSyWQ0WLE+/SgqKgqiKMJgMPhsu3qVa7vdjtmzZyM/Px/vvvsu/Pz83O7x8/ODWq0GAFy5coUOKIfDAb1eD5VK1aSDCQRBwIEDB3Do0KFGP2u327Fu3bqHPmE1hJKSEqSmprp1orKyMsyZMwfHjh17IL8bHBwMpVKJzMzMRg9oh8OBgwcPPhBBkJmZCQAYOHCgy3WpVIoPP/wQixYtQlRU1H3/3QeNVCpFUlISsrOz3ZQuQRBgMpkwe/ZsxMbGonfv3h6thz169IBSqUR6errLO27evAmtVguVSuVxTNaHtz7Y0GenTp36SAYn79+/Hxs2bHBTsLdu3Yq33377gcR8cByH+Pj4RuVArcu1a9ceqvVYFEXk5eUBACIiIh7a7z5s5HI55syZA7PZjMjIyAbXsSiK+P777//QFv3G4nA4kJ+fD61Wi6SkJLp7XRe1Wg1RFJGTk+My3srKylBQUPBIpHy02+1Yu3bt79YPeJ7Hvn37mvQem82GDz744KHrKFKpFCkpKVCpVFCr1ZBKpZBKpZg3bx5UKlW9rhG/l5KSErzxxhtusrisrAxz587F5cuXH9hv+2LQoEF098UXEokEKpUKZrO56cq13W7H9u3bkZeXh6SkJK9b2wCgUCgAAAaDgU5sgiAgPT0dsbGxTbJa2+12fPzxx6isrGz0s+Xl5di7d+8jKRx1Oh3y8vKgUqlcrgcHB+Pjjz/GlClTHsjvSiQSREZGwmAwNFrJsNls9a7UmoLD4aATnyclUSqVPlJbVKIooqamBoIgwG6317urEhgYCLPZjLKyMpfrDocDc+bMgVKpxLvvvuuzn5LfKC8vp9eIABo8eHCTvsNbH2zos2az+ZELRrLZbPjkk08glUrdJv1BgwZh7dq1D6wvRUdHQ6lUNlm5vh84HA44HI4GZWVydnkIDg5+GMVrFiQSCf75z38CqG2jhwUJ+rfZbG7jRBAENzlqs9no/eQeEtzsbEUkwYOeZA95xmq10nfV7Qc8z4PneTgcDuoGQK6Ta976TllZGWbNmgWlUonJkyd73fFQq9XgOA75+fm0jDzPQ6/XQ6lUolWrVvVV3wPnfukHZWVlWLNmDa5evdroZ2/evAm9Xt8s7jG3bt1Cfn4+ANA6qKqqcrv2INDpdDCZTG5jIDg4GB999BGGDh36wH7bFzKZDCqVClevXvWpH0kkEigUClgsFp9zoM+Zpry8HFqtFhzHITk52WfBLBYLANctRrLtSKza3uB5HqIoQiKRuDQqeT4+Pt7jc4IggOM4j4P8ypUrAGotf96eBZreiURRpGX2BfFhIvfxPI/8/Hx07tzZ5VlSnn79+vn8TcC1jut+R33fFRkZCa1W2yglWRAEfP7554iMjLzvyokoikhPT0dSUtIjuRByxmq1Ytu2bcjOzoZKpYIgCJg0aRKioqKon7Qoii51RJQuk8mErl27Aqitz23btsFkMiExMbFeyzMZWxaLBV26dIEoirBYLFAqlfVu6XvqD976oDPe/E3Js3FxcV6fJb58TW1PX+PauXzkPvI7RF5oNBq394WEhHgNpvY0rshzzt/g7T6gdhFIBO7D9p/meR6XLl1CVlYWCgsLoVar0b17dzz77LMIDQ2FKIpwOBwu/saCINB+pVQqfb7fuS/4ki88z0MikXgMRm6s64kv+ercDr7KQ/5G+kVAQECDf5+M5aZQXFwMvV4PrVYLpVKJTz75BCEhIZDJZBAEARkZGdBoNDR2xGaz4Y033sCtW7cAACtWrMDu3bvpJK9Sqah72OrVq9GpUyd6YlyvXr3otxuNRsycORPh4eHgOA6FhYWIiYnBpEmTIJfL4XA4sGTJEphMJnAcB1EUMWDAAERFRWHFihX02jPPPIMZM2a41KnD4cDOnTvBcRxiYmJ8zgOVlZUe27ygoIAq3nWpO7bIAsTX7zgcDo8LaUEQIIoiVVo9yZP69IO61KejeNOPfMkyvV4PwPui70HqKBUVFQCAiRMn+rzWlHc730NkgrN+kp+fj9jYWI/6z1//+lef7/PU1kDD9R8yNzn3D4JEIoFara5XaQZq/a4NBgPsdrvX+dtrzyUCGQDi4uJ8BoI4K2r+/v5uljZfE3hxcTGysrJgNpsRERGBESNGoLi4GNnZ2cjLy4Moijh+/DgUCgWNLiaRytnZ2bDb7fTc9y5duuDMmTMwGAwoKCiAIAjYsmUL1Go1nn/+eQC1ClJFRQWOHz+OW7duISIiAsOGDWvw6lEURZSVleHIkSPgOA7jxo3DrVu3EBYWRt9BMlsQ945OnTphyJAhyMzMhMVioXllP/vsMwwaNAhBQUE4f/48vv76a4wdO9YlYE8URdjtdmzdupUqVZMmTUJoaChqamqwdetWSCQSTJ48GWVlZcjJyYEgCIiNjfUY+EcmmPLycnTr1q1B31xSUgKDwYDJkyc36P7GQCx9DZ3Imss/tLS0FMuWLQPHcTh48CA4jsPx48fx1ltvYc+ePQgLC0NaWhp69uzpskByXuU6HA7IZDLY7XYYDAYAta4wvr7JbDYDAN2uA37bnlUoFF4nH0EQcOPGDWRnZwOoHcNdunRBRkaGxz5IFE+bzYYtW7YAABWAwcHBkMlk2LBhAw3kEEUR69evx7Rp02i/t9ls2Lp1KyorK+Hv7+/y3obgPK47d+6M+Ph4VFZWugQJ8jwPo9GIo0ePorKyEtHR0YiIiEB2djYKCgogiiIuX74Mo9GIESNGwG63Y/PmzZDJZG7lIe86duwYqqqqoFarqSwoLi5GZmYmgoODERcXhy1btkAul0OhUGDIkCFu9c5xHE3RlJCQ0CRXnabA8zyOHDmCVatWYc2aNVi8eDHsdjteeeUV3Lp1C5MmTUJZWRmWLl2KLVu2uMgo0rd88d133yEnJwcajQYvvvgi0tPTAQCTJ0+m32i327FlyxY6+cXFxaFbt27geR6fffYZKisrMX78eISFhUEURZSUlKCiogJ9+/bFhQsXcPnyZbz++uuQy+UQBAFlZWU4evQoOI7D+PHjaR/gOA7FxcU4evQoQkJCEBMTg8uXL6OwsBCvv/46bRNSHkEQEB4eDoPB4HMx6AlPSltDqK6uxrJly1BYWAiFQgGz2YyZM2di3bp16NKlC+7du4cTJ0647DjJZDIkJycjLy8Pu3btwrx582gMxqVLlzBnzhzs3r0bALBu3ToolUrMnDkTWq0W0dHR8PPzo0GGZrMZc+fORd++fVFSUoIxY8ZAFEVMmTIFEokE48ePR1lZGT7++GMAtfN1cHAwqqqqoFAoEBsb67Y4BWplDpElkyZN8lkHlZWVdDwQ7HY78vPzPQbPWq1WbN26lRrxTCYTcnJyUFFRgaSkJLegMef25TgOgwYNojKiuroaV65cwcmTJzFu3DiIogidTgd/f3+8/PLLOH/+PNUPRFGkmUtGjBjh8VvICaZERoSHh2PEiBE4e/YsCgoKqCzctm0b/P39MWLECHAc51OWnTlzBvn5+SgsLIQoiti6dSsiIiKojmKz2VBWVgadTofKykrExMSgT58+jdJRiIzkOA6JiYmoqqpCly5dIJFIIIoirly5ApVKRduCuIkplcp6FWaj0YgjR45AIpEgKSkJVVVVCAsLc3mXs47UuXNnDB06FFu2bIFEIqF1tnHjRgwdOhQBAQG4ePEicnNzkZCQ4KKXCIKAmzdv4tixY7BYLPD398eQIUMQEhKC6upqbNu2jeo/ZCdAFEUqg5wh8xPHcbQ/1P1WpVKJgoICN2OEMxKJhOq8FouFGs3c7vNWic45UBUKRb0VTu7VaDS0E5hMJoii6NViYLVasWLFCgQEBOCdd96BVqvFhQsX6Ari1q1biIyMRFRUFKKioqhivXnzZqxcuRLx8fGIj4/HCy+8AK1WS1dJkZGRqKysxMCBA9GjRw8a8EUqd9WqVZg4cSLGjx8Pg8HQqBy1paWlGDVqFAIDAxETE4M1a9bg1VdfpU7woiiitLQUM2bMQGBgICZMmIBTp05hx44dCAgIoJGmarUa0dHRCAkJwfnz55GdnY3u3btj9erVLluDRqMRzz33HFQqFQYPHgyVSoXp06ejpqYGy5cvh0qlQmFhId58801otVpMnjwZKpUKc+fO9fhdROARq5UveJ7HjRs38NZbb2H+/Pnw8/OjK9bG/OcL8veGuic0VAm/n+W02WxYvnw5LBYLPvroI/j5+UEul2Pw4MHUH7qkpASnTp3yaAnhOA4FBQX034IgwGAwQKlUIjAw0Od3kAWu80qbXPNmuSYp+lauXImJEyeiY8eOtF8FBgZ67IMAaL8lPsR79uzBq6++ips3b4LjOPTo0YO207hx49CzZ08qF0pKSvDyyy+D4zjMnz8farW6Ub79DocDmzdvxqpVqxAfHw+FQoH+/fvTcU3u+eyzz7BixQpMmjQJ4eHh2LlzJ3iepxYxpVKJ6Oho9OjRAw6HA8uXL0dUVBROnDhBfYyBWtmzZcsW+q6kpCScPHkSS5cupWNLo9Fg+/btSE1NRXBwMAYMGICcnBxcunTJrb9wHAeFQlHvyV1N6Zfe+qYoirh48SJWrVqFpKQk9OnTBzKZDK1atUJcXBy0Wi3Ky8uh0+kQFRXlMjk77354k+1WqxV6vR79+/fHzp07MXfuXCQmJqKgoIDGSRD/e1EUMXHiRMTGxmLZsmWwWq3YuHEjDURevnw5eJ6HIAiYNWsWNQJcvnwZe/bsoXPFxYsXMXr0aAQHByM+Ph6vvvoqxowZA6vVStszJiYGK1aswOzZsxEYGIhr165h27ZtAGrTvk6fPh1msxlDhgzBqVOn6j0kx1edN6Y9bDYbZsyYgYCAAJw6dQoHDhzA3r176Viw2+3Q6/UYMGAAWrZsSZ+TSCTo27cvHVsxMTHo27cv5HI53fkqKCjA/Pnz0aVLF7Rq1QpqtRr5+fnU2iiXyxEbGwu1Wk3HZVBQENRqNXJycujc2KVLFzz//PMYP348LBYLrl27hpKSEgiCgDVr1mD48OHo2rWrW10RQ5lara5395JYhZ2tk77clI4ePYrOnTvj6tWrmDVrFrRaLSZNmoTY2FjMmTPHZR4j/Q2oXeDFxsZix44dNN/2smXLUFVVhTFjxmDWrFmYM2cO/v73v2P37t3Izc2FRCKBRqNBZWUl4uPj0aNHD5/Wa7vdjuXLlyMoKAjvvPMOdu7ciUuXLtG6raqqQlxcHDQaDaKjo8FxnIssmzBhgossEwQBUqkUGo0GBQUFiIuLQ48ePdCrVy/ah5YvX051lKSkJJw4caLBbpx1x1BcXBw++eQTvPrqq/TMBLvdjoKCAsTExLh8J7Eo+3r3hQsXMHr0aISEhCAuLg5z5syh4xOonXsuXLiAUaNGISgoCBMmTMDXX3+NrKwsREdH0z6emJiIZ599FiEhIbhw4QL0ej1u376N1atX075CduRGjRqFgIAATJw4ERzHYfr06aiurqa6465du/Dmm29Cr9dj0qRJ4DgOq1evdnGncjgcmD59OiwWC5KTk1FRUYFNmza5jWWlUom8vLx6DQ8NWXh7HSWCIFCFgFjMvFFSUgKTyYSAgAC3LQ5RFNG5c2ePz2VlZcFisSAuLg4mkwnBwcEICAhAWFgYgoKCYDKZqKAh77p06RJ27dpFU5PV1NS4OOb369cPpaWlMJvNiIuLQ+/evQHUNtT27duxe/dunDp1Cq1atQLHccjJycGCBQvqrSigtuPPmjULiYmJePnll10ELfFbtNlseOWVVxAbG4thw4bRsiQmJqJfv37YtGkTlEolXnvtNcjlctqp33vvPWi1WlgsFiqQbDYbUlNTER8fj2HDhkEikWDgwIFIT0/HsWPHIIoiEhISsHv3bigUCrz//vt0C8Zb5yCdoj4lwOFwYOPGjdizZw/UajXNh9tYBgwYgOeee87r38n23/3OBvJ///d/+Pbbbxt8f//+/fG3v/3N49+2b99OrS5t27Z1+ZtSqUR2djY6deqE2NhYN4slUfhMJhN4nodMJqMTIuB7y08URWi1WgC1ExVRjjiOozs9niBHWpMxIZFIMGDAAEilUvTt29etDwK/TSJKpRJTp06FRCJBREQE9Ho9goKCwHEcevfujc2bN0OpVFJFDqhVaGbNmoWBAwfi9ddfp1t/Dd2GF0URx44dw65du7B3716EhYXR/kC+gaT0TE9Px4EDB9CmTRu0aNECgiCga9eu6NatG+bOnUuVTKB2e75Tp07U750sLHmeR25uLrRaLfbt24fWrVvjqaeeQkREBE6cOIFDhw4hJiYGlZWVqKqqQkJCAnr37g2HwwGLxYKcnByPwVwcxyEvL8+rIiEIAr744gv8+OOPbn8zGAywWCx0q9gZb32zpqYGs2fPhlKpxIQJE1z+Rty/jh07hhMnTmD//v1uZTGZTFAqlV4XQEeOHEFERAR++eUXWCwWpKWl4erVq6ioqIBGo4HdbseKFSsQFBREU7AWFhYiKCiIWhWJfCKugQ6HAyaTCUlJSZBKpXQRp1AoYLfbMXPmTMybNw/Dhg2DIAiIjIykKfRsNhtUKhXu3r1LrZxSqRQ3b95EYmIilc8cx2Hx4sWQyWTQaDTIzs72GhBtt9uRlpaGO3fuuFwXRREFBQVYsmSJ2xjlOA4LFy50UzJFUaQxFCSAPywsDJMnT8abb76JMWPGYNeuXThw4IDHsngyNDjvNHiyXJrNZrob89xzz6FHjx4oLy/HlStXUFFRQeOV6vbVwYMHw2QyIT09HXl5efj000/RunVrj+UCav1kAe8LeoLD4aCGNqVSSeuIbLWT2Czn+3fv3o29e/fiyy+/hEKhwOLFiyGRSFBVVQWz2Qyz2YzQ0FDYbDba36ZMmQKO43Dy5Em6zf/VV18hPj4effv2RXV1NSwWC+bNm4d//vOfEEURGo0Gfn5+dE6OjY2lssIbZLd54MCBKC8vR3BwMFQqFUJDQxEYGOhRR3GWZS1btqR9jyzw+vbtS41x8fHxVEdxOBzYtm0bdDodTp8+DT8/P1RVVSEvL6/Buy52ux3Lli1DYmIihg0bRnUU55gms9lMF5ykfcxmMwwGg0/XTzJHOL+bLM7Ju4lfflJSEl566SWUlJTAbDZDoVBQ/UelUqF37950F3fXrl1Yt24dVqxY4aLs3rx500UekF2KXbt24fLly1T/Wb16NSIiIqgMUqlU0Gq1Lu/ieR4FBQUYO3YsJBIJfvnlF8TFxXn8TuLO4gvSj31Zrr0q12QCB3xbFQVBQHZ2NjiOw9y5c10ahvgaVVVVeUwXFhAQAIvFgjFjxiAyMhJvv/02FRTEXcBZsXc4HFi1ahXi4uJozmyDwQCz2eyylVVRUUE7lHM5SXaFZcuW0S3ML7/8skF+xKIoIjMzEyaTCYMGDaLvNBgMdIXO8zyWL18OAHSVFRYWhoyMDEgkErr6dN4JkMvleOmllwAAO3fupCkLeZ7H559/joKCAixcuJDeTxQEmUyGhQsXQhAEmM1mrFu3jm755OXlIT4+3ueArC9IlByE8vXXX0MQBMTExDQp8KK+3OZE+bvfPqpdunTxOVnUxZvfm91up8K7buyARCKh7XHy5EkcOHDAo78pUFufpP4KCwsB1PYDXxNVWVkZ9e9OTk6mdUS2rL21IcdxUKlU0Ov1MBgMePHFF138LslvO9f58ePHkZeXh3feeYf2ZWJZcd5ur+tvLYq1qQZNJhNu3ryJjRs3Ut/KwYMHN6hdHQ4HtFot4uLi6CLVeSfM+Z7Y2Fh6z5AhQ5CQkAC5XE5P13Juo5CQEEybNg2bN2+Gv7+/i1sNydLiLJcqKytRUVFBfZWXLVuGuLg4PPvsswB+U0h95YVWqVQ+x0mfPn08umMRI4Sn8eKtbxJrrydrIulXe/bswZw5c9yyNTmX39u39OnTByqVCkuWLEF8fDwCAgIQHByMw4cPQyqV4quvvkJ+fj4iIiLw2WefwWw2w9/fn2aVmjJlCg1YnjdvHqRSKY4fPw6O42hbhISEUNcF4nZFLInEwEMWhj///DMWLlxIDQuRkZFo0aIFDh8+DI7jYDQakZeXh3nz5tH6IEYEbztEEokEQ4cOdRtLRK56C8b3NG7lcjnef/99t/YPCgqCQqFAeno6BgwY4HW+Ie3grICShU9dpdQTxcXF2LlzJwwGA/WlvnbtmkcfUufUY1VVVfW+m5S5ITKLBKgnJCTQ64GBgR4Vc5lMhoMHD1I3pfHjx9N6IG6hZJGemZmJ/Px8xMTEYPPmzaisrIRSqcR7770HmUyG3r17U9lAFqmDBg2CTCbDsGHD6DcQ/cCT+0tdVCoVLBYLXnnlFWg0GqSkpNDEDkRHcV7YepJlJ06cAOAaC5KVlQUAbjpKeno6HXNArdvOunXrGqyjbN++HZWVlW46ivPOTVP8rUWxNi0u8QgAflNYybsdDgd27NgBANSFNDQ0FBkZGXTuIYZXRPAAABxYSURBVPoS6QdyuRyLFi2ihgWy6HZ+V0JCAr2f6KRVVVVYuHAh3UFLSEig7hr5+fkYOHCgi1wjYyAlJQX+/v50x8BTXyY66+89T8NrixHXDLPZjLy8PISFhdFodLPZjMDAQMjlcly6dAnp6elU4XX+ICIQPK0CBEFAVFQUUlJSUFhYCJ1Oh/z8fBw8eBAymYxGFjsLWpvNBpPJ5GLFI9tVgYGBVIDk5+dDpVJBLpfTlRu5b9y4cRg0aBANlqjrV0OU7rqIokgjaclig+d5mM1mJCUlUT8r4t8XFBT0WyX//4HB8zxVoKVSKbXuBAUFYdOmTRAEAUlJSdSaW1lZCY1G4zIx6HQ66ksol8vx1VdfAfjNl9pms7l0+LrfQzpnfcKa7AKsXbuWKl5vvPHGfY9s9vf3h0qlqteS3li6du3qdUXZGMhgJYsNb8ydO9erny35NtI/4+LiXAKIgNp2y83NRWBgIEJCQsDzPJYuXQoASElJcXk3seh5254mC93s7GwYDAbs2rULkZGR6Nu3L0RRdOuDAGjfJqt50t+JICSTH7FuEEFGLHwA8N5770Eul0MqlUIUXQM7yTj0pMhZrVaYzWZqpSbfKIoiHddkMnKeoEgKKeC3ydRZXhAhrdfr6SJBFEW6ba3RaOjY4HkeJpMJ8fHxCA0N9Rh4c/z4cQC11iZPQYukPbxZgiUSCbp16+ZRub5y5QqioqLqtaQ5Q/qlv7+/18mXxJQ0xX84ODgYNTU1dHImizoSmEcmZOcsLM4HUEilUhp3QhYNxDWHLGpIPcpkMmpNI7KMLGYCAgIgiiKCgoJo1qL4+Hj6O+T/pA8ThYUoFmQx6Em2S6VSj23C8zy0Wi169erVYP95b4sUEndx6tQprFmzxqd7iq93Ov+97ndYrVbMnj0bJpOJ7szabDbs2rWLPltSUoLQ0FDqupCZmYn+/fvj2rVryMrKQnJysou7ijNk0UqUFJJpJDc3F0FBQQgJCaE7GUqlEmvWrHELoBVF0aOcl8vlOHz4MIDf0rFarVYUFBRQIxFRzADgnXfegUwmo8Y70v4hISG0bMQIIJPJ3OrQk37gLSBWo9Hg7bffRk5ODnQ6HSoqKpCWlgY/Pz9q+CByRRRFr7IMqF1kObsWKZVK6moJgOYFHzduHBISEsDzPKRSqVv/86WjkPmd6AxEriUlJdF7SJYoMiaAWvlDrNCe3u/c94hu46z/kO+sG99Adp3I38kYJ3MPcV/atGkTFAoFJkyY4DIn1Y2VcE6SIZfL3RbPNpsN2dnZSElJcfm+gIAA7N27Fzt37kR2dja0Wi0mTpzoVW7Wpx81JBjcq1mJ+C6TD7LZbDh06BCWL1+OHTt24Pz58zQ6WalUUp9cZ6KioqBSqdysAjzP48KFCxg+fDgGDx6MBQsWYNWqVbRjOPukSiQSFBcX48CBA/Q95MMdDge1rpHtPaLgkknx3LlzyMjIcKnkFi1aoFWrVpDL5SgtLaWNWVxcjPfffx8lJSUe8xKbzWYXoU4m26ioKKSlpcFoNEKhULgNVofDgZqaGlRUVFAljed5HDhwAA6Hg1oESVaUxYsXu2zbkncRRSEqKspFAXA+LpykuZkwYQLOnj3rlkOT1ENDLCFArQV48ODB+Prrr93Syd0PiIBr6OT/sAMaycToKe7Abre7CDNP/pjk25yj5GUyGfXtKi8vR3FxMVJTU3H+/Hl88MEHqK6uxpIlS2AwGJCSkoJnn33W5btlMhmNavaU0is1NRV5eXmYMmUK1q5dS2MQAHjsg8SnkWybArUuB2RH6PTp0yguLnaxbvA8T8ebQqGAWq2Gn58f/Pz86G4EsSY7HA6cP38ehw8fdkknSCDBRc5WajKx2mw2pKWl0f5a10WttLTUxQedBCMSa1hZWRksFgvi4+Nx+PBhnD9/nraRs6uA0WikcoMcikEmSeA3KyoZa2lpaS5+kMSH2dtE/SAg9eVpLBP5FRkZSceYJ8xms88tULKD6bwQIdeJkYEoAKTti4uLqTJFFGGgto6Iryupo61bt2LgwIEQBAFKpRIKhYIqJUQxj42NpYGaHMe5TNAEZ8WNKOck8FetVuPcuXONchO73yiVSkRGRvoM8CX1603GeYuvAGqVI5PJhLi4OJrqjihbHMfhxo0b2L59O21ro9GIPXv2YP78+YiJiUF6ejp27NjhdWFI/GUNBgOMRiOKioqQmpqKnJwcXLx4kfq6E5lVd6dEJpMhOjra426b8zzmDFnsnj9/HufOnYNaraauJiTuhfQ3ADh//jw2bNgAm81G0wBKJBIX3aDubjPRD+ricDjw1VdfYeTIkRg6dCg2btyIsWPHwmKxuBjSiBHLaDTi4MGDXmVZXFwcrFYr1q9fTxdupAynT59GRkYGjYUJCAiAn58fWrVqRd1YSLsUFRVh06ZNLtcIPM/DYrG47DKTnSK1Wo1169bRxXJMTAyMRiMyMjKobIuNjaXX6uLt3WQHad26dbDZbFAqlfD393fTf0gQM1Ardx0OB5WhJBaByIn3338fPM9TxdU51kiv1yMyMpLqP8StmEDcl3r06IHvvvsOxcXFVNesrKzEokWLsHv3bhfPDGfINW+uzABc5nlfXh0+Z4GEhASMHTsW6enpOHfuHE6cOIEFCxZg3rx5+PrrrzFq1CgkJSVh3759Hg+JqWs1dr6+c+dOjB8/nq6ACwsL6XarKIqoqKiAv78/3R4YOnQopFIpbTxBEHDx4kXo9Xr4+/sjKyuLrgzLy8vB8zxqamqwa9cuDBs2DEFBQdBoNNDr9fQ3z58/j9TUVKo0ZmVlITs7GytWrHCbcCQSCfUN53keJSUlyMnJgSiKaNeuHW7duoXg4GB6OhXJMVpTU4OpU6fi4sWLLg134cIFVFVVUSufyWSCv78/Ll++DIVCAblcjri4OOTn59PyHj16FBaLBfPnz6ep38i2bN1t0I4dO0Kr1bptMxOrXWMOZQkLC8O4ceNw/PjxRqXwawhkW9tTDm1BEFBdXY0bN24gNzcXQO3WfVFREYqKiu57WTwhl8sxYMAAWCwWlJSUwG63w26349ChQ5g+fTpN0SQIAvbt2+d2uhOZOAC4WNrWrFkDpVKJY8eOYevWrYiJicFbb72FZ555BsuWLUNeXh4++eQT2u+dIX3GbDa7Ba2SALbOnTvTfpWXl4eoqChwHEdX3M59kJwwWFFRAbvdjtLSUsyZMweiKKJ9+/bYs2cPgoODUVFRAaVSifLychw6dIjmII+Li0NlZSUcDgft8+vXr8fRo0ep/9rs2bOxevVqj5NCYGAgOI5DRUUFDWLR6/VQKBTIzMxEdHQ0oqOjER4ejrKyMpqT99y5c5gxYwYN0FGr1eB5HsuWLaPBRQaDASqVCh07dsTJkydpAJNGo8HNmzchirUZi1auXIl58+ZRFxCy3Ugs4Q6HA3l5eXjmmWdw5MgRBAYGurQLEfTOu2oPmoCAAERERKCwsJAuMu7du4e0tDSkp6dDqVTCbDbjxo0bOHjwoMuzxKVJFEWXRUJdyI5B3QOeOI6jCkRubi4EQUBNTQ3S0tLw+eefuyziyfa+0WikvsAkWJu4DEqlUroFL4oibty4gd27d0Oj0dA+LpFIcOXKFZjNZjf5RcpDjBvOOz+dO3eGVqulbdscEDnnzUJ67949Kpvz8vJQUlICm82Go0eP0tR5JSUlKC0tRXFxMR3H5eXlKCkpoTEKeXl5+PXXX2Gz2ZCZmekiA0hfP3jwIMaMGQO1Wo3WrVsjKSkJCoUCWq0W586d8yhb5XI5xo0bB47jcPjwYWRmZuLtt9/Gu+++i7y8PEyfPh0AsGbNGo8HYnHcb2kT6/Y3otyJokitnKQunnnmGej1evTq1YvupJMTBUl/++qrr2Cz2aDVanHr1i1cvHgRFosFCoUCgiDgyJEj0Gg0dLfNWT9IT0/HsGHDPLaXTqdDUlISHSNXr15FYmIitbqWl5ejU6dOsNvtWLp0KYYOHUplGdETiI5CZBkZM/7+/ujevTuKi4uxZs0a+mxkZCQdT3a7nco4h8MBURSh1+vpQqhuPRL9iBhdbty4gRMnTlCXs19++QUSiQQVFRV050KlUtE+Qq55cpfx9O6TJ08CADp06ICff/4ZLVq0wIABA2AwGGC1WqmeNHXqVOqyBdSOx4sXL1J3UCIr1Go1tm/fTl2nYmNjaT2SDDGVlZX49NNPIZPJqLLt7BZnMpkQGRkJu92O3bt303gbURQRHh4OoNalJCIiwqNibDKZoFAofO5WORtQfBn7pIsWLXrf6x+lUkRERKBz587YsmUL/vOf/+D69etIS0ujq4Bhw4bBz88PP/zwA9q3b++2BVNUVASz2Yz/+Z//odeJy0lubi6ysrJw4MABKJVKTJkyhW493r59G2fOnMHdu3fRr18/dO3aFU888QT69euHb775Btu3b4dSqUTfvn1x48YNFBQUYObMmZDJZLh9+zYMBgMMBgOWLl2KFi1aQCqVonv37sjKysLly5dx+vRp3LlzBwsXLkTHjh0B1KbMKyoqgslkwksvveRSwRzHoWXLlvjXv/6FK1eu4Mcff8SYMWNQXFwMnU6H0aNHIywsDG3atEHHjh2RkZGBb775Bt9//z0GDhyIfv36QalUIjc3F2fPnoUo1qZHIp3izp07MJlM+M9//oOpU6dCKpWibdu26NSpE/bt24f9+/ejTZs2WLFiBS2vw+HAgQMHkJycjHbt2gGo7bi//vorDh48iAkTJiAsLMylA5w6dQoWiwWjRo1qVM7q1q1b4+zZs3j++efvq2VOIpHQ9nL2rQJqhe6HH34InU6HqqoqKJVK/Pvf/8bp06dx5swZDBo06KEoMoGBgejQoQM+/vhj/PDDD9i3bx/u3r2LWbNm4eWXX0ZeXh7Onj2Ltm3b4u9//7tLmcrKynDw4EGkpKSgQ4cO9Hq7du0QGhqKnJwcXLhwAWazGbm5uThy5AieeOIJfP7553R1XlRURNuccPv2bWRkZGDgwIF48skn6fW//OUvqK6uxq1bt5CZmQmdTod169YhJCSEuhA490GSwqx169YQBAE6nQ43b97E2LFjUVNTA51Oh+TkZISFhaFr16749ttvce7cOfzlL3+h4/WJJ57AL7/8Ar1ej9OnT2Pfvn1ISEhwSVnXsWNHVFdXo2PHjvjv//5vlz70xBNPICIiApcvX8b27dvh7++Pfv364aeffsLVq1dp4GV4eDiuX7+OU6dOYcWKFWjVqhVWrlyJli1bwmazQafT4e7du4iNjaUW7q5du+LMmTPgeR7PPfccTUf1zDPP4Pr168jOzsbhw4eRmJiI/v3704VLYWEhnnrqKfTo0YNe+/nnn1FSUgIAePXVV13a+ebNm0hLS0NkZGSTlLh//OMfUKlUjTrQRSaTITw8HDqdDhkZGdi/fz+uX7+Ofv36ITk5GW3atEFOTg5MJhOSk5NdtulFUcQPP/yAoqIiDBo0iMqPuly9ehVt27bFCy+84DbWnnzySYSHh0Ov1+Pbb7/Fhg0b8NRTT2HmzJmQy+V014e4C1itVqSkpODcuXM4evQozp49i9TUVLRo0QISiQShoaFo1aoVTp48iZ9++gmvvfYaSkpKsH//frz11lvo0KEDVdCTk5Pd5Bc5oGT//v24ffs2YmJi0Lp1a+oWRPI/NwQSYzNw4MDfLWNI/MzIkSPRvn17j3+fNm0a7t27B4VCgcrKSly/fh0///wzTpw4Qf2cz507B41GgxUrVuDXX3+FUqnETz/9hDt37qB///4IDw/Hr7/+ivXr1+PUqVNo2bIlpk+fjuLiYro7tmDBAty4cQNKpRJt27bF888/j+PHj9PMMQUFBfjmm2/cvpvjODz99NPo0qULdu3ahUuXLsFgMGDHjh24evUqoqOjsWzZMoSFhaG4uBi3b992+VYi59PT013iR4DaeWz//v1ISkpycRf68ccfkZOTg5EjRyI0NBTt2rWjCtXp06exYcMGPP3001SGkd224uJiLF68GGfPnqXBzyTFJtEtDAYD8vPzsWzZMo+GQZJVJDc3F5mZmTh48CDCw8PpuHeet/Ly8jBy5EiEhYVRWXbp0iV8/vnniIiIgFqtprKMyMxffvkFJ06coPoU2fXp3r07MjIyUFpaipUrV6JVq1ZYtWoVdR+x2+2orKzEjz/+iOHDh7uMaY7jEBISgrt37+Ly5cu4fv06xo0bh6KiImRlZdEMInfu3IHBYIBUKsX//u//unyLUqmkctAZ8u42bdrg1KlT9N3V1dXYt28fXnnlFYSGhqJNmzbw8/NDRkYGvv32W9y8eRPJycmIiIhAt27dkJubS3cPif4jiiJu376NvLw8dOvWDS+++CJkMhnatGmDVq1aYd++fUhLS0PXrl2xYsUKqpcZjUZ89913mDBhApVfnTp1wo0bN6DT6TB+/Hgae0W8AzIzM3H37l0sXLjQTYEm47Rv375uLs7OCIKA/fv3g+M4jBw50qt84Ox2e725zcipTsQC3aFDBwwYMABffvklunTpAqPRiNTUVJpP1pmNGzciPz8f69evd/kYYl2w2+2QSCRUGDt/AEn47fxOsqKTSCQukdTAb9sHpLzO/n8Eq9UKjuPoqqluBdpsNixZsgQLFizwOOjqJiEnJ2U5fxv5/bo+YeRvZOXj/NveyuxcD57K68n309NhJkCtEBs+fDhiY2PpIG8MxAfsfnP27FnMmTOH9idnHA4HrUfnbyU+8w8LYr0gwX7OvnykDzj7ABM2bdoEvV6PvXv3ejyZzGaz0YGvUqmQmZmJW7du0Qw2GzZsQFxcHP7rv/7L5Tmr1YrRo0dj3LhxGD58uMvfnPuMcyAlwVsfJFv5AKi/MgCX+zxdI8+S5+uOB/L3s2fPIj8/H9OnT3dTcsjujHN5iUuN8+8QKya5zzmwknxX3TgKUmZPsoB8S91nyM6Vc92RE+w8yZUbN25g9uzZ2LNnD9q0aYPGsn79ekRHRzfK59q5rOQbnU80JX3WU3sAtbI5PT0d77zzjkfrHXm3J1lCIPVO2s6TzCSxOjKZjGZ+IePFk+wifYP42jq3KelnvvK7EwspsW4B8DgOfMHzPKZOneo2bzUFq9WKv/3tb9QX2hPOsoX0e9L3yc4YaQfSB53HsXNcD6kz4j9NUpI5H3RFrLgkeN5ZXgDuY4VAFDyS9UUURbpLMnXqVFitVkyfPh1paWlu31pdXY3Ro0fTnN/OeJvHBEFwWxSS6wDc5l0Sx0Tanud5N93C+Xt9nd9Rn47iXI66/uX1yTJfOoo3Gef895EjR2L//v0N0lGI3CJ1ReYHZx3K0zVPeBqfzu+uW7913+dt/iDvqesj71xPnvSfujoJaTMAbnqXN52MYDQaMXLkSHzxxRc+zwApKyvDyJEjoVAovLYBUM8JjQSiNJAB8euvvyIgIACrV6+Gv78/srOzsWbNGo8FnjRpEkaPHg2j0egSfUk+0FfktKeVg0Qi8foxdcvrifqeJRHA3gZd3fd6us/X7/u67ulv3urB+e8N/Y2ysjJwXO3BN01Rkh+UlbhXr16IiYmBTqfDG2+84fI35/7h/K0Py6+V4CyIG9IHgNpJTavVIiUlxeukSnxVycEzKpWK5v0lW3Akr2vd52JjY5GdnY2hQ4e61ZOv+vHWjnWf8TQ2GzteCSSQZvDgwR6thxKJpEFKjLd7vAlMX2X2JQs81ZG395BA5djY2HrlizfqZi5pDN5kR33Kg0ajQXp6uk/3qvrGfH1yHIDb333dW/f36t7bEHno/PemLsCJnLwfciYrKwsxMTE+28I5INQTzvVC7vEm+33JJ0/v8TVf1YXjOCqzunXrRhV5ktKvsrLSa0aUFi1aIDY2FjqdjqZNq1uW+r7F1zive7+3ftnQ762vb/uas+uTZb7KUN+zZWVlGDBgQIN1lIaMofrGVUPfTe7x9m2NldH1tVVD+0dD2jwzMxMRERH1nj1BDFD1Ha7YJMkhl8uhVCqRn5+PnJwcxMbGet0KlcvlNOk/sRQ9qpAE7o09zetxwOFwYOnSpYiJiXHL1dzckKN8DQYDDU75I7Bt2zao1WqaFqkhkGCsgoICWCwWvPfeex4VNo7jMHHiRJjNZhr49ShjNBpx7dq1Rrk9PC6QYMgJEyY0WZl7+umnm6yYNxWNRgOlUklPTGP8hkQiwXPPPfe7d8fIrpSv/MGPM86+9yTFKEn7WReJRIKEhAScOHECpaWlD7WcfxSsVitWrVrllmqO8fswGo3QarWYP3++14w5BBKP4+tgKqCBluu6yOVyjB07FuHh4YiMjETPnj29Cg6iBIhi7VGjU6dOfWSFTEVFBcaNG0eTuv9RIMcQBwQEYPLkyQ8940ZDCA4ORkREBI4fP47XXnvtoSsa95vS0lLo9Xqvwb7e6NGjBz7++GMatEVyqnrCz88P69evR2pqKiIjI922Wh8VRFHEP/7xjwbna32cIBmT5s2bV69QftTgeR4ajYZmg3rcx9yjCHHraEhO5ccRqVTqEuwaHx/vcxwEBwdj7dq1OHbsGBISEnxmT2G4U1FRgbFjx/qcFxiNgwSkzp07t0G7h84BmL5okM+1rx9pqKJGPmD+/PmPtBBvzDc9LpSWlmLLli1YvHjx7/YffJDY7XZs3rwZ4eHheOGFF5q7OE1GEARkZGQgMDCQntzVGHzlXvV0L8mmMnLkyD9c333U2b9/PwIDA9GnT5/HzpIkiiIOHz6MVatWYd++fWzCfkDYbLZGuV48jhCFoyHyh6S569y5s9dTcRne+SPqKM1JRkYGeJ7HiBEj6pXhJPjYbDbjwIEDPt1Cfpdy3ViIYzrrGA+fx2VAOgdCPM487Pr2FBDEePA8LuPKGyQQMyYmxi3egfH4wPM8SktLIZHUHv/s5+f3SPfLxhgQGIwHSWP6YmlpKUaPHo3ExMR65eVD7dkkFQ7j4fO41Lun6OjHkeY47Ibx8HlcxpU3QkNDERMTA4PB4DPfNePRxeFwYNq0adDpdLh8+TJGjhyJQ4cONXexfPJHMKAw/hg0pi9mZWVBoVB4PSbeGda7GQwG40+KVCpFfHw8zGaz2wFIjEcfnueRmpoKpVKJ119/HUOGDIFEIvlDBg4zGM0JCXqMi4trkIsXU64ZDAbjT0xoaChiY2OxcuVKmhOZ8XjA8zz0ej00Gg3NIW42m+sNtmIwGI0jMzMTSqUS48ePb1BgPlOuGQwG40+MRCLBpEmTYLFYHou0jozfyM/PB/Db8fTZ2dmIj4+nB3wwGIzfD7Faz5s3z+uZFXVhyjWDwWD8yZHL5Vi9ejV0Oh3LQfwYoVQqoVKpIIoibDYbsrOz4e/vj88++4weiMZgMJoOOSMkMTER0dHRDX5OumjRovcfXLEYDAaD8ajDcRyefPJJVFRU4Pvvv4dSqUS7du2au1iMemjbti3u3LmD48eP49///jdatmwJg8EAiUSCoUOHPvYBtwxGc+JwOLB582ZIJBJMnz69UWcZPNRUfAwGg8F4dGE5iB8/eJ6nKSE5jqNpOVk2Dgbj92G1WrFlyxZMmjSpwe4gBKZcMxgMBoPCziNgMBiM38cf6yxiBoPBYPwu/sgnCTIYDMbDgO0bMRgMBoPBYDAY9wmmXDMYDAaDwWAwGPcJplwzGAwGg8FgMBj3CaZcMxgMBoPBYDAY9wmmXDMYDAaDwWAwGPeJ/wdi1+HRukCO/QAAAABJRU5ErkJggg==)" + ], + "metadata": { + "id": "Vsll17yDBkub" + } + }, + { + "cell_type": "markdown", + "source": [ + "# Implementing Q-learning in python\n" + ], + "metadata": { + "id": "WZjbBG1nCy3e" + } + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\n", + "q_table = np.zeros([env.observation_space.n, env.action_space.n]) " + ], + "metadata": { + "id": "i_qIgEQl_e_Z" + }, + "execution_count": 56, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "q_table.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "U-o-RjkMEYw0", + "outputId": "86d40cc3-513b-4753-dd25-4ff601ce14af" + }, + "execution_count": 57, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(500, 6)" + ] + }, + "metadata": {}, + "execution_count": 57 + } + ] + }, + { + "cell_type": "code", + "source": [ + "q_table" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VBDP4B9mEdQo", + "outputId": "0ba4f69c-5494-4f49-c2d7-53685cb9a470" + }, + "execution_count": 58, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0.],\n", + " ...,\n", + " [0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0.]])" + ] + }, + "metadata": {}, + "execution_count": 58 + } + ] + }, + { + "cell_type": "code", + "source": [ + "np.argmax([1,2,3])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_3qzI1fpqBdd", + "outputId": "69627f73-1033-41af-aaef-824a151c8440" + }, + "execution_count": 62, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 62 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import random\n", + "from IPython.display import clear_output" + ], + "metadata": { + "id": "yOXysHFaDMNE" + }, + "execution_count": 59, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "%%time\n", + "\"\"\"Training the agent\"\"\"\n", + "\n", + "import random\n", + "from IPython.display import clear_output\n", + "\n", + "# Hyperparameters\n", + "alpha = 0.1 # learning rate\n", + "gamma = 0.6 # discount factor\n", + "epsilon = 0.1 # exploit and explore\n", + "\n", + "# For plotting metrics\n", + "all_epochs = []\n", + "all_penalties = []\n", + "\n", + "for i in range(1, 100001):\n", + " state = env.reset()\n", + "\n", + " epochs, penalties, reward, = 0, 0, 0\n", + " done = False\n", + " \n", + " while not done:\n", + " if random.uniform(0, 1) < epsilon:\n", + " action = env.action_space.sample() # Explore action space\n", + " else:\n", + " action = np.argmax(q_table[state]) # Exploit learned values\n", + "\n", + " next_state, reward, done, info = env.step(action) \n", + " \n", + " old_value = q_table[state, action]\n", + " next_max = np.max(q_table[next_state])\n", + " \n", + " new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)\n", + " q_table[state, action] = new_value\n", + "\n", + " if reward == -10:\n", + " penalties += 1\n", + "\n", + " state = next_state\n", + " epochs += 1\n", + " \n", + " if i % 100 == 0:\n", + " clear_output(wait=True)\n", + " print(f\"Episode: {i}\")\n", + "\n", + "print(\"Training finished.\\n\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uzLQGY9_DqAK", + "outputId": "75ca1c68-5d1c-4257-f6ed-9b1d04cfd4e6" + }, + "execution_count": 63, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode: 100000\n", + "Training finished.\n", + "\n", + "CPU times: user 1min 8s, sys: 8.1 s, total: 1min 17s\n", + "Wall time: 1min 14s\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "for i in range(10):\n", + " print(q_table[i])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "B-NUS2JLrQ0g", + "outputId": "5923e22b-6e1f-4d95-e9ef-28e5c28f2cc7" + }, + "execution_count": 65, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[0. 0. 0. 0. 0. 0.]\n", + "[ -2.41837065 -2.3639511 -2.41837066 -2.36395109 -2.27325184\n", + " -11.3639493 ]\n", + "[ -1.870144 -1.45024 -1.87014399 -1.45024007 -0.7504\n", + " -10.45023379]\n", + "[ -2.36395101 -2.27325184 -2.36395029 -2.27325181 -2.1220864\n", + " -11.27325008]\n", + "[-2.4961915 -2.49656291 -2.4961915 -2.49680945 -9.45879238 -8.48029525]\n", + "[0. 0. 0. 0. 0. 0.]\n", + "[ -2.4961915 -2.49715321 -2.4961915 -2.49689163 -10.46663985\n", + " -9.50877724]\n", + "[-2.48236806 -2.48455841 -2.48236806 -2.484406 -8.47407677 -9.66789661]\n", + "[-2.27325184 -2.32928432 -2.34522429 -2.34113999 -8.69224525 -9.39155281]\n", + "[ -2.47061344 -2.47818772 -2.47855343 -2.47607242 -9.68543571\n", + " -10.17891183]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "q_table[328]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_bfFkPsWDqRh", + "outputId": "471c0837-7a52-4b0a-9cc1-bc2fa965581e" + }, + "execution_count": 66, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ -2.40800594, -2.27325184, -2.3922264 , -2.35603561,\n", + " -10.60994385, -10.80888382])" + ] + }, + "metadata": {}, + "execution_count": 66 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Evaluating the agent\n" + ], + "metadata": { + "id": "JSYUfQQ-Fa0E" + } + }, + { + "cell_type": "code", + "source": [ + "\"\"\"Evaluate agent's performance after Q-learning\"\"\"\n", + "\n", + "total_epochs, total_penalties = 0, 0\n", + "episodes = 100\n", + "\n", + "for _ in range(episodes):\n", + " state = env.reset()\n", + " epochs, penalties, reward = 0, 0, 0\n", + " \n", + " done = False\n", + " \n", + " while not done:\n", + " action = np.argmax(q_table[state])\n", + " state, reward, done, info = env.step(action)\n", + "\n", + " if reward == -10:\n", + " penalties += 1\n", + "\n", + " epochs += 1\n", + "\n", + " total_penalties += penalties\n", + " total_epochs += epochs\n", + "\n", + "print(f\"Results after {episodes} episodes:\")\n", + "print(f\"Average timesteps per episode: {total_epochs / episodes}\")\n", + "print(f\"Average penalties per episode: {total_penalties / episodes}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hyvGhkEtE4Qc", + "outputId": "24681d5a-81f4-4e75-cd63-5c1fb03bf35e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Results after 100 episodes:\n", + "Average timesteps per episode: 13.14\n", + "Average penalties per episode: 0.0\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Realtime testing" + ], + "metadata": { + "id": "UTW0luFEFv_C" + } + }, + { + "cell_type": "code", + "source": [ + "env.env.s = 328\n", + "env.render()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HFLV9boOGJok", + "outputId": "38d50c38-019a-41c9-b7f7-6631236e2a5b" + }, + "execution_count": 67, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| |\u001b[43m \u001b[0m: | : |\n", + "|\u001b[34;1mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Dropoff)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "env.reset()\n", + "env.s = 328 # set environment to illustration's state\n", + "env.render()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7Uvgjw24rqid", + "outputId": "85529957-ec46-4462-d64b-13ff642b9d9a" + }, + "execution_count": 77, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+---------+\n", + "|R: | : :\u001b[34;1mG\u001b[0m|\n", + "| : | : : |\n", + "|\u001b[43m \u001b[0m: : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "\n", + "epochs = 0\n", + "penalties, reward = 0, 0\n", + "\n", + "frames = [] # for animation\n", + "\n", + "done = False\n", + "\n", + "while not done:\n", + " action = np.argmax(q_table[state])\n", + " state, reward, done, info = env.step(action)\n", + "\n", + " if reward == -10:\n", + " penalties += 1\n", + " \n", + " # Put each rendered frame into dict for animation\n", + " frames.append({\n", + " 'frame': env.render(mode='ansi'),\n", + " 'state': state,\n", + " 'action': action,\n", + " 'reward': reward\n", + " }\n", + " )\n", + "\n", + " epochs += 1\n", + " \n", + "print(\"Timesteps taken: {}\".format(epochs))\n", + "print(\"Penalties incurred: {}\".format(penalties))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4ln37j7oFTlx", + "outputId": "d1b821bf-af7f-4ea9-87e6-89245be11b22" + }, + "execution_count": 78, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Timesteps taken: 18\n", + "Penalties incurred: 0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "env.render()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xmlwNJ82FTn5", + "outputId": "cefa9289-0507-4d0f-c922-f7afa39181f2" + }, + "execution_count": 79, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35m\u001b[34;1m\u001b[43mY\u001b[0m\u001b[0m\u001b[0m| : |B: |\n", + "+---------+\n", + " (Dropoff)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "WCGeM6lSFTqU" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "4roMfUOvFTxb" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "https://www.learndatasci.com/tutorials/reinforcement-q-learning-scratch-python-openai-gym/\n", + "\n", + "https://developer.nvidia.com/blog/deep-learning-nutshell-reinforcement-learning/\n", + "\n", + "https://medium.com/@MoneyAndData/ai-anyone-can-understand-part-1-reinforcement-learning-6c3b3d623a2d\n", + "\n", + "https://arshren.medium.com/deep-q-learning-a-deep-reinforcement-learning-algorithm-f1366cf1b53d\n", + "\n", + "https://www.coursera.org/specializations/reinforcement-learning" + ], + "metadata": { + "id": "nFefBluGFT6Z" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "my6tYnl6FV04" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file