From c7918ac3897fee6dba180cae34c99726038f0cb4 Mon Sep 17 00:00:00 2001 From: ritvikmath Date: Thu, 24 Sep 2020 16:51:44 -0700 Subject: [PATCH] Add files via upload --- Multi Arm Bandit.ipynb | 236 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 Multi Arm Bandit.ipynb diff --git a/Multi Arm Bandit.ipynb b/Multi Arm Bandit.ipynb new file mode 100644 index 0000000..67024bc --- /dev/null +++ b/Multi Arm Bandit.ipynb @@ -0,0 +1,236 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from random import choice" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [], + "source": [ + "class Restaurant:\n", + " def __init__(self, mu, dev):\n", + " self.mu = mu\n", + " self.dev = dev\n", + " def sample(self):\n", + " return np.random.normal(self.mu, self.dev)" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [], + "source": [ + "def explore_only(candidates, num_days):\n", + " scores = []\n", + " for _ in range(num_days):\n", + " scores.append(choice(candidates).sample())\n", + " return sum(scores)" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [], + "source": [ + "def exploit_only(candidates, num_days):\n", + " scores = [c.sample() for c in candidates]\n", + " chosen = candidates[np.argmax(scores)]\n", + " for _ in range(num_days - len(candidates)):\n", + " scores.append(chosen.sample())\n", + " return sum(scores)" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [], + "source": [ + "def epsilon_greedy(candidates, num_days, epsilon=0.05):\n", + " scores = []\n", + " history = {idx: [c.sample()] for idx,c in enumerate(candidates)}\n", + " for _ in range(num_days - len(candidates)):\n", + " p = np.random.random()\n", + " #explore\n", + " if p < epsilon:\n", + " chosen = choice(candidates)\n", + " #exploit\n", + " else:\n", + " chosen = candidates[sorted(history.items(), key=lambda pair: np.mean(pair[1]))[-1][0]]\n", + " score = chosen.sample()\n", + " scores.append(score)\n", + " history[candidates.index(chosen)].append(score)\n", + " return sum(scores)" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [], + "source": [ + "def ucb1(candidates, num_days):\n", + " scores = []\n", + " history = {idx: [c.sample()] for idx,c in enumerate(candidates)}\n", + " for t in range(len(candidates), num_days):\n", + " mu_plus_ucb = [np.mean(history[idx]) + np.sqrt(2*np.log(t) / len(history[idx])) for idx in range(len(candidates))]\n", + " chosen = candidates[np.argmax(mu_plus_ucb)]\n", + " \n", + " score = chosen.sample()\n", + " scores.append(score)\n", + " history[candidates.index(chosen)].append(score)\n", + " return sum(scores)" + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [], + "source": [ + "dev_factor = 0.5\n", + "num_restaurants = 3\n", + "\n", + "mu_vals = [3*i for i in range(1,num_restaurants+1)]\n", + "dev_vals = [mu*dev_factor for mu in mu_vals]\n", + "mu_dev_pairs = zip(mu_vals, dev_vals)\n", + "\n", + "candidates = [Restaurant(mu,dev) for mu,dev in mu_dev_pairs]\n", + "\n", + "num_days = 300\n", + "\n", + "optimal_average = max(mu_vals)*num_days" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Explore Only Mean Regret: 0.33400345242040025\n" + ] + } + ], + "source": [ + "explore_only_vals = []\n", + "for _ in range(1000):\n", + " val = explore_only(candidates, num_days)\n", + " explore_only_vals.append(val)\n", + "print('Explore Only Mean Regret: %s'%((optimal_average - np.mean(explore_only_vals)) / optimal_average))" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Exploit Only Mean Regret: 0.10974979914722435\n" + ] + } + ], + "source": [ + "exploit_only_vals = []\n", + "for _ in range(1000):\n", + " val = exploit_only(candidates, num_days)\n", + " exploit_only_vals.append(val)\n", + "print('Exploit Only Mean Regret: %s'%((optimal_average - np.mean(exploit_only_vals)) / optimal_average))" + ] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epsilon Greedy Mean Regret (10%): 0.061901290618584424\n" + ] + } + ], + "source": [ + "epsilon_greedy_vals = []\n", + "for _ in range(1000):\n", + " val = epsilon_greedy(candidates, num_days, 0.1)\n", + " epsilon_greedy_vals.append(val)\n", + "print('Epsilon Greedy Mean Regret (10%%): %s'%((optimal_average - np.mean(epsilon_greedy_vals)) / optimal_average))" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "UCB1 Mean Regret: 0.05807450789812113\n" + ] + } + ], + "source": [ + "ucb1_vals = []\n", + "for _ in range(1000):\n", + " val = ucb1(candidates, num_days)\n", + " ucb1_vals.append(val)\n", + "print('UCB1 Mean Regret: %s'%((optimal_average - np.mean(ucb1_vals)) / optimal_average))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}