Add files via upload

master
ritvikmath 2020-09-24 16:51:44 -07:00 committed by GitHub
parent 348639903f
commit c7918ac389
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 236 additions and 0 deletions

View File

@ -0,0 +1,236 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from random import choice"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [],
"source": [
"class Restaurant:\n",
" def __init__(self, mu, dev):\n",
" self.mu = mu\n",
" self.dev = dev\n",
" def sample(self):\n",
" return np.random.normal(self.mu, self.dev)"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [],
"source": [
"def explore_only(candidates, num_days):\n",
" scores = []\n",
" for _ in range(num_days):\n",
" scores.append(choice(candidates).sample())\n",
" return sum(scores)"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [],
"source": [
"def exploit_only(candidates, num_days):\n",
" scores = [c.sample() for c in candidates]\n",
" chosen = candidates[np.argmax(scores)]\n",
" for _ in range(num_days - len(candidates)):\n",
" scores.append(chosen.sample())\n",
" return sum(scores)"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [],
"source": [
"def epsilon_greedy(candidates, num_days, epsilon=0.05):\n",
" scores = []\n",
" history = {idx: [c.sample()] for idx,c in enumerate(candidates)}\n",
" for _ in range(num_days - len(candidates)):\n",
" p = np.random.random()\n",
" #explore\n",
" if p < epsilon:\n",
" chosen = choice(candidates)\n",
" #exploit\n",
" else:\n",
" chosen = candidates[sorted(history.items(), key=lambda pair: np.mean(pair[1]))[-1][0]]\n",
" score = chosen.sample()\n",
" scores.append(score)\n",
" history[candidates.index(chosen)].append(score)\n",
" return sum(scores)"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [],
"source": [
"def ucb1(candidates, num_days):\n",
" scores = []\n",
" history = {idx: [c.sample()] for idx,c in enumerate(candidates)}\n",
" for t in range(len(candidates), num_days):\n",
" mu_plus_ucb = [np.mean(history[idx]) + np.sqrt(2*np.log(t) / len(history[idx])) for idx in range(len(candidates))]\n",
" chosen = candidates[np.argmax(mu_plus_ucb)]\n",
" \n",
" score = chosen.sample()\n",
" scores.append(score)\n",
" history[candidates.index(chosen)].append(score)\n",
" return sum(scores)"
]
},
{
"cell_type": "code",
"execution_count": 171,
"metadata": {},
"outputs": [],
"source": [
"dev_factor = 0.5\n",
"num_restaurants = 3\n",
"\n",
"mu_vals = [3*i for i in range(1,num_restaurants+1)]\n",
"dev_vals = [mu*dev_factor for mu in mu_vals]\n",
"mu_dev_pairs = zip(mu_vals, dev_vals)\n",
"\n",
"candidates = [Restaurant(mu,dev) for mu,dev in mu_dev_pairs]\n",
"\n",
"num_days = 300\n",
"\n",
"optimal_average = max(mu_vals)*num_days"
]
},
{
"cell_type": "code",
"execution_count": 172,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Explore Only Mean Regret: 0.33400345242040025\n"
]
}
],
"source": [
"explore_only_vals = []\n",
"for _ in range(1000):\n",
" val = explore_only(candidates, num_days)\n",
" explore_only_vals.append(val)\n",
"print('Explore Only Mean Regret: %s'%((optimal_average - np.mean(explore_only_vals)) / optimal_average))"
]
},
{
"cell_type": "code",
"execution_count": 173,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Exploit Only Mean Regret: 0.10974979914722435\n"
]
}
],
"source": [
"exploit_only_vals = []\n",
"for _ in range(1000):\n",
" val = exploit_only(candidates, num_days)\n",
" exploit_only_vals.append(val)\n",
"print('Exploit Only Mean Regret: %s'%((optimal_average - np.mean(exploit_only_vals)) / optimal_average))"
]
},
{
"cell_type": "code",
"execution_count": 174,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epsilon Greedy Mean Regret (10%): 0.061901290618584424\n"
]
}
],
"source": [
"epsilon_greedy_vals = []\n",
"for _ in range(1000):\n",
" val = epsilon_greedy(candidates, num_days, 0.1)\n",
" epsilon_greedy_vals.append(val)\n",
"print('Epsilon Greedy Mean Regret (10%%): %s'%((optimal_average - np.mean(epsilon_greedy_vals)) / optimal_average))"
]
},
{
"cell_type": "code",
"execution_count": 175,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"UCB1 Mean Regret: 0.05807450789812113\n"
]
}
],
"source": [
"ucb1_vals = []\n",
"for _ in range(1000):\n",
" val = ucb1(candidates, num_days)\n",
" ucb1_vals.append(val)\n",
"print('UCB1 Mean Regret: %s'%((optimal_average - np.mean(ucb1_vals)) / optimal_average))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}