Add files via upload
parent
348639903f
commit
c7918ac389
|
@ -0,0 +1,236 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 89,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"from random import choice"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 90,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class Restaurant:\n",
|
||||
" def __init__(self, mu, dev):\n",
|
||||
" self.mu = mu\n",
|
||||
" self.dev = dev\n",
|
||||
" def sample(self):\n",
|
||||
" return np.random.normal(self.mu, self.dev)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 91,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def explore_only(candidates, num_days):\n",
|
||||
" scores = []\n",
|
||||
" for _ in range(num_days):\n",
|
||||
" scores.append(choice(candidates).sample())\n",
|
||||
" return sum(scores)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 92,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def exploit_only(candidates, num_days):\n",
|
||||
" scores = [c.sample() for c in candidates]\n",
|
||||
" chosen = candidates[np.argmax(scores)]\n",
|
||||
" for _ in range(num_days - len(candidates)):\n",
|
||||
" scores.append(chosen.sample())\n",
|
||||
" return sum(scores)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 93,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def epsilon_greedy(candidates, num_days, epsilon=0.05):\n",
|
||||
" scores = []\n",
|
||||
" history = {idx: [c.sample()] for idx,c in enumerate(candidates)}\n",
|
||||
" for _ in range(num_days - len(candidates)):\n",
|
||||
" p = np.random.random()\n",
|
||||
" #explore\n",
|
||||
" if p < epsilon:\n",
|
||||
" chosen = choice(candidates)\n",
|
||||
" #exploit\n",
|
||||
" else:\n",
|
||||
" chosen = candidates[sorted(history.items(), key=lambda pair: np.mean(pair[1]))[-1][0]]\n",
|
||||
" score = chosen.sample()\n",
|
||||
" scores.append(score)\n",
|
||||
" history[candidates.index(chosen)].append(score)\n",
|
||||
" return sum(scores)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 94,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def ucb1(candidates, num_days):\n",
|
||||
" scores = []\n",
|
||||
" history = {idx: [c.sample()] for idx,c in enumerate(candidates)}\n",
|
||||
" for t in range(len(candidates), num_days):\n",
|
||||
" mu_plus_ucb = [np.mean(history[idx]) + np.sqrt(2*np.log(t) / len(history[idx])) for idx in range(len(candidates))]\n",
|
||||
" chosen = candidates[np.argmax(mu_plus_ucb)]\n",
|
||||
" \n",
|
||||
" score = chosen.sample()\n",
|
||||
" scores.append(score)\n",
|
||||
" history[candidates.index(chosen)].append(score)\n",
|
||||
" return sum(scores)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 171,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dev_factor = 0.5\n",
|
||||
"num_restaurants = 3\n",
|
||||
"\n",
|
||||
"mu_vals = [3*i for i in range(1,num_restaurants+1)]\n",
|
||||
"dev_vals = [mu*dev_factor for mu in mu_vals]\n",
|
||||
"mu_dev_pairs = zip(mu_vals, dev_vals)\n",
|
||||
"\n",
|
||||
"candidates = [Restaurant(mu,dev) for mu,dev in mu_dev_pairs]\n",
|
||||
"\n",
|
||||
"num_days = 300\n",
|
||||
"\n",
|
||||
"optimal_average = max(mu_vals)*num_days"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 172,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Explore Only Mean Regret: 0.33400345242040025\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"explore_only_vals = []\n",
|
||||
"for _ in range(1000):\n",
|
||||
" val = explore_only(candidates, num_days)\n",
|
||||
" explore_only_vals.append(val)\n",
|
||||
"print('Explore Only Mean Regret: %s'%((optimal_average - np.mean(explore_only_vals)) / optimal_average))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 173,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Exploit Only Mean Regret: 0.10974979914722435\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"exploit_only_vals = []\n",
|
||||
"for _ in range(1000):\n",
|
||||
" val = exploit_only(candidates, num_days)\n",
|
||||
" exploit_only_vals.append(val)\n",
|
||||
"print('Exploit Only Mean Regret: %s'%((optimal_average - np.mean(exploit_only_vals)) / optimal_average))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 174,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Epsilon Greedy Mean Regret (10%): 0.061901290618584424\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"epsilon_greedy_vals = []\n",
|
||||
"for _ in range(1000):\n",
|
||||
" val = epsilon_greedy(candidates, num_days, 0.1)\n",
|
||||
" epsilon_greedy_vals.append(val)\n",
|
||||
"print('Epsilon Greedy Mean Regret (10%%): %s'%((optimal_average - np.mean(epsilon_greedy_vals)) / optimal_average))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 175,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"UCB1 Mean Regret: 0.05807450789812113\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ucb1_vals = []\n",
|
||||
"for _ in range(1000):\n",
|
||||
" val = ucb1(candidates, num_days)\n",
|
||||
" ucb1_vals.append(val)\n",
|
||||
"print('UCB1 Mean Regret: %s'%((optimal_average - np.mean(ucb1_vals)) / optimal_average))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
Loading…
Reference in New Issue