From df2f1cdc44d773085caf3b639879d82f567021ef Mon Sep 17 00:00:00 2001 From: tommyod Date: Sun, 7 Jan 2018 18:52:48 +0100 Subject: [PATCH] Added missing brackets --- README.md | 2 +- data/Metadata_analysis.ipynb | 433 +++++++++++++++++++++++++ data/notebook.tex | 592 +++++++++++++++++++++++++++++++++++ data/output_9_1.png | Bin 0 -> 4161 bytes 4 files changed, 1026 insertions(+), 1 deletion(-) create mode 100644 data/Metadata_analysis.ipynb create mode 100644 data/notebook.tex create mode 100644 data/output_9_1.png diff --git a/README.md b/README.md index c394f48..77614ce 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ quantified roughly as follows: | Title | Speaker | Uploader | Time | Views | Year | Level | | ----- | ------- | -------- | ---- | ----- | ---- | ----- | | :star: [Pandas From The Ground Up](https://www.youtube.com/watch?v=5JnMutdy6Fw) [[repo](https://github.com/brandon-rhodes/pycon-pandas-tutorial)] | Brandon Rhodes | PyCon 2015 | 2:24 | 91000 | 2015 | :smiley: | -| [Introduction Into Pandas](https://www.youtube.com/watch?v=-NR-ynQg0YM) [repo](https://github.com/chendaniely/2016-pydata-carolinas-pandas) | Daniel Chen | Python Tutorial | 1:28 | 46000 | 2017 | :smiley: | +| [Introduction Into Pandas](https://www.youtube.com/watch?v=-NR-ynQg0YM) [[repo](https://github.com/chendaniely/2016-pydata-carolinas-pandas)] | Daniel Chen | Python Tutorial | 1:28 | 46000 | 2017 | :smiley: | | [Introduction To Data Analytics With Pandas](https://www.youtube.com/watch?v=5XGycFIe8qE) [[repo](https://github.com/QCaudron/pydata_pandas)] | Quentin Caudron | Python Tutorial | 1:51 | 25000 | 2017 | :smiley: | | [Pandas for Data Analysis](https://www.youtube.com/watch?v=oGzU688xCUs) [[repo](https://github.com/chendaniely/scipy-2017-tutorial-pandas)] | Daniel Chen | Enthought | 3:45 | 13000 | 2017 | :sweat_smile: | | [Optimizing Pandas Code](https://www.youtube.com/watch?v=HN5d490_KKk) [[repo](https://github.com/sversh/pycon2017-optimizing-pandas)] | Sofia Heisler | PyCon 2017 | 0:29 | 12000 | 2017 | :sweat_smile: | diff --git a/data/Metadata_analysis.ipynb b/data/Metadata_analysis.ipynb new file mode 100644 index 0000000..a442c3e --- /dev/null +++ b/data/Metadata_analysis.ipynb @@ -0,0 +1,433 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Metadata analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import bs4\n", + "import requests\n", + "import string\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def int_str(a_string):\n", + " try:\n", + " return int(a_string)\n", + " except:\n", + " return 0\n", + "\n", + "def info_from_youtube(youtube_video_url):\n", + " \"\"\"\n", + " Retrieve information from a YouTube url.\"\"\"\n", + " try:\n", + " soup = bs4.BeautifulSoup(requests.get(youtube_video_url).text, 'lxml')\n", + " title = soup.title.text.strip()\n", + " views_text = soup.find('div', attrs = {'class':'watch-view-count'}).text\n", + " views = int_str(''.join([c for c in views_text if c in string.digits]))\n", + " published_text = soup.find('strong', attrs = {'class':'watch-time-text'}).text\n", + " published = int_str(''.join([c for c in published_text[-4:] if c in string.digits]))\n", + " publisher = soup.find('div', attrs = {'class':'yt-user-info'}).text.strip('\\n')\n", + " num_likes_text = soup.find('button', attrs = {'class':'like-button-renderer-like-button'}).text\n", + " num_likes = int_str(''.join([c for c in num_likes_text if c in string.digits]))\n", + " percentage_likes_text = soup.find('div', attrs = {'class':'video-extras-sparkbar-likes'})['style']\n", + " percentage_likes = float(''.join([c for c in percentage_likes_text if c in string.digits + '.']))\n", + "\n", + " return {'url':youtube_video_url.strip(), \n", + " 'title':title, \n", + " 'views':views, \n", + " 'published_yr':published, \n", + " 'publisher':publisher,\n", + " 'num_likes':num_likes, \n", + " 'percentage_likes':percentage_likes}\n", + " except:\n", + " return None" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "urls = [\n", + "'https://www.youtube.com/watch?v=OB1reY6IX-o',\n", + "'https://www.youtube.com/watch?v=80fZrVMurPM',\n", + "'https://www.youtube.com/watch?v=gtejJ3RCddE',\n", + "'https://www.youtube.com/watch?v=Ejh0ftSjk6g',\n", + "'https://www.youtube.com/watch?v=ZgHGCfwExw0',\n", + "'https://www.youtube.com/watch?v=6ohWS7J1hVA',\n", + "'https://www.youtube.com/watch?v=MKucn8NtVeI',\n", + "'https://www.youtube.com/watch?v=He9MCbs1wgE',\n", + "'https://www.youtube.com/watch?v=XbxIo7ScVzc',\n", + "'https://www.youtube.com/watch?v=EKUy0TSLg04',\n", + "'https://www.youtube.com/watch?v=2kT6QOVSgSg',\n", + "'https://www.youtube.com/watch?v=lKcwuPnSHIQ',\n", + "'https://www.youtube.com/watch?v=5Md_sSsN51k',\n", + "'https://www.youtube.com/watch?v=-lXfsWP7DJ8',\n", + "'https://www.youtube.com/watch?v=v5mrwq7yJc4',\n", + "'https://www.youtube.com/watch?v=p7Mj-4kASmI',\n", + "'https://www.youtube.com/watch?v=1AwG0T4gaO0',\n", + "'https://www.youtube.com/watch?v=8Jktm-Imt-I',\n", + "'https://www.youtube.com/watch?v=rARMKS8jE9g',\n", + "'https://www.youtube.com/watch?v=38R7jiCspkw',\n", + "'https://www.youtube.com/watch?v=HN5d490_KKk',\n", + "'https://www.youtube.com/watch?v=xn9sTXR3Cp8',\n", + "'https://www.youtube.com/watch?v=RA_2qdipVng',\n", + "'https://www.youtube.com/watch?v=zmfe2RaX-14',\n", + "'https://www.youtube.com/watch?v=TMmSESkhRtI',\n", + "'https://www.youtube.com/watch?v=W5WE9Db2RLU',\n", + "'https://www.youtube.com/watch?v=FytuB8nFHPQ',\n", + "'https://www.youtube.com/watch?v=u682UpVrMVM',\n", + "'https://www.youtube.com/watch?v=E92jDCmJNek',\n", + "'https://www.youtube.com/watch?v=ThS4juptJjQ',\n", + "'https://www.youtube.com/watch?v=gSVvxOchT8Y',\n", + "'https://www.youtube.com/watch?v=tJ-O3hk1vRw',\n", + "'https://www.youtube.com/watch?v=HC0J_SPm9co',\n", + "'https://www.youtube.com/watch?v=ZIEyHdvF474',\n", + "'https://www.youtube.com/watch?v=9fOWryQq9J8',\n", + "'https://www.youtube.com/watch?v=E9XTOnEgqRY',\n", + "'https://www.youtube.com/watch?v=GMKZD1Ohlzk',\n", + "'https://www.youtube.com/watch?v=dye7rDktJ2E',\n", + "'https://www.youtube.com/watch?v=39vJRxIPSxw',\n", + "'https://www.youtube.com/watch?v=rIofV14c0tc',\n", + "'https://www.youtube.com/watch?v=cKPlPJyQrt4',\n", + "'https://www.youtube.com/watch?v=bvHgESVuS6Q',\n", + "'https://www.youtube.com/watch?v=k7hSD_-gWMw',\n", + "'https://www.youtube.com/watch?v=YkVscKsV_qk',\n", + "'https://www.youtube.com/watch?v=aXR2d9k9-h4',\n", + "'https://www.youtube.com/watch?v=XJOt4QQgx0A',\n", + "'https://www.youtube.com/watch?v=HTLu2DFOdTg',\n", + "'https://www.youtube.com/watch?v=Ta1bAMOMFOI',\n", + "'https://www.youtube.com/watch?v=jKBwGlYb13w',\n", + "'https://www.youtube.com/watch?v=u2KZJzoz-qI',\n", + "'https://www.youtube.com/watch?v=OSGv2VnC0go',\n", + "'https://www.youtube.com/watch?v=9zinZmE3Ogk',\n", + "'https://www.youtube.com/watch?v=p33CVV29OG8',\n", + "'https://www.youtube.com/watch?v=9zinZmE3Ogk',\n", + "'https://www.youtube.com/watch?v=p33CVV29OG8',\n", + "'https://www.youtube.com/watch?v=voXVTjwnn-U',\n", + "'https://www.youtube.com/watch?v=9zinZmE3Ogk',\n", + "'https://www.youtube.com/watch?v=_Ek3A2b-nHU',\n", + "'https://www.youtube.com/watch?v=zhpWhkW8kcc',\n", + "'https://www.youtube.com/watch?v=Dgnp28Ijm_M',\n", + "'https://www.youtube.com/watch?v=7i6kBz1kZ-A',\n", + "'https://www.youtube.com/watch?v=g-YCaX3ml2Q',\n", + "'https://www.youtube.com/watch?v=rfdzOZkDqYk',\n", + "'https://www.youtube.com/watch?v=5-qadlG7tWo',\n", + "'https://www.youtube.com/watch?v=j6VSAsKAj98',\n", + "'https://www.youtube.com/watch?v=js_0wjzuMfc',\n", + "'https://www.youtube.com/watch?v=Z_OAlIhXziw',\n", + "'https://www.youtube.com/watch?v=Bm96RqNGbGo',\n", + "'https://www.youtube.com/watch?v=x1ndXuw7S0s',\n", + "'https://www.youtube.com/watch?v=2AXuhgid7E4',\n", + "'https://www.youtube.com/watch?v=5JnMutdy6Fw',\n", + "'https://www.youtube.com/watch?v=9d5-Ti6onew',\n", + "'https://www.youtube.com/watch?v=CowlcrtSyME',\n", + "'https://www.youtube.com/watch?v=YGk09nK_xnM',\n", + "'https://www.youtube.com/watch?v=5XGycFIe8qE',\n", + "'https://www.youtube.com/watch?v=-NR-ynQg0YM',\n", + "'https://www.youtube.com/watch?v=oGzU688xCUs',\n", + "'https://www.youtube.com/watch?v=R2rCYf3pv-M',\n", + "'https://www.youtube.com/watch?v=vl9La7wH7QI',\n", + "'https://www.youtube.com/watch?v=Vs7tdobwj1k',\n", + "'https://www.youtube.com/watch?v=C6ni_WoMXhU',\n", + "'https://www.youtube.com/watch?v=sunNXIxIGV8',\n", + "'https://www.youtube.com/watch?v=I9NeF9oAmbg',\n", + "'https://www.youtube.com/watch?v=0eYOhEF_aK0',\n", + "'https://www.youtube.com/watch?v=B4Hu_7_XBDM',\n", + "'https://www.youtube.com/watch?v=YrnHdgZ8n1U',\n", + "'https://www.youtube.com/watch?v=Cz_u2If7KbI',\n", + "'https://www.youtube.com/watch?v=mkQzl2v7BuI',\n", + "'https://www.youtube.com/watch?v=gS7kVg-4ZaU',\n", + "]\n", + "urls = list(set(urls))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Error with: https://www.youtube.com/watch?v=5-qadlG7tWo\n" + ] + } + ], + "source": [ + "# Parse the URLS\n", + "data_inn = []\n", + "for url in urls:\n", + " info = info_from_youtube(url)\n", + " if info:\n", + " data_inn.append(info)\n", + " else:\n", + " print('Error with:', url)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
num_likespercentage_likespublished_yrpublishertitleurlviews
0116898.9830512013Next Day VideoPython's Class Development Toolkit - YouTubehttps://www.youtube.com/watch?v=HTLu2DFOdTg79848
112098.3606562016PyDataSebastian Raschka | Learning scikit learn - An...https://www.youtube.com/watch?v=9fOWryQq9J811883
2125.0000002016SF PythonDillon Niederhut, \"What to do when your data i...https://www.youtube.com/watch?v=g-YCaX3ml2Q485
\n", + "
" + ], + "text/plain": [ + " num_likes percentage_likes published_yr publisher \\\n", + "0 1168 98.983051 2013 Next Day Video \n", + "1 120 98.360656 2016 PyData \n", + "2 1 25.000000 2016 SF Python \n", + "\n", + " title \\\n", + "0 Python's Class Development Toolkit - YouTube \n", + "1 Sebastian Raschka | Learning scikit learn - An... \n", + "2 Dillon Niederhut, \"What to do when your data i... \n", + "\n", + " url views \n", + "0 https://www.youtube.com/watch?v=HTLu2DFOdTg 79848 \n", + "1 https://www.youtube.com/watch?v=9fOWryQq9J8 11883 \n", + "2 https://www.youtube.com/watch?v=g-YCaX3ml2Q 485 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(data_inn)\n", + "df.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def rating_func(df):\n", + " \"\"\"\n", + " A rating heuristic.\n", + " \"\"\"\n", + " num_likes, percentage_likes = df.num_likes, df.percentage_likes\n", + " views, published_yr = df.views/10, df.published_yr\n", + " a, b = num_likes*percentage_likes, np.log(views)\n", + " age = abs(2018 - df.published_yr)\n", + " return (a*b / (a + b)) - age**0.8\n", + "\n", + "df = df.assign(rating = rating_func)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "85" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "| [EuroSciPy 2017: Advanced Pandas - YouTube](https://www.youtube.com/watch?v=gS7kVg-4ZaU) | NAME | EuroSciPy | DURATION | 0 | None | 2017 | Novice |\n", + "| [EuroSciPy 2017: Cython - YouTube](https://www.youtube.com/watch?v=vl9La7wH7QI) | NAME | EuroSciPy | DURATION | 0 | None | 2017 | Novice |\n", + "| [EuroSciPy 2017: Git - YouTube](https://www.youtube.com/watch?v=mkQzl2v7BuI) | NAME | EuroSciPy | DURATION | 0 | None | 2017 | Novice |\n", + "| [EuroSciPy 2017: Matplotlib - YouTube](https://www.youtube.com/watch?v=YrnHdgZ8n1U) | NAME | EuroSciPy | DURATION | 0 | None | 2017 | Novice |\n", + "| [EuroSciPy 2017: NumPy (1/2) - YouTube](https://www.youtube.com/watch?v=R2rCYf3pv-M) | NAME | EuroSciPy | DURATION | 0 | None | 2017 | Novice |\n", + "| [EuroSciPy 2017: NumPy (2/2) - YouTube](https://www.youtube.com/watch?v=sunNXIxIGV8) | NAME | EuroSciPy | DURATION | 0 | None | 2017 | Novice |\n", + "| [EuroSciPy 2017: Pandas - YouTube](https://www.youtube.com/watch?v=Cz_u2If7KbI) | NAME | EuroSciPy | DURATION | 0 | None | 2017 | Novice |\n", + "| [EuroSciPy 2017: Python (1/2) - YouTube](https://www.youtube.com/watch?v=I9NeF9oAmbg) | NAME | EuroSciPy | DURATION | 0 | None | 2017 | Novice |\n", + "| [EuroSciPy 2017: Python (2/2) - YouTube](https://www.youtube.com/watch?v=B4Hu_7_XBDM) | NAME | EuroSciPy | DURATION | 0 | None | 2017 | Novice |\n", + "| [EuroSciPy 2017: SciPy - YouTube](https://www.youtube.com/watch?v=C6ni_WoMXhU) | NAME | EuroSciPy | DURATION | 0 | None | 2017 | Novice |\n", + "| [EuroSciPy 2017: Scikit-learn (1/2) - YouTube](https://www.youtube.com/watch?v=Vs7tdobwj1k) | NAME | EuroSciPy | DURATION | 0 | None | 2017 | Novice |\n", + "| [EuroSciPy 2017: Scikit-learn (2/2) - YouTube](https://www.youtube.com/watch?v=0eYOhEF_aK0) | NAME | EuroSciPy | DURATION | 0 | None | 2017 | Novice |\n" + ] + } + ], + "source": [ + "temp = df.sort_values(['rating'], ascending = False).drop_duplicates()\n", + "temp = temp[temp.publisher == 'EuroSciPy'].sort_values('title')\n", + "\n", + "for i in range(len(temp)):\n", + " values_dict = temp.iloc[i, :].to_dict()\n", + " string = '| [{Title}]({url}) | {Speaker} | {Uploader} | {Duration} | {Views} | {Keywords} | {Year} | {Level} |'\n", + " print(string.format(\n", + " Title = values_dict['title'].replace('|',' '), \n", + " Speaker = 'NAME', \n", + " Uploader = values_dict['publisher'],\n", + " Duration = 'DURATION', \n", + " Views = int(round(values_dict['views']/1000)*1000), \n", + " Keywords = 'None', \n", + " Year = values_dict['published_yr'],\n", + " url = values_dict['url'],\n", + " Level = 'Novice'))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAAD55JREFUeJzt3X9sXfV9xvHnGaEiiSFhAl2xBM2dVEVC8UbJFaNDYnYzthRQW02rCmoZbEz+oxujXaYuaJqq/bENaU3VSps2RcBgKoq1BqpVQDuiNh5DAjo7pTUQunZdBgk0acUINYvEsn72h08k201y7/nhe3w/e78kK77nnvv1k+vrR+d+7/nhiBAAYPj9VNsBAADNoNABIAkKHQCSoNABIAkKHQCSoNABIAkKHQCSoNABIAkKHQCSWDPIH3bJJZfE6OjokmVvvfWW1q9fP8gYjSD34AxjZoncg5Y59+zs7A8j4tKeg0XEwL62bdsWyx04cOAnlg0Dcg/OMGaOIPegZc4taSb66FimXAAgCQodAJKg0AEgCQodAJKg0AEgiZ6Fbvt+28dtP79o2V/afsn2t2x/0fbGlY0JAOilny30ByTtWLZsv6StEfHzkv5N0t0N5wIAlNSz0CPiSUmvL1v2REScKm4+I2nzCmQDAJTQxBz6b0v6cgPjAABqcPRxkWjbo5IejYity5b/saSupF+Pswxke1LSpCR1Op1tU1NTS+6fn5/XyMhIleytIvfgDFvmuaMnJEmdtdKxk73XH9u0YYUTlTNsz/dpmXNPTEzMRkS311iVz+Vi+zZJN0nafrYyl6SI2CNpjyR1u90YHx9fcv/09LSWLxsG5B6cYct8+67HJEk7x05p91zvP7HDHxlf4UTlDNvzfRq5Kxa67R2S/kjSL0fEfzeSBABQSz+7Le6V9LSkLbaP2L5D0l9JulDSftvP2f7bFc4JAOih5xZ6RNxyhsX3rUAWAEANHCkKAElQ6ACQBIUOAElQ6ACQBIUOAElQ6ACQBIUOAElQ6ACQBIUOAElQ6ACQBIUOAElQ6ACQBIUOAElQ6ACQBIUOAElQ6ACQBIUOAElQ6ACQBIUOAElQ6ACQBIUOAElQ6ACQBIUOAElQ6ACQBIUOAElQ6ACQRM9Ct32/7eO2n1+07Kdt77f9neLfi1c2JgCgl3620B+QtGPZsl2SvhoR75L01eI2AKBFPQs9Ip6U9PqyxR+Q9GDx/YOSPthwLgBASY6I3ivZo5IejYitxe03ImLjovv/KyLOOO1ie1LSpCR1Op1tU1NTS+6fn5/XyMhI1fytIffgDFvmuaMnJEmdtdKxk73XH9u0odL4/So7/rA936dlzj0xMTEbEd1eY61pLNVZRMQeSXskqdvtxvj4+JL7p6entXzZMCD34Axb5tt3PSZJ2jl2Srvnev+JHf7IeKXx+1V2/GF7vk8jd/W9XI7ZvkySin+PN5IGAFBZ1UL/kqTbiu9vk/SPzcQBAFTVz26LeyU9LWmL7SO275B0j6TrbX9H0vXFbQBAi3pO8EXELWe5a3vDWQAANXCkKAAkQaEDQBIUOgAkQaEDQBIUOgAkQaEDQBIUOgAkQaEDQBIUOgAkQaEDQBIUOgAkQaEDQBIUOgAkQaEDQBIUOgAkQaEDQBIUOgAkQaEDQBIUOgAkQaEDQBIUOgAkQaEDQBIUOgAkQaEDQBIUOgAkUavQbX/C9gu2n7e91/YFTQUDAJRTudBtb5L0+5K6EbFV0nmSbm4qGACgnLpTLmskrbW9RtI6Sa/WjwQAqKJyoUfEUUmflvSypNcknYiIJ5oKBgAoxxFR7YH2xZIelvRhSW9I+oKkfRHx+WXrTUqalKROp7NtampqyTjz8/MaGRmplKFN5B6cYcs8d/SEJKmzVjp2svf6Y5s2VBq/X2XHH7bn+7TMuScmJmYjottrrDqF/iFJOyLijuL2b0q6JiI+drbHdLvdmJmZWbJsenpa4+PjlTK0idyDM2yZR3c9JknaOXZKu+fW9Fz/8D03Vhq/X2XHH7bn+7TMuW33Veh15tBflnSN7XW2LWm7pEM1xgMA1FBnDv1ZSfskHZQ0V4y1p6FcAICSer8fPIeI+JSkTzWUBQBQA0eKAkASFDoAJEGhA0ASFDoAJEGhA0ASFDoAJEGhA0ASFDoAJEGhA0ASFDoAJEGhA0ASFDoAJEGhA0ASFDoAJEGhA0ASFDoAJEGhA0ASFDoAJEGhA0ASFDoAJEGhA0ASFDoAJEGhA0ASFDoAJEGhA0ASFDoAJFGr0G1vtL3P9ku2D9l+T1PBAADlrKn5+M9J+kpE/Ibtd0ha10AmAEAFlQvd9kWSrpN0uyRFxNuS3m4mFgCgLEdEtQfaV0raI+lFSb8gaVbSXRHx1rL1JiVNSlKn09k2NTW1ZJz5+XmNjIxUytAmcg9Ok5nnjp5oZJx+dNZKx072Xm9s04ZS4670/6Hf3KeVzb9ShvG1LfWXe2JiYjYiur3GqlPoXUnPSLo2Ip61/TlJb0bEn5ztMd1uN2ZmZpYsm56e1vj4eKUMbSL34DSZeXTXY42M04+dY6e0e673m+DD99xYatyV/j/0m/u0svlXyjC+tqX+ctvuq9DrfCh6RNKRiHi2uL1P0lU1xgMA1FC50CPi+5Jesb2lWLRdC9MvAIAW1N3L5U5JDxV7uHxP0m/VjwQAqKJWoUfEc5J6zusAAFYeR4oCQBIUOgAkQaEDQBIUOgAkQaEDQBIUOgAkQaEDQBIUOgAkQaEDQBIUOgAkQaEDQBIUOgAkUfdsiwBqGuRFN5AbW+gAkASFDgBJUOgAkASFDgBJUOgAkASFDgBJUOgAkASFDgBJUOgAkASFDgBJUOgAkASFDgBJ1C502+fZ/obtR5sIBACopokt9LskHWpgHABADbUK3fZmSTdKureZOACAqupuoX9W0icl/biBLACAGhwR1R5o3yTphoj4mO1xSX8YETedYb1JSZOS1Ol0tk1NTS25f35+XiMjI5UytIncg9Nk5rmjJxoZpx+dtdKxkwP7cY0pm3ts04bSP2Mlfg+Lc1fJ1JZ+Xt8TExOzEdHtNVadQv8LSbdKOiXpAkkXSXokIj56tsd0u92YmZlZsmx6elrj4+OVMrSJ3IPTZOZBXh1o59gp7Z4bvouClc19+J4bS/+Mlfg9LM5dJVNb+nl92+6r0CtPuUTE3RGxOSJGJd0s6WvnKnMAwMpiP3QASKKR94MRMS1puomxAADVsIUOAElQ6ACQBIUOAElQ6ACQBIUOAElQ6ACQBIUOAElQ6ACQBIUOAElQ6ACQBIUOAElQ6ACQxPCdrBlDpex5r8uex3qQ5zcHVju20AEgCQodAJKg0AEgCQodAJKg0AEgCQodAJKg0AEgCQodAJKg0AEgCQodAJKg0AEgCQodAJKg0AEgicqFbvty2wdsH7L9gu27mgwGACinzulzT0naGREHbV8oadb2/oh4saFsAIASKm+hR8RrEXGw+P5Hkg5J2tRUMABAOY6I+oPYo5KelLQ1It5cdt+kpElJ6nQ626amppY8dn5+XiMjI7UzDBq5+zN39ESp9cc2bfiJZefKXHb8QeqslY6dbDtFeWVzn+l31stK/N4W566SqS39/E1OTEzMRkS311i1C932iKR/lvRnEfHIudbtdrsxMzOzZNn09LTGx8drZWgDufvTxBWLzpV5NV+xaOfYKe2eG76LgpXNXfYqU9LK/N4W566SqS39/E3a7qvQa+3lYvt8SQ9LeqhXmQMAVladvVws6T5JhyLiM81FAgBUUWcL/VpJt0p6r+3niq8bGsoFACip8gRfRDwlyQ1mAQDUwJGiAJAEhQ4ASVDoAJAEhQ4ASVDoAJAEhQ4ASVDoAJAEhQ4ASVDoAJAEhQ4ASVDoAJAEhQ4ASQzN2fcHcSGD1XZS/CYuDtHk+MCZZHgdZekXttABIAkKHQCSoNABIAkKHQCSoNABIAkKHQCSoNABIAkKHQCSoNABIAkKHQCSoNABIAkKHQCSqFXotnfY/rbt79re1VQoAEB5lQvd9nmS/lrS+yRdIekW21c0FQwAUE6dLfSrJX03Ir4XEW9LmpL0gWZiAQDKqlPomyS9suj2kWIZAKAFjohqD7Q/JOnXIuJ3itu3Sro6Iu5ctt6kpMni5hZJ31421CWSflgpRLvIPTjDmFki96Blzv2zEXFpr4HqXLHoiKTLF93eLOnV5StFxB5Je842iO2ZiOjWyNEKcg/OMGaWyD1o5K435fKvkt5l+5223yHpZklfaiIUAKC8ylvoEXHK9u9J+idJ50m6PyJeaCwZAKCUWheJjojHJT1eM8NZp2NWOXIPzjBmlsg9aP/vc1f+UBQAsLpw6D8AJNFaodu+3/Zx28+3laEs25fbPmD7kO0XbN/VdqZ+2L7A9tdtf7PI/adtZyrD9nm2v2H70baz9Mv2Ydtztp+zPdN2nn7Z3mh7n+2Xitf5e9rO1IvtLcXzfPrrTdsfbztXL7Y/Ufw9Pm97r+0Lao/Z1pSL7eskzUv6+4jY2kqIkmxfJumyiDho+0JJs5I+GBEvthztnGxb0vqImLd9vqSnJN0VEc+0HK0vtv9AUlfSRRFxU9t5+mH7sKRuRAzVftG2H5T0LxFxb7H32rqIeKPtXP0qTklyVNIvRsR/tp3nbGxv0sLf4RURcdL2P0h6PCIeqDNua1voEfGkpNfb+vlVRMRrEXGw+P5Hkg5pCI6OjQXzxc3zi6+h+PDE9mZJN0q6t+0s2dm+SNJ1ku6TpIh4e5jKvLBd0r+v5jJfZI2ktbbXSFqnMxzHUxZz6BXZHpX0bknPtpukP8W0xXOSjkvaHxFDkVvSZyV9UtKP2w5SUkh6wvZscbT0MPg5ST+Q9HfFFNe9tte3HaqkmyXtbTtELxFxVNKnJb0s6TVJJyLiibrjUugV2B6R9LCkj0fEm23n6UdE/G9EXKmFI3qvtr3qp7ls3yTpeETMtp2lgmsj4iotnI30d4spxtVujaSrJP1NRLxb0luShua02MUU0fslfaHtLL3YvlgLJzN8p6SfkbTe9kfrjkuhl1TMQT8s6aGIeKTtPGUVb6GnJe1oOUo/rpX0/mI+ekrSe21/vt1I/YmIV4t/j0v6ohbOTrraHZF0ZNG7t31aKPhh8T5JByPiWNtB+vArkv4jIn4QEf8j6RFJv1R3UAq9hOLDxfskHYqIz7Sdp1+2L7W9sfh+rRZeTC+1m6q3iLg7IjZHxKgW3kp/LSJqb8WsNNvriw/NVUxZ/KqkVb83V0R8X9IrtrcUi7ZLWtUf+C9zi4ZguqXwsqRrbK8remW7Fj6Tq6XN3Rb3Snpa0hbbR2zf0VaWEq6VdKsWthRP7yJ1Q9uh+nCZpAO2v6WFc/Dsj4ih2QVwCHUkPWX7m5K+LumxiPhKy5n6daekh4rXypWS/rzlPH2xvU7S9VrY0l31indB+yQdlDSnhS6ufcQoR4oCQBJMuQBAEhQ6ACRBoQNAEhQ6ACRBoQNAEhQ6ACRBoQNAEhQ6ACTxf08cTBmCJ8NTAAAAAElFTkSuQmCC\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df.sort_values(['rating'], ascending = False).rating.hist(bins = 25)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "[1](http://tomaugspurger.github.io/modern-1-intro.html)\n", + "[2](http://tomaugspurger.github.io/method-chaining.html)\n", + "[3](http://tomaugspurger.github.io/modern-3-indexes.html)\n", + "[4](http://tomaugspurger.github.io/modern-4-performance.html)\n", + "[5](http://tomaugspurger.github.io/modern-5-tidy.html)\n", + "[6](http://tomaugspurger.github.io/modern-6-visualization.html)\n", + "[7](http://tomaugspurger.github.io/modern-7-timeseries.html)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/data/notebook.tex b/data/notebook.tex new file mode 100644 index 0000000..e964a27 --- /dev/null +++ b/data/notebook.tex @@ -0,0 +1,592 @@ + +% Default to the notebook output style + + + + +% Inherit from the specified cell style. + + + + + +\documentclass[11pt]{article} + + + + \usepackage[T1]{fontenc} + % Nicer default font (+ math font) than Computer Modern for most use cases + \usepackage{mathpazo} + + % Basic figure setup, for now with no caption control since it's done + % automatically by Pandoc (which extracts ![](path) syntax from Markdown). + \usepackage{graphicx} + % We will generate all images so they have a width \maxwidth. This means + % that they will get their normal width if they fit onto the page, but + % are scaled down if they would overflow the margins. + \makeatletter + \def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth + \else\Gin@nat@width\fi} + \makeatother + \let\Oldincludegraphics\includegraphics + % Set max figure width to be 80% of text width, for now hardcoded. + \renewcommand{\includegraphics}[1]{\Oldincludegraphics[width=.8\maxwidth]{#1}} + % Ensure that by default, figures have no caption (until we provide a + % proper Figure object with a Caption API and a way to capture that + % in the conversion process - todo). + \usepackage{caption} + \DeclareCaptionLabelFormat{nolabel}{} + \captionsetup{labelformat=nolabel} + + \usepackage{adjustbox} % Used to constrain images to a maximum size + \usepackage{xcolor} % Allow colors to be defined + \usepackage{enumerate} % Needed for markdown enumerations to work + \usepackage{geometry} % Used to adjust the document margins + \usepackage{amsmath} % Equations + \usepackage{amssymb} % Equations + \usepackage{textcomp} % defines textquotesingle + % Hack from http://tex.stackexchange.com/a/47451/13684: + \AtBeginDocument{% + \def\PYZsq{\textquotesingle}% Upright quotes in Pygmentized code + } + \usepackage{upquote} % Upright quotes for verbatim code + \usepackage{eurosym} % defines \euro + \usepackage[mathletters]{ucs} % Extended unicode (utf-8) support + \usepackage[utf8x]{inputenc} % Allow utf-8 characters in the tex document + \usepackage{fancyvrb} % verbatim replacement that allows latex + \usepackage{grffile} % extends the file name processing of package graphics + % to support a larger range + % The hyperref package gives us a pdf with properly built + % internal navigation ('pdf bookmarks' for the table of contents, + % internal cross-reference links, web links for URLs, etc.) + \usepackage{hyperref} + \usepackage{longtable} % longtable support required by pandoc >1.10 + \usepackage{booktabs} % table support for pandoc > 1.12.2 + \usepackage[inline]{enumitem} % IRkernel/repr support (it uses the enumerate* environment) + \usepackage[normalem]{ulem} % ulem is needed to support strikethroughs (\sout) + % normalem makes italics be italics, not underlines + + + + + % Colors for the hyperref package + \definecolor{urlcolor}{rgb}{0,.145,.698} + \definecolor{linkcolor}{rgb}{.71,0.21,0.01} + \definecolor{citecolor}{rgb}{.12,.54,.11} + + % ANSI colors + \definecolor{ansi-black}{HTML}{3E424D} + \definecolor{ansi-black-intense}{HTML}{282C36} + \definecolor{ansi-red}{HTML}{E75C58} + \definecolor{ansi-red-intense}{HTML}{B22B31} + \definecolor{ansi-green}{HTML}{00A250} + \definecolor{ansi-green-intense}{HTML}{007427} + \definecolor{ansi-yellow}{HTML}{DDB62B} + \definecolor{ansi-yellow-intense}{HTML}{B27D12} + \definecolor{ansi-blue}{HTML}{208FFB} + \definecolor{ansi-blue-intense}{HTML}{0065CA} + \definecolor{ansi-magenta}{HTML}{D160C4} + \definecolor{ansi-magenta-intense}{HTML}{A03196} + \definecolor{ansi-cyan}{HTML}{60C6C8} + \definecolor{ansi-cyan-intense}{HTML}{258F8F} + \definecolor{ansi-white}{HTML}{C5C1B4} + \definecolor{ansi-white-intense}{HTML}{A1A6B2} + + % commands and environments needed by pandoc snippets + % extracted from the output of `pandoc -s` + \providecommand{\tightlist}{% + \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} + \DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}} + % Add ',fontsize=\small' for more characters per line + \newenvironment{Shaded}{}{} + \newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}} + \newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.56,0.13,0.00}{{#1}}} + \newcommand{\DecValTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}} + \newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}} + \newcommand{\FloatTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}} + \newcommand{\CharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} + \newcommand{\StringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} + \newcommand{\CommentTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textit{{#1}}}} + \newcommand{\OtherTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{{#1}}} + \newcommand{\AlertTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}} + \newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.02,0.16,0.49}{{#1}}} + \newcommand{\RegionMarkerTok}[1]{{#1}} + \newcommand{\ErrorTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}} + \newcommand{\NormalTok}[1]{{#1}} + + % Additional commands for more recent versions of Pandoc + \newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.53,0.00,0.00}{{#1}}} + \newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} + \newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} + \newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.73,0.40,0.53}{{#1}}} + \newcommand{\ImportTok}[1]{{#1}} + \newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.73,0.13,0.13}{\textit{{#1}}}} + \newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} + \newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} + \newcommand{\VariableTok}[1]{\textcolor[rgb]{0.10,0.09,0.49}{{#1}}} + \newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}} + \newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.40,0.40,0.40}{{#1}}} + \newcommand{\BuiltInTok}[1]{{#1}} + \newcommand{\ExtensionTok}[1]{{#1}} + \newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.74,0.48,0.00}{{#1}}} + \newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.49,0.56,0.16}{{#1}}} + \newcommand{\InformationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} + \newcommand{\WarningTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} + + + % Define a nice break command that doesn't care if a line doesn't already + % exist. + \def\br{\hspace*{\fill} \\* } + % Math Jax compatability definitions + \def\gt{>} + \def\lt{<} + % Document parameters + \title{Metadata\_analysis} + + + + + % Pygments definitions + +\makeatletter +\def\PY@reset{\let\PY@it=\relax \let\PY@bf=\relax% + \let\PY@ul=\relax \let\PY@tc=\relax% + \let\PY@bc=\relax \let\PY@ff=\relax} +\def\PY@tok#1{\csname PY@tok@#1\endcsname} +\def\PY@toks#1+{\ifx\relax#1\empty\else% + \PY@tok{#1}\expandafter\PY@toks\fi} +\def\PY@do#1{\PY@bc{\PY@tc{\PY@ul{% + \PY@it{\PY@bf{\PY@ff{#1}}}}}}} +\def\PY#1#2{\PY@reset\PY@toks#1+\relax+\PY@do{#2}} + +\expandafter\def\csname PY@tok@w\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.73,0.73}{##1}}} +\expandafter\def\csname PY@tok@c\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}} +\expandafter\def\csname PY@tok@cp\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.74,0.48,0.00}{##1}}} +\expandafter\def\csname PY@tok@k\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@kp\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@kt\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.69,0.00,0.25}{##1}}} +\expandafter\def\csname PY@tok@o\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@ow\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}} +\expandafter\def\csname PY@tok@nb\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@nf\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}} +\expandafter\def\csname PY@tok@nc\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}} +\expandafter\def\csname PY@tok@nn\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}} +\expandafter\def\csname PY@tok@ne\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.82,0.25,0.23}{##1}}} +\expandafter\def\csname PY@tok@nv\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} +\expandafter\def\csname PY@tok@no\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.53,0.00,0.00}{##1}}} +\expandafter\def\csname PY@tok@nl\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.63,0.63,0.00}{##1}}} +\expandafter\def\csname PY@tok@ni\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.60,0.60,0.60}{##1}}} +\expandafter\def\csname PY@tok@na\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.49,0.56,0.16}{##1}}} +\expandafter\def\csname PY@tok@nt\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@nd\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}} +\expandafter\def\csname PY@tok@s\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@sd\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@si\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.53}{##1}}} +\expandafter\def\csname PY@tok@se\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.13}{##1}}} +\expandafter\def\csname PY@tok@sr\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.53}{##1}}} +\expandafter\def\csname PY@tok@ss\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} +\expandafter\def\csname PY@tok@sx\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@m\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@gh\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}} +\expandafter\def\csname PY@tok@gu\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.50,0.00,0.50}{##1}}} +\expandafter\def\csname PY@tok@gd\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.63,0.00,0.00}{##1}}} +\expandafter\def\csname PY@tok@gi\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.63,0.00}{##1}}} +\expandafter\def\csname PY@tok@gr\endcsname{\def\PY@tc##1{\textcolor[rgb]{1.00,0.00,0.00}{##1}}} +\expandafter\def\csname PY@tok@ge\endcsname{\let\PY@it=\textit} +\expandafter\def\csname PY@tok@gs\endcsname{\let\PY@bf=\textbf} +\expandafter\def\csname PY@tok@gp\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}} +\expandafter\def\csname PY@tok@go\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.53,0.53,0.53}{##1}}} +\expandafter\def\csname PY@tok@gt\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.27,0.87}{##1}}} +\expandafter\def\csname PY@tok@err\endcsname{\def\PY@bc##1{\setlength{\fboxsep}{0pt}\fcolorbox[rgb]{1.00,0.00,0.00}{1,1,1}{\strut ##1}}} +\expandafter\def\csname PY@tok@kc\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@kd\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@kn\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@kr\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@bp\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@fm\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}} +\expandafter\def\csname PY@tok@vc\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} +\expandafter\def\csname PY@tok@vg\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} +\expandafter\def\csname PY@tok@vi\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} +\expandafter\def\csname PY@tok@vm\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} +\expandafter\def\csname PY@tok@sa\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@sb\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@sc\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@dl\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@s2\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@sh\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@s1\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@mb\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@mf\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@mh\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@mi\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@il\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@mo\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@ch\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}} +\expandafter\def\csname PY@tok@cm\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}} +\expandafter\def\csname PY@tok@cpf\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}} +\expandafter\def\csname PY@tok@c1\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}} +\expandafter\def\csname PY@tok@cs\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}} + +\def\PYZbs{\char`\\} +\def\PYZus{\char`\_} +\def\PYZob{\char`\{} +\def\PYZcb{\char`\}} +\def\PYZca{\char`\^} +\def\PYZam{\char`\&} +\def\PYZlt{\char`\<} +\def\PYZgt{\char`\>} +\def\PYZsh{\char`\#} +\def\PYZpc{\char`\%} +\def\PYZdl{\char`\$} +\def\PYZhy{\char`\-} +\def\PYZsq{\char`\'} +\def\PYZdq{\char`\"} +\def\PYZti{\char`\~} +% for compatibility with earlier versions +\def\PYZat{@} +\def\PYZlb{[} +\def\PYZrb{]} +\makeatother + + + % Exact colors from NB + \definecolor{incolor}{rgb}{0.0, 0.0, 0.5} + \definecolor{outcolor}{rgb}{0.545, 0.0, 0.0} + + + + + % Prevent overflowing lines due to hard-to-break entities + \sloppy + % Setup hyperref package + \hypersetup{ + breaklinks=true, % so long urls are correctly broken across lines + colorlinks=true, + urlcolor=urlcolor, + linkcolor=linkcolor, + citecolor=citecolor, + } + % Slightly bigger margins than the latex defaults + + \geometry{verbose,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in} + + + + \begin{document} + + + \maketitle + + + + + \section{Metadata analysis}\label{metadata-analysis} + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}1}]:} \PY{k+kn}{import} \PY{n+nn}{pandas} \PY{k}{as} \PY{n+nn}{pd} + \PY{k+kn}{import} \PY{n+nn}{numpy} \PY{k}{as} \PY{n+nn}{np} + \PY{k+kn}{import} \PY{n+nn}{bs4} + \PY{k+kn}{import} \PY{n+nn}{requests} + \PY{k+kn}{import} \PY{n+nn}{string} + \PY{o}{\PYZpc{}}\PY{k}{matplotlib} inline +\end{Verbatim} + + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}2}]:} \PY{k}{def} \PY{n+nf}{int\PYZus{}str}\PY{p}{(}\PY{n}{a\PYZus{}string}\PY{p}{)}\PY{p}{:} + \PY{k}{try}\PY{p}{:} + \PY{k}{return} \PY{n+nb}{int}\PY{p}{(}\PY{n}{a\PYZus{}string}\PY{p}{)} + \PY{k}{except}\PY{p}{:} + \PY{k}{return} \PY{l+m+mi}{0} + + \PY{k}{def} \PY{n+nf}{info\PYZus{}from\PYZus{}youtube}\PY{p}{(}\PY{n}{youtube\PYZus{}video\PYZus{}url}\PY{p}{)}\PY{p}{:} + \PY{l+s+sd}{\PYZdq{}\PYZdq{}\PYZdq{}} + \PY{l+s+sd}{ Retrieve information from a YouTube url.\PYZdq{}\PYZdq{}\PYZdq{}} + \PY{k}{try}\PY{p}{:} + \PY{n}{soup} \PY{o}{=} \PY{n}{bs4}\PY{o}{.}\PY{n}{BeautifulSoup}\PY{p}{(}\PY{n}{requests}\PY{o}{.}\PY{n}{get}\PY{p}{(}\PY{n}{youtube\PYZus{}video\PYZus{}url}\PY{p}{)}\PY{o}{.}\PY{n}{text}\PY{p}{,} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{lxml}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)} + \PY{n}{title} \PY{o}{=} \PY{n}{soup}\PY{o}{.}\PY{n}{title}\PY{o}{.}\PY{n}{text}\PY{o}{.}\PY{n}{strip}\PY{p}{(}\PY{p}{)} + \PY{n}{views\PYZus{}text} \PY{o}{=} \PY{n}{soup}\PY{o}{.}\PY{n}{find}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{div}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{attrs} \PY{o}{=} \PY{p}{\PYZob{}}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{class}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{watch\PYZhy{}view\PYZhy{}count}\PY{l+s+s1}{\PYZsq{}}\PY{p}{\PYZcb{}}\PY{p}{)}\PY{o}{.}\PY{n}{text} + \PY{n}{views} \PY{o}{=} \PY{n}{int\PYZus{}str}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{\PYZsq{}}\PY{o}{.}\PY{n}{join}\PY{p}{(}\PY{p}{[}\PY{n}{c} \PY{k}{for} \PY{n}{c} \PY{o+ow}{in} \PY{n}{views\PYZus{}text} \PY{k}{if} \PY{n}{c} \PY{o+ow}{in} \PY{n}{string}\PY{o}{.}\PY{n}{digits}\PY{p}{]}\PY{p}{)}\PY{p}{)} + \PY{n}{published\PYZus{}text} \PY{o}{=} \PY{n}{soup}\PY{o}{.}\PY{n}{find}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{strong}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{attrs} \PY{o}{=} \PY{p}{\PYZob{}}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{class}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{watch\PYZhy{}time\PYZhy{}text}\PY{l+s+s1}{\PYZsq{}}\PY{p}{\PYZcb{}}\PY{p}{)}\PY{o}{.}\PY{n}{text} + \PY{n}{published} \PY{o}{=} \PY{n}{int\PYZus{}str}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{\PYZsq{}}\PY{o}{.}\PY{n}{join}\PY{p}{(}\PY{p}{[}\PY{n}{c} \PY{k}{for} \PY{n}{c} \PY{o+ow}{in} \PY{n}{published\PYZus{}text}\PY{p}{[}\PY{o}{\PYZhy{}}\PY{l+m+mi}{4}\PY{p}{:}\PY{p}{]} \PY{k}{if} \PY{n}{c} \PY{o+ow}{in} \PY{n}{string}\PY{o}{.}\PY{n}{digits}\PY{p}{]}\PY{p}{)}\PY{p}{)} + \PY{n}{publisher} \PY{o}{=} \PY{n}{soup}\PY{o}{.}\PY{n}{find}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{div}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{attrs} \PY{o}{=} \PY{p}{\PYZob{}}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{class}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{yt\PYZhy{}user\PYZhy{}info}\PY{l+s+s1}{\PYZsq{}}\PY{p}{\PYZcb{}}\PY{p}{)}\PY{o}{.}\PY{n}{text}\PY{o}{.}\PY{n}{strip}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+se}{\PYZbs{}n}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)} + \PY{n}{num\PYZus{}likes\PYZus{}text} \PY{o}{=} \PY{n}{soup}\PY{o}{.}\PY{n}{find}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{button}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{attrs} \PY{o}{=} \PY{p}{\PYZob{}}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{class}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{like\PYZhy{}button\PYZhy{}renderer\PYZhy{}like\PYZhy{}button}\PY{l+s+s1}{\PYZsq{}}\PY{p}{\PYZcb{}}\PY{p}{)}\PY{o}{.}\PY{n}{text} + \PY{n}{num\PYZus{}likes} \PY{o}{=} \PY{n}{int\PYZus{}str}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{\PYZsq{}}\PY{o}{.}\PY{n}{join}\PY{p}{(}\PY{p}{[}\PY{n}{c} \PY{k}{for} \PY{n}{c} \PY{o+ow}{in} \PY{n}{num\PYZus{}likes\PYZus{}text} \PY{k}{if} \PY{n}{c} \PY{o+ow}{in} \PY{n}{string}\PY{o}{.}\PY{n}{digits}\PY{p}{]}\PY{p}{)}\PY{p}{)} + \PY{n}{percentage\PYZus{}likes\PYZus{}text} \PY{o}{=} \PY{n}{soup}\PY{o}{.}\PY{n}{find}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{div}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{attrs} \PY{o}{=} \PY{p}{\PYZob{}}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{class}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{video\PYZhy{}extras\PYZhy{}sparkbar\PYZhy{}likes}\PY{l+s+s1}{\PYZsq{}}\PY{p}{\PYZcb{}}\PY{p}{)}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{style}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]} + \PY{n}{percentage\PYZus{}likes} \PY{o}{=} \PY{n+nb}{float}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{\PYZsq{}}\PY{o}{.}\PY{n}{join}\PY{p}{(}\PY{p}{[}\PY{n}{c} \PY{k}{for} \PY{n}{c} \PY{o+ow}{in} \PY{n}{percentage\PYZus{}likes\PYZus{}text} \PY{k}{if} \PY{n}{c} \PY{o+ow}{in} \PY{n}{string}\PY{o}{.}\PY{n}{digits} \PY{o}{+} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{.}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{)}\PY{p}{)} + + \PY{k}{return} \PY{p}{\PYZob{}}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{url}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:}\PY{n}{youtube\PYZus{}video\PYZus{}url}\PY{o}{.}\PY{n}{strip}\PY{p}{(}\PY{p}{)}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{title}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:}\PY{n}{title}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{views}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:}\PY{n}{views}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{published\PYZus{}yr}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:}\PY{n}{published}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{publisher}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:}\PY{n}{publisher}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{num\PYZus{}likes}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:}\PY{n}{num\PYZus{}likes}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{percentage\PYZus{}likes}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:}\PY{n}{percentage\PYZus{}likes}\PY{p}{\PYZcb{}} + \PY{k}{except}\PY{p}{:} + \PY{k}{return} \PY{k+kc}{None} +\end{Verbatim} + + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}3}]:} \PY{n}{urls} \PY{o}{=} \PY{p}{[} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=OB1reY6IX\PYZhy{}o}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=80fZrVMurPM}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=gtejJ3RCddE}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=Ejh0ftSjk6g}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=ZgHGCfwExw0}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=6ohWS7J1hVA}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=MKucn8NtVeI}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=He9MCbs1wgE}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=XbxIo7ScVzc}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=EKUy0TSLg04}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=2kT6QOVSgSg}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=lKcwuPnSHIQ}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=5Md\PYZus{}sSsN51k}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=\PYZhy{}lXfsWP7DJ8}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=v5mrwq7yJc4}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=p7Mj\PYZhy{}4kASmI}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=1AwG0T4gaO0}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=8Jktm\PYZhy{}Imt\PYZhy{}I}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=rARMKS8jE9g}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=38R7jiCspkw}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=HN5d490\PYZus{}KKk}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=xn9sTXR3Cp8}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=RA\PYZus{}2qdipVng}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=zmfe2RaX\PYZhy{}14}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=TMmSESkhRtI}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=W5WE9Db2RLU}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=FytuB8nFHPQ}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=u682UpVrMVM}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=E92jDCmJNek}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=ThS4juptJjQ}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=gSVvxOchT8Y}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=tJ\PYZhy{}O3hk1vRw}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=HC0J\PYZus{}SPm9co}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=ZIEyHdvF474}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=9fOWryQq9J8}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=E9XTOnEgqRY}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=GMKZD1Ohlzk}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=dye7rDktJ2E}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=39vJRxIPSxw}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=rIofV14c0tc}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=cKPlPJyQrt4}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=bvHgESVuS6Q}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=k7hSD\PYZus{}\PYZhy{}gWMw}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=YkVscKsV\PYZus{}qk}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=aXR2d9k9\PYZhy{}h4}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=XJOt4QQgx0A}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=HTLu2DFOdTg}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=Ta1bAMOMFOI}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=jKBwGlYb13w}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=u2KZJzoz\PYZhy{}qI}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=OSGv2VnC0go}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=9zinZmE3Ogk}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=p33CVV29OG8}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=9zinZmE3Ogk}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=p33CVV29OG8}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=voXVTjwnn\PYZhy{}U}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=9zinZmE3Ogk}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=\PYZus{}Ek3A2b\PYZhy{}nHU}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=zhpWhkW8kcc}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=Dgnp28Ijm\PYZus{}M}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=7i6kBz1kZ\PYZhy{}A}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=g\PYZhy{}YCaX3ml2Q}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=rfdzOZkDqYk}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=5\PYZhy{}qadlG7tWo}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=j6VSAsKAj98}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=js\PYZus{}0wjzuMfc}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=Z\PYZus{}OAlIhXziw}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=Bm96RqNGbGo}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=x1ndXuw7S0s}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=2AXuhgid7E4}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=5JnMutdy6Fw}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=9d5\PYZhy{}Ti6onew}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=CowlcrtSyME}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=YGk09nK\PYZus{}xnM}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=5XGycFIe8qE}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=\PYZhy{}NR\PYZhy{}ynQg0YM}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{https://www.youtube.com/watch?v=oGzU688xCUs}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]} + \PY{n}{urls} \PY{o}{=} \PY{n+nb}{list}\PY{p}{(}\PY{n+nb}{set}\PY{p}{(}\PY{n}{urls}\PY{p}{)}\PY{p}{)} +\end{Verbatim} + + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}4}]:} \PY{c+c1}{\PYZsh{} Parse the URLS} + \PY{n}{data\PYZus{}inn} \PY{o}{=} \PY{p}{[}\PY{p}{]} + \PY{k}{for} \PY{n}{url} \PY{o+ow}{in} \PY{n}{urls}\PY{p}{:} + \PY{n}{info} \PY{o}{=} \PY{n}{info\PYZus{}from\PYZus{}youtube}\PY{p}{(}\PY{n}{url}\PY{p}{)} + \PY{k}{if} \PY{n}{info}\PY{p}{:} + \PY{n}{data\PYZus{}inn}\PY{o}{.}\PY{n}{append}\PY{p}{(}\PY{n}{info}\PY{p}{)} + \PY{k}{else}\PY{p}{:} + \PY{n+nb}{print}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Error with:}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{url}\PY{p}{)} +\end{Verbatim} + + + \begin{Verbatim}[commandchars=\\\{\}] +Error with: https://www.youtube.com/watch?v=5-qadlG7tWo + + \end{Verbatim} + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}11}]:} \PY{n}{df} \PY{o}{=} \PY{n}{pd}\PY{o}{.}\PY{n}{DataFrame}\PY{p}{(}\PY{n}{data\PYZus{}inn}\PY{p}{)} + \PY{n}{df}\PY{o}{.}\PY{n}{head}\PY{p}{(}\PY{l+m+mi}{3}\PY{p}{)} +\end{Verbatim} + + +\begin{Verbatim}[commandchars=\\\{\}] +{\color{outcolor}Out[{\color{outcolor}11}]:} num\_likes percentage\_likes published\_yr publisher \textbackslash{} + 0 6527 98.953911 2013 Next Day Video + 1 142 94.666667 2016 PyData + 2 88 100.000000 2015 PyCon Australia + + title \textbackslash{} + 0 Transforming Code into Beautiful, Idiomatic Py{\ldots} + 1 Christopher Roach | Visualizing Geographic Dat{\ldots} + 2 Predicting sports winners using data analytics{\ldots} + + url views + 0 https://www.youtube.com/watch?v=OSGv2VnC0go 340228 + 1 https://www.youtube.com/watch?v=ZIEyHdvF474 13912 + 2 https://www.youtube.com/watch?v=k7hSD\_-gWMw 12867 +\end{Verbatim} + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}12}]:} \PY{k}{def} \PY{n+nf}{rating\PYZus{}func}\PY{p}{(}\PY{n}{df}\PY{p}{)}\PY{p}{:} + \PY{l+s+sd}{\PYZdq{}\PYZdq{}\PYZdq{}} + \PY{l+s+sd}{ A rating heuristic.} + \PY{l+s+sd}{ \PYZdq{}\PYZdq{}\PYZdq{}} + \PY{n}{num\PYZus{}likes}\PY{p}{,} \PY{n}{percentage\PYZus{}likes} \PY{o}{=} \PY{n}{df}\PY{o}{.}\PY{n}{num\PYZus{}likes}\PY{p}{,} \PY{n}{df}\PY{o}{.}\PY{n}{percentage\PYZus{}likes} + \PY{n}{views}\PY{p}{,} \PY{n}{published\PYZus{}yr} \PY{o}{=} \PY{n}{df}\PY{o}{.}\PY{n}{views}\PY{o}{/}\PY{l+m+mi}{10}\PY{p}{,} \PY{n}{df}\PY{o}{.}\PY{n}{published\PYZus{}yr} + \PY{n}{a}\PY{p}{,} \PY{n}{b} \PY{o}{=} \PY{n}{num\PYZus{}likes}\PY{o}{*}\PY{n}{percentage\PYZus{}likes}\PY{p}{,} \PY{n}{np}\PY{o}{.}\PY{n}{log}\PY{p}{(}\PY{n}{views}\PY{p}{)} + \PY{n}{age} \PY{o}{=} \PY{n+nb}{abs}\PY{p}{(}\PY{l+m+mi}{2018} \PY{o}{\PYZhy{}} \PY{n}{df}\PY{o}{.}\PY{n}{published\PYZus{}yr}\PY{p}{)} + \PY{k}{return} \PY{p}{(}\PY{n}{a}\PY{o}{*}\PY{n}{b} \PY{o}{/} \PY{p}{(}\PY{n}{a} \PY{o}{+} \PY{n}{b}\PY{p}{)}\PY{p}{)} \PY{o}{\PYZhy{}} \PY{n}{age}\PY{o}{*}\PY{o}{*}\PY{l+m+mf}{0.8} + + \PY{n}{df} \PY{o}{=} \PY{n}{df}\PY{o}{.}\PY{n}{assign}\PY{p}{(}\PY{n}{rating} \PY{o}{=} \PY{n}{rating\PYZus{}func}\PY{p}{)} +\end{Verbatim} + + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}13}]:} \PY{n+nb}{len}\PY{p}{(}\PY{n}{df}\PY{p}{)} +\end{Verbatim} + + +\begin{Verbatim}[commandchars=\\\{\}] +{\color{outcolor}Out[{\color{outcolor}13}]:} 73 +\end{Verbatim} + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}15}]:} \PY{n}{temp} \PY{o}{=} \PY{n}{df}\PY{o}{.}\PY{n}{sort\PYZus{}values}\PY{p}{(}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{rating}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \PY{n}{ascending} \PY{o}{=} \PY{k+kc}{False}\PY{p}{)}\PY{o}{.}\PY{n}{drop\PYZus{}duplicates}\PY{p}{(}\PY{p}{)} + + \PY{k}{for} \PY{n}{i} \PY{o+ow}{in} \PY{n+nb}{range}\PY{p}{(}\PY{n+nb}{len}\PY{p}{(}\PY{n}{temp}\PY{p}{)}\PY{p}{)}\PY{p}{:} + \PY{n}{values\PYZus{}dict} \PY{o}{=} \PY{n}{temp}\PY{o}{.}\PY{n}{iloc}\PY{p}{[}\PY{n}{i}\PY{p}{,} \PY{p}{:}\PY{p}{]}\PY{o}{.}\PY{n}{to\PYZus{}dict}\PY{p}{(}\PY{p}{)} + \PY{n}{string} \PY{o}{=} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{| [}\PY{l+s+si}{\PYZob{}Title\PYZcb{}}\PY{l+s+s1}{](}\PY{l+s+si}{\PYZob{}url\PYZcb{}}\PY{l+s+s1}{) | }\PY{l+s+si}{\PYZob{}Speaker\PYZcb{}}\PY{l+s+s1}{ | }\PY{l+s+si}{\PYZob{}Uploader\PYZcb{}}\PY{l+s+s1}{ | }\PY{l+s+si}{\PYZob{}Duration\PYZcb{}}\PY{l+s+s1}{ | }\PY{l+s+si}{\PYZob{}Views\PYZcb{}}\PY{l+s+s1}{ | }\PY{l+s+si}{\PYZob{}Keywords\PYZcb{}}\PY{l+s+s1}{ | }\PY{l+s+si}{\PYZob{}Year\PYZcb{}}\PY{l+s+s1}{ | }\PY{l+s+si}{\PYZob{}Level\PYZcb{}}\PY{l+s+s1}{ |}\PY{l+s+s1}{\PYZsq{}} + \PY{n+nb}{print}\PY{p}{(}\PY{n}{string}\PY{o}{.}\PY{n}{format}\PY{p}{(} + \PY{n}{Title} \PY{o}{=} \PY{n}{values\PYZus{}dict}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{title}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{o}{.}\PY{n}{replace}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{|}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{ }\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}\PY{p}{,} + \PY{n}{Speaker} \PY{o}{=} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{NAME}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{n}{Uploader} \PY{o}{=} \PY{n}{values\PYZus{}dict}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{publisher}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} + \PY{n}{Duration} \PY{o}{=} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{DURATION}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{n}{Views} \PY{o}{=} \PY{n+nb}{int}\PY{p}{(}\PY{n+nb}{round}\PY{p}{(}\PY{n}{values\PYZus{}dict}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{views}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{o}{/}\PY{l+m+mi}{1000}\PY{p}{)}\PY{o}{*}\PY{l+m+mi}{1000}\PY{p}{)}\PY{p}{,} + \PY{n}{Keywords} \PY{o}{=} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{None}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} + \PY{n}{Year} \PY{o}{=} \PY{n}{values\PYZus{}dict}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{published\PYZus{}yr}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} + \PY{n}{url} \PY{o}{=} \PY{n}{values\PYZus{}dict}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{url}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} + \PY{n}{Level} \PY{o}{=} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Novice}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}\PY{p}{)} +\end{Verbatim} + + + \begin{Verbatim}[commandchars=\\\{\}] +| [Tetiana Ivanova - How to become a Data Scientist in 6 months a hacker’s approach to career planning - YouTube](https://www.youtube.com/watch?v=rIofV14c0tc) | NAME | PyData | DURATION | 148000 | None | 2016 | Novice | +| [Introduction Into Pandas: Python Tutorial - YouTube](https://www.youtube.com/watch?v=-NR-ynQg0YM) | NAME | Python Tutorial | DURATION | 46000 | None | 2017 | Novice | +| [James Powell - So you want to be a Python expert? - YouTube](https://www.youtube.com/watch?v=cKPlPJyQrt4) | NAME | PyData | DURATION | 28000 | None | 2017 | Novice | +| [NumPy Beginner SciPy 2016 Tutorial Alexandre Chabot LeClerc - YouTube](https://www.youtube.com/watch?v=gtejJ3RCddE) | NAME | Enthought | DURATION | 56000 | None | 2016 | Novice | +| [Introduction To Data Analytics With Pandas - YouTube](https://www.youtube.com/watch?v=5XGycFIe8qE) | NAME | Python Tutorial | DURATION | 25000 | None | 2017 | Novice | +| [Transforming Code into Beautiful, Idiomatic Python - YouTube](https://www.youtube.com/watch?v=OSGv2VnC0go) | NAME | Next Day Video | DURATION | 340000 | None | 2013 | Novice | +| [Machine Learning Part 1 SciPy 2016 Tutorial Andreas Mueller \& Sebastian Raschka - YouTube](https://www.youtube.com/watch?v=OB1reY6IX-o) | NAME | Enthought | DURATION | 47000 | None | 2016 | Novice | +| [Brandon Rhodes - Pandas From The Ground Up - PyCon 2015 - YouTube](https://www.youtube.com/watch?v=5JnMutdy6Fw) | NAME | PyCon 2015 | DURATION | 91000 | None | 2015 | Novice | +| [Modern Dictionaries by Raymond Hettinger - YouTube](https://www.youtube.com/watch?v=p33CVV29OG8) | NAME | SF Python | DURATION | 44000 | None | 2016 | Novice | +| [Jake VanderPlas The Python Visualization Landscape PyCon 2017 - YouTube](https://www.youtube.com/watch?v=FytuB8nFHPQ) | NAME | PyCon 2017 | DURATION | 21000 | None | 2017 | Novice | +| [Raymond Hettinger, Keynote on Concurrency, PyBay 2017 - YouTube](https://www.youtube.com/watch?v=9zinZmE3Ogk) | NAME | SF Python | DURATION | 15000 | None | 2017 | Novice | +| [Pandas for Data Analysis SciPy 2017 Tutorial Daniel Chen - YouTube](https://www.youtube.com/watch?v=oGzU688xCUs) | NAME | Enthought | DURATION | 13000 | None | 2017 | Novice | +| [JupyterLab: Building Blocks for Interactive Computing SciPy 2016 Brian Granger - YouTube](https://www.youtube.com/watch?v=Ejh0ftSjk6g) | NAME | Enthought | DURATION | 28000 | None | 2016 | Novice | +| [Sofia Heisler No More Sad Pandas Optimizing Pandas Code for Speed and Efficiency PyCon 2017 - YouTube](https://www.youtube.com/watch?v=HN5d490\_KKk) | NAME | PyCon 2017 | DURATION | 12000 | None | 2017 | Novice | +| [A Visual Guide To Pandas - YouTube](https://www.youtube.com/watch?v=9d5-Ti6onew) | NAME | Next Day Video | DURATION | 49000 | None | 2015 | Novice | +| [Machine Learning with Scikit Learn SciPy 2015 Tutorial Andreas Mueller \& Kyle Kastner Part I - YouTube](https://www.youtube.com/watch?v=80fZrVMurPM) | NAME | Enthought | DURATION | 48000 | None | 2015 | Novice | +| [Machine Learning for Time Series Data in Python SciPy 2016 Brett Naul - YouTube](https://www.youtube.com/watch?v=ZgHGCfwExw0) | NAME | Enthought | DURATION | 24000 | None | 2016 | Novice | +| [The Fun of Reinvention (Screencast) - YouTube](https://www.youtube.com/watch?v=js\_0wjzuMfc) | NAME | David Beazley | DURATION | 11000 | None | 2017 | Novice | +| [Analyzing and Manipulating Data with Pandas Beginner SciPy 2016 Tutorial Jonathan Rocher - YouTube](https://www.youtube.com/watch?v=6ohWS7J1hVA) | NAME | Enthought | DURATION | 22000 | None | 2016 | Novice | +| [Super Advanced Python - YouTube](https://www.youtube.com/watch?v=u2KZJzoz-qI) | NAME | Next Day Video | DURATION | 143000 | None | 2013 | Novice | +| [Computational Statistics SciPy 2017 Tutorial Allen Downey - YouTube](https://www.youtube.com/watch?v=He9MCbs1wgE) | NAME | Enthought | DURATION | 10000 | None | 2017 | Novice | +| [Raymond Hettinger, "Being a Core Developer in Python", PyBay2016 - YouTube](https://www.youtube.com/watch?v=voXVTjwnn-U) | NAME | SF Python | DURATION | 19000 | None | 2016 | Novice | +| [Aileen Nielsen - Time Series Analysis - PyCon 2017 - YouTube](https://www.youtube.com/watch?v=zmfe2RaX-14) | NAME | PyCon 2017 | DURATION | 9000 | None | 2017 | Novice | +| [Learning TensorFlow - YouTube](https://www.youtube.com/watch?v=bvHgESVuS6Q) | NAME | PyCon Australia | DURATION | 18000 | None | 2016 | Novice | +| [JupyterHub: Deploying Jupyter Notebooks for students and researchers - YouTube](https://www.youtube.com/watch?v=gSVvxOchT8Y) | NAME | PyData | DURATION | 17000 | None | 2016 | Novice | +| [Jeffrey Yau Applied Time Series Econometrics in Python and R - YouTube](https://www.youtube.com/watch?v=tJ-O3hk1vRw) | NAME | PyData | DURATION | 17000 | None | 2016 | Novice | +| [Machine Learning with scikit learn Part One SciPy 2017 Tutorial Andreas Mueller \& Alexandre Gram - YouTube](https://www.youtube.com/watch?v=2kT6QOVSgSg) | NAME | Enthought | DURATION | 8000 | None | 2017 | Novice | +| [Introduction to Numerical Computing with NumPy SciPy 2017 Tutorial Dillon Niederhut - YouTube](https://www.youtube.com/watch?v=lKcwuPnSHIQ) | NAME | Enthought | DURATION | 8000 | None | 2017 | Novice | +| [Matthew Rocklin Dask A Pythonic Distributed Data Science Framework PyCon 2017 - YouTube](https://www.youtube.com/watch?v=RA\_2qdipVng) | NAME | PyCon 2017 | DURATION | 7000 | None | 2017 | Novice | +| [Christopher Fonnesbeck - Introduction to Statistical Modeling with Python - PyCon 2017 - YouTube](https://www.youtube.com/watch?v=TMmSESkhRtI) | NAME | PyCon 2017 | DURATION | 7000 | None | 2017 | Novice | +| [Fully Convolutional Networks for Image Segmentation SciPy 2017 Daniil Pakhomov - YouTube](https://www.youtube.com/watch?v=-lXfsWP7DJ8) | NAME | Enthought | DURATION | 7000 | None | 2017 | Novice | +| [Chloe Mawer, Jonathan Whitmore - Exploratory data analysis in python - PyCon 2017 - YouTube](https://www.youtube.com/watch?v=W5WE9Db2RLU) | NAME | PyCon 2017 | DURATION | 7000 | None | 2017 | Novice | +| [Christopher Roach Visualizing Geographic Data With Python - YouTube](https://www.youtube.com/watch?v=ZIEyHdvF474) | NAME | PyData | DURATION | 14000 | None | 2016 | Novice | +| [Builtin Superheroes (Screencast) - YouTube](https://www.youtube.com/watch?v=j6VSAsKAj98) | NAME | David Beazley | DURATION | 12000 | None | 2016 | Novice | +| [Python's Class Development Toolkit - YouTube](https://www.youtube.com/watch?v=HTLu2DFOdTg) | NAME | Next Day Video | DURATION | 80000 | None | 2013 | Novice | +| [Sebastian Raschka Learning scikit learn - An Introduction to Machine Learning in Python - YouTube](https://www.youtube.com/watch?v=9fOWryQq9J8) | NAME | PyData | DURATION | 12000 | None | 2016 | Novice | +| [Alex Rubinsteyn: Python Libraries for Deep Learning with Sequences - YouTube](https://www.youtube.com/watch?v=E92jDCmJNek) | NAME | PyData | DURATION | 23000 | None | 2015 | Novice | +| [The Other Async (Threads + Async = ❤️) - YouTube](https://www.youtube.com/watch?v=x1ndXuw7S0s) | NAME | David Beazley | DURATION | 5000 | None | 2017 | Novice | +| [Daniel Chen Introduction to Pandas - YouTube](https://www.youtube.com/watch?v=dye7rDktJ2E) | NAME | PyData | DURATION | 10000 | None | 2016 | Novice | +| [Numba - Tell Those C++ Bullies to Get Lost SciPy 2017 Tutorial Gil Forsyth \& Lorena Barba - YouTube](https://www.youtube.com/watch?v=1AwG0T4gaO0) | NAME | Enthought | DURATION | 5000 | None | 2017 | Novice | +| [Deploying Interactive Jupyter Dashboards for Visualizing Hundreds of Millions of Datapoints, in 30 L - YouTube](https://www.youtube.com/watch?v=8Jktm-Imt-I) | NAME | Enthought | DURATION | 5000 | None | 2017 | Novice | +| [Divya Sardana Building Recommender Systems Using Python - YouTube](https://www.youtube.com/watch?v=39vJRxIPSxw) | NAME | PyData | DURATION | 10000 | None | 2016 | Novice | +| [Joel Grus: Learning Data Science Using Functional Python - YouTube](https://www.youtube.com/watch?v=ThS4juptJjQ) | NAME | PyData | DURATION | 18000 | None | 2015 | Novice | +| [Stephen Simmons Pandas from the Inside - YouTube](https://www.youtube.com/watch?v=CowlcrtSyME) | NAME | PyData | DURATION | 9000 | None | 2016 | Novice | +| [Curious Course on Coroutines and Concurrency - YouTube](https://www.youtube.com/watch?v=Z\_OAlIhXziw) | NAME | David Beazley | DURATION | 9000 | None | 2016 | Novice | +| [Anatomy of matplotlib SciPy 2015 Tutorial Benjamin Root and Joe Kington - YouTube](https://www.youtube.com/watch?v=MKucn8NtVeI) | NAME | Enthought | DURATION | 18000 | None | 2015 | Novice | +| [Anatomy of Matplotlib SciPy 2017 Tutorial Ben Root - YouTube](https://www.youtube.com/watch?v=rARMKS8jE9g) | NAME | Enthought | DURATION | 4000 | None | 2017 | Novice | +| [Data Science is Software SciPy 2016 Tutorial Peter Bull \& Isaac Slavitt - YouTube](https://www.youtube.com/watch?v=EKUy0TSLg04) | NAME | Enthought | DURATION | 9000 | None | 2016 | Novice | +| [Jake VanderPlas: Machine Learning with Scikit Learn - YouTube](https://www.youtube.com/watch?v=HC0J\_SPm9co) | NAME | PyData | DURATION | 16000 | None | 2015 | Novice | +| [Using Jupyter notebooks to develop and share interactive data displays - YouTube](https://www.youtube.com/watch?v=aXR2d9k9-h4) | NAME | PyCon Australia | DURATION | 8000 | None | 2016 | Novice | +| [Parallel Python: Analyzing Large Datasets Intermediate SciPy 2016 Tutorial Matthew Rocklin \& Mi - YouTube](https://www.youtube.com/watch?v=5Md\_sSsN51k) | NAME | Enthought | DURATION | 7000 | None | 2016 | Novice | +| [Functional Programming with Python - YouTube](https://www.youtube.com/watch?v=Ta1bAMOMFOI) | NAME | Next Day Video | DURATION | 44000 | None | 2013 | Novice | +| [Predicting sports winners using data analytics with pandas and scikit-learn by Robert Layton - YouTube](https://www.youtube.com/watch?v=k7hSD\_-gWMw) | NAME | PyCon Australia | DURATION | 13000 | None | 2015 | Novice | +| [Keynote: Project Jupyter SciPy 2016 Brian Granger - YouTube](https://www.youtube.com/watch?v=v5mrwq7yJc4) | NAME | Enthought | DURATION | 7000 | None | 2016 | Novice | +| [matplotlib (Python Plotting Library) Beginner SciPy 2016 Tutorial Nicolas Rougier - YouTube](https://www.youtube.com/watch?v=p7Mj-4kASmI) | NAME | Enthought | DURATION | 6000 | None | 2016 | Novice | +| [Awesome Big Data Algorithms - YouTube](https://www.youtube.com/watch?v=jKBwGlYb13w) | NAME | Next Day Video | DURATION | 41000 | None | 2013 | Novice | +| [Stephen Simmons - Pandas from the Inside / "Big Pandas" - YouTube](https://www.youtube.com/watch?v=YGk09nK\_xnM) | NAME | PyData | DURATION | 3000 | None | 2017 | Novice | +| [Fear and Awaiting in Async (Screencast) - YouTube](https://www.youtube.com/watch?v=Bm96RqNGbGo) | NAME | David Beazley | DURATION | 5000 | None | 2016 | Novice | +| [Brian Granger: All About Jupyter - YouTube](https://www.youtube.com/watch?v=GMKZD1Ohlzk) | NAME | PyData | DURATION | 11000 | None | 2015 | Novice | +| [Anusua Trivedi: An example of Predictive Analytics: Building a Recommendation Engine using Python - YouTube](https://www.youtube.com/watch?v=E9XTOnEgqRY) | NAME | PyData | DURATION | 11000 | None | 2015 | Novice | +| [Sarah Guido The Wild West of Data Wrangling PyCon 2017 - YouTube](https://www.youtube.com/watch?v=xn9sTXR3Cp8) | NAME | PyCon 2017 | DURATION | 3000 | None | 2017 | Novice | +| [Adventures in scikit-learn's Random Forest by Gregory Saunders - YouTube](https://www.youtube.com/watch?v=YkVscKsV\_qk) | NAME | PyCon Australia | DURATION | 9000 | None | 2015 | Novice | +| [Doing Math with Python - YouTube](https://www.youtube.com/watch?v=XJOt4QQgx0A) | NAME | PyCon Australia | DURATION | 5000 | None | 2016 | Novice | +| [Iterations of Evolution: The Unauthorized Biography of the For-Loop - YouTube](https://www.youtube.com/watch?v=2AXuhgid7E4) | NAME | David Beazley | DURATION | 2000 | None | 2017 | Novice | +| [Alex Martelli, ""Good Enough" IS Good Enough!", PyBay2016 - YouTube](https://www.youtube.com/watch?v=\_Ek3A2b-nHU) | NAME | SF Python | DURATION | 4000 | None | 2016 | Novice | +| [Renee Chu - Python for Social Scientists: Cleaning and Prepping Data - PyCon 2016 - YouTube](https://www.youtube.com/watch?v=u682UpVrMVM) | NAME | PyCon 2016 | DURATION | 3000 | None | 2016 | Novice | +| [PyMC: Markov Chain Monte Carlo in Python SciPy 2014 Chris Fonnesbeck - YouTube](https://www.youtube.com/watch?v=XbxIo7ScVzc) | NAME | Enthought | DURATION | 9000 | None | 2014 | Novice | +| [Alex Martelli, ""The Tower of Abstraction", PyBay2016 - YouTube](https://www.youtube.com/watch?v=zhpWhkW8kcc) | NAME | SF Python | DURATION | 3000 | None | 2016 | Novice | +| [Jupyter Advanced Topics Tutorial SciPy 2015 Tutorial Jonathan Frederic, Matthias Bussonier \& Tho - YouTube](https://www.youtube.com/watch?v=38R7jiCspkw) | NAME | Enthought | DURATION | 4000 | None | 2015 | Novice | +| [Rachel Thomas, "Using randomness to make code much faster", PyBay2017 - YouTube](https://www.youtube.com/watch?v=7i6kBz1kZ-A) | NAME | SF Python | DURATION | 1000 | None | 2017 | Novice | +| [Mahmoud Hashemi, Python Profiling \& Performance: elementary to enterprise, PyBay2016 - YouTube](https://www.youtube.com/watch?v=Dgnp28Ijm\_M) | NAME | SF Python | DURATION | 1000 | None | 2016 | Novice | +| [Dillon Niederhut, "What to do when your data is large, but not big", PyBay2016 - YouTube](https://www.youtube.com/watch?v=g-YCaX3ml2Q) | NAME | SF Python | DURATION | 0 | None | 2016 | Novice | +| [Cynthia Lin, "Opening Up to Open Source", PyBay2017 - YouTube](https://www.youtube.com/watch?v=rfdzOZkDqYk) | NAME | SF Python | DURATION | 0 | None | 2017 | Novice | + + \end{Verbatim} + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}9}]:} \PY{n}{df}\PY{o}{.}\PY{n}{sort\PYZus{}values}\PY{p}{(}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{rating}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \PY{n}{ascending} \PY{o}{=} \PY{k+kc}{False}\PY{p}{)}\PY{o}{.}\PY{n}{rating}\PY{o}{.}\PY{n}{hist}\PY{p}{(}\PY{n}{bins} \PY{o}{=} \PY{l+m+mi}{25}\PY{p}{)} +\end{Verbatim} + + +\begin{Verbatim}[commandchars=\\\{\}] +{\color{outcolor}Out[{\color{outcolor}9}]:} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_9_1.png} + \end{center} + { \hspace*{\fill} \\} + + + % Add a bibliography block to the postdoc + + + + \end{document} diff --git a/data/output_9_1.png b/data/output_9_1.png new file mode 100644 index 0000000000000000000000000000000000000000..c9a867726053b8f0cf8f5ce3da7d67132a53586c GIT binary patch literal 4161 zcmchac~leU`o~9cE1)#3prC+;0LrRGKtL8X2oq$HeMb@2%8rj&>f#l$zN;I;oe_xZ$3coz+b< zjiYNVt*T7FY)KB<@Wp}Xj!mYyta_60J69CPIdT5OFXFp@}Hg1Jv@ygts1D7Nl zB!3B<6s>r2)&M~7je#}0zK~Z3p#Ko~nsjLg%5z%KDts|cG zy&VYr$vd+vF2qSloKBo=d(?Qy1nH@;37a)2d_u|l0+_9;G!$$^@qJ-zJFObA@E(#n zvnMxm*8)XD4Fg&;AXsZ0AVj0f^x|x^qTkDh%K=3Ltes-KlQ>cC1pvgv3la*WIJY2; zJaTnU@oF8dh89xLpctO+&ZeQxSeD~FJUpDib#l9= zW=P_yK)@(A-m&3Vjk3_53`tEV3bg8UiCzkvx2=1usT%SN`lTZ0eWqHozDf&U9g;e- zjU8B7=FmF8&B?aa@GV8apc%piV{J#-Az+Ieo5blKmvWT*WD7#&su0o76EAiwRokfm zcAIzZl?On#S>W=z3tRrO0NsD2DYKpb08QIL z{u7Z%1WD+0dSk};zK=B=@QRaStpiS2Bx72+#MNwdX;*61g2XF%IW)b^RV!tY8JIub zb{xvdDlmsO0B(N6M^+&kCubgiMNrVoS_60v_s8X?Yg-#e=km4S`OAa?+8i#+fxXy) z1MG5%F7NDFhNqij(;xc*fN4B!4}dyMm9zV5tL0x3oVY)L%lc8M09Q6KM}_O1*UP%Y zqj>;_hXfL+1ySe4PJ`m&^tJ;x%33^(Vo}wN%wudzuy&+i9dPGnBGrL)0<0_+)YC>$ zgzo=(IG{B0B!->a#r z-Yud!e`j5`8hn%&48KHkw!$(wj2D4x21S^e`Ts{5{052ch{UGWG#TfMRMShB-|EQe~V7H#4 zFe$W$YOWAkQT8kjG!>_#0^BfgKsRoj@WD)I@$A9&UD4j`AjHB*{i2SxGsd0%Y~0M$ zv@yKw!-urIOP4&m2XqpZT&|RG^DCml%SeO~Os9)57>|8+(fvh@V3j)koKER&cfYvM zXt;PaJuEED4B5hba?fKn2<&%kF+)}~u2_KhW30+D{{G20^T^1^VuR9(iV=}0wMr2u z6VL-CX@t>Jd6%9Ql%JwA(rcDVe}265+&AVqMQST3l`!f*_;l}UgR7*8oVWJ}<}&sW z(dM8OQ7I7W|c1^Na2|=a8_k#vgf4>sHB|!eTFL=Km4ggy=5hu z1QGk@0Ki-0^SGT@!sY@oYQNf>U)~Y$Su#3gMBs48r5E`QrJH1Zb35Bm(27!zIk{7& z&W-Yu!bs2@)6!IO5IB49bsROzo2{$8^8L34z*)7Oh+&PWX%z1wS#*t`0A_7Rn3sn# zLGTJR#3R7Y&d!F-StL@hmX_9Ub^k2<juOi-_*PO$f^R^gt64-tp&G%L=@77_-|TDmiln8dK7nTFTY|(TkbhZ zZ>FeXhtujMipbiaFG$-XK%AEO5tqJeGnzGtsIx#G)1EZo#d7tMfJfb7wgkZOX5rF# z#D2r*mh3tJ7=HL!`w{D1`2*FXRd3R2dn!7Km$~K}y{+K?!K(bZi^=YmS&Kx@MyzK2 z$~k`Pbw!zg(ffEwuHb|!Vk0hOPxOdTN=~h#Vu7EUJ76pKiBD3UrrTfm7S&wYTM zMLt=4e(V-hQN%DsP6Z}U(`X?0nqbMfefxItq2%A_G@2-?HA_rV1n9eqon|;ub_BOO zFnRg#>j#wla;%koT0~MMFJ+_$vpl83t%2?ut}Hvd{L@G@${LzX%PJO}Ml5tSkMNUQ z_nVkBgpbQC#FNg>*oY{(X2~;_a&8|b%`~L$kA#ARdLHcVKjfT{@rmWjd`SkV;gAlCA^Ikh{~y4M4H?*3z42>Our)6J1< zfT2f)H$TTsDE5voZA{pGemL2ES;uJ0;IE=bCE=;+5xi}m$M25UESxeXh57RnQ^THn z0K2r0(w7EF{n-*!&~Baz$LD~uBH-pyZYa2~ooSA2+lIh9U55nUMux2cx@CG*ielE! z3Jb$jfg9`d$uUX{TNG~;Ig^sZX~MKvBHsqWQ#9JsnO7>{bgi*D_*KKS^W zAM&GVU1>lhMAv@%Im-(t$0jgj2pgiqt0y0fk{kgW zAe-$7V|XXO3>&}}ldm;pw$bRQNo7PoV`sH7ylUs}!?s5xcyQ9hp^}HRIJDw0X%N;d z5*Q;A@O4WQE#%Z&{J<4G7ckM2ZG4QC1($QNSUmg3>0gu)32dRQBk>SYd94!oBpVrQFXBTj` zxVt+Ne-vU97=nEw3()qmAf;pLC=(uSQolzl)#l@@(6YIBvgz7P1D+|Ui=4@BdEq{* zJf=}7ct|U7V3p#yJO}hL9vyi1f)=qBX1=C3V`U1Tiv9^MuR0g2HKS)(mM2Kb-TP!X zE{}E}wc?EaA%GnKU3|qD>)1Ld(axmu4VYtXz6!b~8FX4@A!ADDFqiK{z3TbXVOs+p zm7GfBABs@MQ|a3LC<}~j4MiXgvms*G>A0-6Nyn9JvQKVSfs^>AN}o&1gnA(%e6Y1C z(|?aBbioGPs5RruIV^ss4@tF%&c3 zb73d{VNH>7Qtri*EmiO8X|dQ^ePTqbip`b#RlSocOBkX*TZQ9*ogVZq{;GyTxUT|l zb)83^Im)4m<4b6AaC_B-q<3!$$kg%m@te+|tRp0)P@562jx56Tl!(vJY2W3Hc2gkf z4=)d8)-he!i~eKmpv9b&!=G#&{en?VCTq_pGjO~a_^(cGA