diff --git a/DataFrames/ALL.ipynb b/DataFrames/ALL.ipynb
new file mode 100644
index 0000000..95b169b
--- /dev/null
+++ b/DataFrames/ALL.ipynb
@@ -0,0 +1,1258 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T23:14:45.083313Z",
+     "start_time": "2020-11-17T23:14:44.818195Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "UserWarning: The Dask Engine for Modin is experimental.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import gc\n",
+    "from time import time, sleep\n",
+    "\n",
+    "import pandas as pd\n",
+    "import dask.dataframe as dd\n",
+    "import modin.pandas as mpd\n",
+    "import vaex\n",
+    "from pyspark.sql import SparkSession\n",
+    "from pyspark.sql.functions import sum, avg\n",
+    "# pandas on ray has moved to Modin\n",
+    "# import ray.dataframe as rpd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T22:55:27.722264Z",
+     "start_time": "2020-11-17T22:55:27.699399Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# data based on https://www.kaggle.com/c/ieee-fraud-detection/data\n",
+    "folder = \"/home/vaclav/Data/Kaggle/EEE-CIS_Fraud_Detection\"\n",
+    "files = [\"train_transaction.csv\", \"train_identity.csv\"]\n",
+    "paths = [os.path.join(folder, f) for f in files]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T22:55:27.733967Z",
+     "start_time": "2020-11-17T22:55:27.727006Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "stats = {}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T22:55:27.750077Z",
+     "start_time": "2020-11-17T22:55:27.737957Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'1.1.4'"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.__version__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T22:56:19.465362Z",
+     "start_time": "2020-11-17T22:55:27.767439Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "stats[\"pandas\"] = {}\n",
+    "s = stats[\"pandas\"]\n",
+    "\n",
+    "ts = time()\n",
+    "df = pd.read_csv(paths[0])\n",
+    "te = time()\n",
+    "s[\"load_transactions\"] = te-ts\n",
+    "\n",
+    "ts = time()\n",
+    "df2 = pd.read_csv(paths[1])\n",
+    "te = time()\n",
+    "s[\"load_identity\"] = te-ts\n",
+    "\n",
+    "ts = time()\n",
+    "dff = df.merge(df2, on=\"TransactionID\")\n",
+    "te = time()\n",
+    "s[\"merge\"] = te-ts\n",
+    "\n",
+    "ts = time()\n",
+    "grp = dff[\"isFraud\",\"ProductCD\",\"card4\",\"card6\",\"id_15\",\"id_31\",\"TransactionAmt\"].fillna(\"\")\n",
+    ".groupby([\"isFraud\",\"ProductCD\",\"card4\",\"card6\",\"id_15\",\"id_31\"])[\"TransactionAmt\"].agg([\"mean\",\"sum\"])\n",
+    "te = time()\n",
+    "s[\"aggregation\"] = te-ts\n",
+    "\n",
+    "ts = time()\n",
+    "dff.sort_values(by=[\"card1\",\"addr1\",\"D9\"], inplace=True)\n",
+    "dff.sort_values(by=[\"addr1\",\"D9\",\"card1\"], inplace=True)\n",
+    "dff.sort_values(by=[\"D9\",\"card1\",\"addr1\"], inplace=True)\n",
+    "te = time()\n",
+    "s[\"sorting\"] = te-ts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T22:56:19.519391Z",
+     "start_time": "2020-11-17T22:56:19.472412Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>pandas</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>aggregation</th>\n",
+       "      <td>0.075788</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>load_identity</th>\n",
+       "      <td>0.682109</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>load_transactions</th>\n",
+       "      <td>18.279765</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>merge</th>\n",
+       "      <td>3.196074</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>sorting</th>\n",
+       "      <td>2.224164</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                      pandas\n",
+       "aggregation         0.075788\n",
+       "load_identity       0.682109\n",
+       "load_transactions  18.279765\n",
+       "merge               3.196074\n",
+       "sorting             2.224164"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(stats)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dff.to_pickle(\"data/dff.pkl\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(4553, 2)"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Because julia groups by including N\\A, let's just check that number of groups matches\n",
+    "grp = dff[[\"isFraud\",\"ProductCD\",\"card4\",\"card6\",\"id_15\",\"id_31\",\"TransactionAmt\"]].fillna(\"~U~\")\\\n",
+    ".groupby([\"isFraud\",\"ProductCD\",\"card4\",\"card6\",\"id_15\",\"id_31\"])[\"TransactionAmt\"].agg([\"mean\",\"sum\"])\n",
+    "grp.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T22:56:19.550559Z",
+     "start_time": "2020-11-17T22:56:19.524963Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def clean(wait_time: int=15):\n",
+    "    \"\"\"Cleans created DataFrames and call the garbage collector to actions. Wait for 15s by default\"\"\"\n",
+    "    df, df2, dff, grp = None, None, None, None\n",
+    "    gc.collect()\n",
+    "    sleep(wait_time)\n",
+    "    return None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T22:56:34.718818Z",
+     "start_time": "2020-11-17T22:56:19.559830Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "clean()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T22:56:34.813222Z",
+     "start_time": "2020-11-17T22:56:34.780299Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def list_variables_memory_usage() -> dict:\n",
+    "    \"\"\"Memory of existing local variables\"\"\"\n",
+    "    local_vars = list(locals().items())\n",
+    "    return {var: sys.getsizeof(obj) for var, obj in local_vars}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Dask\n",
+    "When to use dask - https://docs.dask.org/en/latest/dataframe.html#common-uses-and-anti-uses"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T22:57:00.280745Z",
+     "start_time": "2020-11-17T22:56:34.844985Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "stats[\"dask\"] = {}\n",
+    "s = stats[\"dask\"]\n",
+    "\n",
+    "ts = time()\n",
+    "df = dd.read_csv(paths[0])\n",
+    "te = time()\n",
+    "s[\"load_transactions\"] = te-ts\n",
+    "\n",
+    "ts = time()\n",
+    "df2 = dd.read_csv(paths[1])\n",
+    "te = time()\n",
+    "s[\"load_identity\"] = te-ts\n",
+    "\n",
+    "ts = time()\n",
+    "dff = df.merge(df2, on=\"TransactionID\")\n",
+    "te = time()\n",
+    "s[\"merge\"] = te-ts\n",
+    "\n",
+    "# the difference is that we call compute method, which runs all the computations at this point\n",
+    "ts = time()\n",
+    "grp = dff.groupby([\"isFraud\",\"ProductCD\",\"card4\",\"card6\",\"id_15\",\"id_31\"])[\"TransactionAmt\"]\\\n",
+    "    .agg([\"mean\",\"sum\"])\\\n",
+    "    .compute()\n",
+    "te = time()\n",
+    "s[\"aggregation\"] = te-ts\n",
+    "\n",
+    "# parallel soring is tricky that is why there are only work arounds in dask. \n",
+    "ts = time()\n",
+    "dff.set_index(\"card1\").compute()\n",
+    "te = time()\n",
+    "s[\"sorting\"] = te-ts\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T22:57:15.539155Z",
+     "start_time": "2020-11-17T22:57:00.286799Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "clean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T22:57:15.580380Z",
+     "start_time": "2020-11-17T22:57:15.546567Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>pandas</th>\n",
+       "      <th>dask</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>load_transactions</th>\n",
+       "      <td>18.279765</td>\n",
+       "      <td>0.083901</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>load_identity</th>\n",
+       "      <td>0.682109</td>\n",
+       "      <td>0.028268</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>merge</th>\n",
+       "      <td>3.196074</td>\n",
+       "      <td>0.073891</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>aggregation</th>\n",
+       "      <td>0.075788</td>\n",
+       "      <td>20.837958</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>sorting</th>\n",
+       "      <td>2.224164</td>\n",
+       "      <td>71.282675</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                      pandas       dask\n",
+       "load_transactions  18.279765   0.083901\n",
+       "load_identity       0.682109   0.028268\n",
+       "merge               3.196074   0.073891\n",
+       "aggregation         0.075788  20.837958\n",
+       "sorting             2.224164  71.282675"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(stats)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stats[\"dask_indexed\"] = {}\n",
+    "s = stats[\"dask_indexed\"]\n",
+    "\n",
+    "ts = time()\n",
+    "df = dd.read_csv(paths[0]).set_index(\"TransactionID\")\n",
+    "te = time()\n",
+    "s[\"load_transactions\"] = te-ts\n",
+    "\n",
+    "ts = time()\n",
+    "df2 = dd.read_csv(paths[1]).set_index(\"TransactionID\")\n",
+    "te = time()\n",
+    "s[\"load_identity\"] = te-ts\n",
+    "\n",
+    "ts = time()\n",
+    "dff = df.merge(df2, left_index=True, right_index=True)\n",
+    "te = time()\n",
+    "s[\"merge\"] = te-ts\n",
+    "\n",
+    "# the difference is that we call compute method, which runs all the computations at this point\n",
+    "ts = time()\n",
+    "grp = dff.groupby([\"isFraud\",\"ProductCD\",\"card4\",\"card6\",\"id_15\",\"id_31\"])[\"TransactionAmt\"]\\\n",
+    "    .agg([\"mean\",\"sum\"])\\\n",
+    "    .compute()\n",
+    "te = time()\n",
+    "s[\"aggregation\"] = te-ts\n",
+    "\n",
+    "# parallel soring is tricky that is why there are only work arounds in dask. \n",
+    "ts = time()\n",
+    "dff.set_index(\"card1\").compute()\n",
+    "te = time()\n",
+    "s[\"sorting\"] = te-ts\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>pandas</th>\n",
+       "      <th>dask</th>\n",
+       "      <th>dask_indexed</th>\n",
+       "      <th>vaex</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>load_transactions</th>\n",
+       "      <td>18.279765</td>\n",
+       "      <td>0.083901</td>\n",
+       "      <td>14.930128</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>load_identity</th>\n",
+       "      <td>0.682109</td>\n",
+       "      <td>0.028268</td>\n",
+       "      <td>0.761821</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>merge</th>\n",
+       "      <td>3.196074</td>\n",
+       "      <td>0.073891</td>\n",
+       "      <td>0.078762</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>aggregation</th>\n",
+       "      <td>0.075788</td>\n",
+       "      <td>20.837958</td>\n",
+       "      <td>23.130105</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>sorting</th>\n",
+       "      <td>2.224164</td>\n",
+       "      <td>71.282675</td>\n",
+       "      <td>75.393628</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                      pandas       dask  dask_indexed  vaex\n",
+       "load_transactions  18.279765   0.083901     14.930128   NaN\n",
+       "load_identity       0.682109   0.028268      0.761821   NaN\n",
+       "merge               3.196074   0.073891      0.078762   NaN\n",
+       "aggregation         0.075788  20.837958     23.130105   NaN\n",
+       "sorting             2.224164  71.282675     75.393628   NaN"
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "clean()\n",
+    "pd.DataFrame(stats)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T22:29:19.438715Z",
+     "start_time": "2020-11-17T22:29:19.429209Z"
+    }
+   },
+   "source": [
+    "# Vaex"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T23:03:06.560013Z",
+     "start_time": "2020-11-17T23:03:06.545427Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'vaex-core': '2.0.3',\n",
+       " 'vaex-viz': '0.4.0',\n",
+       " 'vaex-hdf5': '0.6.0',\n",
+       " 'vaex-server': '0.3.1',\n",
+       " 'vaex-astro': '0.7.0',\n",
+       " 'vaex-jupyter': '0.5.2',\n",
+       " 'vaex-ml': '0.9.0',\n",
+       " 'vaex-arrow': '0.5.1'}"
+      ]
+     },
+     "execution_count": 97,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "vaex.__version__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 98,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T23:04:01.165275Z",
+     "start_time": "2020-11-17T23:03:06.562006Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "tool = \"vaex\"\n",
+    "stats[tool] = {}\n",
+    "s = stats[tool]\n",
+    "\n",
+    "\n",
+    "ts = time()\n",
+    "df = vaex.open(paths[0])\n",
+    "te = time()\n",
+    "s[\"load_transactions\"] = te-ts\n",
+    "\n",
+    "ts = time()\n",
+    "df2 = vaex.open(paths[1])\n",
+    "te = time()\n",
+    "s[\"load_identity\"] = te-ts\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 99,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T23:04:01.642707Z",
+     "start_time": "2020-11-17T23:04:01.176085Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "ts = time()\n",
+    "dff = df.join(df2, on=\"TransactionID\")\n",
+    "te = time()\n",
+    "s[\"merge\"] = te-ts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 100,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T23:04:03.395316Z",
+     "start_time": "2020-11-17T23:04:01.645742Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# the difference is that we call compute method, which runs all the computations at this point\n",
+    "ts = time()\n",
+    "grp = dff.groupby([dff[\"isFraud\"],dff[\"ProductCD\"],dff[\"card4\"],dff[\"card6\"],dff[\"id_15\"],dff[\"id_31\"]], \n",
+    "                  agg=[vaex.agg.mean('TransactionAmt'), vaex.agg.sum('TransactionAmt')])\n",
+    "te = time()\n",
+    "s[\"aggregation\"] = te-ts\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 111,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# the difference is that we call compute method, which runs all the computations at this point\n",
+    "ts = time()\n",
+    "dff_s = dff.sort(by=[\"card1\",\"addr1\",\"D9\"])\n",
+    "dff_s = dff.sort(by=[\"addr1\",\"D9\",\"card1\"])\n",
+    "dff_s = dff.sort(by=[\"D9\",\"card1\",\"addr1\"])\n",
+    "te = time()\n",
+    "s[\"sorting\"] = te-ts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 113,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T23:04:03.469428Z",
+     "start_time": "2020-11-17T23:04:03.423857Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>pandas</th>\n",
+       "      <th>dask</th>\n",
+       "      <th>dask_indexed</th>\n",
+       "      <th>vaex</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>load_transactions</th>\n",
+       "      <td>18.279765</td>\n",
+       "      <td>0.083901</td>\n",
+       "      <td>14.930128</td>\n",
+       "      <td>18.734002</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>load_identity</th>\n",
+       "      <td>0.682109</td>\n",
+       "      <td>0.028268</td>\n",
+       "      <td>0.761821</td>\n",
+       "      <td>1.023915</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>merge</th>\n",
+       "      <td>3.196074</td>\n",
+       "      <td>0.073891</td>\n",
+       "      <td>0.078762</td>\n",
+       "      <td>0.131490</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>aggregation</th>\n",
+       "      <td>0.075788</td>\n",
+       "      <td>20.837958</td>\n",
+       "      <td>23.130105</td>\n",
+       "      <td>0.383996</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>sorting</th>\n",
+       "      <td>2.224164</td>\n",
+       "      <td>71.282675</td>\n",
+       "      <td>75.393628</td>\n",
+       "      <td>1.035000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>sort</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.329828</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                      pandas       dask  dask_indexed       vaex\n",
+       "load_transactions  18.279765   0.083901     14.930128  18.734002\n",
+       "load_identity       0.682109   0.028268      0.761821   1.023915\n",
+       "merge               3.196074   0.073891      0.078762   0.131490\n",
+       "aggregation         0.075788  20.837958     23.130105   0.383996\n",
+       "sorting             2.224164  71.282675     75.393628   1.035000\n",
+       "sort                     NaN        NaN           NaN   0.329828"
+      ]
+     },
+     "execution_count": 113,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(stats)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T23:04:18.930053Z",
+     "start_time": "2020-11-17T23:04:03.543914Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "clean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# PySpark"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T23:04:27.010489Z",
+     "start_time": "2020-11-17T23:04:18.932048Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from pyspark import SparkContext\n",
+    "sc = SparkContext()\n",
+    "sc.version\n",
+    "sc.stop()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T23:04:29.416261Z",
+     "start_time": "2020-11-17T23:04:27.011485Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Create my_spark\n",
+    "my_spark = SparkSession.builder \\\n",
+    "    .master(\"local\") \\\n",
+    "    .appName(\"Pandas Alternative\") \\\n",
+    "    .config(\"spark.some.config.option\", \"some-value\") \\\n",
+    "    .getOrCreate()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T23:05:33.528531Z",
+     "start_time": "2020-11-17T23:04:29.419253Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "tool = \"spark\"\n",
+    "stats[tool] = {}\n",
+    "s = stats[tool]\n",
+    "\n",
+    "\n",
+    "ts = time()\n",
+    "df = my_spark.read.csv(paths[0],inferSchema = True,header= True) \n",
+    "te = time()\n",
+    "s[\"load_transactions\"] = te-ts\n",
+    "\n",
+    "ts = time()\n",
+    "df2 = my_spark.read.csv(paths[1],inferSchema = True,header= True) \n",
+    "te = time()\n",
+    "s[\"load_identity\"] = te-ts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T23:05:33.849489Z",
+     "start_time": "2020-11-17T23:05:33.534687Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "ts = time()\n",
+    "dff = df.join(df2, \"TransactionID\")\n",
+    "te = time()\n",
+    "s[\"merge\"] = te-ts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T23:06:01.148952Z",
+     "start_time": "2020-11-17T23:05:33.851490Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# the difference is that we call collect method, which runs all the computations at this point\n",
+    "#ts = time()\n",
+    "#grp = dff.groupby([dff[\"isFraud\"],dff[\"ProductCD\"],dff[\"card4\"],dff[\"card6\"],dff[\"id_15\"],dff[\"id_31\"]]) \\\n",
+    "#        .agg(avg(\"TransactionAmt\"), sum(\"TransactionAmt\"))\\\n",
+    "#        .collect()\n",
+    "#te = time()\n",
+    "#s[\"aggregation\"] = te-ts\n",
+    "#s[\"all\"] = te-tss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T23:13:24.332254Z",
+     "start_time": "2020-11-17T23:13:03.641149Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# the difference is that we call collect method, which runs all the computations at this point\n",
+    "ts = time()\n",
+    "grp = dff.groupby([\"isFraud\",\"ProductCD\",\"card4\",\"card6\",\"id_15\",\"id_31\"]) \\\n",
+    "        .agg(avg(\"TransactionAmt\"), sum(\"TransactionAmt\"))\\\n",
+    "        .collect()\n",
+    "te = time()\n",
+    "s[\"aggregation\"] = te-ts\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T23:06:25.157340Z",
+     "start_time": "2020-11-17T23:06:25.118349Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>pandas</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>aggregation</th>\n",
+       "      <td>0.060114</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>all</th>\n",
+       "      <td>19.908346</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>load_identity</th>\n",
+       "      <td>0.480164</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>load_transactions</th>\n",
+       "      <td>17.354527</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>merge</th>\n",
+       "      <td>2.013150</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Total</th>\n",
+       "      <td>39.816302</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                      pandas\n",
+       "aggregation         0.060114\n",
+       "all                19.908346\n",
+       "load_identity       0.480164\n",
+       "load_transactions  17.354527\n",
+       "merge               2.013150\n",
+       "Total              39.816302"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stats_df = pd.DataFrame(stats)\n",
+    "stats_df.loc['Total'] = stats_df.sum()\n",
+    "stats_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T22:16:19.569545Z",
+     "start_time": "2020-11-17T22:16:19.559625Z"
+    }
+   },
+   "source": [
+    "# Modin"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T22:57:15.609009Z",
+     "start_time": "2020-11-17T22:57:15.586070Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'0.8.2'"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mpd.__version__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T23:00:06.292260Z",
+     "start_time": "2020-11-17T22:58:42.702035Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n",
+      "distributed.nanny - WARNING - Restarting worker\n",
+      "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n",
+      "distributed.nanny - WARNING - Restarting worker\n",
+      "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n",
+      "distributed.nanny - WARNING - Restarting worker\n",
+      "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n",
+      "distributed.nanny - WARNING - Restarting worker\n",
+      "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n",
+      "distributed.nanny - WARNING - Restarting worker\n",
+      "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n"
+     ]
+    },
+    {
+     "ename": "KilledWorker",
+     "evalue": "('lambda-dc847cac2df298f0ded2b3e426e3824d', <Worker 'tcp://127.0.0.1:56445', name: 7, memory: 0, processing: 3>)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mKilledWorker\u001b[0m                              Traceback (most recent call last)",
+      "\u001b[1;32m<ipython-input-15-7c0f6249d513>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      6\u001b[0m \u001b[0mts\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpaths\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      8\u001b[0m \u001b[0mte\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      9\u001b[0m \u001b[0ms\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"load_transactions\"\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mte\u001b[0m\u001b[1;33m-\u001b[0m\u001b[0mts\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m~\\Anaconda3\\envs\\big_tables\\lib\\site-packages\\modin\\pandas\\io.py\u001b[0m in \u001b[0;36mparser_func\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)\u001b[0m\n\u001b[0;32m    107\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"sep\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msep\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    108\u001b[0m             \u001b[0mkwargs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"sep\"\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"\\t\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 109\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    110\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    111\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mparser_func\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m~\\Anaconda3\\envs\\big_tables\\lib\\site-packages\\modin\\pandas\\io.py\u001b[0m in \u001b[0;36m_read\u001b[1;34m(**kwargs)\u001b[0m\n\u001b[0;32m    125\u001b[0m     \u001b[1;32mfrom\u001b[0m \u001b[0mmodin\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdata_management\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfactories\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdispatcher\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mEngineDispatcher\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    126\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 127\u001b[1;33m     \u001b[0mpd_obj\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mEngineDispatcher\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    128\u001b[0m     \u001b[1;31m# This happens when `read_csv` returns a TextFileReader object for iterating through\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    129\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpd_obj\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpandas\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mio\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparsers\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTextFileReader\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m~\\Anaconda3\\envs\\big_tables\\lib\\site-packages\\modin\\data_management\\factories\\dispatcher.py\u001b[0m in \u001b[0;36mread_csv\u001b[1;34m(cls, **kwargs)\u001b[0m\n\u001b[0;32m    102\u001b[0m     \u001b[1;33m@\u001b[0m\u001b[0mclassmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    103\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 104\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_read_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    105\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    106\u001b[0m     \u001b[1;33m@\u001b[0m\u001b[0mclassmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m~\\Anaconda3\\envs\\big_tables\\lib\\site-packages\\modin\\data_management\\factories\\factories.py\u001b[0m in \u001b[0;36m_read_csv\u001b[1;34m(cls, **kwargs)\u001b[0m\n\u001b[0;32m     85\u001b[0m     \u001b[1;33m@\u001b[0m\u001b[0mclassmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     86\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_read_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 87\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mio_cls\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     88\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     89\u001b[0m     \u001b[1;33m@\u001b[0m\u001b[0mclassmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m~\\Anaconda3\\envs\\big_tables\\lib\\site-packages\\modin\\engines\\base\\io\\file_reader.py\u001b[0m in \u001b[0;36mread\u001b[1;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[0;32m     27\u001b[0m     \u001b[1;33m@\u001b[0m\u001b[0mclassmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     28\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 29\u001b[1;33m         \u001b[0mquery_compiler\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcls\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     30\u001b[0m         \u001b[1;31m# TODO (devin-petersohn): Make this section more general for non-pandas kernel\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     31\u001b[0m         \u001b[1;31m# implementations.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m~\\Anaconda3\\envs\\big_tables\\lib\\site-packages\\modin\\engines\\base\\io\\text\\csv_reader.py\u001b[0m in \u001b[0;36m_read\u001b[1;34m(cls, filepath_or_buffer, **kwargs)\u001b[0m\n\u001b[0;32m    174\u001b[0m         \u001b[1;31m# or based on the column(s) that were requested.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    175\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mindex_col\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 176\u001b[1;33m             \u001b[0mrow_lengths\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcls\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmaterialize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindex_ids\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    177\u001b[0m             \u001b[0mnew_index\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpandas\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mRangeIndex\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msum\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrow_lengths\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    178\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m~\\Anaconda3\\envs\\big_tables\\lib\\site-packages\\modin\\engines\\dask\\task_wrapper.py\u001b[0m in \u001b[0;36mmaterialize\u001b[1;34m(cls, future)\u001b[0m\n\u001b[0;32m     28\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mmaterialize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfuture\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     29\u001b[0m         \u001b[0mclient\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_get_global_client\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 30\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mclient\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgather\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfuture\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[1;32m~\\Anaconda3\\envs\\big_tables\\lib\\site-packages\\distributed\\client.py\u001b[0m in \u001b[0;36mgather\u001b[1;34m(self, futures, errors, direct, asynchronous)\u001b[0m\n\u001b[0;32m   1990\u001b[0m                 \u001b[0mdirect\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdirect\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1991\u001b[0m                 \u001b[0mlocal_worker\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlocal_worker\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1992\u001b[1;33m                 \u001b[0masynchronous\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0masynchronous\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1993\u001b[0m             )\n\u001b[0;32m   1994\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m~\\Anaconda3\\envs\\big_tables\\lib\\site-packages\\distributed\\client.py\u001b[0m in \u001b[0;36msync\u001b[1;34m(self, func, asynchronous, callback_timeout, *args, **kwargs)\u001b[0m\n\u001b[0;32m    831\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    832\u001b[0m             return sync(\n\u001b[1;32m--> 833\u001b[1;33m                 \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloop\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcallback_timeout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcallback_timeout\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    834\u001b[0m             )\n\u001b[0;32m    835\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m~\\Anaconda3\\envs\\big_tables\\lib\\site-packages\\distributed\\utils.py\u001b[0m in \u001b[0;36msync\u001b[1;34m(loop, func, callback_timeout, *args, **kwargs)\u001b[0m\n\u001b[0;32m    338\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0merror\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    339\u001b[0m         \u001b[0mtyp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mexc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtb\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0merror\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 340\u001b[1;33m         \u001b[1;32mraise\u001b[0m \u001b[0mexc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    341\u001b[0m     \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    342\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m~\\Anaconda3\\envs\\big_tables\\lib\\site-packages\\distributed\\utils.py\u001b[0m in \u001b[0;36mf\u001b[1;34m()\u001b[0m\n\u001b[0;32m    322\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mcallback_timeout\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    323\u001b[0m                 \u001b[0mfuture\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0masyncio\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwait_for\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfuture\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcallback_timeout\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 324\u001b[1;33m             \u001b[0mresult\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32myield\u001b[0m \u001b[0mfuture\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    325\u001b[0m         \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mexc\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    326\u001b[0m             \u001b[0merror\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexc_info\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m~\\Anaconda3\\envs\\big_tables\\lib\\site-packages\\tornado\\gen.py\u001b[0m in \u001b[0;36mrun\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    733\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    734\u001b[0m                     \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 735\u001b[1;33m                         \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfuture\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    736\u001b[0m                     \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    737\u001b[0m                         \u001b[0mexc_info\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexc_info\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m~\\Anaconda3\\envs\\big_tables\\lib\\site-packages\\distributed\\client.py\u001b[0m in \u001b[0;36m_gather\u001b[1;34m(self, futures, errors, direct, local_worker)\u001b[0m\n\u001b[0;32m   1849\u001b[0m                             \u001b[0mexc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mCancelledError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1850\u001b[0m                         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1851\u001b[1;33m                             \u001b[1;32mraise\u001b[0m \u001b[0mexception\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtraceback\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1852\u001b[0m                         \u001b[1;32mraise\u001b[0m \u001b[0mexc\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1853\u001b[0m                     \u001b[1;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"skip\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mKilledWorker\u001b[0m: ('lambda-dc847cac2df298f0ded2b3e426e3824d', <Worker 'tcp://127.0.0.1:56445', name: 7, memory: 0, processing: 3>)"
+     ]
+    }
+   ],
+   "source": [
+    "tool = \"modin\"\n",
+    "stats[tool] = {}\n",
+    "s = stats[tool]\n",
+    "\n",
+    "\n",
+    "ts = time()\n",
+    "df = mpd.read_csv(paths[0])\n",
+    "te = time()\n",
+    "s[\"load_transactions\"] = te-ts\n",
+    "\n",
+    "ts = time()\n",
+    "df2 = mpd.read_csv(paths[1])\n",
+    "te = time()\n",
+    "s[\"load_identity\"] = te-ts\n",
+    "\n",
+    "ts = time()\n",
+    "dff = df.merge(df2, on=\"TransactionID\")\n",
+    "te = time()\n",
+    "s[\"merge\"] = te-ts\n",
+    "\n",
+    "# modin defaults to pandas for multiple column aggregation and then fails on KeyError, though the key is available\n",
+    "ts = time()\n",
+    "try:\n",
+    "    grp = dff.groupby([\"isFraud\",\"ProductCD\",\"card4\",\"card6\",\"id_15\",\"id_31\"])[\"TransactionAmt\"].agg([\"mean\",\"sum\"])\n",
+    "except Exception as e:\n",
+    "    print(e)\n",
+    "te = time()\n",
+    "s[\"aggregation\"] = te-ts\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T22:58:20.241434Z",
+     "start_time": "2020-11-17T22:55:17.025Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pd.DataFrame(stats)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-17T23:01:01.211911Z",
+     "start_time": "2020-11-17T23:00:45.850513Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n",
+      "distributed.nanny - WARNING - Restarting worker\n",
+      "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n",
+      "distributed.nanny - WARNING - Restarting worker\n",
+      "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n",
+      "distributed.nanny - WARNING - Restarting worker\n"
+     ]
+    }
+   ],
+   "source": [
+    "clean()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "big-tables",
+   "language": "python",
+   "name": "big-tables"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/DataFrames/Julia.ipynb b/DataFrames/Julia.ipynb
new file mode 100644
index 0000000..7d7eeb8
--- /dev/null
+++ b/DataFrames/Julia.ipynb
@@ -0,0 +1,593 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-15T23:25:27.615000+01:00",
+     "start_time": "2020-11-15T22:25:00.718Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "using CSV\n",
+    "using DataFrames\n",
+    "using Dates\n",
+    "using Statistics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "v\"1.4.1\""
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Julia version\n",
+    "VERSION"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-15T23:25:31.536000+01:00",
+     "start_time": "2020-11-15T22:25:00.722Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2-element Array{String,1}:\n",
+       " \"dff.pkl\"\n",
+       " \"sales_data_sample.csv\""
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readdir(\"data\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2-element Array{String,1}:\n",
+       " \"train_transaction.csv\"\n",
+       " \"train_identity.csv\""
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "folder = \"/home/vaclav/Data/Kaggle/EEE-CIS_Fraud_Detection\"\n",
+    "files = [\"train_transaction.csv\", \"train_identity.csv\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\"/home/vaclav/Data/Kaggle/EEE-CIS_Fraud_Detection/train_transaction.csv\""
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "joinpath(folder,files[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Dict{Any,Any} with 5 entries:\n",
+       "  \"merge\"             => 0.771\n",
+       "  \"sort\"              => 5.032\n",
+       "  \"load_transactions\" => 8.045\n",
+       "  \"aggregation\"       => 0.034\n",
+       "  \"load_identity\"     => 0.502"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "s = Dict()\n",
+    "\n",
+    "# load transactions ~600MB\n",
+    "ts = now()\n",
+    "df = CSV.read(joinpath(folder,files[1]), DataFrame)\n",
+    "te = now()\n",
+    "time_in_sec = (te-ts) / Millisecond(1) * (1 / 1000)\n",
+    "push!(s, \"load_transactions\"=>time_in_sec)\n",
+    "\n",
+    "# load identity ~25MB\n",
+    "ts = now()\n",
+    "df2 = CSV.read(joinpath(folder,files[2]), DataFrame)\n",
+    "te = now()\n",
+    "time_in_sec = (te-ts) / Millisecond(1) * (1 / 1000)\n",
+    "push!(s, \"load_identity\"=>time_in_sec)\n",
+    "\n",
+    "# join\n",
+    "ts = now()\n",
+    "dff = join(df, df2, kind = :inner, on = \"TransactionID\")\n",
+    "te = now()\n",
+    "time_in_sec = (te-ts) / Millisecond(1) * (1 / 1000)\n",
+    "push!(s, \"merge\"=>time_in_sec)\n",
+    "\n",
+    "# group by\n",
+    "ts = now()\n",
+    "grp = combine(groupby(dff, [\"isFraud\",\"ProductCD\",\"card4\",\"card6\",\"id_15\",\"id_31\"]), \n",
+    "    :TransactionAmt=>maximum=>:TransactionAmountMax, \n",
+    "    :TransactionAmt=>mean=>:TransactionAmountMean)\n",
+    "te = now()\n",
+    "time_in_sec = (te-ts) / Millisecond(1) * (1 / 1000)\n",
+    "push!(s, \"aggregation\"=>time_in_sec)\n",
+    "\n",
+    "# group by\n",
+    "ts = now()\n",
+    "sort!(dff, [\"card1\",\"addr1\",\"D9\"])\n",
+    "sort!(dff, [\"addr1\",\"D9\",\"card1\"])\n",
+    "sort!(dff, [\"D9\",\"card1\",\"addr1\"])\n",
+    "te = now()\n",
+    "time_in_sec = (te-ts) / Millisecond(1) * (1 / 1000)\n",
+    "push!(s, \"sort\"=>time_in_sec)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table class=\"data-frame\"><thead><tr><th></th><th>aggregation</th><th>load_identity</th><th>load_transactions</th><th>merge</th><th>sort</th></tr><tr><th></th><th>Float64</th><th>Float64</th><th>Float64</th><th>Float64</th><th>Float64</th></tr></thead><tbody><p>1 rows × 5 columns</p><tr><th>1</th><td>0.034</td><td>0.502</td><td>8.045</td><td>0.771</td><td>5.032</td></tr></tbody></table>"
+      ],
+      "text/latex": [
+       "\\begin{tabular}{r|ccccc}\n",
+       "\t& aggregation & load\\_identity & load\\_transactions & merge & sort\\\\\n",
+       "\t\\hline\n",
+       "\t& Float64 & Float64 & Float64 & Float64 & Float64\\\\\n",
+       "\t\\hline\n",
+       "\t1 & 0.034 & 0.502 & 8.045 & 0.771 & 5.032 \\\\\n",
+       "\\end{tabular}\n"
+      ],
+      "text/plain": [
+       "1×5 DataFrame\n",
+       "│ Row │ aggregation │ load_identity │ load_transactions │ merge   │ sort    │\n",
+       "│     │ \u001b[90mFloat64\u001b[39m     │ \u001b[90mFloat64\u001b[39m       │ \u001b[90mFloat64\u001b[39m           │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
+       "├─────┼─────────────┼───────────────┼───────────────────┼─────────┼─────────┤\n",
+       "│ 1   │ 0.034       │ 0.502         │ 8.045             │ 0.771   │ 5.032   │"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "DataFrame(s)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(144233, 434, 4553, 8)"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# check the shape of the dataframes\n",
+    "nrow(dff), length(names(dff)), nrow(grp), length(names(grp))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Groupby Details\n",
+    "https://dataframes.juliadata.org/stable/man/split_apply_combine/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<p><b>GroupedDataFrame with 4553 groups based on keys: isFraud, ProductCD, card4, card6, id_15, id_31</b></p><p><i>First Group (136 rows): isFraud = 0, ProductCD = \"H\", card4 = \"visa\", card6 = \"debit\", id_15 = \"Found\", id_31 = \"firefox 57.0\"</i></p><table class=\"data-frame\"><thead><tr><th></th><th>TransactionID</th><th>isFraud</th><th>TransactionDT</th><th>TransactionAmt</th><th>ProductCD</th><th>card1</th><th>card2</th></tr><tr><th></th><th>Int64</th><th>Int64</th><th>Int64</th><th>Float64</th><th>String</th><th>Int64</th><th>Float64?</th></tr></thead><tbody><tr><th>1</th><td>3067453</td><td>0</td><td>1729328</td><td>200.0</td><td>H</td><td>1030</td><td>157.0</td></tr><tr><th>2</th><td>3073296</td><td>0</td><td>1816710</td><td>100.0</td><td>H</td><td>1675</td><td>174.0</td></tr><tr><th>3</th><td>3061872</td><td>0</td><td>1642116</td><td>100.0</td><td>H</td><td>1974</td><td>111.0</td></tr><tr><th>4</th><td>3078930</td><td>0</td><td>1902540</td><td>100.0</td><td>H</td><td>6697</td><td>111.0</td></tr><tr><th>5</th><td>3038788</td><td>0</td><td>1211592</td><td>75.0</td><td>H</td><td>7508</td><td>321.0</td></tr><tr><th>6</th><td>3056336</td><td>0</td><td>1556463</td><td>25.0</td><td>H</td><td>9500</td><td>321.0</td></tr><tr><th>7</th><td>3026050</td><td>0</td><td>951882</td><td>50.0</td><td>H</td><td>9500</td><td>321.0</td></tr><tr><th>8</th><td>2999258</td><td>0</td><td>348274</td><td>50.0</td><td>H</td><td>10680</td><td>373.0</td></tr><tr><th>9</th><td>2995902</td><td>0</td><td>260116</td><td>40.0</td><td>H</td><td>12526</td><td>381.0</td></tr><tr><th>10</th><td>3124444</td><td>0</td><td>2767769</td><td>50.0</td><td>H</td><td>12839</td><td>321.0</td></tr><tr><th>11</th><td>3091878</td><td>0</td><td>2077478</td><td>40.0</td><td>H</td><td>2884</td><td>490.0</td></tr><tr><th>12</th><td>3011076</td><td>0</td><td>608878</td><td>50.0</td><td>H</td><td>7508</td><td>321.0</td></tr><tr><th>13</th><td>3170944</td><td>0</td><td>4067304</td><td>100.0</td><td>H</td><td>7664</td><td>490.0</td></tr><tr><th>14</th><td>3170950</td><td>0</td><td>4067403</td><td>100.0</td><td>H</td><td>7664</td><td>490.0</td></tr><tr><th>15</th><td>3170951</td><td>0</td><td>4067438</td><td>100.0</td><td>H</td><td>7664</td><td>490.0</td></tr><tr><th>16</th><td>3145133</td><td>0</td><td>3289750</td><td>150.0</td><td>H</td><td>9112</td><td>250.0</td></tr><tr><th>17</th><td>3034874</td><td>0</td><td>1130093</td><td>300.0</td><td>H</td><td>10294</td><td>555.0</td></tr><tr><th>18</th><td>3170845</td><td>0</td><td>4065306</td><td>35.0</td><td>H</td><td>12695</td><td>490.0</td></tr><tr><th>19</th><td>3114169</td><td>0</td><td>2516387</td><td>30.0</td><td>H</td><td>5822</td><td>555.0</td></tr><tr><th>20</th><td>3264648</td><td>0</td><td>6746645</td><td>40.0</td><td>H</td><td>15497</td><td>490.0</td></tr><tr><th>21</th><td>3043704</td><td>0</td><td>1304889</td><td>25.0</td><td>H</td><td>16993</td><td>555.0</td></tr><tr><th>22</th><td>3191704</td><td>0</td><td>4678752</td><td>25.0</td><td>H</td><td>1323</td><td>268.0</td></tr><tr><th>23</th><td>3092620</td><td>0</td><td>2087380</td><td>100.0</td><td>H</td><td>2772</td><td>512.0</td></tr><tr><th>24</th><td>3023170</td><td>0</td><td>876581</td><td>50.0</td><td>H</td><td>17188</td><td>321.0</td></tr><tr><th>25</th><td>3062921</td><td>0</td><td>1654005</td><td>50.0</td><td>H</td><td>17496</td><td>554.0</td></tr><tr><th>26</th><td>3098962</td><td>0</td><td>2176776</td><td>75.0</td><td>H</td><td>12501</td><td>490.0</td></tr><tr><th>27</th><td>3099033</td><td>0</td><td>2177727</td><td>75.0</td><td>H</td><td>12501</td><td>490.0</td></tr><tr><th>28</th><td>3039596</td><td>0</td><td>1226005</td><td>50.0</td><td>H</td><td>14084</td><td>257.0</td></tr><tr><th>29</th><td>3057383</td><td>0</td><td>1570940</td><td>25.0</td><td>H</td><td>15377</td><td>555.0</td></tr><tr><th>30</th><td>3044025</td><td>0</td><td>1311408</td><td>25.0</td><td>H</td><td>15497</td><td>490.0</td></tr><tr><th>&vellip;</th><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td></tr></tbody></table><p>&vellip;</p><p><i>Last Group (1 row): isFraud = 1, ProductCD = \"S\", card4 = \"visa\", card6 = \"credit\", id_15 = \"New\", id_31 = \"mobile safari 11.0\"</i></p><table class=\"data-frame\"><thead><tr><th></th><th>TransactionID</th><th>isFraud</th><th>TransactionDT</th><th>TransactionAmt</th><th>ProductCD</th><th>card1</th><th>card2</th></tr><tr><th></th><th>Int64</th><th>Int64</th><th>Int64</th><th>Float64</th><th>String</th><th>Int64</th><th>Float64?</th></tr></thead><tbody><tr><th>1</th><td>3216693</td><td>1</td><td>5439563</td><td>25.0</td><td>S</td><td>18375</td><td>174.0</td></tr></tbody></table>"
+      ],
+      "text/latex": [
+       "GroupedDataFrame with 4553 groups based on keys: isFraud, ProductCD, card4, card6, id\\_15, id\\_31\n",
+       "\n",
+       "First Group (136 rows): isFraud = 0, ProductCD = \"H\", card4 = \"visa\", card6 = \"debit\", id\\_15 = \"Found\", id\\_31 = \"firefox 57.0\"\n",
+       "\n",
+       "\\begin{tabular}{r|cccccccc}\n",
+       "\t& TransactionID & isFraud & TransactionDT & TransactionAmt & ProductCD & card1 & card2 & \\\\\n",
+       "\t\\hline\n",
+       "\t& Int64 & Int64 & Int64 & Float64 & String & Int64 & Float64? & \\\\\n",
+       "\t\\hline\n",
+       "\t1 & 3067453 & 0 & 1729328 & 200.0 & H & 1030 & 157.0 & $\\dots$ \\\\\n",
+       "\t2 & 3073296 & 0 & 1816710 & 100.0 & H & 1675 & 174.0 & $\\dots$ \\\\\n",
+       "\t3 & 3061872 & 0 & 1642116 & 100.0 & H & 1974 & 111.0 & $\\dots$ \\\\\n",
+       "\t4 & 3078930 & 0 & 1902540 & 100.0 & H & 6697 & 111.0 & $\\dots$ \\\\\n",
+       "\t5 & 3038788 & 0 & 1211592 & 75.0 & H & 7508 & 321.0 & $\\dots$ \\\\\n",
+       "\t6 & 3056336 & 0 & 1556463 & 25.0 & H & 9500 & 321.0 & $\\dots$ \\\\\n",
+       "\t7 & 3026050 & 0 & 951882 & 50.0 & H & 9500 & 321.0 & $\\dots$ \\\\\n",
+       "\t8 & 2999258 & 0 & 348274 & 50.0 & H & 10680 & 373.0 & $\\dots$ \\\\\n",
+       "\t9 & 2995902 & 0 & 260116 & 40.0 & H & 12526 & 381.0 & $\\dots$ \\\\\n",
+       "\t10 & 3124444 & 0 & 2767769 & 50.0 & H & 12839 & 321.0 & $\\dots$ \\\\\n",
+       "\t11 & 3091878 & 0 & 2077478 & 40.0 & H & 2884 & 490.0 & $\\dots$ \\\\\n",
+       "\t12 & 3011076 & 0 & 608878 & 50.0 & H & 7508 & 321.0 & $\\dots$ \\\\\n",
+       "\t13 & 3170944 & 0 & 4067304 & 100.0 & H & 7664 & 490.0 & $\\dots$ \\\\\n",
+       "\t14 & 3170950 & 0 & 4067403 & 100.0 & H & 7664 & 490.0 & $\\dots$ \\\\\n",
+       "\t15 & 3170951 & 0 & 4067438 & 100.0 & H & 7664 & 490.0 & $\\dots$ \\\\\n",
+       "\t16 & 3145133 & 0 & 3289750 & 150.0 & H & 9112 & 250.0 & $\\dots$ \\\\\n",
+       "\t17 & 3034874 & 0 & 1130093 & 300.0 & H & 10294 & 555.0 & $\\dots$ \\\\\n",
+       "\t18 & 3170845 & 0 & 4065306 & 35.0 & H & 12695 & 490.0 & $\\dots$ \\\\\n",
+       "\t19 & 3114169 & 0 & 2516387 & 30.0 & H & 5822 & 555.0 & $\\dots$ \\\\\n",
+       "\t20 & 3264648 & 0 & 6746645 & 40.0 & H & 15497 & 490.0 & $\\dots$ \\\\\n",
+       "\t21 & 3043704 & 0 & 1304889 & 25.0 & H & 16993 & 555.0 & $\\dots$ \\\\\n",
+       "\t22 & 3191704 & 0 & 4678752 & 25.0 & H & 1323 & 268.0 & $\\dots$ \\\\\n",
+       "\t23 & 3092620 & 0 & 2087380 & 100.0 & H & 2772 & 512.0 & $\\dots$ \\\\\n",
+       "\t24 & 3023170 & 0 & 876581 & 50.0 & H & 17188 & 321.0 & $\\dots$ \\\\\n",
+       "\t25 & 3062921 & 0 & 1654005 & 50.0 & H & 17496 & 554.0 & $\\dots$ \\\\\n",
+       "\t26 & 3098962 & 0 & 2176776 & 75.0 & H & 12501 & 490.0 & $\\dots$ \\\\\n",
+       "\t27 & 3099033 & 0 & 2177727 & 75.0 & H & 12501 & 490.0 & $\\dots$ \\\\\n",
+       "\t28 & 3039596 & 0 & 1226005 & 50.0 & H & 14084 & 257.0 & $\\dots$ \\\\\n",
+       "\t29 & 3057383 & 0 & 1570940 & 25.0 & H & 15377 & 555.0 & $\\dots$ \\\\\n",
+       "\t30 & 3044025 & 0 & 1311408 & 25.0 & H & 15497 & 490.0 & $\\dots$ \\\\\n",
+       "\t$\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ &  \\\\\n",
+       "\\end{tabular}\n",
+       "\n",
+       "$\\dots$\n",
+       "\n",
+       "Last Group (1 row): isFraud = 1, ProductCD = \"S\", card4 = \"visa\", card6 = \"credit\", id\\_15 = \"New\", id\\_31 = \"mobile safari 11.0\"\n",
+       "\n",
+       "\\begin{tabular}{r|cccccccc}\n",
+       "\t& TransactionID & isFraud & TransactionDT & TransactionAmt & ProductCD & card1 & card2 & \\\\\n",
+       "\t\\hline\n",
+       "\t& Int64 & Int64 & Int64 & Float64 & String & Int64 & Float64? & \\\\\n",
+       "\t\\hline\n",
+       "\t1 & 3216693 & 1 & 5439563 & 25.0 & S & 18375 & 174.0 & $\\dots$ \\\\\n",
+       "\\end{tabular}\n"
+      ],
+      "text/plain": [
+       "GroupedDataFrame with 4553 groups based on keys: isFraud, ProductCD, card4, card6, id_15, id_31\n",
+       "First Group (136 rows): isFraud = 0, ProductCD = \"H\", card4 = \"visa\", card6 = \"debit\", id_15 = \"Found\", id_31 = \"firefox 57.0\". Omitted printing of 429 columns\n",
+       "│ Row │ TransactionID │ isFraud │ TransactionDT │ TransactionAmt │ ProductCD │\n",
+       "│     │ \u001b[90mInt64\u001b[39m         │ \u001b[90mInt64\u001b[39m   │ \u001b[90mInt64\u001b[39m         │ \u001b[90mFloat64\u001b[39m        │ \u001b[90mString\u001b[39m    │\n",
+       "├─────┼───────────────┼─────────┼───────────────┼────────────────┼───────────┤\n",
+       "│ 1   │ 3067453       │ 0       │ 1729328       │ 200.0          │ H         │\n",
+       "│ 2   │ 3073296       │ 0       │ 1816710       │ 100.0          │ H         │\n",
+       "│ 3   │ 3061872       │ 0       │ 1642116       │ 100.0          │ H         │\n",
+       "│ 4   │ 3078930       │ 0       │ 1902540       │ 100.0          │ H         │\n",
+       "│ 5   │ 3038788       │ 0       │ 1211592       │ 75.0           │ H         │\n",
+       "│ 6   │ 3056336       │ 0       │ 1556463       │ 25.0           │ H         │\n",
+       "│ 7   │ 3026050       │ 0       │ 951882        │ 50.0           │ H         │\n",
+       "│ 8   │ 2999258       │ 0       │ 348274        │ 50.0           │ H         │\n",
+       "│ 9   │ 2995902       │ 0       │ 260116        │ 40.0           │ H         │\n",
+       "│ 10  │ 3124444       │ 0       │ 2767769       │ 50.0           │ H         │\n",
+       "⋮\n",
+       "│ 126 │ 3002736       │ 0       │ 426917        │ 25.0           │ H         │\n",
+       "│ 127 │ 3066817       │ 0       │ 1723040       │ 50.0           │ H         │\n",
+       "│ 128 │ 3096864       │ 0       │ 2154200       │ 25.0           │ H         │\n",
+       "│ 129 │ 3097062       │ 0       │ 2156024       │ 50.0           │ H         │\n",
+       "│ 130 │ 2998791       │ 0       │ 341460        │ 75.0           │ H         │\n",
+       "│ 131 │ 3090745       │ 0       │ 2067079       │ 30.0           │ H         │\n",
+       "│ 132 │ 3022302       │ 0       │ 861372        │ 50.0           │ H         │\n",
+       "│ 133 │ 3056137       │ 0       │ 1553999       │ 125.0          │ H         │\n",
+       "│ 134 │ 3085711       │ 0       │ 1988068       │ 30.0           │ H         │\n",
+       "│ 135 │ 3099225       │ 0       │ 2180952       │ 50.0           │ H         │\n",
+       "│ 136 │ 3051141       │ 0       │ 1453322       │ 75.0           │ H         │\n",
+       "⋮\n",
+       "Last Group (1 row): isFraud = 1, ProductCD = \"S\", card4 = \"visa\", card6 = \"credit\", id_15 = \"New\", id_31 = \"mobile safari 11.0\". Omitted printing of 429 columns\n",
+       "│ Row │ TransactionID │ isFraud │ TransactionDT │ TransactionAmt │ ProductCD │\n",
+       "│     │ \u001b[90mInt64\u001b[39m         │ \u001b[90mInt64\u001b[39m   │ \u001b[90mInt64\u001b[39m         │ \u001b[90mFloat64\u001b[39m        │ \u001b[90mString\u001b[39m    │\n",
+       "├─────┼───────────────┼─────────┼───────────────┼────────────────┼───────────┤\n",
+       "│ 1   │ 3216693       │ 1       │ 5439563       │ 25.0           │ S         │"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "groupby(dff, [\"isFraud\",\"ProductCD\",\"card4\",\"card6\",\"id_15\",\"id_31\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table class=\"data-frame\"><thead><tr><th></th><th>isFraud</th><th>ProductCD</th><th>card4</th><th>card6</th><th>id_15</th><th>id_31</th><th>TransactionAmountMax</th><th>TransactionAmountMean</th></tr><tr><th></th><th>Int64</th><th>String</th><th>String?</th><th>String?</th><th>String?</th><th>String?</th><th>Float64</th><th>Float64</th></tr></thead><tbody><p>4,553 rows × 8 columns</p><tr><th>1</th><td>0</td><td>H</td><td>visa</td><td>debit</td><td>Found</td><td>firefox 57.0</td><td>300.0</td><td>71.1765</td></tr><tr><th>2</th><td>0</td><td>R</td><td>visa</td><td>credit</td><td>Found</td><td>ie 11.0 for desktop</td><td>1000.0</td><td>208.58</td></tr><tr><th>3</th><td>1</td><td>R</td><td>visa</td><td>debit</td><td>Found</td><td>chrome 63.0 for android</td><td>300.0</td><td>216.667</td></tr><tr><th>4</th><td>0</td><td>C</td><td>visa</td><td>credit</td><td>Found</td><td>chrome 65.0</td><td>410.373</td><td>49.3293</td></tr><tr><th>5</th><td>0</td><td>R</td><td>visa</td><td>debit</td><td>Found</td><td>chrome 62.0 for android</td><td>200.0</td><td>98.9583</td></tr><tr><th>6</th><td>1</td><td>C</td><td>mastercard</td><td>credit</td><td>New</td><td>chrome 63.0</td><td>225.504</td><td>47.2223</td></tr><tr><th>7</th><td>0</td><td>C</td><td>mastercard</td><td>credit</td><td>Found</td><td>chrome 62.0 for android</td><td>154.071</td><td>44.5909</td></tr><tr><th>8</th><td>0</td><td>C</td><td>mastercard</td><td>credit</td><td>Found</td><td>chrome 63.0</td><td>302.111</td><td>50.8962</td></tr><tr><th>9</th><td>1</td><td>C</td><td>mastercard</td><td>credit</td><td>Found</td><td>chrome 63.0</td><td>265.498</td><td>44.8802</td></tr><tr><th>10</th><td>0</td><td>C</td><td>mastercard</td><td>credit</td><td>New</td><td>chrome 63.0</td><td>302.111</td><td>48.2015</td></tr><tr><th>11</th><td>0</td><td>C</td><td>mastercard</td><td>credit</td><td>Found</td><td>safari generic</td><td>141.158</td><td>39.4613</td></tr><tr><th>12</th><td>0</td><td>C</td><td>mastercard</td><td>credit</td><td>Unknown</td><td>safari generic</td><td>221.54</td><td>51.347</td></tr><tr><th>13</th><td>0</td><td>C</td><td>mastercard</td><td>credit</td><td>Found</td><td>mobile safari generic</td><td>162.676</td><td>53.6838</td></tr><tr><th>14</th><td>0</td><td>R</td><td>visa</td><td>credit</td><td>New</td><td>chrome 64.0</td><td>350.0</td><td>138.889</td></tr><tr><th>15</th><td>0</td><td>R</td><td>visa</td><td>credit</td><td>Found</td><td>chrome 64.0</td><td>450.0</td><td>153.112</td></tr><tr><th>16</th><td>0</td><td>R</td><td>visa</td><td>credit</td><td>Found</td><td>chrome 65.0</td><td>900.0</td><td>177.778</td></tr><tr><th>17</th><td>0</td><td>C</td><td>mastercard</td><td>credit</td><td>Found</td><td>chrome 64.0</td><td>220.171</td><td>50.7401</td></tr><tr><th>18</th><td>1</td><td>C</td><td>mastercard</td><td>credit</td><td>Found</td><td>chrome 65.0 for android</td><td>185.67</td><td>53.1863</td></tr><tr><th>19</th><td>0</td><td>C</td><td>visa</td><td>debit</td><td>Found</td><td>chrome 63.0</td><td>422.558</td><td>43.4618</td></tr><tr><th>20</th><td>0</td><td>C</td><td>visa</td><td>debit</td><td>Found</td><td>chrome 64.0 for android</td><td>268.265</td><td>28.9167</td></tr><tr><th>21</th><td>0</td><td>C</td><td>visa</td><td>debit</td><td>Found</td><td>firefox 57.0</td><td>204.633</td><td>44.2996</td></tr><tr><th>22</th><td>0</td><td>C</td><td>visa</td><td>debit</td><td>Found</td><td>chrome 65.0 for android</td><td>230.079</td><td>32.405</td></tr><tr><th>23</th><td>0</td><td>C</td><td>visa</td><td>debit</td><td>Found</td><td>chrome 60.0 for android</td><td>96.37</td><td>31.5289</td></tr><tr><th>24</th><td>0</td><td>C</td><td>visa</td><td>credit</td><td>Found</td><td>chrome 62.0</td><td>412.764</td><td>48.7786</td></tr><tr><th>25</th><td>0</td><td>C</td><td>mastercard</td><td>debit</td><td>Found</td><td>chrome 64.0 for android</td><td>162.953</td><td>26.765</td></tr><tr><th>26</th><td>0</td><td>C</td><td>mastercard</td><td>debit</td><td>Found</td><td>chrome 65.0 for android</td><td>283.37</td><td>32.5836</td></tr><tr><th>27</th><td>0</td><td>R</td><td>visa</td><td>credit</td><td>Found</td><td>mobile safari generic</td><td>1000.0</td><td>167.918</td></tr><tr><th>28</th><td>0</td><td>H</td><td>visa</td><td>credit</td><td>Found</td><td>chrome 62.0 for ios</td><td>50.0</td><td>40.0</td></tr><tr><th>29</th><td>0</td><td>H</td><td>visa</td><td>credit</td><td>Found</td><td>chrome 64.0</td><td>450.0</td><td>96.2881</td></tr><tr><th>30</th><td>0</td><td>S</td><td>visa</td><td>credit</td><td>New</td><td>chrome generic</td><td>200.0</td><td>47.7391</td></tr><tr><th>&vellip;</th><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td></tr></tbody></table>"
+      ],
+      "text/latex": [
+       "\\begin{tabular}{r|cccccccc}\n",
+       "\t& isFraud & ProductCD & card4 & card6 & id\\_15 & id\\_31 & TransactionAmountMax & TransactionAmountMean\\\\\n",
+       "\t\\hline\n",
+       "\t& Int64 & String & String? & String? & String? & String? & Float64 & Float64\\\\\n",
+       "\t\\hline\n",
+       "\t1 & 0 & H & visa & debit & Found & firefox 57.0 & 300.0 & 71.1765 \\\\\n",
+       "\t2 & 0 & R & visa & credit & Found & ie 11.0 for desktop & 1000.0 & 208.58 \\\\\n",
+       "\t3 & 1 & R & visa & debit & Found & chrome 63.0 for android & 300.0 & 216.667 \\\\\n",
+       "\t4 & 0 & C & visa & credit & Found & chrome 65.0 & 410.373 & 49.3293 \\\\\n",
+       "\t5 & 0 & R & visa & debit & Found & chrome 62.0 for android & 200.0 & 98.9583 \\\\\n",
+       "\t6 & 1 & C & mastercard & credit & New & chrome 63.0 & 225.504 & 47.2223 \\\\\n",
+       "\t7 & 0 & C & mastercard & credit & Found & chrome 62.0 for android & 154.071 & 44.5909 \\\\\n",
+       "\t8 & 0 & C & mastercard & credit & Found & chrome 63.0 & 302.111 & 50.8962 \\\\\n",
+       "\t9 & 1 & C & mastercard & credit & Found & chrome 63.0 & 265.498 & 44.8802 \\\\\n",
+       "\t10 & 0 & C & mastercard & credit & New & chrome 63.0 & 302.111 & 48.2015 \\\\\n",
+       "\t11 & 0 & C & mastercard & credit & Found & safari generic & 141.158 & 39.4613 \\\\\n",
+       "\t12 & 0 & C & mastercard & credit & Unknown & safari generic & 221.54 & 51.347 \\\\\n",
+       "\t13 & 0 & C & mastercard & credit & Found & mobile safari generic & 162.676 & 53.6838 \\\\\n",
+       "\t14 & 0 & R & visa & credit & New & chrome 64.0 & 350.0 & 138.889 \\\\\n",
+       "\t15 & 0 & R & visa & credit & Found & chrome 64.0 & 450.0 & 153.112 \\\\\n",
+       "\t16 & 0 & R & visa & credit & Found & chrome 65.0 & 900.0 & 177.778 \\\\\n",
+       "\t17 & 0 & C & mastercard & credit & Found & chrome 64.0 & 220.171 & 50.7401 \\\\\n",
+       "\t18 & 1 & C & mastercard & credit & Found & chrome 65.0 for android & 185.67 & 53.1863 \\\\\n",
+       "\t19 & 0 & C & visa & debit & Found & chrome 63.0 & 422.558 & 43.4618 \\\\\n",
+       "\t20 & 0 & C & visa & debit & Found & chrome 64.0 for android & 268.265 & 28.9167 \\\\\n",
+       "\t21 & 0 & C & visa & debit & Found & firefox 57.0 & 204.633 & 44.2996 \\\\\n",
+       "\t22 & 0 & C & visa & debit & Found & chrome 65.0 for android & 230.079 & 32.405 \\\\\n",
+       "\t23 & 0 & C & visa & debit & Found & chrome 60.0 for android & 96.37 & 31.5289 \\\\\n",
+       "\t24 & 0 & C & visa & credit & Found & chrome 62.0 & 412.764 & 48.7786 \\\\\n",
+       "\t25 & 0 & C & mastercard & debit & Found & chrome 64.0 for android & 162.953 & 26.765 \\\\\n",
+       "\t26 & 0 & C & mastercard & debit & Found & chrome 65.0 for android & 283.37 & 32.5836 \\\\\n",
+       "\t27 & 0 & R & visa & credit & Found & mobile safari generic & 1000.0 & 167.918 \\\\\n",
+       "\t28 & 0 & H & visa & credit & Found & chrome 62.0 for ios & 50.0 & 40.0 \\\\\n",
+       "\t29 & 0 & H & visa & credit & Found & chrome 64.0 & 450.0 & 96.2881 \\\\\n",
+       "\t30 & 0 & S & visa & credit & New & chrome generic & 200.0 & 47.7391 \\\\\n",
+       "\t$\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ \\\\\n",
+       "\\end{tabular}\n"
+      ],
+      "text/plain": [
+       "4553×8 DataFrame. Omitted printing of 1 columns\n",
+       "│ Row  │ isFraud │ ProductCD │ card4            │ card6   │ id_15   │ id_31                   │ TransactionAmountMax │\n",
+       "│      │ \u001b[90mInt64\u001b[39m   │ \u001b[90mString\u001b[39m    │ \u001b[90mString?\u001b[39m          │ \u001b[90mString?\u001b[39m │ \u001b[90mString?\u001b[39m │ \u001b[90mUnion{Missing, String}\u001b[39m  │ \u001b[90mFloat64\u001b[39m              │\n",
+       "├──────┼─────────┼───────────┼──────────────────┼─────────┼─────────┼─────────────────────────┼──────────────────────┤\n",
+       "│ 1    │ 0       │ H         │ visa             │ debit   │ Found   │ firefox 57.0            │ 300.0                │\n",
+       "│ 2    │ 0       │ R         │ visa             │ credit  │ Found   │ ie 11.0 for desktop     │ 1000.0               │\n",
+       "│ 3    │ 1       │ R         │ visa             │ debit   │ Found   │ chrome 63.0 for android │ 300.0                │\n",
+       "│ 4    │ 0       │ C         │ visa             │ credit  │ Found   │ chrome 65.0             │ 410.373              │\n",
+       "│ 5    │ 0       │ R         │ visa             │ debit   │ Found   │ chrome 62.0 for android │ 200.0                │\n",
+       "│ 6    │ 1       │ C         │ mastercard       │ credit  │ New     │ chrome 63.0             │ 225.504              │\n",
+       "│ 7    │ 0       │ C         │ mastercard       │ credit  │ Found   │ chrome 62.0 for android │ 154.071              │\n",
+       "│ 8    │ 0       │ C         │ mastercard       │ credit  │ Found   │ chrome 63.0             │ 302.111              │\n",
+       "│ 9    │ 1       │ C         │ mastercard       │ credit  │ Found   │ chrome 63.0             │ 265.498              │\n",
+       "│ 10   │ 0       │ C         │ mastercard       │ credit  │ New     │ chrome 63.0             │ 302.111              │\n",
+       "⋮\n",
+       "│ 4543 │ 0       │ R         │ mastercard       │ credit  │ New     │ chrome 60.0 for android │ 100.0                │\n",
+       "│ 4544 │ 0       │ H         │ \u001b[90mmissing\u001b[39m          │ debit   │ New     │ mobile safari 11.0      │ 50.0                 │\n",
+       "│ 4545 │ 0       │ R         │ visa             │ credit  │ Unknown │ chrome 61.0             │ 100.0                │\n",
+       "│ 4546 │ 0       │ H         │ visa             │ credit  │ New     │ edge 17.0               │ 75.0                 │\n",
+       "│ 4547 │ 0       │ H         │ visa             │ credit  │ New     │ mobile                  │ 50.0                 │\n",
+       "│ 4548 │ 0       │ R         │ american express │ credit  │ New     │ opera 49.0              │ 200.0                │\n",
+       "│ 4549 │ 0       │ R         │ visa             │ debit   │ Found   │ opera                   │ 100.0                │\n",
+       "│ 4550 │ 1       │ C         │ mastercard       │ credit  │ New     │ chrome 59.0             │ 205.682              │\n",
+       "│ 4551 │ 0       │ C         │ \u001b[90mmissing\u001b[39m          │ \u001b[90mmissing\u001b[39m │ New     │ samsung browser 6.4     │ 32.707               │\n",
+       "│ 4552 │ 1       │ R         │ visa             │ credit  │ New     │ chrome 64.0 for android │ 300.0                │\n",
+       "│ 4553 │ 1       │ S         │ visa             │ credit  │ New     │ mobile safari 11.0      │ 25.0                 │"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ENV[\"COLUMNS\"]=120\n",
+    "# this function group by `missing` as well\n",
+    "combine(groupby(dff, [\"isFraud\",\"ProductCD\",\"card4\",\"card6\",\"id_15\",\"id_31\"]), \n",
+    "    :TransactionAmt=>maximum=>:TransactionAmountMax, \n",
+    "    :TransactionAmt=>mean=>:TransactionAmountMean)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Sorting Details"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table class=\"data-frame\"><thead><tr><th></th><th>TransactionID</th><th>isFraud</th><th>TransactionDT</th><th>TransactionAmt</th><th>ProductCD</th><th>card1</th><th>card2</th><th>card3</th><th>card4</th><th>card5</th></tr><tr><th></th><th>Int64</th><th>Int64</th><th>Int64</th><th>Float64</th><th>String</th><th>Int64</th><th>Float64?</th><th>Float64?</th><th>String?</th><th>Float64?</th></tr></thead><tbody><p>144,233 rows × 434 columns (omitted printing of 424 columns)</p><tr><th>1</th><td>3230924</td><td>0</td><td>5787419</td><td>23.443</td><td>C</td><td>1000</td><td>555.0</td><td>185.0</td><td>mastercard</td><td>224.0</td></tr><tr><th>2</th><td>3020767</td><td>0</td><td>842821</td><td>150.0</td><td>R</td><td>1004</td><td>583.0</td><td>150.0</td><td>visa</td><td>226.0</td></tr><tr><th>3</th><td>3028973</td><td>0</td><td>1022173</td><td>30.0</td><td>H</td><td>1004</td><td>583.0</td><td>150.0</td><td>visa</td><td>226.0</td></tr><tr><th>4</th><td>3386444</td><td>0</td><td>10082484</td><td>50.0</td><td>H</td><td>1004</td><td>583.0</td><td>150.0</td><td>visa</td><td>226.0</td></tr><tr><th>5</th><td>3038871</td><td>0</td><td>1212802</td><td>50.0</td><td>H</td><td>1005</td><td>543.0</td><td>150.0</td><td>mastercard</td><td>117.0</td></tr><tr><th>6</th><td>3234681</td><td>0</td><td>5883179</td><td>150.0</td><td>R</td><td>1006</td><td>399.0</td><td>150.0</td><td>american express</td><td>146.0</td></tr><tr><th>7</th><td>3436647</td><td>0</td><td>11468973</td><td>100.0</td><td>R</td><td>1006</td><td>399.0</td><td>150.0</td><td>american express</td><td>146.0</td></tr><tr><th>8</th><td>3095681</td><td>0</td><td>2145214</td><td>150.0</td><td>R</td><td>1006</td><td>399.0</td><td>150.0</td><td>american express</td><td>146.0</td></tr><tr><th>9</th><td>3021401</td><td>0</td><td>850730</td><td>23.203</td><td>C</td><td>1007</td><td>555.0</td><td>135.0</td><td>mastercard</td><td>224.0</td></tr><tr><th>10</th><td>3226241</td><td>0</td><td>5651177</td><td>55.164</td><td>C</td><td>1007</td><td>555.0</td><td>135.0</td><td>mastercard</td><td>224.0</td></tr><tr><th>11</th><td>3039439</td><td>0</td><td>1222657</td><td>200.0</td><td>R</td><td>1009</td><td>399.0</td><td>150.0</td><td>american express</td><td>146.0</td></tr><tr><th>12</th><td>3069943</td><td>0</td><td>1786013</td><td>50.0</td><td>H</td><td>1009</td><td>399.0</td><td>150.0</td><td>american express</td><td>146.0</td></tr><tr><th>13</th><td>3054866</td><td>0</td><td>1539008</td><td>50.0</td><td>H</td><td>1009</td><td>399.0</td><td>150.0</td><td>american express</td><td>146.0</td></tr><tr><th>14</th><td>3024078</td><td>0</td><td>926293</td><td>150.0</td><td>R</td><td>1009</td><td>399.0</td><td>150.0</td><td>american express</td><td>146.0</td></tr><tr><th>15</th><td>2999616</td><td>0</td><td>354228</td><td>200.0</td><td>R</td><td>1009</td><td>399.0</td><td>150.0</td><td>american express</td><td>146.0</td></tr><tr><th>16</th><td>3378279</td><td>0</td><td>9826142</td><td>277.932</td><td>C</td><td>1010</td><td>555.0</td><td>121.0</td><td>visa</td><td>226.0</td></tr><tr><th>17</th><td>3319221</td><td>0</td><td>8183988</td><td>25.0</td><td>H</td><td>1011</td><td>543.0</td><td>150.0</td><td>mastercard</td><td>224.0</td></tr><tr><th>18</th><td>3015198</td><td>0</td><td>702331</td><td>50.0</td><td>H</td><td>1012</td><td>479.0</td><td>150.0</td><td>visa</td><td>162.0</td></tr><tr><th>19</th><td>3008268</td><td>0</td><td>562205</td><td>100.0</td><td>H</td><td>1012</td><td>479.0</td><td>150.0</td><td>visa</td><td>162.0</td></tr><tr><th>20</th><td>3524124</td><td>0</td><td>14154216</td><td>75.0</td><td>H</td><td>1012</td><td>479.0</td><td>150.0</td><td>visa</td><td>162.0</td></tr><tr><th>21</th><td>3098185</td><td>0</td><td>2166984</td><td>175.0</td><td>R</td><td>1012</td><td>479.0</td><td>150.0</td><td>visa</td><td>162.0</td></tr><tr><th>22</th><td>3569931</td><td>0</td><td>15573121</td><td>19.92</td><td>C</td><td>1014</td><td>555.0</td><td>117.0</td><td>visa</td><td>226.0</td></tr><tr><th>23</th><td>3563146</td><td>0</td><td>15300736</td><td>43.651</td><td>C</td><td>1014</td><td>555.0</td><td>117.0</td><td>visa</td><td>226.0</td></tr><tr><th>24</th><td>3455404</td><td>1</td><td>12076280</td><td>450.0</td><td>R</td><td>1015</td><td>555.0</td><td>144.0</td><td>mastercard</td><td>224.0</td></tr><tr><th>25</th><td>3393951</td><td>0</td><td>10275141</td><td>23.564</td><td>C</td><td>1015</td><td>555.0</td><td>144.0</td><td>mastercard</td><td>224.0</td></tr><tr><th>26</th><td>3068829</td><td>0</td><td>1751791</td><td>63.6</td><td>C</td><td>1015</td><td>555.0</td><td>144.0</td><td>mastercard</td><td>224.0</td></tr><tr><th>27</th><td>3080080</td><td>0</td><td>1919379</td><td>108.205</td><td>C</td><td>1015</td><td>555.0</td><td>144.0</td><td>mastercard</td><td>224.0</td></tr><tr><th>28</th><td>3093683</td><td>0</td><td>2128731</td><td>31.132</td><td>C</td><td>1015</td><td>555.0</td><td>144.0</td><td>mastercard</td><td>224.0</td></tr><tr><th>29</th><td>3159857</td><td>0</td><td>3734666</td><td>22.037</td><td>C</td><td>1015</td><td>555.0</td><td>144.0</td><td>mastercard</td><td>224.0</td></tr><tr><th>30</th><td>3331629</td><td>0</td><td>8498739</td><td>22.96</td><td>C</td><td>1015</td><td>555.0</td><td>144.0</td><td>mastercard</td><td>224.0</td></tr><tr><th>&vellip;</th><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td><td>&vellip;</td></tr></tbody></table>"
+      ],
+      "text/latex": [
+       "\\begin{tabular}{r|ccccccccccc}\n",
+       "\t& TransactionID & isFraud & TransactionDT & TransactionAmt & ProductCD & card1 & card2 & card3 & card4 & card5 & \\\\\n",
+       "\t\\hline\n",
+       "\t& Int64 & Int64 & Int64 & Float64 & String & Int64 & Float64? & Float64? & String? & Float64? & \\\\\n",
+       "\t\\hline\n",
+       "\t1 & 3230924 & 0 & 5787419 & 23.443 & C & 1000 & 555.0 & 185.0 & mastercard & 224.0 & $\\dots$ \\\\\n",
+       "\t2 & 3020767 & 0 & 842821 & 150.0 & R & 1004 & 583.0 & 150.0 & visa & 226.0 & $\\dots$ \\\\\n",
+       "\t3 & 3028973 & 0 & 1022173 & 30.0 & H & 1004 & 583.0 & 150.0 & visa & 226.0 & $\\dots$ \\\\\n",
+       "\t4 & 3386444 & 0 & 10082484 & 50.0 & H & 1004 & 583.0 & 150.0 & visa & 226.0 & $\\dots$ \\\\\n",
+       "\t5 & 3038871 & 0 & 1212802 & 50.0 & H & 1005 & 543.0 & 150.0 & mastercard & 117.0 & $\\dots$ \\\\\n",
+       "\t6 & 3234681 & 0 & 5883179 & 150.0 & R & 1006 & 399.0 & 150.0 & american express & 146.0 & $\\dots$ \\\\\n",
+       "\t7 & 3436647 & 0 & 11468973 & 100.0 & R & 1006 & 399.0 & 150.0 & american express & 146.0 & $\\dots$ \\\\\n",
+       "\t8 & 3095681 & 0 & 2145214 & 150.0 & R & 1006 & 399.0 & 150.0 & american express & 146.0 & $\\dots$ \\\\\n",
+       "\t9 & 3021401 & 0 & 850730 & 23.203 & C & 1007 & 555.0 & 135.0 & mastercard & 224.0 & $\\dots$ \\\\\n",
+       "\t10 & 3226241 & 0 & 5651177 & 55.164 & C & 1007 & 555.0 & 135.0 & mastercard & 224.0 & $\\dots$ \\\\\n",
+       "\t11 & 3039439 & 0 & 1222657 & 200.0 & R & 1009 & 399.0 & 150.0 & american express & 146.0 & $\\dots$ \\\\\n",
+       "\t12 & 3069943 & 0 & 1786013 & 50.0 & H & 1009 & 399.0 & 150.0 & american express & 146.0 & $\\dots$ \\\\\n",
+       "\t13 & 3054866 & 0 & 1539008 & 50.0 & H & 1009 & 399.0 & 150.0 & american express & 146.0 & $\\dots$ \\\\\n",
+       "\t14 & 3024078 & 0 & 926293 & 150.0 & R & 1009 & 399.0 & 150.0 & american express & 146.0 & $\\dots$ \\\\\n",
+       "\t15 & 2999616 & 0 & 354228 & 200.0 & R & 1009 & 399.0 & 150.0 & american express & 146.0 & $\\dots$ \\\\\n",
+       "\t16 & 3378279 & 0 & 9826142 & 277.932 & C & 1010 & 555.0 & 121.0 & visa & 226.0 & $\\dots$ \\\\\n",
+       "\t17 & 3319221 & 0 & 8183988 & 25.0 & H & 1011 & 543.0 & 150.0 & mastercard & 224.0 & $\\dots$ \\\\\n",
+       "\t18 & 3015198 & 0 & 702331 & 50.0 & H & 1012 & 479.0 & 150.0 & visa & 162.0 & $\\dots$ \\\\\n",
+       "\t19 & 3008268 & 0 & 562205 & 100.0 & H & 1012 & 479.0 & 150.0 & visa & 162.0 & $\\dots$ \\\\\n",
+       "\t20 & 3524124 & 0 & 14154216 & 75.0 & H & 1012 & 479.0 & 150.0 & visa & 162.0 & $\\dots$ \\\\\n",
+       "\t21 & 3098185 & 0 & 2166984 & 175.0 & R & 1012 & 479.0 & 150.0 & visa & 162.0 & $\\dots$ \\\\\n",
+       "\t22 & 3569931 & 0 & 15573121 & 19.92 & C & 1014 & 555.0 & 117.0 & visa & 226.0 & $\\dots$ \\\\\n",
+       "\t23 & 3563146 & 0 & 15300736 & 43.651 & C & 1014 & 555.0 & 117.0 & visa & 226.0 & $\\dots$ \\\\\n",
+       "\t24 & 3455404 & 1 & 12076280 & 450.0 & R & 1015 & 555.0 & 144.0 & mastercard & 224.0 & $\\dots$ \\\\\n",
+       "\t25 & 3393951 & 0 & 10275141 & 23.564 & C & 1015 & 555.0 & 144.0 & mastercard & 224.0 & $\\dots$ \\\\\n",
+       "\t26 & 3068829 & 0 & 1751791 & 63.6 & C & 1015 & 555.0 & 144.0 & mastercard & 224.0 & $\\dots$ \\\\\n",
+       "\t27 & 3080080 & 0 & 1919379 & 108.205 & C & 1015 & 555.0 & 144.0 & mastercard & 224.0 & $\\dots$ \\\\\n",
+       "\t28 & 3093683 & 0 & 2128731 & 31.132 & C & 1015 & 555.0 & 144.0 & mastercard & 224.0 & $\\dots$ \\\\\n",
+       "\t29 & 3159857 & 0 & 3734666 & 22.037 & C & 1015 & 555.0 & 144.0 & mastercard & 224.0 & $\\dots$ \\\\\n",
+       "\t30 & 3331629 & 0 & 8498739 & 22.96 & C & 1015 & 555.0 & 144.0 & mastercard & 224.0 & $\\dots$ \\\\\n",
+       "\t$\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ &  \\\\\n",
+       "\\end{tabular}\n"
+      ],
+      "text/plain": [
+       "144233×434 DataFrame. Omitted printing of 426 columns\n",
+       "│ Row    │ TransactionID │ isFraud │ TransactionDT │ TransactionAmt │ ProductCD │ card1 │ card2    │ card3    │\n",
+       "│        │ \u001b[90mInt64\u001b[39m         │ \u001b[90mInt64\u001b[39m   │ \u001b[90mInt64\u001b[39m         │ \u001b[90mFloat64\u001b[39m        │ \u001b[90mString\u001b[39m    │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64?\u001b[39m │ \u001b[90mFloat64?\u001b[39m │\n",
+       "├────────┼───────────────┼─────────┼───────────────┼────────────────┼───────────┼───────┼──────────┼──────────┤\n",
+       "│ 1      │ 3230924       │ 0       │ 5787419       │ 23.443         │ C         │ 1000  │ 555.0    │ 185.0    │\n",
+       "│ 2      │ 3020767       │ 0       │ 842821        │ 150.0          │ R         │ 1004  │ 583.0    │ 150.0    │\n",
+       "│ 3      │ 3028973       │ 0       │ 1022173       │ 30.0           │ H         │ 1004  │ 583.0    │ 150.0    │\n",
+       "│ 4      │ 3386444       │ 0       │ 10082484      │ 50.0           │ H         │ 1004  │ 583.0    │ 150.0    │\n",
+       "│ 5      │ 3038871       │ 0       │ 1212802       │ 50.0           │ H         │ 1005  │ 543.0    │ 150.0    │\n",
+       "│ 6      │ 3234681       │ 0       │ 5883179       │ 150.0          │ R         │ 1006  │ 399.0    │ 150.0    │\n",
+       "│ 7      │ 3436647       │ 0       │ 11468973      │ 100.0          │ R         │ 1006  │ 399.0    │ 150.0    │\n",
+       "│ 8      │ 3095681       │ 0       │ 2145214       │ 150.0          │ R         │ 1006  │ 399.0    │ 150.0    │\n",
+       "│ 9      │ 3021401       │ 0       │ 850730        │ 23.203         │ C         │ 1007  │ 555.0    │ 135.0    │\n",
+       "│ 10     │ 3226241       │ 0       │ 5651177       │ 55.164         │ C         │ 1007  │ 555.0    │ 135.0    │\n",
+       "⋮\n",
+       "│ 144223 │ 3449153       │ 0       │ 11892549      │ 4.795          │ C         │ 18377 │ 555.0    │ 131.0    │\n",
+       "│ 144224 │ 3453522       │ 0       │ 12008182      │ 24.715         │ C         │ 18377 │ 555.0    │ 131.0    │\n",
+       "│ 144225 │ 3464285       │ 0       │ 12352490      │ 3.873          │ C         │ 18377 │ 555.0    │ 131.0    │\n",
+       "│ 144226 │ 3464986       │ 0       │ 12401894      │ 13.034         │ C         │ 18377 │ 555.0    │ 131.0    │\n",
+       "│ 144227 │ 3469775       │ 0       │ 12544573      │ 45.372         │ C         │ 18377 │ 555.0    │ 131.0    │\n",
+       "│ 144228 │ 3472353       │ 0       │ 12628755      │ 67.383         │ C         │ 18377 │ 555.0    │ 131.0    │\n",
+       "│ 144229 │ 3140430       │ 0       │ 3178997       │ 100.0          │ H         │ 18378 │ 111.0    │ 150.0    │\n",
+       "│ 144230 │ 3077370       │ 0       │ 1887400       │ 150.0          │ R         │ 18383 │ 128.0    │ 150.0    │\n",
+       "│ 144231 │ 3304239       │ 0       │ 7910291       │ 25.0           │ S         │ 18384 │ 543.0    │ 150.0    │\n",
+       "│ 144232 │ 3058006       │ 0       │ 1604565       │ 100.0          │ H         │ 18388 │ 555.0    │ 150.0    │\n",
+       "│ 144233 │ 3102181       │ 0       │ 2231501       │ 100.0          │ R         │ 18396 │ 111.0    │ 150.0    │"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sort!(dff, [\"card1\",\"addr1\",\"D9\"])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Julia 1.4.1",
+   "language": "julia",
+   "name": "julia-1.4"
+  },
+  "language_info": {
+   "file_extension": ".jl",
+   "mimetype": "application/julia",
+   "name": "julia",
+   "version": "1.4.1"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Julia/CSV/Read_CSV.ipynb b/Julia/CSV/Read_CSV.ipynb
index 06e27f8..4b60d4a 100644
--- a/Julia/CSV/Read_CSV.ipynb
+++ b/Julia/CSV/Read_CSV.ipynb
@@ -5,6 +5,9 @@
    "metadata": {},
    "source": [
     "## Reading a CSV file to a DataFrame in Julia (programing lang)\n",
+    "\n",
+    "Article: https://towardsdatascience.com/read-csv-to-data-frame-in-julia-programming-lang-77f3d0081c14\n",
+    "\n",
     "Julia often offer several ways how to do the same thing and reading CSV is an example. In all cases, you will need the `CSV` and `DataFrames` package. If you don't have them installed, in the Julia REPL run: `import Pkg; Pkg.add(\"CSV\"); Pkg.add(\"DataFrames\")`"
    ]
   },

	pandas
aggregation	0.075788
load_identity	0.682109
load_transactions	18.279765
merge	3.196074
sorting	2.224164
	pandas	dask
load_transactions	18.279765	0.083901
load_identity	0.682109	0.028268
merge	3.196074	0.073891
aggregation	0.075788	20.837958
sorting	2.224164	71.282675
	pandas
aggregation	0.060114
all	19.908346
load_identity	0.480164
load_transactions	17.354527
merge	2.013150
Total	39.816302
	aggregation	load_identity	load_transactions	merge	sort
	Float64	Float64	Float64	Float64	Float64
1	0.034	0.502	8.045	0.771	5.032
	TransactionID	isFraud	TransactionDT	TransactionAmt	ProductCD	card1	card2
	Int64	Int64	Int64	Float64	String	Int64	Float64?
1	3067453	0	1729328	200.0	H	1030	157.0
2	3073296	0	1816710	100.0	H	1675	174.0
3	3061872	0	1642116	100.0	H	1974	111.0
4	3078930	0	1902540	100.0	H	6697	111.0
5	3038788	0	1211592	75.0	H	7508	321.0
6	3056336	0	1556463	25.0	H	9500	321.0
7	3026050	0	951882	50.0	H	9500	321.0
8	2999258	0	348274	50.0	H	10680	373.0
9	2995902	0	260116	40.0	H	12526	381.0
10	3124444	0	2767769	50.0	H	12839	321.0
11	3091878	0	2077478	40.0	H	2884	490.0
12	3011076	0	608878	50.0	H	7508	321.0
13	3170944	0	4067304	100.0	H	7664	490.0
14	3170950	0	4067403	100.0	H	7664	490.0
15	3170951	0	4067438	100.0	H	7664	490.0
16	3145133	0	3289750	150.0	H	9112	250.0
17	3034874	0	1130093	300.0	H	10294	555.0
18	3170845	0	4065306	35.0	H	12695	490.0
19	3114169	0	2516387	30.0	H	5822	555.0
20	3264648	0	6746645	40.0	H	15497	490.0
21	3043704	0	1304889	25.0	H	16993	555.0
22	3191704	0	4678752	25.0	H	1323	268.0
23	3092620	0	2087380	100.0	H	2772	512.0
24	3023170	0	876581	50.0	H	17188	321.0
25	3062921	0	1654005	50.0	H	17496	554.0
26	3098962	0	2176776	75.0	H	12501	490.0
27	3099033	0	2177727	75.0	H	12501	490.0
28	3039596	0	1226005	50.0	H	14084	257.0
29	3057383	0	1570940	25.0	H	15377	555.0
30	3044025	0	1311408	25.0	H	15497	490.0
⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮
	isFraud	ProductCD	card4	card6	id_15	id_31	TransactionAmountMax	TransactionAmountMean
	Int64	String	String?	String?	String?	String?	Float64	Float64
1	0	H	visa	debit	Found	firefox 57.0	300.0	71.1765
2	0	R	visa	credit	Found	ie 11.0 for desktop	1000.0	208.58
3	1	R	visa	debit	Found	chrome 63.0 for android	300.0	216.667
4	0	C	visa	credit	Found	chrome 65.0	410.373	49.3293
5	0	R	visa	debit	Found	chrome 62.0 for android	200.0	98.9583
6	1	C	mastercard	credit	New	chrome 63.0	225.504	47.2223
7	0	C	mastercard	credit	Found	chrome 62.0 for android	154.071	44.5909
8	0	C	mastercard	credit	Found	chrome 63.0	302.111	50.8962
9	1	C	mastercard	credit	Found	chrome 63.0	265.498	44.8802
10	0	C	mastercard	credit	New	chrome 63.0	302.111	48.2015
11	0	C	mastercard	credit	Found	safari generic	141.158	39.4613
12	0	C	mastercard	credit	Unknown	safari generic	221.54	51.347
13	0	C	mastercard	credit	Found	mobile safari generic	162.676	53.6838
14	0	R	visa	credit	New	chrome 64.0	350.0	138.889
15	0	R	visa	credit	Found	chrome 64.0	450.0	153.112
16	0	R	visa	credit	Found	chrome 65.0	900.0	177.778
17	0	C	mastercard	credit	Found	chrome 64.0	220.171	50.7401
18	1	C	mastercard	credit	Found	chrome 65.0 for android	185.67	53.1863
19	0	C	visa	debit	Found	chrome 63.0	422.558	43.4618
20	0	C	visa	debit	Found	chrome 64.0 for android	268.265	28.9167
21	0	C	visa	debit	Found	firefox 57.0	204.633	44.2996
22	0	C	visa	debit	Found	chrome 65.0 for android	230.079	32.405
23	0	C	visa	debit	Found	chrome 60.0 for android	96.37	31.5289
24	0	C	visa	credit	Found	chrome 62.0	412.764	48.7786
25	0	C	mastercard	debit	Found	chrome 64.0 for android	162.953	26.765
26	0	C	mastercard	debit	Found	chrome 65.0 for android	283.37	32.5836
27	0	R	visa	credit	Found	mobile safari generic	1000.0	167.918
28	0	H	visa	credit	Found	chrome 62.0 for ios	50.0	40.0
29	0	H	visa	credit	Found	chrome 64.0	450.0	96.2881
30	0	S	visa	credit	New	chrome generic	200.0	47.7391
⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮