Files committed to the personality detection project

master
mehulnagpurkar 2020-03-01 17:38:48 +11:00
parent 185f66c5db
commit b7d348b735
11 changed files with 386 additions and 0 deletions

BIN
.DS_Store vendored

Binary file not shown.

BIN
PersonalityDetection/.DS_Store vendored 100644

Binary file not shown.

View File

@ -0,0 +1,193 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Myers-Briggs Type Indicator demo\n",
"\n",
"Intructions: Execute the two cells below to load the functions and then enter text in space provided to estimate the MBTI personality\n",
"\n",
"**Note: This notebook requires SpaCy and IPython widgets to be installed**"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import spacy\n",
"from spacy.lang.en.stop_words import STOP_WORDS\n",
"import re\n",
"import pickle\n",
"import numpy as np\n",
"from ipywidgets import widgets, interact\n",
"\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"plt.style.use(\"ggplot\")\n",
"\n",
"# python -m spacy download en_core_web_sm\n",
"nlp = spacy.load(\"en_core_web_sm\")\n",
"\n",
"\n",
"def tokeniser(sentence):\n",
" \n",
" # Remove ||| from kaggle dataset\n",
" sentence = re.sub(\"[]|||[]\", \" \", sentence)\n",
"\n",
" # remove reddit subreddit urls\n",
" sentence = re.sub(\"/r/[0-9A-Za-z]\", \"\", sentence)\n",
"\n",
" # remove MBTI types\n",
" MBTI_types = ['INFJ', 'ENTP', 'INTP', 'INTJ', 'ENTJ', 'ENFJ', 'INFP', 'ENFP',\n",
" 'ISFP', 'ISTP', 'ISFJ', 'ISTJ', 'ESTP', 'ESFP', 'ESTJ', 'ESFJ',\n",
" 'MBTI']\n",
" MBTI_types = [ti.lower() for ti in MBTI_types] + [ti.lower() + 's' for ti in MBTI_types]\n",
"\n",
" tokens = nlp(sentence)\n",
"\n",
" tokens = [ti for ti in tokens if ti.lower_ not in STOP_WORDS]\n",
" tokens = [ti for ti in tokens if not ti.is_space]\n",
" tokens = [ti for ti in tokens if not ti.is_punct]\n",
" tokens = [ti for ti in tokens if not ti.like_num]\n",
" tokens = [ti for ti in tokens if not ti.like_url]\n",
" tokens = [ti for ti in tokens if not ti.like_email]\n",
" tokens = [ti for ti in tokens if ti.lower_ not in MBTI_types]\n",
"\n",
"\n",
" # lemmatize\n",
" tokens = [ti.lemma_ for ti in tokens if ti.lemma_ not in STOP_WORDS]\n",
" tokens = [ti for ti in tokens if len(ti) > 1]\n",
"\n",
" return tokens\n",
"\n",
"dummy_fn = lambda x:x\n",
"\n",
"\n",
"with open('./pickle files/cv.pickle', 'rb') as f:\n",
" cv = pickle.load(f)\n",
" \n",
"with open('./pickle files/idf_transformer.pickle', 'rb') as f:\n",
" idf_transformer = pickle.load(f)\n",
" \n",
"# loading the pickle files with the classifiers\n",
"with open('./pickle files/LR_clf_IE_kaggle.pickle', 'rb') as f:\n",
" lr_ie = pickle.load(f)\n",
"with open('./pickle files/LR_clf_JP_kaggle.pickle', 'rb') as f:\n",
" lr_jp = pickle.load(f)\n",
"with open('./pickle files/LR_clf_NS_kaggle.pickle', 'rb') as f:\n",
" lr_ns = pickle.load(f)\n",
"with open('./pickle files/LR_clf_TF_kaggle.pickle', 'rb') as f:\n",
" lr_tf = pickle.load(f)\n",
"\n",
"\n",
"def eval_string(my_post, show_graph=False):\n",
" c = cv.transform([tokeniser(my_post)])\n",
" x = idf_transformer.transform(c)\n",
" \n",
" ie = lr_ie.predict_proba(x).flatten()\n",
" ns = lr_ns.predict_proba(x).flatten()\n",
" tf = lr_tf.predict_proba(x).flatten()\n",
" jp = lr_jp.predict_proba(x).flatten()\n",
" \n",
" probs = np.vstack([ie, ns, tf, jp])\n",
" \n",
" names = [\"Introversion - Extroversion\", \n",
" \"Intuiting - Sensing\", \n",
" \"Thinking - Feeling\", \n",
" \"Judging - Perceiving\"]\n",
" \n",
" for i, dim in enumerate(names):\n",
" print(f\"{dim:28s}: {probs[i,1]:.3f} - {probs[i, 0]:.3f}\")\n",
" \n",
" if show_graph:\n",
" fig = plt.figure(figsize=(6,6))\n",
" ax = fig.gca()\n",
" \n",
" xlabels = [\"Introversion (I)\", \"Intuiting (N)\", \"Thinking (T)\", \"Judging (J)\"]\n",
" ax.barh(xlabels, [1, 1, 1, 1])\n",
" ax.barh(xlabels, [ie[1], ns[1], tf[1], jp[1]])\n",
" \n",
" ax.set_xlim([0, 1])\n",
" ax.set_xlabel(\"Propensity\")\n",
" \n",
" plt.show(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Type in some text"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "942446e84f5f4ee0945435bd80296d2c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"interactive(children=(Textarea(value='', description='Input:', placeholder='Enter in some text'), Checkbox(val…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<function __main__.eval_string(my_post, show_graph=False)>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"interact(eval_string, my_post=widgets.Textarea( value='', \n",
" placeholder='Enter in some text', \n",
" description='Input:',\n",
" disabled=False)\n",
" )\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@ -0,0 +1,193 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Myers-Briggs Type Indicator demo\n",
"\n",
"Intructions: Execute the two cells below to load the functions and then enter text in space provided to estimate the MBTI personality\n",
"\n",
"**Note: This notebook requires SpaCy and IPython widgets to be installed**"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import spacy\n",
"from spacy.lang.en.stop_words import STOP_WORDS\n",
"import re\n",
"import pickle\n",
"import numpy as np\n",
"from ipywidgets import widgets, interact\n",
"\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"plt.style.use(\"ggplot\")\n",
"\n",
"# python -m spacy download en_core_web_sm\n",
"nlp = spacy.load(\"en_core_web_sm\")\n",
"\n",
"\n",
"def tokeniser(sentence):\n",
" \n",
" # Remove ||| from kaggle dataset\n",
" sentence = re.sub(\"[]|||[]\", \" \", sentence)\n",
"\n",
" # remove reddit subreddit urls\n",
" sentence = re.sub(\"/r/[0-9A-Za-z]\", \"\", sentence)\n",
"\n",
" # remove MBTI types\n",
" MBTI_types = ['INFJ', 'ENTP', 'INTP', 'INTJ', 'ENTJ', 'ENFJ', 'INFP', 'ENFP',\n",
" 'ISFP', 'ISTP', 'ISFJ', 'ISTJ', 'ESTP', 'ESFP', 'ESTJ', 'ESFJ',\n",
" 'MBTI']\n",
" MBTI_types = [ti.lower() for ti in MBTI_types] + [ti.lower() + 's' for ti in MBTI_types]\n",
"\n",
" tokens = nlp(sentence)\n",
"\n",
" tokens = [ti for ti in tokens if ti.lower_ not in STOP_WORDS]\n",
" tokens = [ti for ti in tokens if not ti.is_space]\n",
" tokens = [ti for ti in tokens if not ti.is_punct]\n",
" tokens = [ti for ti in tokens if not ti.like_num]\n",
" tokens = [ti for ti in tokens if not ti.like_url]\n",
" tokens = [ti for ti in tokens if not ti.like_email]\n",
" tokens = [ti for ti in tokens if ti.lower_ not in MBTI_types]\n",
"\n",
"\n",
" # lemmatize\n",
" tokens = [ti.lemma_ for ti in tokens if ti.lemma_ not in STOP_WORDS]\n",
" tokens = [ti for ti in tokens if len(ti) > 1]\n",
"\n",
" return tokens\n",
"\n",
"dummy_fn = lambda x:x\n",
"\n",
"\n",
"with open('./pickle files/cv.pickle', 'rb') as f:\n",
" cv = pickle.load(f)\n",
" \n",
"with open('./pickle files/idf_transformer.pickle', 'rb') as f:\n",
" idf_transformer = pickle.load(f)\n",
" \n",
"# loading the pickle files with the classifiers\n",
"with open('./pickle files/LR_clf_IE_kaggle.pickle', 'rb') as f:\n",
" lr_ie = pickle.load(f)\n",
"with open('./pickle files/LR_clf_JP_kaggle.pickle', 'rb') as f:\n",
" lr_jp = pickle.load(f)\n",
"with open('./pickle files/LR_clf_NS_kaggle.pickle', 'rb') as f:\n",
" lr_ns = pickle.load(f)\n",
"with open('./pickle files/LR_clf_TF_kaggle.pickle', 'rb') as f:\n",
" lr_tf = pickle.load(f)\n",
"\n",
"\n",
"def eval_string(my_post, show_graph=False):\n",
" c = cv.transform([tokeniser(my_post)])\n",
" x = idf_transformer.transform(c)\n",
" \n",
" ie = lr_ie.predict_proba(x).flatten()\n",
" ns = lr_ns.predict_proba(x).flatten()\n",
" tf = lr_tf.predict_proba(x).flatten()\n",
" jp = lr_jp.predict_proba(x).flatten()\n",
" \n",
" probs = np.vstack([ie, ns, tf, jp])\n",
" \n",
" names = [\"Introversion - Extroversion\", \n",
" \"Intuiting - Sensing\", \n",
" \"Thinking - Feeling\", \n",
" \"Judging - Perceiving\"]\n",
" \n",
" for i, dim in enumerate(names):\n",
" print(f\"{dim:28s}: {probs[i,1]:.3f} - {probs[i, 0]:.3f}\")\n",
" \n",
" if show_graph:\n",
" fig = plt.figure(figsize=(6,6))\n",
" ax = fig.gca()\n",
" \n",
" xlabels = [\"Introversion (I)\", \"Intuiting (N)\", \"Thinking (T)\", \"Judging (J)\"]\n",
" ax.barh(xlabels, [1, 1, 1, 1])\n",
" ax.barh(xlabels, [ie[1], ns[1], tf[1], jp[1]])\n",
" \n",
" ax.set_xlim([0, 1])\n",
" ax.set_xlabel(\"Propensity\")\n",
" \n",
" plt.show(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Type in some text"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "942446e84f5f4ee0945435bd80296d2c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"interactive(children=(Textarea(value='', description='Input:', placeholder='Enter in some text'), Checkbox(val…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<function __main__.eval_string(my_post, show_graph=False)>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"interact(eval_string, my_post=widgets.Textarea( value='', \n",
" placeholder='Enter in some text', \n",
" description='Input:',\n",
" disabled=False)\n",
" )\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}