Files committed to the personality detection project
parent
185f66c5db
commit
b7d348b735
Binary file not shown.
|
@ -0,0 +1,193 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Myers-Briggs Type Indicator demo\n",
|
||||
"\n",
|
||||
"Intructions: Execute the two cells below to load the functions and then enter text in space provided to estimate the MBTI personality\n",
|
||||
"\n",
|
||||
"**Note: This notebook requires SpaCy and IPython widgets to be installed**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import spacy\n",
|
||||
"from spacy.lang.en.stop_words import STOP_WORDS\n",
|
||||
"import re\n",
|
||||
"import pickle\n",
|
||||
"import numpy as np\n",
|
||||
"from ipywidgets import widgets, interact\n",
|
||||
"\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"%matplotlib inline\n",
|
||||
"plt.style.use(\"ggplot\")\n",
|
||||
"\n",
|
||||
"# python -m spacy download en_core_web_sm\n",
|
||||
"nlp = spacy.load(\"en_core_web_sm\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def tokeniser(sentence):\n",
|
||||
" \n",
|
||||
" # Remove ||| from kaggle dataset\n",
|
||||
" sentence = re.sub(\"[]|||[]\", \" \", sentence)\n",
|
||||
"\n",
|
||||
" # remove reddit subreddit urls\n",
|
||||
" sentence = re.sub(\"/r/[0-9A-Za-z]\", \"\", sentence)\n",
|
||||
"\n",
|
||||
" # remove MBTI types\n",
|
||||
" MBTI_types = ['INFJ', 'ENTP', 'INTP', 'INTJ', 'ENTJ', 'ENFJ', 'INFP', 'ENFP',\n",
|
||||
" 'ISFP', 'ISTP', 'ISFJ', 'ISTJ', 'ESTP', 'ESFP', 'ESTJ', 'ESFJ',\n",
|
||||
" 'MBTI']\n",
|
||||
" MBTI_types = [ti.lower() for ti in MBTI_types] + [ti.lower() + 's' for ti in MBTI_types]\n",
|
||||
"\n",
|
||||
" tokens = nlp(sentence)\n",
|
||||
"\n",
|
||||
" tokens = [ti for ti in tokens if ti.lower_ not in STOP_WORDS]\n",
|
||||
" tokens = [ti for ti in tokens if not ti.is_space]\n",
|
||||
" tokens = [ti for ti in tokens if not ti.is_punct]\n",
|
||||
" tokens = [ti for ti in tokens if not ti.like_num]\n",
|
||||
" tokens = [ti for ti in tokens if not ti.like_url]\n",
|
||||
" tokens = [ti for ti in tokens if not ti.like_email]\n",
|
||||
" tokens = [ti for ti in tokens if ti.lower_ not in MBTI_types]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" # lemmatize\n",
|
||||
" tokens = [ti.lemma_ for ti in tokens if ti.lemma_ not in STOP_WORDS]\n",
|
||||
" tokens = [ti for ti in tokens if len(ti) > 1]\n",
|
||||
"\n",
|
||||
" return tokens\n",
|
||||
"\n",
|
||||
"dummy_fn = lambda x:x\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with open('./pickle files/cv.pickle', 'rb') as f:\n",
|
||||
" cv = pickle.load(f)\n",
|
||||
" \n",
|
||||
"with open('./pickle files/idf_transformer.pickle', 'rb') as f:\n",
|
||||
" idf_transformer = pickle.load(f)\n",
|
||||
" \n",
|
||||
"# loading the pickle files with the classifiers\n",
|
||||
"with open('./pickle files/LR_clf_IE_kaggle.pickle', 'rb') as f:\n",
|
||||
" lr_ie = pickle.load(f)\n",
|
||||
"with open('./pickle files/LR_clf_JP_kaggle.pickle', 'rb') as f:\n",
|
||||
" lr_jp = pickle.load(f)\n",
|
||||
"with open('./pickle files/LR_clf_NS_kaggle.pickle', 'rb') as f:\n",
|
||||
" lr_ns = pickle.load(f)\n",
|
||||
"with open('./pickle files/LR_clf_TF_kaggle.pickle', 'rb') as f:\n",
|
||||
" lr_tf = pickle.load(f)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def eval_string(my_post, show_graph=False):\n",
|
||||
" c = cv.transform([tokeniser(my_post)])\n",
|
||||
" x = idf_transformer.transform(c)\n",
|
||||
" \n",
|
||||
" ie = lr_ie.predict_proba(x).flatten()\n",
|
||||
" ns = lr_ns.predict_proba(x).flatten()\n",
|
||||
" tf = lr_tf.predict_proba(x).flatten()\n",
|
||||
" jp = lr_jp.predict_proba(x).flatten()\n",
|
||||
" \n",
|
||||
" probs = np.vstack([ie, ns, tf, jp])\n",
|
||||
" \n",
|
||||
" names = [\"Introversion - Extroversion\", \n",
|
||||
" \"Intuiting - Sensing\", \n",
|
||||
" \"Thinking - Feeling\", \n",
|
||||
" \"Judging - Perceiving\"]\n",
|
||||
" \n",
|
||||
" for i, dim in enumerate(names):\n",
|
||||
" print(f\"{dim:28s}: {probs[i,1]:.3f} - {probs[i, 0]:.3f}\")\n",
|
||||
" \n",
|
||||
" if show_graph:\n",
|
||||
" fig = plt.figure(figsize=(6,6))\n",
|
||||
" ax = fig.gca()\n",
|
||||
" \n",
|
||||
" xlabels = [\"Introversion (I)\", \"Intuiting (N)\", \"Thinking (T)\", \"Judging (J)\"]\n",
|
||||
" ax.barh(xlabels, [1, 1, 1, 1])\n",
|
||||
" ax.barh(xlabels, [ie[1], ns[1], tf[1], jp[1]])\n",
|
||||
" \n",
|
||||
" ax.set_xlim([0, 1])\n",
|
||||
" ax.set_xlabel(\"Propensity\")\n",
|
||||
" \n",
|
||||
" plt.show(fig)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Type in some text"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "942446e84f5f4ee0945435bd80296d2c",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"interactive(children=(Textarea(value='', description='Input:', placeholder='Enter in some text'), Checkbox(val…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<function __main__.eval_string(my_post, show_graph=False)>"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"interact(eval_string, my_post=widgets.Textarea( value='', \n",
|
||||
" placeholder='Enter in some text', \n",
|
||||
" description='Input:',\n",
|
||||
" disabled=False)\n",
|
||||
" )\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
|
@ -0,0 +1,193 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Myers-Briggs Type Indicator demo\n",
|
||||
"\n",
|
||||
"Intructions: Execute the two cells below to load the functions and then enter text in space provided to estimate the MBTI personality\n",
|
||||
"\n",
|
||||
"**Note: This notebook requires SpaCy and IPython widgets to be installed**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import spacy\n",
|
||||
"from spacy.lang.en.stop_words import STOP_WORDS\n",
|
||||
"import re\n",
|
||||
"import pickle\n",
|
||||
"import numpy as np\n",
|
||||
"from ipywidgets import widgets, interact\n",
|
||||
"\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"%matplotlib inline\n",
|
||||
"plt.style.use(\"ggplot\")\n",
|
||||
"\n",
|
||||
"# python -m spacy download en_core_web_sm\n",
|
||||
"nlp = spacy.load(\"en_core_web_sm\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def tokeniser(sentence):\n",
|
||||
" \n",
|
||||
" # Remove ||| from kaggle dataset\n",
|
||||
" sentence = re.sub(\"[]|||[]\", \" \", sentence)\n",
|
||||
"\n",
|
||||
" # remove reddit subreddit urls\n",
|
||||
" sentence = re.sub(\"/r/[0-9A-Za-z]\", \"\", sentence)\n",
|
||||
"\n",
|
||||
" # remove MBTI types\n",
|
||||
" MBTI_types = ['INFJ', 'ENTP', 'INTP', 'INTJ', 'ENTJ', 'ENFJ', 'INFP', 'ENFP',\n",
|
||||
" 'ISFP', 'ISTP', 'ISFJ', 'ISTJ', 'ESTP', 'ESFP', 'ESTJ', 'ESFJ',\n",
|
||||
" 'MBTI']\n",
|
||||
" MBTI_types = [ti.lower() for ti in MBTI_types] + [ti.lower() + 's' for ti in MBTI_types]\n",
|
||||
"\n",
|
||||
" tokens = nlp(sentence)\n",
|
||||
"\n",
|
||||
" tokens = [ti for ti in tokens if ti.lower_ not in STOP_WORDS]\n",
|
||||
" tokens = [ti for ti in tokens if not ti.is_space]\n",
|
||||
" tokens = [ti for ti in tokens if not ti.is_punct]\n",
|
||||
" tokens = [ti for ti in tokens if not ti.like_num]\n",
|
||||
" tokens = [ti for ti in tokens if not ti.like_url]\n",
|
||||
" tokens = [ti for ti in tokens if not ti.like_email]\n",
|
||||
" tokens = [ti for ti in tokens if ti.lower_ not in MBTI_types]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" # lemmatize\n",
|
||||
" tokens = [ti.lemma_ for ti in tokens if ti.lemma_ not in STOP_WORDS]\n",
|
||||
" tokens = [ti for ti in tokens if len(ti) > 1]\n",
|
||||
"\n",
|
||||
" return tokens\n",
|
||||
"\n",
|
||||
"dummy_fn = lambda x:x\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with open('./pickle files/cv.pickle', 'rb') as f:\n",
|
||||
" cv = pickle.load(f)\n",
|
||||
" \n",
|
||||
"with open('./pickle files/idf_transformer.pickle', 'rb') as f:\n",
|
||||
" idf_transformer = pickle.load(f)\n",
|
||||
" \n",
|
||||
"# loading the pickle files with the classifiers\n",
|
||||
"with open('./pickle files/LR_clf_IE_kaggle.pickle', 'rb') as f:\n",
|
||||
" lr_ie = pickle.load(f)\n",
|
||||
"with open('./pickle files/LR_clf_JP_kaggle.pickle', 'rb') as f:\n",
|
||||
" lr_jp = pickle.load(f)\n",
|
||||
"with open('./pickle files/LR_clf_NS_kaggle.pickle', 'rb') as f:\n",
|
||||
" lr_ns = pickle.load(f)\n",
|
||||
"with open('./pickle files/LR_clf_TF_kaggle.pickle', 'rb') as f:\n",
|
||||
" lr_tf = pickle.load(f)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def eval_string(my_post, show_graph=False):\n",
|
||||
" c = cv.transform([tokeniser(my_post)])\n",
|
||||
" x = idf_transformer.transform(c)\n",
|
||||
" \n",
|
||||
" ie = lr_ie.predict_proba(x).flatten()\n",
|
||||
" ns = lr_ns.predict_proba(x).flatten()\n",
|
||||
" tf = lr_tf.predict_proba(x).flatten()\n",
|
||||
" jp = lr_jp.predict_proba(x).flatten()\n",
|
||||
" \n",
|
||||
" probs = np.vstack([ie, ns, tf, jp])\n",
|
||||
" \n",
|
||||
" names = [\"Introversion - Extroversion\", \n",
|
||||
" \"Intuiting - Sensing\", \n",
|
||||
" \"Thinking - Feeling\", \n",
|
||||
" \"Judging - Perceiving\"]\n",
|
||||
" \n",
|
||||
" for i, dim in enumerate(names):\n",
|
||||
" print(f\"{dim:28s}: {probs[i,1]:.3f} - {probs[i, 0]:.3f}\")\n",
|
||||
" \n",
|
||||
" if show_graph:\n",
|
||||
" fig = plt.figure(figsize=(6,6))\n",
|
||||
" ax = fig.gca()\n",
|
||||
" \n",
|
||||
" xlabels = [\"Introversion (I)\", \"Intuiting (N)\", \"Thinking (T)\", \"Judging (J)\"]\n",
|
||||
" ax.barh(xlabels, [1, 1, 1, 1])\n",
|
||||
" ax.barh(xlabels, [ie[1], ns[1], tf[1], jp[1]])\n",
|
||||
" \n",
|
||||
" ax.set_xlim([0, 1])\n",
|
||||
" ax.set_xlabel(\"Propensity\")\n",
|
||||
" \n",
|
||||
" plt.show(fig)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Type in some text"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "942446e84f5f4ee0945435bd80296d2c",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"interactive(children=(Textarea(value='', description='Input:', placeholder='Enter in some text'), Checkbox(val…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<function __main__.eval_string(my_post, show_graph=False)>"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"interact(eval_string, my_post=widgets.Textarea( value='', \n",
|
||||
" placeholder='Enter in some text', \n",
|
||||
" description='Input:',\n",
|
||||
" disabled=False)\n",
|
||||
" )\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue