diff --git a/qual_analysis.ipynb b/qual_analysis.ipynb
new file mode 100644
index 0000000..6181fc9
--- /dev/null
+++ b/qual_analysis.ipynb
@@ -0,0 +1,6167 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Massive change testing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import plotly.express as px\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import hdbscan\n",
+ "import umap\n",
+ "import re\n",
+ "from gensim.test.utils import datapath\n",
+ "import gensim.downloader as api\n",
+ "from sklearn.ensemble import RandomForestRegressor\n",
+ "from topic_extractor import TopicExtractor\n",
+ "from sklearn.feature_extraction.text import TfidfVectorizer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "wv = api.load('word2vec-google-news-300')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "vul_list = ['temperature', 'weather', 'soil', 'employment', 'vaccine', 'education', 'jobs', 'income', 'discrimination', \n",
+ "'racism', 'ethnicity', 'minorities', 'orientation', 'immigrants', 'disabled', 'women', 'attitudes', 'imports', 'production', 'irrigation',\n",
+ "'economic', 'rain', 'water']\n",
+ "acc_list = ['price', 'cost', 'expensive', 'gdp', 'food', 'distance', 'rural']\n",
+ "sho_list = ['earthquake', 'disaster', 'conflict', 'war', 'politics', 'social', 'storms', 'volcanoes']\n",
+ "ava_list = ['enough', 'aid', 'share', 'amount', 'donations', 'grants', 'market']\n",
+ "\n",
+ "vul_df = pd.DataFrame({'pillar': 'vulnerability', 'target_words': vul_list})\n",
+ "acc_df = pd.DataFrame({'pillar': 'access', 'target_words': acc_list})\n",
+ "sho_df = pd.DataFrame({'pillar': 'shocks', 'target_words': sho_list})\n",
+ "ava_df = pd.DataFrame({'pillar': 'availability', 'target_words': ava_list})\n",
+ "\n",
+ "pdList = [vul_df, acc_df, sho_df, ava_df]\n",
+ "nom_cluster_words = pd.concat(pdList)\n",
+ "nom_cluster_words['word'] = [np.array(wv.most_similar(row , topn = 10)).T[0] for row in nom_cluster_words['target_words']]\n",
+ "\n",
+ "nom_cluster_words = nom_cluster_words.explode('word').reset_index(drop = True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Prompt | \n",
+ " text | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 10 | \n",
+ " 2.4\\tWhat would you say are the main reasons w... | \n",
+ " Having no income, when those families do not h... | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 2.4\\tWhat would you say are the main reasons w... | \n",
+ " As I said the poor people, they are poor becau... | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 2.4\\tWhat would you say are the main reasons w... | \n",
+ " Economic state i think; Sometimes social probl... | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 2.4\\tWhat would you say are the main reasons w... | \n",
+ " I think the leading cause is that, the governm... | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 2.4\\tWhat would you say are the main reasons w... | \n",
+ " ? I‘d say this goes back the city not having j... | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 2.5 Cause 3: | \n",
+ " Having an illness | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 2.5 Cause 3: | \n",
+ " Cause 3: Social reasons\\nCause 4: Educational ... | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 2.5 Cause 3: | \n",
+ " And lastly not having a job | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 2.5 Cause 3: | \n",
+ " Cause 3: Not having an active economy\\nCause 4... | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 2.5 Cause 3: | \n",
+ " Under-performing of the community. Yes. And th... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
95 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Prompt \\\n",
+ "10 2.4\\tWhat would you say are the main reasons w... \n",
+ "10 2.4\\tWhat would you say are the main reasons w... \n",
+ "10 2.4\\tWhat would you say are the main reasons w... \n",
+ "10 2.4\\tWhat would you say are the main reasons w... \n",
+ "10 2.4\\tWhat would you say are the main reasons w... \n",
+ ".. ... \n",
+ "13 2.5 Cause 3: \n",
+ "13 2.5 Cause 3: \n",
+ "13 2.5 Cause 3: \n",
+ "13 2.5 Cause 3: \n",
+ "13 2.5 Cause 3: \n",
+ "\n",
+ " text \n",
+ "10 Having no income, when those families do not h... \n",
+ "10 As I said the poor people, they are poor becau... \n",
+ "10 Economic state i think; Sometimes social probl... \n",
+ "10 I think the leading cause is that, the governm... \n",
+ "10 ? I‘d say this goes back the city not having j... \n",
+ ".. ... \n",
+ "13 Having an illness \n",
+ "13 Cause 3: Social reasons\\nCause 4: Educational ... \n",
+ "13 And lastly not having a job \n",
+ "13 Cause 3: Not having an active economy\\nCause 4... \n",
+ "13 Under-performing of the community. Yes. And th... \n",
+ "\n",
+ "[95 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw_data = pd.read_csv('Iraq Qual Analyses.csv').loc[:,:'P4.2']\n",
+ "raw_data['text'] = [row for row in raw_data[raw_data.columns[1:]].values]\n",
+ "raw_data = raw_data[10:14]\n",
+ "raw_data.drop(raw_data.columns[1:-1],axis = 1,inplace=True)\n",
+ "raw_data = raw_data.explode('text').dropna()\n",
+ "raw_data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "8.584315002489348"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "np.linalg.norm(wv['opportunities'])**2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/4220633183.py:2: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ " word_list = word_list.str.replace(',','').str.replace('.','').str.lower()\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "True\n"
+ ]
+ }
+ ],
+ "source": [
+ "word_list = raw_data.text.str.split(' ').explode()\n",
+ "word_list = word_list.str.replace(',','').str.replace('.','').str.lower()\n",
+ "stop_words = [(\"don't\",.34),('reasons',.34),('foods',.6),('unable',.3),('citizens',.4),('my',.3),('hunger',.3),('cause',.2),('factories',.3),('leads',.3),('expired',.3),('living',.4),('low',.2)]\n",
+ "for stop_word, similarity in stop_words:\n",
+ " word_list = word_list[[wv.similarity(word.lower(), stop_word)< similarity if word in wv else True for word in word_list]]\n",
+ "print('children' in word_list.to_list())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "topic_extractorer = TopicExtractor(wv,0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "topic_extractorer.load_seed_clusters(seed_words=nom_cluster_words['word'],known_labels=nom_cluster_words['pillar'],threshold=7)\n",
+ "topic_extractorer.load_words(word_list,6)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "w:\\DARPA_Habitus\\CONSULTING\\Analytics\\HABITUS\\Max\\Habitus\\MICRO_model\\qual_analyses\\topic_extractor.py:63: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " temp['pillar'] = temp.pillar.replace(np.NaN,'None')\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "hovertemplate": "%{hovertext}
pillar=vulnerability
principal component of text embeddings=%{x}
secondary component of text embeddings=%{y}
labels=%{marker.color}",
+ "hovertext": [
+ "females",
+ "irrigational",
+ "handicapped",
+ "girls",
+ "mothers",
+ "wintry_conditions",
+ "employment",
+ "import",
+ "Floridan_aquifer",
+ "economy",
+ "invidious_discrimination",
+ "husbands",
+ "irrigations",
+ "imported",
+ "Latino_immigrants",
+ "wheelchair_bound",
+ "jobs",
+ "importing",
+ "export",
+ "exporting",
+ "dewpoint",
+ "blacks",
+ "institutionalized_discrimination",
+ "freshwater",
+ "migrant",
+ "gender",
+ "racial_bigotry",
+ "immigrant",
+ "racist",
+ "unemployed",
+ "discriminatory",
+ "Disabled",
+ "societal_attitudes",
+ "ethnic_minorities",
+ "bigotry",
+ "Economic",
+ "downpour",
+ "stormy_weather",
+ "inclement_weather",
+ "institutionalized_racism",
+ "discriminations",
+ "sprinkler_irrigation",
+ "soils",
+ "wet_weather",
+ "homophobia",
+ "°_C",
+ "visually_impaired",
+ "disabilities",
+ "macroeconomic",
+ "sewage",
+ "°_F",
+ "Rain",
+ "macro_economic",
+ "racial_intolerance",
+ "irrigated",
+ "discriminated",
+ "potable_water",
+ "racial_minorities",
+ "physically_challenged",
+ "exported",
+ "wastewater",
+ "discriminated_against",
+ "degrees_Fahrenheit",
+ "gender_bias",
+ "racial_discrimination",
+ "torrential_rain",
+ "groundwater",
+ "potable",
+ "wintery_weather",
+ "mentally_disabled",
+ "irrigating",
+ "migrants",
+ "gender_discrimination",
+ "rains",
+ "temperatures",
+ "vaccines",
+ "exports",
+ "sandy_soil",
+ "unemployment",
+ "racial_prejudice",
+ "heavy_rain",
+ "heavy_rains",
+ "Immigrants",
+ "physically_handicapped",
+ "ambient_temperature",
+ "Racism",
+ "wintry_weather",
+ "Discrimination",
+ "irrigated_agriculture",
+ "downpours",
+ "illegals",
+ "illegal_immigrants",
+ "undocumented_workers",
+ "undocumented_immigrants",
+ "rain_showers",
+ "ambient_temperatures",
+ "gender_ethnicity",
+ "drip_irrigation",
+ "flu_vaccines",
+ "daytime_temperatures",
+ "rainfall",
+ "H#N#_influenza_vaccine",
+ "influenza_vaccines",
+ "H#N#_vaccines",
+ "smallpox_vaccine",
+ "Exports",
+ "influenza_vaccine",
+ "flu_vaccine",
+ "H#N#_vaccine",
+ "swine_flu_vaccine"
+ ],
+ "legendgroup": "vulnerability",
+ "marker": {
+ "color": [
+ 9,
+ 14,
+ 6,
+ 9,
+ 9,
+ 17,
+ 25,
+ 28,
+ 14,
+ 32,
+ 24,
+ 9,
+ 16,
+ 28,
+ 23,
+ 6,
+ 25,
+ 28,
+ 28,
+ 28,
+ 15,
+ 24,
+ 24,
+ 14,
+ 23,
+ 24,
+ 24,
+ 23,
+ 24,
+ 26,
+ 24,
+ 6,
+ 24,
+ 24,
+ 24,
+ 32,
+ 17,
+ 17,
+ 17,
+ 24,
+ 24,
+ 16,
+ 14,
+ 17,
+ 24,
+ 15,
+ 6,
+ 6,
+ 32,
+ 14,
+ 15,
+ 17,
+ 32,
+ 24,
+ 16,
+ 24,
+ 14,
+ 24,
+ 6,
+ 28,
+ 14,
+ 24,
+ 15,
+ 24,
+ 24,
+ 17,
+ 14,
+ 14,
+ 17,
+ 6,
+ 16,
+ 23,
+ 24,
+ 17,
+ 15,
+ 3,
+ 28,
+ 14,
+ 27,
+ 24,
+ 17,
+ 17,
+ 23,
+ 6,
+ 15,
+ 24,
+ 17,
+ 24,
+ 16,
+ 17,
+ 23,
+ 23,
+ 23,
+ 23,
+ 17,
+ 15,
+ 24,
+ 16,
+ 3,
+ 15,
+ 17,
+ 3,
+ 3,
+ 3,
+ 3,
+ 28,
+ 3,
+ 3,
+ 3,
+ 3
+ ],
+ "coloraxis": "coloraxis",
+ "symbol": "circle"
+ },
+ "mode": "markers",
+ "name": "vulnerability",
+ "orientation": "v",
+ "showlegend": true,
+ "type": "scatter",
+ "x": [
+ 9.16235637664795,
+ 12.767671585083008,
+ 9.896053314208984,
+ 9.240710258483887,
+ 9.290543556213379,
+ 15.923059463500977,
+ 10.54406452178955,
+ 0.19626043736934662,
+ 12.633519172668457,
+ 24.75111198425293,
+ 6.978339672088623,
+ 9.35623836517334,
+ 12.579814910888672,
+ 0.2165854126214981,
+ 8.579086303710938,
+ 9.92792797088623,
+ 10.26561450958252,
+ 0.2808363735675812,
+ 0.3294012248516083,
+ 0.3225804567337036,
+ 15.000429153442383,
+ 7.427166938781738,
+ 6.9527812004089355,
+ 12.820655822753906,
+ 8.830239295959473,
+ 7.550509452819824,
+ 6.663220405578613,
+ 8.71061897277832,
+ 6.589083194732666,
+ -12.985361099243164,
+ 7.014978885650635,
+ 10.140372276306152,
+ 7.797515869140625,
+ 7.644662380218506,
+ 6.5979766845703125,
+ 24.93074607849121,
+ 16.063474655151367,
+ 16.031957626342773,
+ 15.953804969787598,
+ 6.765969753265381,
+ 7.031929016113281,
+ 12.582038879394531,
+ 12.48460578918457,
+ 15.775157928466797,
+ 6.689704418182373,
+ 15.040907859802246,
+ 9.99012279510498,
+ 10.115378379821777,
+ 24.975109100341797,
+ 12.808707237243652,
+ 15.066388130187988,
+ 15.964011192321777,
+ 25.05222511291504,
+ 6.5349225997924805,
+ 12.527203559875488,
+ 7.084386348724365,
+ 12.771050453186035,
+ 7.503295421600342,
+ 9.904162406921387,
+ 0.27075278759002686,
+ 12.786842346191406,
+ 6.9771599769592285,
+ 15.13479232788086,
+ 7.086657524108887,
+ 7.041834831237793,
+ 16.11159896850586,
+ 12.757672309875488,
+ 12.691655158996582,
+ 15.938840866088867,
+ 9.794981956481934,
+ 12.484210968017578,
+ 8.815749168395996,
+ 7.036043643951416,
+ 16.088163375854492,
+ 15.223651885986328,
+ 7.790129661560059,
+ 0.28267717361450195,
+ 12.45325756072998,
+ -13.279583930969238,
+ 6.629983901977539,
+ 16.08289337158203,
+ 16.163053512573242,
+ 8.72900390625,
+ 9.8550386428833,
+ 15.07657527923584,
+ 6.595930576324463,
+ 15.868152618408203,
+ 6.896994113922119,
+ 12.628324508666992,
+ 16.199296951293945,
+ 8.62634563446045,
+ 8.719016075134277,
+ 8.639336585998535,
+ 8.64566707611084,
+ 16.028329849243164,
+ 15.079449653625488,
+ 7.499907970428467,
+ 12.532830238342285,
+ 7.790839672088623,
+ 15.261857986450195,
+ 16.089712142944336,
+ 7.75033712387085,
+ 7.71716833114624,
+ 7.733931064605713,
+ 7.802220821380615,
+ 0.2456071823835373,
+ 7.667672157287598,
+ 7.674630165100098,
+ 7.77892541885376,
+ 7.838179111480713
+ ],
+ "xaxis": "x",
+ "y": [
+ 10.112516403198242,
+ -5.146145820617676,
+ 11.058342933654785,
+ 10.490525245666504,
+ 10.366625785827637,
+ -0.464988112449646,
+ 31.472095489501953,
+ -0.9640982151031494,
+ -4.6123480796813965,
+ -1.4137349128723145,
+ 9.959176063537598,
+ 10.348305702209473,
+ -5.20917272567749,
+ -0.9652140736579895,
+ 10.339306831359863,
+ 11.163494110107422,
+ 31.355772018432617,
+ -1.0373811721801758,
+ -1.0338189601898193,
+ -0.9910680055618286,
+ -1.5478788614273071,
+ 9.836847305297852,
+ 9.878887176513672,
+ -4.773446083068848,
+ 10.140910148620605,
+ 9.163508415222168,
+ 9.847591400146484,
+ 10.160124778747559,
+ 9.885452270507812,
+ 19.87122917175293,
+ 9.854803085327148,
+ 11.271401405334473,
+ 9.030710220336914,
+ 9.786548614501953,
+ 9.713510513305664,
+ -1.3317567110061646,
+ -0.8841704726219177,
+ -0.4798450469970703,
+ -0.5381402969360352,
+ 9.801292419433594,
+ 9.792922973632812,
+ -5.284242153167725,
+ -5.0293498039245605,
+ -0.7814459800720215,
+ 9.88368034362793,
+ -1.4927150011062622,
+ 11.138364791870117,
+ 11.193595886230469,
+ -1.2993342876434326,
+ -4.733755588531494,
+ -1.5095746517181396,
+ -0.8039936423301697,
+ -1.204244613647461,
+ 9.878316879272461,
+ -5.346510410308838,
+ 9.674796104431152,
+ -4.668487071990967,
+ 9.848109245300293,
+ 11.130931854248047,
+ -0.9126441478729248,
+ -4.695742130279541,
+ 9.772257804870605,
+ -1.4167968034744263,
+ 9.531572341918945,
+ 9.645428657531738,
+ -0.5683789849281311,
+ -4.690822124481201,
+ -4.621603488922119,
+ -0.44945886731147766,
+ 10.966743469238281,
+ -5.313103199005127,
+ 10.161568641662598,
+ 9.559785842895508,
+ -0.9048455357551575,
+ -1.3202414512634277,
+ -5.146547317504883,
+ -0.9798607230186462,
+ -5.107876300811768,
+ 20.058378219604492,
+ 9.778946876525879,
+ -0.7409117817878723,
+ -0.5987833738327026,
+ 10.21772289276123,
+ 11.271647453308105,
+ -1.4616918563842773,
+ 9.737491607666016,
+ -0.48458540439605713,
+ 9.76830005645752,
+ -5.537435054779053,
+ -0.7557460069656372,
+ 10.260096549987793,
+ 10.268465995788574,
+ 10.198100090026855,
+ 10.28207015991211,
+ -0.7947467565536499,
+ -1.4168654680252075,
+ 8.679947853088379,
+ -5.318938732147217,
+ -5.1545796394348145,
+ -1.3082618713378906,
+ -0.9046620726585388,
+ -5.186274528503418,
+ -5.219858169555664,
+ -5.20323371887207,
+ -5.135258674621582,
+ -0.9587400555610657,
+ -5.270003795623779,
+ -5.261002540588379,
+ -5.158375263214111,
+ -5.098591327667236
+ ],
+ "yaxis": "y"
+ },
+ {
+ "hovertemplate": "%{hovertext}
pillar=availability
principal component of text embeddings=%{x}
secondary component of text embeddings=%{y}
labels=%{marker.color}",
+ "hovertext": [
+ "industry",
+ "amout",
+ "marketplace",
+ "markets",
+ "amounts",
+ "funding",
+ "sums",
+ "contributions",
+ "donators",
+ "donated",
+ "donors",
+ "charitable_donations",
+ "donation",
+ "Donation",
+ "Donations",
+ "Monetary_donations"
+ ],
+ "legendgroup": "availability",
+ "marker": {
+ "color": [
+ 29,
+ 4,
+ 29,
+ 29,
+ 4,
+ 4,
+ 4,
+ 5,
+ 5,
+ 5,
+ 5,
+ 5,
+ 5,
+ 5,
+ 5,
+ 5
+ ],
+ "coloraxis": "coloraxis",
+ "symbol": "diamond"
+ },
+ "mode": "markers",
+ "name": "availability",
+ "orientation": "v",
+ "showlegend": true,
+ "type": "scatter",
+ "x": [
+ 0.18043620884418488,
+ -4.6981987953186035,
+ 0.21258105337619781,
+ 0.13186922669410706,
+ -4.652432441711426,
+ -4.505962371826172,
+ -4.539834976196289,
+ -4.1242194175720215,
+ -4.087623119354248,
+ -4.140650272369385,
+ -4.089099884033203,
+ -4.198858737945557,
+ -4.072911739349365,
+ -4.0027756690979,
+ -4.094690799713135,
+ -4.215901851654053
+ ],
+ "xaxis": "x",
+ "y": [
+ -1.8738614320755005,
+ -2.429811716079712,
+ -1.8311617374420166,
+ -1.8468598127365112,
+ -2.321540594100952,
+ -2.174583911895752,
+ -2.2341742515563965,
+ -2.505549669265747,
+ -2.4823150634765625,
+ -2.439889907836914,
+ -2.5200045108795166,
+ -2.392725706100464,
+ -2.5460383892059326,
+ -2.6063902378082275,
+ -2.5252842903137207,
+ -2.5158116817474365
+ ],
+ "yaxis": "y"
+ },
+ {
+ "hovertemplate": "%{hovertext}
pillar=access
principal component of text embeddings=%{x}
secondary component of text embeddings=%{y}
labels=%{marker.color}",
+ "hovertext": [
+ "costly",
+ "pricey",
+ "price",
+ "outrageously_expensive",
+ "cheaper",
+ "pricy",
+ "pricing",
+ "prices",
+ "prohibitively_expensive",
+ "pricier",
+ "GDP",
+ "Prices"
+ ],
+ "legendgroup": "access",
+ "marker": {
+ "color": [
+ 22,
+ 22,
+ 29,
+ 22,
+ 22,
+ 22,
+ 29,
+ 29,
+ 22,
+ 22,
+ 32,
+ 29
+ ],
+ "coloraxis": "coloraxis",
+ "symbol": "square"
+ },
+ "mode": "markers",
+ "name": "access",
+ "orientation": "v",
+ "showlegend": true,
+ "type": "scatter",
+ "x": [
+ -1.1036288738250732,
+ -0.9784208536148071,
+ -0.37404417991638184,
+ -1.067224144935608,
+ -0.8902236223220825,
+ -1.0903464555740356,
+ -0.11353441327810287,
+ -0.14094533026218414,
+ -1.045573353767395,
+ -0.9335117340087891,
+ 24.91689109802246,
+ -0.18147997558116913
+ ],
+ "xaxis": "x",
+ "y": [
+ -1.630856990814209,
+ -1.627647876739502,
+ -1.6491870880126953,
+ -1.5814464092254639,
+ -1.6146478652954102,
+ -1.5424124002456665,
+ -1.7577154636383057,
+ -1.7066618204116821,
+ -1.6164547204971313,
+ -1.679885983467102,
+ -1.156358003616333,
+ -1.6531304121017456
+ ],
+ "yaxis": "y"
+ },
+ {
+ "hovertemplate": "%{hovertext}
pillar=shocks
principal component of text embeddings=%{x}
secondary component of text embeddings=%{y}
labels=%{marker.color}",
+ "hovertext": [
+ "war",
+ "societal",
+ "conflict",
+ "internecine_conflict",
+ "strife",
+ "undersea_volcanoes",
+ "politician",
+ "catastrophe",
+ "calamity",
+ "conflicts",
+ "Social",
+ "disasters",
+ "hostilities",
+ "natural_disasters",
+ "catastrophes",
+ "War",
+ "Conflict",
+ "civil_strife",
+ "volcanic",
+ "Persian_Gulf_War",
+ "partisan_politics",
+ "hurricanes",
+ "rainstorms",
+ "devastating_earthquake",
+ "tornados",
+ "volcano",
+ "earthquakes",
+ "Earthquake",
+ "quake",
+ "volcanic_eruptions",
+ "aftershock",
+ "lava_flows",
+ "volcanic_activity",
+ "thunderstorms",
+ "tornadoes",
+ "temblor",
+ "magnitude_earthquake",
+ "severe_thunderstorms",
+ "#.#_magnitude_earthquake",
+ "#.#_magnitude_quake",
+ "quakes",
+ "active_volcanoes",
+ "volcanos"
+ ],
+ "legendgroup": "shocks",
+ "marker": {
+ "color": [
+ 12,
+ 31,
+ 12,
+ 12,
+ 12,
+ 13,
+ 30,
+ 20,
+ 20,
+ 12,
+ 31,
+ 20,
+ 12,
+ 20,
+ 20,
+ 12,
+ 12,
+ 12,
+ 13,
+ 12,
+ 30,
+ 17,
+ 17,
+ 21,
+ 17,
+ 13,
+ 21,
+ 21,
+ 21,
+ 13,
+ 21,
+ 13,
+ 13,
+ 17,
+ 17,
+ 21,
+ 21,
+ 17,
+ 21,
+ 21,
+ 21,
+ 13,
+ 13
+ ],
+ "coloraxis": "coloraxis",
+ "symbol": "x"
+ },
+ "mode": "markers",
+ "name": "shocks",
+ "orientation": "v",
+ "showlegend": true,
+ "type": "scatter",
+ "x": [
+ 23.775033950805664,
+ 7.8632049560546875,
+ 23.73737907409668,
+ 23.739322662353516,
+ 23.762481689453125,
+ 12.668766975402832,
+ 24.632911682128906,
+ 21.732084274291992,
+ 21.78840446472168,
+ 23.85695457458496,
+ 7.8252129554748535,
+ 21.57056427001953,
+ 23.72748565673828,
+ 21.626258850097656,
+ 21.661327362060547,
+ 23.777233123779297,
+ 23.88597297668457,
+ 23.608903884887695,
+ 12.627827644348145,
+ 23.643224716186523,
+ 24.67068862915039,
+ 21.654014587402344,
+ 16.230154037475586,
+ 20.910120010375977,
+ 16.63974380493164,
+ 12.589360237121582,
+ 20.940082550048828,
+ 20.907955169677734,
+ 20.704904556274414,
+ 12.658271789550781,
+ 20.754716873168945,
+ 12.632294654846191,
+ 12.61149787902832,
+ 16.41225242614746,
+ 16.693267822265625,
+ 20.801971435546875,
+ 20.7946720123291,
+ 16.38044548034668,
+ 20.90024185180664,
+ 20.796903610229492,
+ 20.873451232910156,
+ 12.596431732177734,
+ 12.615105628967285
+ ],
+ "xaxis": "x",
+ "y": [
+ -0.5166358947753906,
+ 8.860580444335938,
+ -0.6228737235069275,
+ -0.6622812151908875,
+ -0.6967945694923401,
+ -2.5611214637756348,
+ -0.8868563175201416,
+ -0.571590781211853,
+ -0.6115420460700989,
+ -0.7369848489761353,
+ 8.780749320983887,
+ -0.4452560544013977,
+ -0.5958192944526672,
+ -0.4690714478492737,
+ -0.5017377138137817,
+ -0.5338945388793945,
+ -0.7218263745307922,
+ -0.5466699004173279,
+ -2.572164297103882,
+ -0.5221036076545715,
+ -0.8606185913085938,
+ -0.5041481852531433,
+ -0.7453354597091675,
+ -0.6226603984832764,
+ -0.6450382471084595,
+ -2.5973639488220215,
+ -0.5227365493774414,
+ -0.5900933742523193,
+ -0.6559988260269165,
+ -2.5386602878570557,
+ -0.5897109508514404,
+ -2.617889642715454,
+ -2.5297799110412598,
+ -0.7741866111755371,
+ -0.6501337289810181,
+ -0.5965899229049683,
+ -0.6158409714698792,
+ -0.7104356288909912,
+ -0.7185001373291016,
+ -0.6174815893173218,
+ -0.5477422475814819,
+ -2.517280340194702,
+ -2.5789031982421875
+ ],
+ "yaxis": "y"
+ },
+ {
+ "hovertemplate": "%{hovertext}
pillar=None
principal component of text embeddings=%{x}
secondary component of text embeddings=%{y}
labels=%{marker.color}",
+ "hovertext": [
+ "money",
+ "money",
+ "money",
+ "money",
+ "money",
+ "money",
+ "money",
+ "money",
+ "money",
+ "health",
+ "health",
+ "health",
+ "children",
+ "children",
+ "children",
+ "children",
+ "children",
+ "experience",
+ "experience",
+ "financial",
+ "financial",
+ "financial",
+ "financial",
+ "financial",
+ "financial",
+ "financial",
+ "market",
+ "market",
+ "market",
+ "receiving",
+ "social",
+ "social",
+ "social",
+ "social",
+ "social",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "job",
+ "care",
+ "care",
+ "care",
+ "care",
+ "water",
+ "buy",
+ "buy",
+ "buy",
+ "buy",
+ "economic",
+ "economic",
+ "economic",
+ "economic",
+ "economic",
+ "economic",
+ "economic",
+ "economic",
+ "political",
+ "political",
+ "political",
+ "political",
+ "receive",
+ "receive",
+ "received",
+ "received",
+ "received",
+ "received",
+ "received",
+ "opportunity",
+ "opportunity",
+ "opportunity",
+ "opportunity",
+ "obtain",
+ "markets",
+ "disabled",
+ "crisis",
+ "opportunities",
+ "opportunities",
+ "opportunities",
+ "opportunities",
+ "opportunities",
+ "opportunities",
+ "opportunities",
+ "opportunities",
+ "opportunities",
+ "opportunities",
+ "opportunities",
+ "opportunities",
+ "opportunities",
+ "agricultural",
+ "agricultural",
+ "agricultural",
+ "geographic",
+ "geographic",
+ "ducks",
+ "geography",
+ "geography",
+ "geographical",
+ "geographical",
+ "disaster",
+ "agriculture",
+ "agriculture",
+ "agriculture",
+ "agriculture",
+ "agriculture",
+ "weather",
+ "crop",
+ "geographically",
+ "disability",
+ "disability",
+ "disability",
+ "disability",
+ "disability",
+ "chickens",
+ "birds",
+ "crops",
+ "crops",
+ "crops",
+ "crops",
+ "salaries",
+ "salaries",
+ "salaries",
+ "salaries",
+ "medications",
+ "turkeys",
+ "wheat",
+ "wheat",
+ "wheat",
+ "wheat",
+ "rain",
+ "rain",
+ "geese",
+ "salary",
+ "salary",
+ "unemployment",
+ "unemployment",
+ "unemployment",
+ "unemployment",
+ "unemployment",
+ "unemployment",
+ "unemployment",
+ "unemployment",
+ "unemployment",
+ "unemployment",
+ "unemployment",
+ "jobless",
+ "jobless",
+ "jobless",
+ "jobless"
+ ],
+ "legendgroup": "None",
+ "marker": {
+ "color": [
+ 4,
+ 4,
+ 4,
+ 4,
+ 4,
+ 4,
+ 4,
+ 4,
+ 4,
+ 1,
+ 1,
+ 1,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 0,
+ 0,
+ 33,
+ 33,
+ 33,
+ 33,
+ 33,
+ 33,
+ 33,
+ 29,
+ 29,
+ 29,
+ 2,
+ 31,
+ 31,
+ 31,
+ 31,
+ 31,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 1,
+ 1,
+ 1,
+ 1,
+ 14,
+ 11,
+ 11,
+ 11,
+ 11,
+ 32,
+ 32,
+ 32,
+ 32,
+ 32,
+ 32,
+ 32,
+ 32,
+ 30,
+ 30,
+ 30,
+ 30,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 2,
+ 29,
+ 6,
+ 12,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18,
+ 18,
+ 18,
+ 7,
+ 7,
+ 8,
+ 7,
+ 7,
+ 7,
+ 7,
+ 20,
+ 18,
+ 18,
+ 18,
+ 18,
+ 18,
+ 17,
+ 19,
+ 7,
+ 6,
+ 6,
+ 6,
+ 6,
+ 6,
+ 8,
+ 8,
+ 19,
+ 19,
+ 19,
+ 19,
+ 10,
+ 10,
+ 10,
+ 10,
+ 3,
+ 8,
+ 19,
+ 19,
+ 19,
+ 19,
+ 17,
+ 17,
+ 8,
+ 10,
+ 10,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 26,
+ 26,
+ 26,
+ 26
+ ],
+ "coloraxis": "coloraxis",
+ "symbol": "cross"
+ },
+ "mode": "markers",
+ "name": "None",
+ "orientation": "v",
+ "showlegend": true,
+ "type": "scatter",
+ "x": [
+ -4.507212162017822,
+ -4.4262800216674805,
+ -4.376980781555176,
+ -4.472561359405518,
+ -4.478466033935547,
+ -4.406641960144043,
+ -4.5273919105529785,
+ -4.566632270812988,
+ -4.462923526763916,
+ 8.432709693908691,
+ 8.542923927307129,
+ 8.438213348388672,
+ 9.243868827819824,
+ 9.323732376098633,
+ 9.353984832763672,
+ 9.28091812133789,
+ 9.46688175201416,
+ 33.436790466308594,
+ 33.42896270751953,
+ 24.834928512573242,
+ 24.603927612304688,
+ 24.81510353088379,
+ 24.914033889770508,
+ 24.714937210083008,
+ 24.85848617553711,
+ 24.783843994140625,
+ 0.13158822059631348,
+ 0.2060222327709198,
+ 0.07041709870100021,
+ -3.8944013118743896,
+ 7.815195560455322,
+ 7.9211320877075195,
+ 7.829451084136963,
+ 7.823032379150391,
+ 7.851646900177002,
+ 10.382811546325684,
+ 10.526248931884766,
+ 10.266364097595215,
+ 10.403772354125977,
+ 10.074569702148438,
+ 10.3519868850708,
+ 10.058606147766113,
+ 10.351016998291016,
+ 9.907017707824707,
+ 10.072925567626953,
+ 10.131407737731934,
+ 10.245551109313965,
+ 10.093221664428711,
+ 10.352201461791992,
+ 9.824226379394531,
+ 10.205928802490234,
+ 10.347371101379395,
+ 9.96969223022461,
+ 10.00643539428711,
+ 10.362354278564453,
+ 9.932232856750488,
+ 10.579630851745605,
+ 10.070685386657715,
+ 10.280411720275879,
+ 9.908019065856934,
+ 10.055655479431152,
+ 10.26353931427002,
+ 10.193639755249023,
+ 10.36439037322998,
+ 10.125931739807129,
+ 9.86143684387207,
+ 10.180801391601562,
+ 9.962239265441895,
+ 10.22084903717041,
+ 10.006352424621582,
+ 10.262260437011719,
+ 8.452844619750977,
+ 8.473775863647461,
+ 8.484114646911621,
+ 8.491806030273438,
+ 12.699315071105957,
+ -0.5915045142173767,
+ -0.5873561501502991,
+ -0.5466613173484802,
+ -0.6931774020195007,
+ 24.807939529418945,
+ 24.832786560058594,
+ 24.81983757019043,
+ 24.947004318237305,
+ 24.587072372436523,
+ 24.792675018310547,
+ 24.696651458740234,
+ 24.700870513916016,
+ 24.622825622558594,
+ 24.710262298583984,
+ 24.635618209838867,
+ 24.629207611083984,
+ -3.8478801250457764,
+ -3.9143288135528564,
+ -3.906141519546509,
+ -3.936976909637451,
+ -3.858661413192749,
+ -3.932265520095825,
+ -3.8614954948425293,
+ 33.24429702758789,
+ 33.2432746887207,
+ 33.252357482910156,
+ 33.05552291870117,
+ -3.7636804580688477,
+ 0.21506711840629578,
+ 10.071990966796875,
+ 23.506977081298828,
+ 33.159996032714844,
+ 33.235191345214844,
+ 33.110774993896484,
+ 33.03934860229492,
+ 33.20603561401367,
+ 33.24473571777344,
+ 33.170082092285156,
+ 33.01203918457031,
+ 33.145511627197266,
+ 33.04566955566406,
+ 33.01129913330078,
+ 33.11540603637695,
+ 33.05546188354492,
+ 12.637762069702148,
+ 12.703325271606445,
+ 12.683088302612305,
+ 7.131596565246582,
+ 7.1713032722473145,
+ 9.540665626525879,
+ 7.1630048751831055,
+ 7.180014610290527,
+ 7.161986351013184,
+ 7.191754341125488,
+ 21.568710327148438,
+ 12.767961502075195,
+ 12.709783554077148,
+ 12.733067512512207,
+ 12.74810791015625,
+ 12.725667953491211,
+ 15.861473083496094,
+ 11.574446678161621,
+ 7.154047966003418,
+ 10.197831153869629,
+ 10.304410934448242,
+ 10.185552597045898,
+ 10.260804176330566,
+ 10.31605339050293,
+ 9.557987213134766,
+ 9.517667770385742,
+ 11.821714401245117,
+ 11.849270820617676,
+ 11.85051155090332,
+ 11.834342002868652,
+ 0.5366057753562927,
+ 0.5434686541557312,
+ 0.493112713098526,
+ 0.5365191102027893,
+ 8.457159996032715,
+ 9.551088333129883,
+ 11.37149429321289,
+ 11.353878021240234,
+ 11.38868522644043,
+ 11.386590003967285,
+ 15.934645652770996,
+ 16.065654754638672,
+ 9.50425910949707,
+ 0.5356476902961731,
+ 0.5355372428894043,
+ -13.246667861938477,
+ -12.950582504272461,
+ -13.320332527160645,
+ -13.17508602142334,
+ -13.222073554992676,
+ -13.138623237609863,
+ -13.177305221557617,
+ -13.256484031677246,
+ -13.399938583374023,
+ -13.151272773742676,
+ -13.078429222106934,
+ -13.153911590576172,
+ -13.203241348266602,
+ -13.046936988830566,
+ -13.121779441833496
+ ],
+ "xaxis": "x",
+ "y": [
+ -2.2750625610351562,
+ -2.1617109775543213,
+ -2.093186140060425,
+ -2.203427791595459,
+ -2.178464412689209,
+ -2.307161331176758,
+ -2.301379680633545,
+ -2.287383556365967,
+ -2.2355751991271973,
+ -4.484686851501465,
+ -4.379030704498291,
+ -4.488071918487549,
+ 10.510049819946289,
+ 10.52122974395752,
+ 10.431391716003418,
+ 10.450465202331543,
+ 10.560538291931152,
+ 3.770805597305298,
+ 3.8158607482910156,
+ -1.5887807607650757,
+ -1.4921249151229858,
+ -1.4737590551376343,
+ -1.5524905920028687,
+ -1.5470755100250244,
+ -1.5884498357772827,
+ -1.5273369550704956,
+ -1.806786060333252,
+ -1.8529914617538452,
+ -1.8813849687576294,
+ -0.04487733170390129,
+ 8.809576034545898,
+ 8.828706741333008,
+ 8.794428825378418,
+ 8.889230728149414,
+ 8.825891494750977,
+ 31.368797302246094,
+ 31.796459197998047,
+ 32.00880432128906,
+ 31.591533660888672,
+ 31.790260314941406,
+ 31.943866729736328,
+ 31.922914505004883,
+ 31.77692222595215,
+ 31.576095581054688,
+ 31.818601608276367,
+ 31.540224075317383,
+ 31.738788604736328,
+ 31.65993309020996,
+ 31.488407135009766,
+ 31.782691955566406,
+ 31.734996795654297,
+ 31.52668571472168,
+ 31.755765914916992,
+ 31.368860244750977,
+ 31.66207504272461,
+ 31.645532608032227,
+ 31.674280166625977,
+ 31.610933303833008,
+ 31.75507926940918,
+ 31.54774284362793,
+ 31.65562629699707,
+ 31.70794677734375,
+ 32.01081848144531,
+ 31.8942813873291,
+ 31.771577835083008,
+ 31.659448623657227,
+ 31.668880462646484,
+ 31.751672744750977,
+ 31.550310134887695,
+ 31.7370548248291,
+ 31.567859649658203,
+ -4.4550862312316895,
+ -4.4339470863342285,
+ -4.422357559204102,
+ -4.416126728057861,
+ -4.625192642211914,
+ -1.5523978471755981,
+ -1.5859959125518799,
+ -1.5279690027236938,
+ -1.5441169738769531,
+ -1.1945008039474487,
+ -1.232282280921936,
+ -1.1883515119552612,
+ -1.232890248298645,
+ -1.417033314704895,
+ -1.2373076677322388,
+ -1.3513730764389038,
+ -1.3807910680770874,
+ -1.008764624595642,
+ -0.9554536938667297,
+ -1.0258835554122925,
+ -1.0276527404785156,
+ -0.08584870398044586,
+ -0.07352712005376816,
+ -0.08116650581359863,
+ -0.11972293257713318,
+ -0.08955511450767517,
+ -0.10064183175563812,
+ -0.13496297597885132,
+ 3.7353227138519287,
+ 3.6970198154449463,
+ 3.764241933822632,
+ 3.722795248031616,
+ -0.08996137976646423,
+ -1.7895642518997192,
+ 11.262763023376465,
+ -0.888943076133728,
+ 3.5394551753997803,
+ 3.785940408706665,
+ 3.5987133979797363,
+ 3.652507781982422,
+ 3.6828837394714355,
+ 3.749213933944702,
+ 3.5954811573028564,
+ 3.589816093444824,
+ 3.562051296234131,
+ 3.556161642074585,
+ 3.498551368713379,
+ 3.641343593597412,
+ 3.5875024795532227,
+ -5.698921203613281,
+ -5.7093281745910645,
+ -5.761826515197754,
+ 7.662810325622559,
+ 7.6909356117248535,
+ 10.086607933044434,
+ 7.696770668029785,
+ 7.719559669494629,
+ 7.686873435974121,
+ 7.705811977386475,
+ -0.5955846905708313,
+ -5.893669128417969,
+ -5.8075737953186035,
+ -5.848629951477051,
+ -5.869546890258789,
+ -5.901583671569824,
+ -0.45020514726638794,
+ -4.952081203460693,
+ 7.6730875968933105,
+ 11.240777015686035,
+ 11.395076751708984,
+ 11.296164512634277,
+ 11.360045433044434,
+ 11.377558708190918,
+ 10.074575424194336,
+ 10.084754943847656,
+ -5.053019046783447,
+ -5.046474933624268,
+ -5.074513912200928,
+ -5.067023754119873,
+ -8.25075626373291,
+ -8.257633209228516,
+ -8.207293510437012,
+ -8.250665664672852,
+ -4.401479244232178,
+ 10.055825233459473,
+ -4.8598313331604,
+ -4.82229471206665,
+ -4.862067699432373,
+ -4.853354454040527,
+ -0.8348350524902344,
+ -0.9088714122772217,
+ 10.160237312316895,
+ -8.249314308166504,
+ -8.249642372131348,
+ 20.04449462890625,
+ 20.085220336914062,
+ 19.971458435058594,
+ 20.079345703125,
+ 20.09954071044922,
+ 20.16891098022461,
+ 20.200408935546875,
+ 20.2004337310791,
+ 20.07594871520996,
+ 20.181053161621094,
+ 20.227428436279297,
+ 19.99015998840332,
+ 20.114471435546875,
+ 19.939321517944336,
+ 20.05548667907715
+ ],
+ "yaxis": "y"
+ }
+ ],
+ "layout": {
+ "coloraxis": {
+ "colorbar": {
+ "title": {
+ "text": "labels"
+ }
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "legend": {
+ "orientation": "h",
+ "title": {
+ "text": "pillar"
+ },
+ "tracegroupgap": 0
+ },
+ "margin": {
+ "t": 60
+ },
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "xaxis": {
+ "anchor": "y",
+ "domain": [
+ 0,
+ 1
+ ],
+ "title": {
+ "text": "principal component of text embeddings"
+ }
+ },
+ "yaxis": {
+ "anchor": "x",
+ "domain": [
+ 0,
+ 1
+ ],
+ "title": {
+ "text": "secondary component of text embeddings"
+ }
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "topic_extractorer.view_clusters()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "10 As I said the poor people they are poor becaus...\n",
+ "11 Unemployment\n",
+ "12 Poverty \n",
+ "13 Neglecting Agriculture fields \n",
+ "Name: C2, dtype: object\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/3078728154.py:2: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "raw_data = pd.read_csv('Iraq Qual Analyses.csv').loc[:,:'P4.2']\n",
+ "responses = raw_data.C2.str.replace(',','').str.replace('.','')\n",
+ "print(responses[10:14])\n",
+ "word_list = responses[10:14].str.split(\" \").explode()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sample = topic_extractorer.pre_trained.word.sample(200)\n",
+ "sims = np.array([wv.similarity(word1,word2) for i,word1 in enumerate(sample[:-1]) for word2 in sample[i+1:]])\n",
+ "sims.sort()\n",
+ "median_sim = sims[sims.shape[0]//2]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['Prompt', 'C1', 'C2', 'C3', 'P5.2', 'P3.2', 'P1', 'C7', 'C8', 'C9',\n",
+ " 'C1.2', 'C2.2', 'C3.2', 'C4', 'C4.2', 'C5', '5.2', 'C6', 'C6.2', 'C7.2',\n",
+ " 'C8.2', 'C9.2', 'C10', 'C10.2', 'P2', 'P3', 'P4', 'P5', 'P1.2', 'P2.2',\n",
+ " 'P4.2'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw_data.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "C1's mental model:\n",
+ " cluster strength examples \\\n",
+ "0 9 4.398752 [children] \n",
+ "\n",
+ " location \n",
+ "0 [0.012939453, 0.0016098022, -0.04321289, 0.177... \n",
+ "C2's mental model:\n",
+ " cluster strength examples \\\n",
+ "1 4 0.000000 [money] \n",
+ "2 18 0.000000 [agriculture] \n",
+ "3 27 0.000000 [unemployment] \n",
+ "0 -1 -0.093495 [grow, fields, neglecting] \n",
+ "\n",
+ " location \n",
+ "1 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n",
+ "2 [-0.034179688, -0.021728516, -0.04296875, -0.0... \n",
+ "3 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n",
+ "0 [0.0133463545, 0.23860677, 0.060221355, -0.062... \n",
+ "C3's mental model:\n",
+ " cluster strength examples \\\n",
+ "1 32 1.759501 [economic] \n",
+ "0 31 0.000000 [social] \n",
+ "\n",
+ " location \n",
+ "1 [0.051757812, 0.003753662, -0.125, 0.032226562... \n",
+ "0 [0.099121094, -0.09765625, -0.123535156, 0.163... \n",
+ "P5.2's mental model:\n",
+ " cluster strength examples \\\n",
+ "11 19 5.588203 [crop, crops, wheat] \n",
+ "3 2 4.638495 [received, receive] \n",
+ "6 8 1.838235 [chickens, ducks, geese] \n",
+ "1 0 1.759501 [experience] \n",
+ "2 1 1.759501 [care] \n",
+ "7 11 1.759501 [buy] \n",
+ "9 17 1.759501 [rain] \n",
+ "13 33 1.759501 [financial] \n",
+ "10 18 1.759501 [agriculture] \n",
+ "4 3 0.000000 [medications] \n",
+ "5 4 0.000000 [money] \n",
+ "8 14 0.000000 [water] \n",
+ "12 25 0.000000 [job] \n",
+ "0 -1 -2.185442 [mine, surprise, servants] \n",
+ "\n",
+ " location \n",
+ "11 [0.13769531, 0.24978298, -0.03390842, 0.280870... \n",
+ "3 [0.056274414, -0.08089193, -0.13297527, -0.182... \n",
+ "6 [0.020507812, 0.18432617, -0.30004883, 0.13378... \n",
+ "1 [0.037841797, -0.060058594, -0.05810547, -0.15... \n",
+ "2 [-0.19726562, 0.25, 0.052246094, 0.029418945, ... \n",
+ "7 [0.060302734, -0.17871094, -0.09716797, 0.2753... \n",
+ "9 [-0.05102539, 0.045898438, -0.2734375, -0.2597... \n",
+ "13 [-0.09277344, -0.024414062, -0.14550781, -0.01... \n",
+ "10 [-0.034179688, -0.021728516, -0.04296875, -0.0... \n",
+ "4 [-0.18066406, 0.16796875, -0.16992188, 0.22363... \n",
+ "5 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n",
+ "8 [-0.15136719, 0.13671875, 0.11669922, -0.17871... \n",
+ "12 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "0 [0.0067859357, 0.074561484, 0.043162655, 0.031... \n",
+ "P3.2's mental model:\n",
+ " cluster strength examples \\\n",
+ "1 0 1.759501 [opportunities] \n",
+ "4 25 1.759501 [job] \n",
+ "6 29 1.759501 [market] \n",
+ "2 7 1.014648 [geography, geographic] \n",
+ "3 11 0.000000 [buy] \n",
+ "5 27 0.000000 [unemployment] \n",
+ "0 -1 -0.027927 [connected, situation, surroundings] \n",
+ "\n",
+ " location \n",
+ "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n",
+ "4 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "6 [-0.15625, -0.087890625, -0.22949219, -0.23144... \n",
+ "2 [0.06036377, -0.033958435, -0.033325195, 0.248... \n",
+ "3 [0.060302734, -0.17871094, -0.09716797, 0.2753... \n",
+ "5 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n",
+ "0 [0.037760418, 0.038523357, -0.09503852, 0.0713... \n",
+ "P1's mental model:\n",
+ " cluster strength examples \\\n",
+ "1 27 1.759501 [unemployment] \n",
+ "0 -1 1.148143 [society, unfair] \n",
+ "\n",
+ " location \n",
+ "1 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n",
+ "0 [-0.13769531, 0.036376953, -0.11740112, -0.020... \n",
+ "C7's mental model:\n",
+ " cluster strength examples \\\n",
+ "0 6 0.0 [disability] \n",
+ "1 25 0.0 [job] \n",
+ "\n",
+ " location \n",
+ "0 [0.19726562, -0.1484375, -0.15820312, 0.1875, ... \n",
+ "1 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "C8's mental model:\n",
+ " cluster strength examples \\\n",
+ "1 0 1.759501 [opportunities] \n",
+ "2 25 1.759501 [job] \n",
+ "0 -1 0.153353 [stay, secure, fate] \n",
+ "\n",
+ " location \n",
+ "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n",
+ "2 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "0 [0.038635254, 0.014607747, -0.10839844, 0.0579... \n",
+ "C9's mental model:\n",
+ " cluster strength examples \\\n",
+ "0 25 1.759501 [job] \n",
+ "\n",
+ " location \n",
+ "0 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "C1.2's mental model:\n",
+ " cluster strength examples \\\n",
+ "1 0 2.639251 [opportunity] \n",
+ "3 25 2.639251 [job] \n",
+ "2 18 2.384640 [agricultural, agriculture] \n",
+ "5 32 1.759501 [economic] \n",
+ "0 -1 0.191559 [preparing, economics, sectors] \n",
+ "4 29 0.000000 [market] \n",
+ "6 33 0.000000 [financial] \n",
+ "\n",
+ " location \n",
+ "1 [-0.025634766, 0.20410156, 0.044189453, -0.007... \n",
+ "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "2 [-0.060872395, 0.06437174, -0.025227865, 0.007... \n",
+ "5 [0.051757812, 0.003753662, -0.125, 0.032226562... \n",
+ "0 [-0.0045700073, 0.09932709, 0.11424255, 0.0610... \n",
+ "4 [-0.15625, -0.087890625, -0.22949219, -0.23144... \n",
+ "6 [-0.09277344, -0.024414062, -0.14550781, -0.01... \n",
+ "C2.2's mental model:\n",
+ " cluster strength examples \\\n",
+ "3 33 3.519001 [financial] \n",
+ "1 12 0.000000 [crisis] \n",
+ "2 18 0.000000 [agriculture] \n",
+ "0 -1 -0.107507 [states, eats, available] \n",
+ "\n",
+ " location \n",
+ "3 [-0.09277344, -0.024414062, -0.14550781, -0.01... \n",
+ "1 [0.11669922, 0.03466797, -0.11816406, 0.4375, ... \n",
+ "2 [-0.034179688, -0.021728516, -0.04296875, -0.0... \n",
+ "0 [-0.032714844, 0.068359375, -0.06858317, 0.103... \n",
+ "C3.2's mental model:\n",
+ " cluster strength examples \\\n",
+ "3 25 3.519001 [job] \n",
+ "1 0 0.000000 [opportunities] \n",
+ "2 7 0.000000 [geographically] \n",
+ "0 -1 -0.049945 [shares, profits, resources] \n",
+ "\n",
+ " location \n",
+ "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n",
+ "2 [-0.107910156, -0.21875, -0.13085938, 0.089355... \n",
+ "0 [0.03427476, 0.031355638, 0.033590462, 0.10567... \n",
+ "C4's mental model:\n",
+ " cluster strength examples \\\n",
+ "1 4 1.759501 [money] \n",
+ "6 26 1.759501 [jobless] \n",
+ "2 7 1.501686 [geographic, geographical] \n",
+ "0 -1 1.020174 [location, environment, achieve] \n",
+ "3 11 0.000000 [buy] \n",
+ "4 17 0.000000 [weather] \n",
+ "5 25 0.000000 [job] \n",
+ "\n",
+ " location \n",
+ "1 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n",
+ "6 [0.49609375, 0.026000977, -0.104003906, 0.2158... \n",
+ "2 [0.0032958984, 0.0063476562, -0.042663574, 0.1... \n",
+ "0 [-0.0859375, 0.083789065, -0.0803711, 0.026171... \n",
+ "3 [0.060302734, -0.17871094, -0.09716797, 0.2753... \n",
+ "4 [-0.18652344, 0.16796875, -0.30273438, -0.0559... \n",
+ "5 [-0.014587402, -0.048339844, -0.13671875, -0.1... "
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "C4.2's mental model:\n",
+ " cluster strength examples \\\n",
+ "1 31 0.000000 [social] \n",
+ "0 -1 -0.001679 [cultivation, aspect] \n",
+ "\n",
+ " location \n",
+ "1 [0.099121094, -0.09765625, -0.123535156, 0.163... \n",
+ "0 [0.22607422, 0.1387024, 0.028686523, 0.0827331... \n",
+ "C5's mental model:\n",
+ " cluster strength examples \\\n",
+ "1 0 1.759501 [opportunities] \n",
+ "3 25 1.759501 [job] \n",
+ "2 7 0.000000 [geographical] \n",
+ "0 -1 -0.437343 [himself, border, location] \n",
+ "\n",
+ " location \n",
+ "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n",
+ "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "2 [0.022583008, 0.07861328, 0.015258789, 0.15039... \n",
+ "0 [0.013224284, 0.08122762, 0.061604816, 0.05407... \n",
+ "5.2's mental model:\n",
+ " cluster strength examples \\\n",
+ "0 25 2.639251 [job] \n",
+ "\n",
+ " location \n",
+ "0 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "C6's mental model:\n",
+ " cluster strength examples \\\n",
+ "0 6 0.906088 [disabled, disability] \n",
+ "\n",
+ " location \n",
+ "0 [0.15576172, -0.06085205, -0.15185547, 0.11047... \n",
+ "C6.2's mental model:\n",
+ " cluster strength examples \\\n",
+ "2 1 1.759501 [care] \n",
+ "1 0 0.000000 [opportunities] \n",
+ "3 25 0.000000 [job] \n",
+ "4 27 0.000000 [unemployment] \n",
+ "0 -1 -0.285983 [designated, body, cases] \n",
+ "\n",
+ " location \n",
+ "2 [-0.19726562, 0.25, 0.052246094, 0.029418945, ... \n",
+ "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n",
+ "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "4 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n",
+ "0 [-0.025497437, 0.045959473, 0.14416504, 0.1986... \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "C7.2's mental model:\n",
+ " cluster strength examples \\\n",
+ "4 30 1.759501 [political] \n",
+ "3 25 1.759501 [job] \n",
+ "1 0 0.000000 [opportunities] \n",
+ "2 20 0.000000 [disaster] \n",
+ "0 -1 -0.396100 [provider, manpower, active] \n",
+ "\n",
+ " location \n",
+ "4 [-0.028686523, 0.029296875, -0.0625, 0.3535156... \n",
+ "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n",
+ "2 [0.23339844, -0.0006713867, -0.050048828, 0.18... \n",
+ "0 [-0.06995985, 0.06215922, 0.015842438, 0.06583... \n",
+ "C8.2's mental model:\n",
+ " cluster strength examples \\\n",
+ "2 26 1.759501 [jobless] \n",
+ "0 -1 0.000000 [closed] \n",
+ "1 7 0.000000 [geography] \n",
+ "\n",
+ " location \n",
+ "2 [0.49609375, 0.026000977, -0.104003906, 0.2158... \n",
+ "0 [-0.041015625, 0.016723633, 0.21484375, 0.1298... \n",
+ "1 [0.13671875, -0.0019989014, 0.033935547, 0.296... \n",
+ "C9.2's mental model:\n",
+ " cluster strength examples \\\n",
+ "1 6 0.000000 [disability] \n",
+ "0 -1 -0.079731 [achieve, suitable, laziness] \n",
+ "\n",
+ " location \n",
+ "1 [0.19726562, -0.1484375, -0.15820312, 0.1875, ... \n",
+ "0 [-0.11816406, -0.007975261, -0.07828776, 0.149... \n",
+ "C10's mental model:\n",
+ " cluster strength examples \\\n",
+ "3 4 2.639251 [money] \n",
+ "4 25 1.759501 [job] \n",
+ "1 0 0.000000 [opportunities] \n",
+ "2 2 0.000000 [obtain] \n",
+ "0 -1 -0.224280 [vacant, efficacious, filled] \n",
+ "\n",
+ " location \n",
+ "3 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n",
+ "4 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n",
+ "2 [-0.079589844, -0.15234375, -0.42382812, -0.13... \n",
+ "0 [0.0040893555, 0.08319092, 0.07827759, 0.03198... \n",
+ "C10.2's mental model:\n",
+ " cluster strength examples \\\n",
+ "2 31 2.639251 [social] \n",
+ "1 30 1.759501 [political] \n",
+ "3 32 1.759501 [economic] \n",
+ "0 -1 0.902344 [connections, educational] \n",
+ "\n",
+ " location \n",
+ "2 [0.099121094, -0.09765625, -0.123535156, 0.163... \n",
+ "1 [-0.028686523, 0.029296875, -0.0625, 0.3535156... \n",
+ "3 [0.051757812, 0.003753662, -0.125, 0.032226562... \n",
+ "0 [-0.119140625, 0.0053710938, -0.0052083335, 0.... \n",
+ "P2's mental model:\n",
+ " cluster strength examples \\\n",
+ "1 1 2.639251 [health] \n",
+ "3 25 1.759501 [job] \n",
+ "0 -1 0.000000 [conditions] \n",
+ "2 2 0.000000 [receiving] \n",
+ "\n",
+ " location \n",
+ "1 [-0.07421875, 0.11279297, 0.09472656, 0.071777... \n",
+ "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "0 [-0.27539062, 0.21386719, 0.0390625, 0.1088867... \n",
+ "2 [0.095703125, -0.015991211, -0.18359375, -0.09... \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "P3's mental model:\n",
+ " cluster strength examples \\\n",
+ "0 -1 0.307454 [unavailability, response, items] \n",
+ "1 10 0.000000 [salary] \n",
+ "2 29 0.000000 [markets] \n",
+ "\n",
+ " location \n",
+ "0 [-0.04616928, 0.2263794, -0.121520996, 0.08703... \n",
+ "1 [0.3984375, -0.056152344, -0.16894531, 0.18359... \n",
+ "2 [-0.052001953, -0.08544922, -0.13574219, 0.106... \n",
+ "P4's mental model:\n",
+ " cluster strength examples \\\n",
+ "0 6 1.759501 [disability] \n",
+ "1 32 1.759501 [economic] \n",
+ "\n",
+ " location \n",
+ "0 [0.19726562, -0.1484375, -0.15820312, 0.1875, ... \n",
+ "1 [0.051757812, 0.003753662, -0.125, 0.032226562... \n",
+ "P5's mental model:\n",
+ " cluster strength examples \\\n",
+ "0 0 1.759501 [opportunities] \n",
+ "1 25 1.759501 [job] \n",
+ "\n",
+ " location \n",
+ "0 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n",
+ "1 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "P1.2's mental model:\n",
+ " cluster strength examples \\\n",
+ "3 25 1.759501 [job] \n",
+ "1 0 1.084699 [opportunities, opportunity] \n",
+ "0 -1 0.232175 [active, borders, opened] \n",
+ "2 18 0.000000 [agricultural] \n",
+ "\n",
+ " location \n",
+ "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "1 [-0.1348877, 0.24072266, -0.017700195, -0.0401... \n",
+ "0 [0.01599884, 0.118759155, 0.024902344, -0.0052... \n",
+ "2 [-0.07421875, 0.107421875, -0.016357422, 0.024... \n",
+ "P2.2's mental model:\n",
+ " cluster strength examples \\\n",
+ "1 25 3.519001 [job] \n",
+ "0 -1 0.399209 [mediation, organizations, sustenance] \n",
+ "\n",
+ " location \n",
+ "1 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "0 [-0.002090454, 0.02607727, -0.022460938, 0.057... \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n",
+ "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n",
+ "\n",
+ "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "P4.2's mental model:\n",
+ " cluster strength examples \\\n",
+ "5 27 5.278502 [unemployment] \n",
+ "4 10 4.055685 [salaries, salary] \n",
+ "2 4 1.759501 [money] \n",
+ "1 2 0.000000 [receive] \n",
+ "3 8 0.000000 [birds] \n",
+ "0 -1 -0.143925 [society, skills, seriously] \n",
+ "\n",
+ " location \n",
+ "5 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n",
+ "4 [0.24140625, 0.009472656, -0.020996094, 0.2265... \n",
+ "2 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n",
+ "1 [0.052001953, -0.22167969, -0.21191406, 0.0996... \n",
+ "3 [0.07324219, 0.18261719, -0.33984375, -0.02404... \n",
+ "0 [-0.05913086, 0.08886719, -0.105773926, 0.0845... \n"
+ ]
+ }
+ ],
+ "source": [
+ "mental_models = {}\n",
+ "for participant in raw_data.columns[1:]:\n",
+ " responses = raw_data[participant].str.replace(',','').str.replace('.','').str.lower()\n",
+ " word_list = responses[10:14].str.split(\" \").explode()\n",
+ " stop_words = [(\"don't\",.34),('reasons',.34),('foods',.6),('unable',.3),('citizens',.4),('my',.3),('hunger',.3),('cause',.2),('factories',.3),('leads',.3),('expired',.3),('living',.4),('low',.2)]\n",
+ " for stop_word, similarity in stop_words:\n",
+ " word_list = word_list[[wv.similarity(word.lower(), stop_word)< similarity if word in wv else True for word in word_list]]\n",
+ " \n",
+ " overall_clusters = topic_extractorer.get_current_vecs()\n",
+ " overall_clusters = overall_clusters[overall_clusters.pillar.isna()]\n",
+ " mental_model = []\n",
+ " for word in word_list:\n",
+ " if word in overall_clusters.word.to_list():\n",
+ " mental_model.append(overall_clusters[overall_clusters.word == word].iloc[0])\n",
+ " mental_model = pd.DataFrame(mental_model)\n",
+ " \n",
+ " coalescence = []\n",
+ " for label in sorted(mental_model.labels.unique()):\n",
+ " words = mental_model[mental_model.labels == label].word.to_numpy()\n",
+ " location = mental_model[mental_model.labels == label].loc[:,\"v0\":\"v299\"].mean(axis=0).to_numpy()\n",
+ " tally = 0\n",
+ " num_entries = 0\n",
+ " for i,word1 in enumerate(words[:-1]):\n",
+ " for word2 in words[i+1:]:\n",
+ " #print(word1,word2)\n",
+ " tally += wv.similarity(word1,word2) - median_sim # adjusting for the median similarity between randomly picked words\n",
+ " num_entries += 1\n",
+ " if num_entries == 0: num_entries += 1\n",
+ " coalescence.append({'cluster':label,'strength':tally/num_entries*len(words), 'examples': list(set(words))[:3],'location':location})\n",
+ " try:\n",
+ " coalescence_df = pd.DataFrame(coalescence).sort_values(by='strength',ascending=False)\n",
+ " print(f\"{participant}'s mental model:\")\n",
+ " print(coalescence_df)\n",
+ " mental_models[participant] = coalescence_df\n",
+ " except Exception:\n",
+ " print(f\"{participant}'s mental model was not detectable\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.7698261\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[[(\"['money'] -> ['crop', 'crops', 'wheat']\", 0.13513015),\n",
+ " (\"['money'] -> ['received', 'receive']\", 0.0568099),\n",
+ " (\"['money'] -> ['chickens', 'ducks', 'geese']\", 0.15346186),\n",
+ " (\"['money'] -> ['experience']\", 0.11808307),\n",
+ " (\"['money'] -> ['care']\", 0.18036431),\n",
+ " (\"['money'] -> ['buy']\", 0.31760776),\n",
+ " (\"['money'] -> ['rain']\", 0.11101911),\n",
+ " (\"['money'] -> ['financial']\", 0.27252272),\n",
+ " (\"['money'] -> ['agriculture']\", 0.110064715),\n",
+ " (\"['money'] -> ['medications']\", 0.17193604),\n",
+ " (\"['money'] -> ['money']\", 1.0),\n",
+ " (\"['money'] -> ['water']\", 0.24552587),\n",
+ " (\"['money'] -> ['job']\", 0.24213025)],\n",
+ " [(\"['agriculture'] -> ['crop', 'crops', 'wheat']\", 0.5422642),\n",
+ " (\"['agriculture'] -> ['received', 'receive']\", 0.044078436),\n",
+ " (\"['agriculture'] -> ['chickens', 'ducks', 'geese']\", 0.20737456),\n",
+ " (\"['agriculture'] -> ['experience']\", 0.0825472),\n",
+ " (\"['agriculture'] -> ['care']\", 0.120561525),\n",
+ " (\"['agriculture'] -> ['buy']\", -0.025091732),\n",
+ " (\"['agriculture'] -> ['rain']\", 0.13316438),\n",
+ " (\"['agriculture'] -> ['financial']\", 0.18856841),\n",
+ " (\"['agriculture'] -> ['agriculture']\", 1.0000001),\n",
+ " (\"['agriculture'] -> ['medications']\", 0.09028715),\n",
+ " (\"['agriculture'] -> ['money']\", 0.110064715),\n",
+ " (\"['agriculture'] -> ['water']\", 0.2595156),\n",
+ " (\"['agriculture'] -> ['job']\", 0.16164827)],\n",
+ " [(\"['unemployment'] -> ['crop', 'crops', 'wheat']\", 0.17200448),\n",
+ " (\"['unemployment'] -> ['received', 'receive']\", 0.044345416),\n",
+ " (\"['unemployment'] -> ['chickens', 'ducks', 'geese']\", 0.09846855),\n",
+ " (\"['unemployment'] -> ['experience']\", 0.038453504),\n",
+ " (\"['unemployment'] -> ['care']\", 0.10824404),\n",
+ " (\"['unemployment'] -> ['buy']\", 0.00931779),\n",
+ " (\"['unemployment'] -> ['rain']\", 0.13327138),\n",
+ " (\"['unemployment'] -> ['financial']\", 0.22514835),\n",
+ " (\"['unemployment'] -> ['agriculture']\", 0.26194343),\n",
+ " (\"['unemployment'] -> ['medications']\", 0.14619951),\n",
+ " (\"['unemployment'] -> ['money']\", 0.14758553),\n",
+ " (\"['unemployment'] -> ['water']\", 0.14522398),\n",
+ " (\"['unemployment'] -> ['job']\", 0.30947816)]]"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mm2 = mental_models['P5.2'].query('cluster>-1')\n",
+ "mm1 = mental_models['C2'].query('cluster>-1')\n",
+ "\n",
+ "def compare_mental_models(mm1,mm2):\n",
+ " mm1_filtered = mm1.query('cluster>-1')\n",
+ " mm2_filtered = mm2.query('cluster>-1')\n",
+ " if mm1_filtered.empty or mm2_filtered.empty:\n",
+ " return 0\n",
+ " return np.mean([max([np.dot(row1.location,row2.location)/np.linalg.norm(row1.location)/np.linalg.norm(row2.location) for _,row2 in mm2_filtered.iterrows()]) for _,row1 in mm1_filtered.iterrows()])\n",
+ "\n",
+ "print(compare_mental_models(mm1,mm2))\n",
+ "[[(f\"{row1.examples} -> {row2.examples}\",np.dot(row1.location,row2.location)/np.linalg.norm(row1.location)/np.linalg.norm(row2.location)) for _,row2 in mm2.iterrows()] for _,row1 in mm1.iterrows()]\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "results = []\n",
+ "for name1, mm1 in mental_models.items():\n",
+ " row = {'name':name1}\n",
+ " for name2, mm2 in mental_models.items():\n",
+ " row[name2] = min(compare_mental_models(mm1, mm2),compare_mental_models(mm2, mm1))\n",
+ " results.append(row)\n",
+ " \n",
+ "similarity_df = pd.DataFrame(results).set_index('name')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "C2 and C2.2 agree\n",
+ "C2 and P4.2 agree\n",
+ "C3 and C4.2 agree\n",
+ "C3 and C10.2 agree\n",
+ "C3 and P4 agree\n",
+ "P3.2 and C1.2 agree\n",
+ "P3.2 and C3.2 agree\n",
+ "P3.2 and C4 agree\n",
+ "P3.2 and C5 agree\n",
+ "P3.2 and C6.2 agree\n",
+ "P3.2 and C10 agree\n",
+ "C7 and C8 agree\n",
+ "C7 and C9 agree\n",
+ "C7 and 5.2 agree\n",
+ "C7 and C6 agree\n",
+ "C7 and C9.2 agree\n",
+ "C7 and P2 agree\n",
+ "C7 and P5 agree\n",
+ "C7 and P2.2 agree\n",
+ "C7.2 and P1.2 agree\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[{'C1'},\n",
+ " {'C2', 'C2.2', 'P4.2'},\n",
+ " {'C10.2', 'C3', 'C4.2', 'P4'},\n",
+ " {'P5.2'},\n",
+ " {'C1.2', 'C10', 'C3.2', 'C4', 'C5', 'C6.2', 'P3.2'},\n",
+ " {'P1'},\n",
+ " {'5.2', 'C6', 'C7', 'C8', 'C9', 'C9.2', 'P2', 'P2.2', 'P5'},\n",
+ " {'C7.2', 'P1.2'},\n",
+ " {'C8.2'},\n",
+ " {'P3'}]"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "name_set = set(pd.DataFrame(results).name)\n",
+ "groups = []\n",
+ "for name1, row in similarity_df.iterrows():\n",
+ " if name1 in name_set:\n",
+ " group = {name1}\n",
+ " for name2 in row.index:\n",
+ " if row[name2]>.5 and name1 != name2 and name2 in name_set:\n",
+ " print(f'{name1} and {name2} agree')\n",
+ " group.add(name2)\n",
+ " name_set.remove(name2)\n",
+ " name_set.remove(name1)\n",
+ " groups.append(group)\n",
+ "groups"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "group 0:\n",
+ "\t C1\n",
+ " cluster strength examples \\\n",
+ "0 9 4.398752 [children] \n",
+ "\n",
+ " location \n",
+ "0 [0.012939453, 0.0016098022, -0.04321289, 0.177... \n",
+ "group 1:\n",
+ "\t C2.2\n",
+ " cluster strength examples \\\n",
+ "3 33 3.519001 [financial] \n",
+ "1 12 0.000000 [crisis] \n",
+ "2 18 0.000000 [agriculture] \n",
+ "\n",
+ " location \n",
+ "3 [-0.09277344, -0.024414062, -0.14550781, -0.01... \n",
+ "1 [0.11669922, 0.03466797, -0.11816406, 0.4375, ... \n",
+ "2 [-0.034179688, -0.021728516, -0.04296875, -0.0... \n",
+ "\t P4.2\n",
+ " cluster strength examples \\\n",
+ "5 27 5.278502 [unemployment] \n",
+ "4 10 4.055685 [salaries, salary] \n",
+ "2 4 1.759501 [money] \n",
+ "1 2 0.000000 [receive] \n",
+ "3 8 0.000000 [birds] \n",
+ "\n",
+ " location \n",
+ "5 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n",
+ "4 [0.24140625, 0.009472656, -0.020996094, 0.2265... \n",
+ "2 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n",
+ "1 [0.052001953, -0.22167969, -0.21191406, 0.0996... \n",
+ "3 [0.07324219, 0.18261719, -0.33984375, -0.02404... \n",
+ "\t C2\n",
+ " cluster strength examples \\\n",
+ "1 4 0.0 [money] \n",
+ "2 18 0.0 [agriculture] \n",
+ "3 27 0.0 [unemployment] \n",
+ "\n",
+ " location \n",
+ "1 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n",
+ "2 [-0.034179688, -0.021728516, -0.04296875, -0.0... \n",
+ "3 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n",
+ "group 2:\n",
+ "\t C4.2\n",
+ " cluster strength examples \\\n",
+ "1 31 0.0 [social] \n",
+ "\n",
+ " location \n",
+ "1 [0.099121094, -0.09765625, -0.123535156, 0.163... \n",
+ "\t C3\n",
+ " cluster strength examples \\\n",
+ "1 32 1.759501 [economic] \n",
+ "0 31 0.000000 [social] \n",
+ "\n",
+ " location \n",
+ "1 [0.051757812, 0.003753662, -0.125, 0.032226562... \n",
+ "0 [0.099121094, -0.09765625, -0.123535156, 0.163... \n",
+ "\t C10.2\n",
+ " cluster strength examples \\\n",
+ "2 31 2.639251 [social] \n",
+ "1 30 1.759501 [political] \n",
+ "3 32 1.759501 [economic] \n",
+ "\n",
+ " location \n",
+ "2 [0.099121094, -0.09765625, -0.123535156, 0.163... \n",
+ "1 [-0.028686523, 0.029296875, -0.0625, 0.3535156... \n",
+ "3 [0.051757812, 0.003753662, -0.125, 0.032226562... \n",
+ "\t P4\n",
+ " cluster strength examples \\\n",
+ "0 6 1.759501 [disability] \n",
+ "1 32 1.759501 [economic] \n",
+ "\n",
+ " location \n",
+ "0 [0.19726562, -0.1484375, -0.15820312, 0.1875, ... \n",
+ "1 [0.051757812, 0.003753662, -0.125, 0.032226562... \n",
+ "group 3:\n",
+ "\t P5.2\n",
+ " cluster strength examples \\\n",
+ "11 19 5.588203 [crop, crops, wheat] \n",
+ "3 2 4.638495 [received, receive] \n",
+ "6 8 1.838235 [chickens, ducks, geese] \n",
+ "1 0 1.759501 [experience] \n",
+ "2 1 1.759501 [care] \n",
+ "7 11 1.759501 [buy] \n",
+ "9 17 1.759501 [rain] \n",
+ "13 33 1.759501 [financial] \n",
+ "10 18 1.759501 [agriculture] \n",
+ "4 3 0.000000 [medications] \n",
+ "5 4 0.000000 [money] \n",
+ "8 14 0.000000 [water] \n",
+ "12 25 0.000000 [job] \n",
+ "\n",
+ " location \n",
+ "11 [0.13769531, 0.24978298, -0.03390842, 0.280870... \n",
+ "3 [0.056274414, -0.08089193, -0.13297527, -0.182... \n",
+ "6 [0.020507812, 0.18432617, -0.30004883, 0.13378... \n",
+ "1 [0.037841797, -0.060058594, -0.05810547, -0.15... \n",
+ "2 [-0.19726562, 0.25, 0.052246094, 0.029418945, ... \n",
+ "7 [0.060302734, -0.17871094, -0.09716797, 0.2753... \n",
+ "9 [-0.05102539, 0.045898438, -0.2734375, -0.2597... \n",
+ "13 [-0.09277344, -0.024414062, -0.14550781, -0.01... \n",
+ "10 [-0.034179688, -0.021728516, -0.04296875, -0.0... \n",
+ "4 [-0.18066406, 0.16796875, -0.16992188, 0.22363... \n",
+ "5 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n",
+ "8 [-0.15136719, 0.13671875, 0.11669922, -0.17871... \n",
+ "12 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "group 4:\n",
+ "\t C5\n",
+ " cluster strength examples \\\n",
+ "1 0 1.759501 [opportunities] \n",
+ "3 25 1.759501 [job] \n",
+ "2 7 0.000000 [geographical] \n",
+ "\n",
+ " location \n",
+ "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n",
+ "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "2 [0.022583008, 0.07861328, 0.015258789, 0.15039... \n",
+ "\t C1.2\n",
+ " cluster strength examples \\\n",
+ "1 0 2.639251 [opportunity] \n",
+ "3 25 2.639251 [job] \n",
+ "2 18 2.384640 [agricultural, agriculture] \n",
+ "5 32 1.759501 [economic] \n",
+ "4 29 0.000000 [market] \n",
+ "6 33 0.000000 [financial] \n",
+ "\n",
+ " location \n",
+ "1 [-0.025634766, 0.20410156, 0.044189453, -0.007... \n",
+ "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "2 [-0.060872395, 0.06437174, -0.025227865, 0.007... \n",
+ "5 [0.051757812, 0.003753662, -0.125, 0.032226562... \n",
+ "4 [-0.15625, -0.087890625, -0.22949219, -0.23144... \n",
+ "6 [-0.09277344, -0.024414062, -0.14550781, -0.01... \n",
+ "\t P3.2\n",
+ " cluster strength examples \\\n",
+ "1 0 1.759501 [opportunities] \n",
+ "4 25 1.759501 [job] \n",
+ "6 29 1.759501 [market] \n",
+ "2 7 1.014648 [geography, geographic] \n",
+ "3 11 0.000000 [buy] \n",
+ "5 27 0.000000 [unemployment] \n",
+ "\n",
+ " location \n",
+ "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n",
+ "4 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "6 [-0.15625, -0.087890625, -0.22949219, -0.23144... \n",
+ "2 [0.06036377, -0.033958435, -0.033325195, 0.248... \n",
+ "3 [0.060302734, -0.17871094, -0.09716797, 0.2753... \n",
+ "5 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n",
+ "\t C3.2\n",
+ " cluster strength examples \\\n",
+ "3 25 3.519001 [job] \n",
+ "1 0 0.000000 [opportunities] \n",
+ "2 7 0.000000 [geographically] \n",
+ "\n",
+ " location \n",
+ "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n",
+ "2 [-0.107910156, -0.21875, -0.13085938, 0.089355... \n",
+ "\t C4\n",
+ " cluster strength examples \\\n",
+ "1 4 1.759501 [money] \n",
+ "6 26 1.759501 [jobless] \n",
+ "2 7 1.501686 [geographic, geographical] \n",
+ "3 11 0.000000 [buy] \n",
+ "4 17 0.000000 [weather] \n",
+ "5 25 0.000000 [job] \n",
+ "\n",
+ " location \n",
+ "1 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n",
+ "6 [0.49609375, 0.026000977, -0.104003906, 0.2158... \n",
+ "2 [0.0032958984, 0.0063476562, -0.042663574, 0.1... \n",
+ "3 [0.060302734, -0.17871094, -0.09716797, 0.2753... \n",
+ "4 [-0.18652344, 0.16796875, -0.30273438, -0.0559... \n",
+ "5 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "\t C10\n",
+ " cluster strength examples \\\n",
+ "3 4 2.639251 [money] \n",
+ "4 25 1.759501 [job] \n",
+ "1 0 0.000000 [opportunities] \n",
+ "2 2 0.000000 [obtain] \n",
+ "\n",
+ " location \n",
+ "3 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n",
+ "4 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n",
+ "2 [-0.079589844, -0.15234375, -0.42382812, -0.13... \n",
+ "\t C6.2\n",
+ " cluster strength examples \\\n",
+ "2 1 1.759501 [care] \n",
+ "1 0 0.000000 [opportunities] \n",
+ "3 25 0.000000 [job] \n",
+ "4 27 0.000000 [unemployment] \n",
+ "\n",
+ " location \n",
+ "2 [-0.19726562, 0.25, 0.052246094, 0.029418945, ... \n",
+ "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n",
+ "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "4 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n",
+ "group 5:\n",
+ "\t P1\n",
+ " cluster strength examples \\\n",
+ "1 27 1.759501 [unemployment] \n",
+ "\n",
+ " location \n",
+ "1 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n",
+ "group 6:\n",
+ "\t C8\n",
+ " cluster strength examples \\\n",
+ "1 0 1.759501 [opportunities] \n",
+ "2 25 1.759501 [job] \n",
+ "\n",
+ " location \n",
+ "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n",
+ "2 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "\t C9\n",
+ " cluster strength examples \\\n",
+ "0 25 1.759501 [job] \n",
+ "\n",
+ " location \n",
+ "0 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "\t P2.2\n",
+ " cluster strength examples \\\n",
+ "1 25 3.519001 [job] \n",
+ "\n",
+ " location \n",
+ "1 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "\t C7\n",
+ " cluster strength examples \\\n",
+ "0 6 0.0 [disability] \n",
+ "1 25 0.0 [job] \n",
+ "\n",
+ " location \n",
+ "0 [0.19726562, -0.1484375, -0.15820312, 0.1875, ... \n",
+ "1 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "\t 5.2\n",
+ " cluster strength examples \\\n",
+ "0 25 2.639251 [job] \n",
+ "\n",
+ " location \n",
+ "0 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "\t P5\n",
+ " cluster strength examples \\\n",
+ "0 0 1.759501 [opportunities] \n",
+ "1 25 1.759501 [job] \n",
+ "\n",
+ " location \n",
+ "0 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n",
+ "1 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "\t C9.2\n",
+ " cluster strength examples \\\n",
+ "1 6 0.0 [disability] \n",
+ "\n",
+ " location \n",
+ "1 [0.19726562, -0.1484375, -0.15820312, 0.1875, ... \n",
+ "\t P2\n",
+ " cluster strength examples \\\n",
+ "1 1 2.639251 [health] \n",
+ "3 25 1.759501 [job] \n",
+ "2 2 0.000000 [receiving] \n",
+ "\n",
+ " location \n",
+ "1 [-0.07421875, 0.11279297, 0.09472656, 0.071777... \n",
+ "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "2 [0.095703125, -0.015991211, -0.18359375, -0.09... \n",
+ "\t C6\n",
+ " cluster strength examples \\\n",
+ "0 6 0.906088 [disabled, disability] \n",
+ "\n",
+ " location \n",
+ "0 [0.15576172, -0.06085205, -0.15185547, 0.11047... \n",
+ "group 7:\n",
+ "\t P1.2\n",
+ " cluster strength examples \\\n",
+ "3 25 1.759501 [job] \n",
+ "1 0 1.084699 [opportunities, opportunity] \n",
+ "2 18 0.000000 [agricultural] \n",
+ "\n",
+ " location \n",
+ "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "1 [-0.1348877, 0.24072266, -0.017700195, -0.0401... \n",
+ "2 [-0.07421875, 0.107421875, -0.016357422, 0.024... \n",
+ "\t C7.2\n",
+ " cluster strength examples \\\n",
+ "4 30 1.759501 [political] \n",
+ "3 25 1.759501 [job] \n",
+ "1 0 0.000000 [opportunities] \n",
+ "2 20 0.000000 [disaster] \n",
+ "\n",
+ " location \n",
+ "4 [-0.028686523, 0.029296875, -0.0625, 0.3535156... \n",
+ "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n",
+ "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n",
+ "2 [0.23339844, -0.0006713867, -0.050048828, 0.18... \n",
+ "group 8:\n",
+ "\t C8.2\n",
+ " cluster strength examples \\\n",
+ "2 26 1.759501 [jobless] \n",
+ "1 7 0.000000 [geography] \n",
+ "\n",
+ " location \n",
+ "2 [0.49609375, 0.026000977, -0.104003906, 0.2158... \n",
+ "1 [0.13671875, -0.0019989014, 0.033935547, 0.296... \n",
+ "group 9:\n",
+ "\t P3\n",
+ " cluster strength examples \\\n",
+ "1 10 0.0 [salary] \n",
+ "2 29 0.0 [markets] \n",
+ "\n",
+ " location \n",
+ "1 [0.3984375, -0.056152344, -0.16894531, 0.18359... \n",
+ "2 [-0.052001953, -0.08544922, -0.13574219, 0.106... \n"
+ ]
+ }
+ ],
+ "source": [
+ "for i, group in enumerate(groups):\n",
+ " print(f'group {i}:')\n",
+ " for item in group:\n",
+ " if len(mental_models[item].query('cluster>-1').index)>0:\n",
+ " print(f'\\t {item}')\n",
+ " print(mental_models[item].query('cluster>-1'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.41913477"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "compare_mental_models(mental_models['C1.2'],mental_models['C10'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'agriculture',\n",
+ " 'cost',\n",
+ " 'economic',\n",
+ " 'family planning',\n",
+ " 'food availability',\n",
+ " 'food management',\n",
+ " 'geographic ',\n",
+ " 'government',\n",
+ " 'health',\n",
+ " 'natural causes',\n",
+ " 'personal character',\n",
+ " 'poverty',\n",
+ " 'social reasons',\n",
+ " 'unemployment',\n",
+ " 'unfair society'}"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "qual_codes = pd.read_csv('Unprompted Causes Codes.csv').loc[:,'C1':'P4.2']\n",
+ "topicsLU = {}\n",
+ "for participant in qual_codes.columns:\n",
+ " topic_list = qual_codes[participant].dropna().to_list()\n",
+ " topicsLU[participant] = topic_list\n",
+ "unique_topics = set()\n",
+ "for topics in topicsLU.values():\n",
+ " for topic in topics:\n",
+ " unique_topics.add(topic.strip().lower().replace('.','').replace('isolatin','isolation').replace('isolation','').replace('geogrpahic','geographic').\n",
+ " replace('unemployement','unemployment').replace('/poverty','').replace('neglecting ','').replace('high cost of food','cost').replace(' conditions',''))\n",
+ "unique_topics"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "unprompted_clusters = topic_extractorer.get_current_vecs()[topic_extractorer.get_current_vecs().pillar.isna()]\n",
+ "# for label in sorted(unprompted_clusters.labels.unique()):\n",
+ "# words = unprompted_clusters.query(f'labels=={label}').word.unique()[:3]\n",
+ "# topic_location = unprompted_clusters.loc[:,\"v0\":\"v299\"].mean(axis=0).to_numpy()\n",
+ "# for topic in unique_topics:\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "latent_vars_map = {\n",
+ " 'protein supply':['meat','animals','chicken','beef','pork','poultry','cow','pig','protein','duck'],\n",
+ " 'energy supply' :['corn','wheat','barley','grain','oats','harvest','crops','agriculture'],\n",
+ " 'food supply': ['meat','animals','chicken','beef','pork','poultry','cow','pig','duck','corn','wheat','barley','grain','oats','harvest','crops','agriculture'],\n",
+ " 'political stability':['riots', 'protests', 'war', 'crisis', 'instability', 'unrest','disruption','coup','assasination','politics','government'],\n",
+ " 'armed conflict': ['war','riots','militia','terrorist','conflict','armed'],\n",
+ " 'floods': ['flood','floods','rain','weather'],\n",
+ " 'droughts':['dry','drought','rain','droughts','arid'],\n",
+ " 'earthquakes':['earthquakes','tremors'],\n",
+ " 'cyclones': ['hurricane','typhoon','cyclone'],\n",
+ " 'disease' : ['illness','sick','flu','vaccine','malaria','aids','disease','medications','meds','medicine'],\n",
+ " 'income': ['income','job','joblessness','unemployment','jobs','money','wage'],\n",
+ " 'prices': ['prices','expensive','cost','costly'],\n",
+ " 'climate' :['rain','weather','season','dry','arid'],\n",
+ " 'inclusivity' : ['disabled','illness','disability'],\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " word | \n",
+ " pillar | \n",
+ " v0 | \n",
+ " v1 | \n",
+ " v2 | \n",
+ " v3 | \n",
+ " v4 | \n",
+ " v5 | \n",
+ " v6 | \n",
+ " v7 | \n",
+ " ... | \n",
+ " v292 | \n",
+ " v293 | \n",
+ " v294 | \n",
+ " v295 | \n",
+ " v296 | \n",
+ " v297 | \n",
+ " v298 | \n",
+ " v299 | \n",
+ " mag | \n",
+ " labels | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 323 | \n",
+ " money | \n",
+ " NaN | \n",
+ " 0.158203 | \n",
+ " 0.051270 | \n",
+ " 0.066406 | \n",
+ " 0.210938 | \n",
+ " 0.035156 | \n",
+ " -0.004669 | \n",
+ " -0.004456 | \n",
+ " -0.082031 | \n",
+ " ... | \n",
+ " -0.060303 | \n",
+ " -0.125977 | \n",
+ " 0.062988 | \n",
+ " 0.049072 | \n",
+ " 0.124023 | \n",
+ " -0.080566 | \n",
+ " -0.056396 | \n",
+ " -0.078613 | \n",
+ " 6.031805 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 324 | \n",
+ " money | \n",
+ " NaN | \n",
+ " 0.158203 | \n",
+ " 0.051270 | \n",
+ " 0.066406 | \n",
+ " 0.210938 | \n",
+ " 0.035156 | \n",
+ " -0.004669 | \n",
+ " -0.004456 | \n",
+ " -0.082031 | \n",
+ " ... | \n",
+ " -0.060303 | \n",
+ " -0.125977 | \n",
+ " 0.062988 | \n",
+ " 0.049072 | \n",
+ " 0.124023 | \n",
+ " -0.080566 | \n",
+ " -0.056396 | \n",
+ " -0.078613 | \n",
+ " 6.031805 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 325 | \n",
+ " money | \n",
+ " NaN | \n",
+ " 0.158203 | \n",
+ " 0.051270 | \n",
+ " 0.066406 | \n",
+ " 0.210938 | \n",
+ " 0.035156 | \n",
+ " -0.004669 | \n",
+ " -0.004456 | \n",
+ " -0.082031 | \n",
+ " ... | \n",
+ " -0.060303 | \n",
+ " -0.125977 | \n",
+ " 0.062988 | \n",
+ " 0.049072 | \n",
+ " 0.124023 | \n",
+ " -0.080566 | \n",
+ " -0.056396 | \n",
+ " -0.078613 | \n",
+ " 6.031805 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 326 | \n",
+ " money | \n",
+ " NaN | \n",
+ " 0.158203 | \n",
+ " 0.051270 | \n",
+ " 0.066406 | \n",
+ " 0.210938 | \n",
+ " 0.035156 | \n",
+ " -0.004669 | \n",
+ " -0.004456 | \n",
+ " -0.082031 | \n",
+ " ... | \n",
+ " -0.060303 | \n",
+ " -0.125977 | \n",
+ " 0.062988 | \n",
+ " 0.049072 | \n",
+ " 0.124023 | \n",
+ " -0.080566 | \n",
+ " -0.056396 | \n",
+ " -0.078613 | \n",
+ " 6.031805 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 327 | \n",
+ " money | \n",
+ " NaN | \n",
+ " 0.158203 | \n",
+ " 0.051270 | \n",
+ " 0.066406 | \n",
+ " 0.210938 | \n",
+ " 0.035156 | \n",
+ " -0.004669 | \n",
+ " -0.004456 | \n",
+ " -0.082031 | \n",
+ " ... | \n",
+ " -0.060303 | \n",
+ " -0.125977 | \n",
+ " 0.062988 | \n",
+ " 0.049072 | \n",
+ " 0.124023 | \n",
+ " -0.080566 | \n",
+ " -0.056396 | \n",
+ " -0.078613 | \n",
+ " 6.031805 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 676 | \n",
+ " unemployment | \n",
+ " NaN | \n",
+ " 0.398438 | \n",
+ " -0.045166 | \n",
+ " -0.265625 | \n",
+ " 0.198242 | \n",
+ " 0.398438 | \n",
+ " -0.009338 | \n",
+ " -0.166992 | \n",
+ " -0.082031 | \n",
+ " ... | \n",
+ " -0.102539 | \n",
+ " -0.015381 | \n",
+ " 0.039551 | \n",
+ " 0.314453 | \n",
+ " -0.179688 | \n",
+ " 0.164062 | \n",
+ " 0.058838 | \n",
+ " 0.012573 | \n",
+ " 15.340950 | \n",
+ " 27 | \n",
+ "
\n",
+ " \n",
+ " | 677 | \n",
+ " jobless | \n",
+ " NaN | \n",
+ " 0.496094 | \n",
+ " 0.026001 | \n",
+ " -0.104004 | \n",
+ " 0.215820 | \n",
+ " 0.228516 | \n",
+ " 0.213867 | \n",
+ " -0.227539 | \n",
+ " -0.028320 | \n",
+ " ... | \n",
+ " -0.021729 | \n",
+ " -0.251953 | \n",
+ " 0.062012 | \n",
+ " 0.078125 | \n",
+ " -0.157227 | \n",
+ " 0.251953 | \n",
+ " -0.040527 | \n",
+ " 0.025391 | \n",
+ " 15.455056 | \n",
+ " 26 | \n",
+ "
\n",
+ " \n",
+ " | 678 | \n",
+ " jobless | \n",
+ " NaN | \n",
+ " 0.496094 | \n",
+ " 0.026001 | \n",
+ " -0.104004 | \n",
+ " 0.215820 | \n",
+ " 0.228516 | \n",
+ " 0.213867 | \n",
+ " -0.227539 | \n",
+ " -0.028320 | \n",
+ " ... | \n",
+ " -0.021729 | \n",
+ " -0.251953 | \n",
+ " 0.062012 | \n",
+ " 0.078125 | \n",
+ " -0.157227 | \n",
+ " 0.251953 | \n",
+ " -0.040527 | \n",
+ " 0.025391 | \n",
+ " 15.455056 | \n",
+ " 26 | \n",
+ "
\n",
+ " \n",
+ " | 679 | \n",
+ " jobless | \n",
+ " NaN | \n",
+ " 0.496094 | \n",
+ " 0.026001 | \n",
+ " -0.104004 | \n",
+ " 0.215820 | \n",
+ " 0.228516 | \n",
+ " 0.213867 | \n",
+ " -0.227539 | \n",
+ " -0.028320 | \n",
+ " ... | \n",
+ " -0.021729 | \n",
+ " -0.251953 | \n",
+ " 0.062012 | \n",
+ " 0.078125 | \n",
+ " -0.157227 | \n",
+ " 0.251953 | \n",
+ " -0.040527 | \n",
+ " 0.025391 | \n",
+ " 15.455056 | \n",
+ " 26 | \n",
+ "
\n",
+ " \n",
+ " | 680 | \n",
+ " jobless | \n",
+ " NaN | \n",
+ " 0.496094 | \n",
+ " 0.026001 | \n",
+ " -0.104004 | \n",
+ " 0.215820 | \n",
+ " 0.228516 | \n",
+ " 0.213867 | \n",
+ " -0.227539 | \n",
+ " -0.028320 | \n",
+ " ... | \n",
+ " -0.021729 | \n",
+ " -0.251953 | \n",
+ " 0.062012 | \n",
+ " 0.078125 | \n",
+ " -0.157227 | \n",
+ " 0.251953 | \n",
+ " -0.040527 | \n",
+ " 0.025391 | \n",
+ " 15.455056 | \n",
+ " 26 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
180 rows × 304 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " word pillar v0 v1 v2 v3 v4 \\\n",
+ "323 money NaN 0.158203 0.051270 0.066406 0.210938 0.035156 \n",
+ "324 money NaN 0.158203 0.051270 0.066406 0.210938 0.035156 \n",
+ "325 money NaN 0.158203 0.051270 0.066406 0.210938 0.035156 \n",
+ "326 money NaN 0.158203 0.051270 0.066406 0.210938 0.035156 \n",
+ "327 money NaN 0.158203 0.051270 0.066406 0.210938 0.035156 \n",
+ ".. ... ... ... ... ... ... ... \n",
+ "676 unemployment NaN 0.398438 -0.045166 -0.265625 0.198242 0.398438 \n",
+ "677 jobless NaN 0.496094 0.026001 -0.104004 0.215820 0.228516 \n",
+ "678 jobless NaN 0.496094 0.026001 -0.104004 0.215820 0.228516 \n",
+ "679 jobless NaN 0.496094 0.026001 -0.104004 0.215820 0.228516 \n",
+ "680 jobless NaN 0.496094 0.026001 -0.104004 0.215820 0.228516 \n",
+ "\n",
+ " v5 v6 v7 ... v292 v293 v294 \\\n",
+ "323 -0.004669 -0.004456 -0.082031 ... -0.060303 -0.125977 0.062988 \n",
+ "324 -0.004669 -0.004456 -0.082031 ... -0.060303 -0.125977 0.062988 \n",
+ "325 -0.004669 -0.004456 -0.082031 ... -0.060303 -0.125977 0.062988 \n",
+ "326 -0.004669 -0.004456 -0.082031 ... -0.060303 -0.125977 0.062988 \n",
+ "327 -0.004669 -0.004456 -0.082031 ... -0.060303 -0.125977 0.062988 \n",
+ ".. ... ... ... ... ... ... ... \n",
+ "676 -0.009338 -0.166992 -0.082031 ... -0.102539 -0.015381 0.039551 \n",
+ "677 0.213867 -0.227539 -0.028320 ... -0.021729 -0.251953 0.062012 \n",
+ "678 0.213867 -0.227539 -0.028320 ... -0.021729 -0.251953 0.062012 \n",
+ "679 0.213867 -0.227539 -0.028320 ... -0.021729 -0.251953 0.062012 \n",
+ "680 0.213867 -0.227539 -0.028320 ... -0.021729 -0.251953 0.062012 \n",
+ "\n",
+ " v295 v296 v297 v298 v299 mag labels \n",
+ "323 0.049072 0.124023 -0.080566 -0.056396 -0.078613 6.031805 4 \n",
+ "324 0.049072 0.124023 -0.080566 -0.056396 -0.078613 6.031805 4 \n",
+ "325 0.049072 0.124023 -0.080566 -0.056396 -0.078613 6.031805 4 \n",
+ "326 0.049072 0.124023 -0.080566 -0.056396 -0.078613 6.031805 4 \n",
+ "327 0.049072 0.124023 -0.080566 -0.056396 -0.078613 6.031805 4 \n",
+ ".. ... ... ... ... ... ... ... \n",
+ "676 0.314453 -0.179688 0.164062 0.058838 0.012573 15.340950 27 \n",
+ "677 0.078125 -0.157227 0.251953 -0.040527 0.025391 15.455056 26 \n",
+ "678 0.078125 -0.157227 0.251953 -0.040527 0.025391 15.455056 26 \n",
+ "679 0.078125 -0.157227 0.251953 -0.040527 0.025391 15.455056 26 \n",
+ "680 0.078125 -0.157227 0.251953 -0.040527 0.025391 15.455056 26 \n",
+ "\n",
+ "[180 rows x 304 columns]"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "unprompted_clusters.query('labels > -1')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " cluster | \n",
+ " strength | \n",
+ " examples | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 18 | \n",
+ " 25 | \n",
+ " 31.796455 | \n",
+ " [job] | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0 | \n",
+ " 12.258999 | \n",
+ " [opportunity, opportunities, experience] | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " 27 | \n",
+ " 9.715584 | \n",
+ " [unemployment] | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 4 | \n",
+ " 7.949114 | \n",
+ " [money] | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " 32 | \n",
+ " 7.065879 | \n",
+ " [economic] | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 18 | \n",
+ " 6.520283 | \n",
+ " [agricultural, agriculture] | \n",
+ "
\n",
+ " \n",
+ " | 25 | \n",
+ " 33 | \n",
+ " 6.182644 | \n",
+ " [financial] | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " 19 | \n",
+ " 5.619564 | \n",
+ " [crop, crops, wheat] | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 2 | \n",
+ " 5.514252 | \n",
+ " [receive, receiving, obtain] | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 10 | \n",
+ " 4.750502 | \n",
+ " [salaries, salary] | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 6 | \n",
+ " 4.445997 | \n",
+ " [disabled, disability] | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 9 | \n",
+ " 4.416175 | \n",
+ " [children] | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " 31 | \n",
+ " 4.416175 | \n",
+ " [social] | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 7 | \n",
+ " 4.202181 | \n",
+ " [geographically, geography, geographic] | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1 | \n",
+ " 4.049020 | \n",
+ " [health, care] | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 11 | \n",
+ " 3.532940 | \n",
+ " [buy] | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 30 | \n",
+ " 3.532940 | \n",
+ " [political] | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " 26 | \n",
+ " 3.532940 | \n",
+ " [jobless] | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " 29 | \n",
+ " 3.066149 | \n",
+ " [markets, market] | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 8 | \n",
+ " 2.485127 | \n",
+ " [chickens, turkeys, birds] | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " 17 | \n",
+ " 1.787024 | \n",
+ " [rain, weather] | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 14 | \n",
+ " 0.000000 | \n",
+ " [water] | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " 20 | \n",
+ " 0.000000 | \n",
+ " [disaster] | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " 12 | \n",
+ " 0.000000 | \n",
+ " [crisis] | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 3 | \n",
+ " 0.000000 | \n",
+ " [medications] | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " -1 | \n",
+ " -5.975976 | \n",
+ " [grow, economics, efficacious] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " cluster strength examples\n",
+ "18 25 31.796455 [job]\n",
+ "1 0 12.258999 [opportunity, opportunities, experience]\n",
+ "20 27 9.715584 [unemployment]\n",
+ "5 4 7.949114 [money]\n",
+ "24 32 7.065879 [economic]\n",
+ "15 18 6.520283 [agricultural, agriculture]\n",
+ "25 33 6.182644 [financial]\n",
+ "16 19 5.619564 [crop, crops, wheat]\n",
+ "3 2 5.514252 [receive, receiving, obtain]\n",
+ "10 10 4.750502 [salaries, salary]\n",
+ "6 6 4.445997 [disabled, disability]\n",
+ "9 9 4.416175 [children]\n",
+ "23 31 4.416175 [social]\n",
+ "7 7 4.202181 [geographically, geography, geographic]\n",
+ "2 1 4.049020 [health, care]\n",
+ "11 11 3.532940 [buy]\n",
+ "22 30 3.532940 [political]\n",
+ "19 26 3.532940 [jobless]\n",
+ "21 29 3.066149 [markets, market]\n",
+ "8 8 2.485127 [chickens, turkeys, birds]\n",
+ "14 17 1.787024 [rain, weather]\n",
+ "13 14 0.000000 [water]\n",
+ "17 20 0.000000 [disaster]\n",
+ "12 12 0.000000 [crisis]\n",
+ "4 3 0.000000 [medications]\n",
+ "0 -1 -5.975976 [grow, economics, efficacious]"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sample = topic_extractorer.pre_trained.word.sample(1000)\n",
+ "sims = np.array([wv.similarity(word1,word2) for i,word1 in enumerate(sample[:-1]) for word2 in sample[i+1:]])\n",
+ "sims.sort()\n",
+ "median_sim = sims[sims.shape[0]//2]\n",
+ "coalescence = []\n",
+ "for label in sorted(unprompted_clusters.labels.unique()):\n",
+ " words = unprompted_clusters[unprompted_clusters.labels == label].word.to_numpy()\n",
+ " tally = 0\n",
+ " num_entries = 0\n",
+ " for i,word1 in enumerate(words[:-1]):\n",
+ " for word2 in words[i+1:]:\n",
+ " #print(word1,word2)\n",
+ " tally += wv.similarity(word1,word2) - median_sim # adjusting for the median similarity between randomly picked words\n",
+ " num_entries += 1\n",
+ " if num_entries == 0: num_entries += 1\n",
+ " coalescence.append({'cluster':label,'strength':tally/num_entries*len(words), 'examples': list(set(words))[:3]})\n",
+ "coalescence_df = pd.DataFrame(coalescence).sort_values(by='strength',ascending=False)\n",
+ "coalescence_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " cluster | \n",
+ " strength | \n",
+ " examples | \n",
+ " protein supply | \n",
+ " energy supply | \n",
+ " food supply | \n",
+ " political stability | \n",
+ " armed conflict | \n",
+ " floods | \n",
+ " droughts | \n",
+ " earthquakes | \n",
+ " cyclones | \n",
+ " disease | \n",
+ " income | \n",
+ " prices | \n",
+ " climate | \n",
+ " inclusivity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 18 | \n",
+ " 25 | \n",
+ " 31.796455 | \n",
+ " [job] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.338995 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.032694 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0 | \n",
+ " 12.258999 | \n",
+ " [opportunity, opportunities, experience] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.018104 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " 27 | \n",
+ " 9.715584 | \n",
+ " [unemployment] | \n",
+ " 0.000000 | \n",
+ " 0.034321 | \n",
+ " 0.000000 | \n",
+ " 0.072793 | \n",
+ " 0.038521 | \n",
+ " 0.097461 | \n",
+ " 0.082664 | \n",
+ " 0.077937 | \n",
+ " 0.043921 | \n",
+ " 0.026523 | \n",
+ " 0.436022 | \n",
+ " 0.011958 | \n",
+ " 0.034313 | \n",
+ " 0.134235 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 4 | \n",
+ " 7.949114 | \n",
+ " [money] | \n",
+ " 0.009790 | \n",
+ " 0.000000 | \n",
+ " 0.004593 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.231398 | \n",
+ " 0.126873 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " 32 | \n",
+ " 7.065879 | \n",
+ " [economic] | \n",
+ " 0.000000 | \n",
+ " 0.074815 | \n",
+ " 0.001991 | \n",
+ " 0.116053 | \n",
+ " 0.008262 | \n",
+ " 0.084514 | \n",
+ " 0.051772 | \n",
+ " 0.034559 | \n",
+ " 0.041371 | \n",
+ " 0.000000 | \n",
+ " 0.242611 | \n",
+ " 0.014831 | \n",
+ " 0.031305 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 18 | \n",
+ " 6.520283 | \n",
+ " [agricultural, agriculture] | \n",
+ " 0.168442 | \n",
+ " 0.403144 | \n",
+ " 0.286338 | \n",
+ " 0.012277 | \n",
+ " 0.000000 | \n",
+ " 0.090378 | \n",
+ " 0.164445 | \n",
+ " 0.000000 | \n",
+ " 0.003401 | \n",
+ " 0.000000 | \n",
+ " 0.088432 | \n",
+ " 0.000000 | \n",
+ " 0.070659 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | 25 | \n",
+ " 33 | \n",
+ " 6.182644 | \n",
+ " [financial] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.034291 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.031186 | \n",
+ " 0.000000 | \n",
+ " 0.116196 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " 19 | \n",
+ " 5.619564 | \n",
+ " [crop, crops, wheat] | \n",
+ " 0.137234 | \n",
+ " 0.533502 | \n",
+ " 0.325388 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.184479 | \n",
+ " 0.229797 | \n",
+ " 0.000000 | \n",
+ " 0.090899 | \n",
+ " 0.027929 | \n",
+ " 0.008572 | \n",
+ " 0.000000 | \n",
+ " 0.148771 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 2 | \n",
+ " 5.514252 | \n",
+ " [receive, receiving, obtain] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.006181 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 10 | \n",
+ " 4.750502 | \n",
+ " [salaries, salary] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.262970 | \n",
+ " 0.052700 | \n",
+ " 0.000000 | \n",
+ " 0.020607 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 6 | \n",
+ " 4.445997 | \n",
+ " [disabled, disability] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.078602 | \n",
+ " 0.067276 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.586652 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 9 | \n",
+ " 4.416175 | \n",
+ " [children] | \n",
+ " 0.024409 | \n",
+ " 0.000000 | \n",
+ " 0.008060 | \n",
+ " 0.000000 | \n",
+ " 0.004505 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.070780 | \n",
+ " 0.039174 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.171606 | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " 31 | \n",
+ " 4.416175 | \n",
+ " [social] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.043513 | \n",
+ " 0.003150 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.068558 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.081211 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 7 | \n",
+ " 4.202181 | \n",
+ " [geographically, geography, geographic] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.010706 | \n",
+ " 0.032551 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1 | \n",
+ " 4.049020 | \n",
+ " [health, care] | \n",
+ " 0.000711 | \n",
+ " 0.006612 | \n",
+ " 0.006318 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.189484 | \n",
+ " 0.060231 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.204632 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 11 | \n",
+ " 3.532940 | \n",
+ " [buy] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.183064 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 30 | \n",
+ " 3.532940 | \n",
+ " [political] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.196803 | \n",
+ " 0.153736 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.044111 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " 26 | \n",
+ " 3.532940 | \n",
+ " [jobless] | \n",
+ " 0.000000 | \n",
+ " 0.027506 | \n",
+ " 0.000000 | \n",
+ " 0.032873 | \n",
+ " 0.052705 | \n",
+ " 0.091863 | \n",
+ " 0.037706 | \n",
+ " 0.044431 | \n",
+ " 0.101680 | \n",
+ " 0.007323 | \n",
+ " 0.355548 | \n",
+ " 0.000000 | \n",
+ " 0.011519 | \n",
+ " 0.146293 | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " 29 | \n",
+ " 3.066149 | \n",
+ " [markets, market] | \n",
+ " 0.000000 | \n",
+ " 0.031985 | \n",
+ " 0.000000 | \n",
+ " 0.032482 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.019352 | \n",
+ " 0.110605 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 8 | \n",
+ " 2.485127 | \n",
+ " [chickens, turkeys, birds] | \n",
+ " 0.342107 | \n",
+ " 0.123217 | \n",
+ " 0.257776 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.010813 | \n",
+ " 0.009382 | \n",
+ " 0.001699 | \n",
+ " 0.015904 | \n",
+ " 0.043576 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.004465 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " 17 | \n",
+ " 1.787024 | \n",
+ " [rain, weather] | \n",
+ " 0.000000 | \n",
+ " 0.096077 | \n",
+ " 0.015908 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.507542 | \n",
+ " 0.357732 | \n",
+ " 0.092732 | \n",
+ " 0.202258 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.409019 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 14 | \n",
+ " 0.000000 | \n",
+ " [water] | \n",
+ " 0.050871 | \n",
+ " 0.100549 | \n",
+ " 0.075975 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.240113 | \n",
+ " 0.250010 | \n",
+ " 0.011750 | \n",
+ " 0.024795 | \n",
+ " 0.030857 | \n",
+ " 0.002714 | \n",
+ " 0.000937 | \n",
+ " 0.171574 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " 20 | \n",
+ " 0.000000 | \n",
+ " [disaster] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.127237 | \n",
+ " 0.089180 | \n",
+ " 0.263581 | \n",
+ " 0.101491 | \n",
+ " 0.219786 | \n",
+ " 0.373670 | \n",
+ " 0.001986 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.027711 | \n",
+ " 0.010487 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " 12 | \n",
+ " 0.000000 | \n",
+ " [crisis] | \n",
+ " 0.000000 | \n",
+ " 0.002268 | \n",
+ " 0.000000 | \n",
+ " 0.306537 | \n",
+ " 0.161924 | \n",
+ " 0.096206 | \n",
+ " 0.080741 | \n",
+ " 0.121456 | \n",
+ " 0.104278 | \n",
+ " 0.027391 | \n",
+ " 0.045787 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 3 | \n",
+ " 0.000000 | \n",
+ " [medications] | \n",
+ " 0.000000 | \n",
+ " 0.009789 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000216 | \n",
+ " 0.000000 | \n",
+ " 0.374936 | \n",
+ " 0.009604 | \n",
+ " 0.001413 | \n",
+ " 0.000000 | \n",
+ " 0.129098 | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " -1 | \n",
+ " -5.975976 | \n",
+ " [grow, economics, efficacious] | \n",
+ " 0.000000 | \n",
+ " 0.011873 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.030379 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " cluster strength examples \\\n",
+ "18 25 31.796455 [job] \n",
+ "1 0 12.258999 [opportunity, opportunities, experience] \n",
+ "20 27 9.715584 [unemployment] \n",
+ "5 4 7.949114 [money] \n",
+ "24 32 7.065879 [economic] \n",
+ "15 18 6.520283 [agricultural, agriculture] \n",
+ "25 33 6.182644 [financial] \n",
+ "16 19 5.619564 [crop, crops, wheat] \n",
+ "3 2 5.514252 [receive, receiving, obtain] \n",
+ "10 10 4.750502 [salaries, salary] \n",
+ "6 6 4.445997 [disabled, disability] \n",
+ "9 9 4.416175 [children] \n",
+ "23 31 4.416175 [social] \n",
+ "7 7 4.202181 [geographically, geography, geographic] \n",
+ "2 1 4.049020 [health, care] \n",
+ "11 11 3.532940 [buy] \n",
+ "22 30 3.532940 [political] \n",
+ "19 26 3.532940 [jobless] \n",
+ "21 29 3.066149 [markets, market] \n",
+ "8 8 2.485127 [chickens, turkeys, birds] \n",
+ "14 17 1.787024 [rain, weather] \n",
+ "13 14 0.000000 [water] \n",
+ "17 20 0.000000 [disaster] \n",
+ "12 12 0.000000 [crisis] \n",
+ "4 3 0.000000 [medications] \n",
+ "0 -1 -5.975976 [grow, economics, efficacious] \n",
+ "\n",
+ " protein supply energy supply food supply political stability \\\n",
+ "18 0.000000 0.000000 0.000000 0.000000 \n",
+ "1 0.000000 0.000000 0.000000 0.000000 \n",
+ "20 0.000000 0.034321 0.000000 0.072793 \n",
+ "5 0.009790 0.000000 0.004593 0.000000 \n",
+ "24 0.000000 0.074815 0.001991 0.116053 \n",
+ "15 0.168442 0.403144 0.286338 0.012277 \n",
+ "25 0.000000 0.000000 0.000000 0.034291 \n",
+ "16 0.137234 0.533502 0.325388 0.000000 \n",
+ "3 0.000000 0.000000 0.000000 0.000000 \n",
+ "10 0.000000 0.000000 0.000000 0.000000 \n",
+ "6 0.000000 0.000000 0.000000 0.000000 \n",
+ "9 0.024409 0.000000 0.008060 0.000000 \n",
+ "23 0.000000 0.000000 0.000000 0.043513 \n",
+ "7 0.000000 0.000000 0.000000 0.000000 \n",
+ "2 0.000711 0.006612 0.006318 0.000000 \n",
+ "11 0.000000 0.000000 0.000000 0.000000 \n",
+ "22 0.000000 0.000000 0.000000 0.196803 \n",
+ "19 0.000000 0.027506 0.000000 0.032873 \n",
+ "21 0.000000 0.031985 0.000000 0.032482 \n",
+ "8 0.342107 0.123217 0.257776 0.000000 \n",
+ "14 0.000000 0.096077 0.015908 0.000000 \n",
+ "13 0.050871 0.100549 0.075975 0.000000 \n",
+ "17 0.000000 0.000000 0.000000 0.127237 \n",
+ "12 0.000000 0.002268 0.000000 0.306537 \n",
+ "4 0.000000 0.009789 0.000000 0.000000 \n",
+ "0 0.000000 0.011873 0.000000 0.000000 \n",
+ "\n",
+ " armed conflict floods droughts earthquakes cyclones disease \\\n",
+ "18 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
+ "1 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
+ "20 0.038521 0.097461 0.082664 0.077937 0.043921 0.026523 \n",
+ "5 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
+ "24 0.008262 0.084514 0.051772 0.034559 0.041371 0.000000 \n",
+ "15 0.000000 0.090378 0.164445 0.000000 0.003401 0.000000 \n",
+ "25 0.000000 0.000000 0.000000 0.000000 0.031186 0.000000 \n",
+ "16 0.000000 0.184479 0.229797 0.000000 0.090899 0.027929 \n",
+ "3 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
+ "10 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
+ "6 0.000000 0.000000 0.000000 0.000000 0.000000 0.078602 \n",
+ "9 0.004505 0.000000 0.000000 0.000000 0.000000 0.070780 \n",
+ "23 0.003150 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
+ "7 0.000000 0.000000 0.010706 0.032551 0.000000 0.000000 \n",
+ "2 0.000000 0.000000 0.000000 0.000000 0.000000 0.189484 \n",
+ "11 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
+ "22 0.153736 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
+ "19 0.052705 0.091863 0.037706 0.044431 0.101680 0.007323 \n",
+ "21 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
+ "8 0.000000 0.010813 0.009382 0.001699 0.015904 0.043576 \n",
+ "14 0.000000 0.507542 0.357732 0.092732 0.202258 0.000000 \n",
+ "13 0.000000 0.240113 0.250010 0.011750 0.024795 0.030857 \n",
+ "17 0.089180 0.263581 0.101491 0.219786 0.373670 0.001986 \n",
+ "12 0.161924 0.096206 0.080741 0.121456 0.104278 0.027391 \n",
+ "4 0.000000 0.000000 0.000000 0.000216 0.000000 0.374936 \n",
+ "0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
+ "\n",
+ " income prices climate inclusivity \n",
+ "18 0.338995 0.000000 0.000000 0.032694 \n",
+ "1 0.018104 0.000000 0.000000 0.000000 \n",
+ "20 0.436022 0.011958 0.034313 0.134235 \n",
+ "5 0.231398 0.126873 0.000000 0.000000 \n",
+ "24 0.242611 0.014831 0.031305 0.000000 \n",
+ "15 0.088432 0.000000 0.070659 0.000000 \n",
+ "25 0.116196 0.000000 0.000000 0.000000 \n",
+ "16 0.008572 0.000000 0.148771 0.000000 \n",
+ "3 0.000000 0.000000 0.000000 0.006181 \n",
+ "10 0.262970 0.052700 0.000000 0.020607 \n",
+ "6 0.067276 0.000000 0.000000 0.586652 \n",
+ "9 0.039174 0.000000 0.000000 0.171606 \n",
+ "23 0.068558 0.000000 0.000000 0.081211 \n",
+ "7 0.000000 0.000000 0.000000 0.000000 \n",
+ "2 0.060231 0.000000 0.000000 0.204632 \n",
+ "11 0.000000 0.183064 0.000000 0.000000 \n",
+ "22 0.044111 0.000000 0.000000 0.000000 \n",
+ "19 0.355548 0.000000 0.011519 0.146293 \n",
+ "21 0.019352 0.110605 0.000000 0.000000 \n",
+ "8 0.000000 0.000000 0.004465 0.000000 \n",
+ "14 0.000000 0.000000 0.409019 0.000000 \n",
+ "13 0.002714 0.000937 0.171574 0.000000 \n",
+ "17 0.000000 0.000000 0.027711 0.010487 \n",
+ "12 0.045787 0.000000 0.000000 0.000000 \n",
+ "4 0.009604 0.001413 0.000000 0.129098 \n",
+ "0 0.000000 0.030379 0.000000 0.000000 "
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "for key in latent_vars_map:\n",
+ " coalescence_df[key] = [max(0,np.mean([((wv.similarity(word1,word2) - median_sim)/(1-median_sim)) for word2 in latent_vars_map[key] for word1 in words1])) for words1 in coalescence_df.examples]\n",
+ "coalescence_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " cluster | \n",
+ " strength | \n",
+ " examples | \n",
+ " protein supply | \n",
+ " energy supply | \n",
+ " food supply | \n",
+ " political stability | \n",
+ " armed conflict | \n",
+ " floods | \n",
+ " droughts | \n",
+ " ... | \n",
+ " cyclones | \n",
+ " disease | \n",
+ " income | \n",
+ " prices | \n",
+ " climate | \n",
+ " inclusivity | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 31 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 18 | \n",
+ " 25 | \n",
+ " 31.796455 | \n",
+ " [job] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.338995 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.032694 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0 | \n",
+ " 12.258999 | \n",
+ " [opportunity, opportunities, experience] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.018104 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " 27 | \n",
+ " 9.715584 | \n",
+ " [unemployment] | \n",
+ " 0.000000 | \n",
+ " 0.034321 | \n",
+ " 0.000000 | \n",
+ " 0.072793 | \n",
+ " 0.038521 | \n",
+ " 0.097461 | \n",
+ " 0.082664 | \n",
+ " ... | \n",
+ " 0.043921 | \n",
+ " 0.026523 | \n",
+ " 0.436022 | \n",
+ " 0.011958 | \n",
+ " 0.034313 | \n",
+ " 0.134235 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 4 | \n",
+ " 7.949114 | \n",
+ " [money] | \n",
+ " 0.009790 | \n",
+ " 0.000000 | \n",
+ " 0.004593 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.231398 | \n",
+ " 0.126873 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " 32 | \n",
+ " 7.065879 | \n",
+ " [economic] | \n",
+ " 0.000000 | \n",
+ " 0.074815 | \n",
+ " 0.001991 | \n",
+ " 0.116053 | \n",
+ " 0.008262 | \n",
+ " 0.084514 | \n",
+ " 0.051772 | \n",
+ " ... | \n",
+ " 0.041371 | \n",
+ " 0.000000 | \n",
+ " 0.242611 | \n",
+ " 0.014831 | \n",
+ " 0.031305 | \n",
+ " 0.000000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 18 | \n",
+ " 6.520283 | \n",
+ " [agricultural, agriculture] | \n",
+ " 0.168442 | \n",
+ " 0.403144 | \n",
+ " 0.286338 | \n",
+ " 0.012277 | \n",
+ " 0.000000 | \n",
+ " 0.090378 | \n",
+ " 0.164445 | \n",
+ " ... | \n",
+ " 0.003401 | \n",
+ " 0.000000 | \n",
+ " 0.088432 | \n",
+ " 0.000000 | \n",
+ " 0.070659 | \n",
+ " 0.000000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 25 | \n",
+ " 33 | \n",
+ " 6.182644 | \n",
+ " [financial] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.034291 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.031186 | \n",
+ " 0.000000 | \n",
+ " 0.116196 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " 19 | \n",
+ " 5.619564 | \n",
+ " [crop, crops, wheat] | \n",
+ " 0.137234 | \n",
+ " 0.533502 | \n",
+ " 0.325388 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.184479 | \n",
+ " 0.229797 | \n",
+ " ... | \n",
+ " 0.090899 | \n",
+ " 0.027929 | \n",
+ " 0.008572 | \n",
+ " 0.000000 | \n",
+ " 0.148771 | \n",
+ " 0.000000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 2 | \n",
+ " 5.514252 | \n",
+ " [receive, receiving, obtain] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.006181 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 10 | \n",
+ " 4.750502 | \n",
+ " [salaries, salary] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.262970 | \n",
+ " 0.052700 | \n",
+ " 0.000000 | \n",
+ " 0.020607 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 6 | \n",
+ " 4.445997 | \n",
+ " [disabled, disability] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.078602 | \n",
+ " 0.067276 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.586652 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 9 | \n",
+ " 4.416175 | \n",
+ " [children] | \n",
+ " 0.024409 | \n",
+ " 0.000000 | \n",
+ " 0.008060 | \n",
+ " 0.000000 | \n",
+ " 0.004505 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.070780 | \n",
+ " 0.039174 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.171606 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " 31 | \n",
+ " 4.416175 | \n",
+ " [social] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.043513 | \n",
+ " 0.003150 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.068558 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.081211 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 7 | \n",
+ " 4.202181 | \n",
+ " [geographically, geography, geographic] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.010706 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1 | \n",
+ " 4.049020 | \n",
+ " [health, care] | \n",
+ " 0.000711 | \n",
+ " 0.006612 | \n",
+ " 0.006318 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.189484 | \n",
+ " 0.060231 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.204632 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 11 | \n",
+ " 3.532940 | \n",
+ " [buy] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.183064 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 30 | \n",
+ " 3.532940 | \n",
+ " [political] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.196803 | \n",
+ " 0.153736 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.044111 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " 26 | \n",
+ " 3.532940 | \n",
+ " [jobless] | \n",
+ " 0.000000 | \n",
+ " 0.027506 | \n",
+ " 0.000000 | \n",
+ " 0.032873 | \n",
+ " 0.052705 | \n",
+ " 0.091863 | \n",
+ " 0.037706 | \n",
+ " ... | \n",
+ " 0.101680 | \n",
+ " 0.007323 | \n",
+ " 0.355548 | \n",
+ " 0.000000 | \n",
+ " 0.011519 | \n",
+ " 0.146293 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " 29 | \n",
+ " 3.066149 | \n",
+ " [markets, market] | \n",
+ " 0.000000 | \n",
+ " 0.031985 | \n",
+ " 0.000000 | \n",
+ " 0.032482 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.019352 | \n",
+ " 0.110605 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 8 | \n",
+ " 2.485127 | \n",
+ " [chickens, turkeys, birds] | \n",
+ " 0.342107 | \n",
+ " 0.123217 | \n",
+ " 0.257776 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.010813 | \n",
+ " 0.009382 | \n",
+ " ... | \n",
+ " 0.015904 | \n",
+ " 0.043576 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.004465 | \n",
+ " 0.000000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " 17 | \n",
+ " 1.787024 | \n",
+ " [rain, weather] | \n",
+ " 0.000000 | \n",
+ " 0.096077 | \n",
+ " 0.015908 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.507542 | \n",
+ " 0.357732 | \n",
+ " ... | \n",
+ " 0.202258 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.409019 | \n",
+ " 0.000000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 14 | \n",
+ " 0.000000 | \n",
+ " [water] | \n",
+ " 0.050871 | \n",
+ " 0.100549 | \n",
+ " 0.075975 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.240113 | \n",
+ " 0.250010 | \n",
+ " ... | \n",
+ " 0.024795 | \n",
+ " 0.030857 | \n",
+ " 0.002714 | \n",
+ " 0.000937 | \n",
+ " 0.171574 | \n",
+ " 0.000000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " 20 | \n",
+ " 0.000000 | \n",
+ " [disaster] | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.127237 | \n",
+ " 0.089180 | \n",
+ " 0.263581 | \n",
+ " 0.101491 | \n",
+ " ... | \n",
+ " 0.373670 | \n",
+ " 0.001986 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.027711 | \n",
+ " 0.010487 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " 12 | \n",
+ " 0.000000 | \n",
+ " [crisis] | \n",
+ " 0.000000 | \n",
+ " 0.002268 | \n",
+ " 0.000000 | \n",
+ " 0.306537 | \n",
+ " 0.161924 | \n",
+ " 0.096206 | \n",
+ " 0.080741 | \n",
+ " ... | \n",
+ " 0.104278 | \n",
+ " 0.027391 | \n",
+ " 0.045787 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 3 | \n",
+ " 0.000000 | \n",
+ " [medications] | \n",
+ " 0.000000 | \n",
+ " 0.009789 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.374936 | \n",
+ " 0.009604 | \n",
+ " 0.001413 | \n",
+ " 0.000000 | \n",
+ " 0.129098 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " -1 | \n",
+ " -5.975976 | \n",
+ " [grow, economics, efficacious] | \n",
+ " 0.000000 | \n",
+ " 0.011873 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.030379 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
26 rows × 21 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " cluster strength examples \\\n",
+ "18 25 31.796455 [job] \n",
+ "1 0 12.258999 [opportunity, opportunities, experience] \n",
+ "20 27 9.715584 [unemployment] \n",
+ "5 4 7.949114 [money] \n",
+ "24 32 7.065879 [economic] \n",
+ "15 18 6.520283 [agricultural, agriculture] \n",
+ "25 33 6.182644 [financial] \n",
+ "16 19 5.619564 [crop, crops, wheat] \n",
+ "3 2 5.514252 [receive, receiving, obtain] \n",
+ "10 10 4.750502 [salaries, salary] \n",
+ "6 6 4.445997 [disabled, disability] \n",
+ "9 9 4.416175 [children] \n",
+ "23 31 4.416175 [social] \n",
+ "7 7 4.202181 [geographically, geography, geographic] \n",
+ "2 1 4.049020 [health, care] \n",
+ "11 11 3.532940 [buy] \n",
+ "22 30 3.532940 [political] \n",
+ "19 26 3.532940 [jobless] \n",
+ "21 29 3.066149 [markets, market] \n",
+ "8 8 2.485127 [chickens, turkeys, birds] \n",
+ "14 17 1.787024 [rain, weather] \n",
+ "13 14 0.000000 [water] \n",
+ "17 20 0.000000 [disaster] \n",
+ "12 12 0.000000 [crisis] \n",
+ "4 3 0.000000 [medications] \n",
+ "0 -1 -5.975976 [grow, economics, efficacious] \n",
+ "\n",
+ " protein supply energy supply food supply political stability \\\n",
+ "18 0.000000 0.000000 0.000000 0.000000 \n",
+ "1 0.000000 0.000000 0.000000 0.000000 \n",
+ "20 0.000000 0.034321 0.000000 0.072793 \n",
+ "5 0.009790 0.000000 0.004593 0.000000 \n",
+ "24 0.000000 0.074815 0.001991 0.116053 \n",
+ "15 0.168442 0.403144 0.286338 0.012277 \n",
+ "25 0.000000 0.000000 0.000000 0.034291 \n",
+ "16 0.137234 0.533502 0.325388 0.000000 \n",
+ "3 0.000000 0.000000 0.000000 0.000000 \n",
+ "10 0.000000 0.000000 0.000000 0.000000 \n",
+ "6 0.000000 0.000000 0.000000 0.000000 \n",
+ "9 0.024409 0.000000 0.008060 0.000000 \n",
+ "23 0.000000 0.000000 0.000000 0.043513 \n",
+ "7 0.000000 0.000000 0.000000 0.000000 \n",
+ "2 0.000711 0.006612 0.006318 0.000000 \n",
+ "11 0.000000 0.000000 0.000000 0.000000 \n",
+ "22 0.000000 0.000000 0.000000 0.196803 \n",
+ "19 0.000000 0.027506 0.000000 0.032873 \n",
+ "21 0.000000 0.031985 0.000000 0.032482 \n",
+ "8 0.342107 0.123217 0.257776 0.000000 \n",
+ "14 0.000000 0.096077 0.015908 0.000000 \n",
+ "13 0.050871 0.100549 0.075975 0.000000 \n",
+ "17 0.000000 0.000000 0.000000 0.127237 \n",
+ "12 0.000000 0.002268 0.000000 0.306537 \n",
+ "4 0.000000 0.009789 0.000000 0.000000 \n",
+ "0 0.000000 0.011873 0.000000 0.000000 \n",
+ "\n",
+ " armed conflict floods droughts ... cyclones disease income \\\n",
+ "18 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.338995 \n",
+ "1 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.018104 \n",
+ "20 0.038521 0.097461 0.082664 ... 0.043921 0.026523 0.436022 \n",
+ "5 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.231398 \n",
+ "24 0.008262 0.084514 0.051772 ... 0.041371 0.000000 0.242611 \n",
+ "15 0.000000 0.090378 0.164445 ... 0.003401 0.000000 0.088432 \n",
+ "25 0.000000 0.000000 0.000000 ... 0.031186 0.000000 0.116196 \n",
+ "16 0.000000 0.184479 0.229797 ... 0.090899 0.027929 0.008572 \n",
+ "3 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 \n",
+ "10 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.262970 \n",
+ "6 0.000000 0.000000 0.000000 ... 0.000000 0.078602 0.067276 \n",
+ "9 0.004505 0.000000 0.000000 ... 0.000000 0.070780 0.039174 \n",
+ "23 0.003150 0.000000 0.000000 ... 0.000000 0.000000 0.068558 \n",
+ "7 0.000000 0.000000 0.010706 ... 0.000000 0.000000 0.000000 \n",
+ "2 0.000000 0.000000 0.000000 ... 0.000000 0.189484 0.060231 \n",
+ "11 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 \n",
+ "22 0.153736 0.000000 0.000000 ... 0.000000 0.000000 0.044111 \n",
+ "19 0.052705 0.091863 0.037706 ... 0.101680 0.007323 0.355548 \n",
+ "21 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.019352 \n",
+ "8 0.000000 0.010813 0.009382 ... 0.015904 0.043576 0.000000 \n",
+ "14 0.000000 0.507542 0.357732 ... 0.202258 0.000000 0.000000 \n",
+ "13 0.000000 0.240113 0.250010 ... 0.024795 0.030857 0.002714 \n",
+ "17 0.089180 0.263581 0.101491 ... 0.373670 0.001986 0.000000 \n",
+ "12 0.161924 0.096206 0.080741 ... 0.104278 0.027391 0.045787 \n",
+ "4 0.000000 0.000000 0.000000 ... 0.000000 0.374936 0.009604 \n",
+ "0 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 \n",
+ "\n",
+ " prices climate inclusivity 0 2 31 7 \n",
+ "18 0.000000 0.000000 0.032694 0 0 0 0 \n",
+ "1 0.000000 0.000000 0.000000 1 0 0 0 \n",
+ "20 0.011958 0.034313 0.134235 0 0 0 0 \n",
+ "5 0.126873 0.000000 0.000000 0 0 0 0 \n",
+ "24 0.014831 0.031305 0.000000 0 0 0 0 \n",
+ "15 0.000000 0.070659 0.000000 0 0 0 0 \n",
+ "25 0.000000 0.000000 0.000000 0 0 0 0 \n",
+ "16 0.000000 0.148771 0.000000 0 0 0 0 \n",
+ "3 0.000000 0.000000 0.006181 0 1 0 0 \n",
+ "10 0.052700 0.000000 0.020607 0 0 0 0 \n",
+ "6 0.000000 0.000000 0.586652 0 0 0 0 \n",
+ "9 0.000000 0.000000 0.171606 0 0 0 0 \n",
+ "23 0.000000 0.000000 0.081211 0 0 1 0 \n",
+ "7 0.000000 0.000000 0.000000 0 0 0 1 \n",
+ "2 0.000000 0.000000 0.204632 0 0 0 0 \n",
+ "11 0.183064 0.000000 0.000000 0 0 0 0 \n",
+ "22 0.000000 0.000000 0.000000 0 0 0 0 \n",
+ "19 0.000000 0.011519 0.146293 0 0 0 0 \n",
+ "21 0.110605 0.000000 0.000000 0 0 0 0 \n",
+ "8 0.000000 0.004465 0.000000 0 0 0 0 \n",
+ "14 0.000000 0.409019 0.000000 0 0 0 0 \n",
+ "13 0.000937 0.171574 0.000000 0 0 0 0 \n",
+ "17 0.000000 0.027711 0.010487 0 0 0 0 \n",
+ "12 0.000000 0.000000 0.000000 0 0 0 0 \n",
+ "4 0.001413 0.000000 0.129098 0 0 0 0 \n",
+ "0 0.030379 0.000000 0.000000 0 0 0 0 \n",
+ "\n",
+ "[26 rows x 21 columns]"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "final_matrix = coalescence_df.copy()\n",
+ "for i,cluster in enumerate(final_matrix.cluster):\n",
+ " row = final_matrix[final_matrix.cluster == cluster].iloc[0]\n",
+ " if all(row.loc['protein supply':'inclusivity']<.1) and row.strength > 1:\n",
+ " new_col = [0]*len(final_matrix)\n",
+ " new_col[i] = 1\n",
+ " final_matrix[cluster] = new_col\n",
+ "final_matrix"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3.8.12 64-bit",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.12"
+ },
+ "orig_nbformat": 4,
+ "vscode": {
+ "interpreter": {
+ "hash": "5b3ded1ccb95c1d9bd405e7b823d9e85424cde40fbb5985eb47e999ef50e15b4"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}