diff --git a/qual_analysis.ipynb b/qual_analysis.ipynb new file mode 100644 index 0000000..6181fc9 --- /dev/null +++ b/qual_analysis.ipynb @@ -0,0 +1,6167 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Massive change testing" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import plotly.express as px\n", + "import numpy as np\n", + "import pandas as pd\n", + "import hdbscan\n", + "import umap\n", + "import re\n", + "from gensim.test.utils import datapath\n", + "import gensim.downloader as api\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "from topic_extractor import TopicExtractor\n", + "from sklearn.feature_extraction.text import TfidfVectorizer" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "wv = api.load('word2vec-google-news-300')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "vul_list = ['temperature', 'weather', 'soil', 'employment', 'vaccine', 'education', 'jobs', 'income', 'discrimination', \n", + "'racism', 'ethnicity', 'minorities', 'orientation', 'immigrants', 'disabled', 'women', 'attitudes', 'imports', 'production', 'irrigation',\n", + "'economic', 'rain', 'water']\n", + "acc_list = ['price', 'cost', 'expensive', 'gdp', 'food', 'distance', 'rural']\n", + "sho_list = ['earthquake', 'disaster', 'conflict', 'war', 'politics', 'social', 'storms', 'volcanoes']\n", + "ava_list = ['enough', 'aid', 'share', 'amount', 'donations', 'grants', 'market']\n", + "\n", + "vul_df = pd.DataFrame({'pillar': 'vulnerability', 'target_words': vul_list})\n", + "acc_df = pd.DataFrame({'pillar': 'access', 'target_words': acc_list})\n", + "sho_df = pd.DataFrame({'pillar': 'shocks', 'target_words': sho_list})\n", + "ava_df = pd.DataFrame({'pillar': 'availability', 'target_words': ava_list})\n", + "\n", + "pdList = [vul_df, acc_df, sho_df, ava_df]\n", + "nom_cluster_words = pd.concat(pdList)\n", + "nom_cluster_words['word'] = [np.array(wv.most_similar(row , topn = 10)).T[0] for row in nom_cluster_words['target_words']]\n", + "\n", + "nom_cluster_words = nom_cluster_words.explode('word').reset_index(drop = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Prompttext
102.4\\tWhat would you say are the main reasons w...Having no income, when those families do not h...
102.4\\tWhat would you say are the main reasons w...As I said the poor people, they are poor becau...
102.4\\tWhat would you say are the main reasons w...Economic state i think; Sometimes social probl...
102.4\\tWhat would you say are the main reasons w...I think the leading cause is that, the governm...
102.4\\tWhat would you say are the main reasons w...? I‘d say this goes back the city not having j...
.........
132.5 Cause 3:Having an illness
132.5 Cause 3:Cause 3: Social reasons\\nCause 4: Educational ...
132.5 Cause 3:And lastly not having a job
132.5 Cause 3:Cause 3: Not having an active economy\\nCause 4...
132.5 Cause 3:Under-performing of the community. Yes. And th...
\n", + "

95 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Prompt \\\n", + "10 2.4\\tWhat would you say are the main reasons w... \n", + "10 2.4\\tWhat would you say are the main reasons w... \n", + "10 2.4\\tWhat would you say are the main reasons w... \n", + "10 2.4\\tWhat would you say are the main reasons w... \n", + "10 2.4\\tWhat would you say are the main reasons w... \n", + ".. ... \n", + "13 2.5 Cause 3: \n", + "13 2.5 Cause 3: \n", + "13 2.5 Cause 3: \n", + "13 2.5 Cause 3: \n", + "13 2.5 Cause 3: \n", + "\n", + " text \n", + "10 Having no income, when those families do not h... \n", + "10 As I said the poor people, they are poor becau... \n", + "10 Economic state i think; Sometimes social probl... \n", + "10 I think the leading cause is that, the governm... \n", + "10 ? I‘d say this goes back the city not having j... \n", + ".. ... \n", + "13 Having an illness \n", + "13 Cause 3: Social reasons\\nCause 4: Educational ... \n", + "13 And lastly not having a job \n", + "13 Cause 3: Not having an active economy\\nCause 4... \n", + "13 Under-performing of the community. Yes. And th... \n", + "\n", + "[95 rows x 2 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data = pd.read_csv('Iraq Qual Analyses.csv').loc[:,:'P4.2']\n", + "raw_data['text'] = [row for row in raw_data[raw_data.columns[1:]].values]\n", + "raw_data = raw_data[10:14]\n", + "raw_data.drop(raw_data.columns[1:-1],axis = 1,inplace=True)\n", + "raw_data = raw_data.explode('text').dropna()\n", + "raw_data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8.584315002489348" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.linalg.norm(wv['opportunities'])**2" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/4220633183.py:2: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + " word_list = word_list.str.replace(',','').str.replace('.','').str.lower()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] + } + ], + "source": [ + "word_list = raw_data.text.str.split(' ').explode()\n", + "word_list = word_list.str.replace(',','').str.replace('.','').str.lower()\n", + "stop_words = [(\"don't\",.34),('reasons',.34),('foods',.6),('unable',.3),('citizens',.4),('my',.3),('hunger',.3),('cause',.2),('factories',.3),('leads',.3),('expired',.3),('living',.4),('low',.2)]\n", + "for stop_word, similarity in stop_words:\n", + " word_list = word_list[[wv.similarity(word.lower(), stop_word)< similarity if word in wv else True for word in word_list]]\n", + "print('children' in word_list.to_list())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "topic_extractorer = TopicExtractor(wv,0)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "topic_extractorer.load_seed_clusters(seed_words=nom_cluster_words['word'],known_labels=nom_cluster_words['pillar'],threshold=7)\n", + "topic_extractorer.load_words(word_list,6)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "w:\\DARPA_Habitus\\CONSULTING\\Analytics\\HABITUS\\Max\\Habitus\\MICRO_model\\qual_analyses\\topic_extractor.py:63: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " temp['pillar'] = temp.pillar.replace(np.NaN,'None')\n" + ] + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "hovertemplate": "%{hovertext}

pillar=vulnerability
principal component of text embeddings=%{x}
secondary component of text embeddings=%{y}
labels=%{marker.color}", + "hovertext": [ + "females", + "irrigational", + "handicapped", + "girls", + "mothers", + "wintry_conditions", + "employment", + "import", + "Floridan_aquifer", + "economy", + "invidious_discrimination", + "husbands", + "irrigations", + "imported", + "Latino_immigrants", + "wheelchair_bound", + "jobs", + "importing", + "export", + "exporting", + "dewpoint", + "blacks", + "institutionalized_discrimination", + "freshwater", + "migrant", + "gender", + "racial_bigotry", + "immigrant", + "racist", + "unemployed", + "discriminatory", + "Disabled", + "societal_attitudes", + "ethnic_minorities", + "bigotry", + "Economic", + "downpour", + "stormy_weather", + "inclement_weather", + "institutionalized_racism", + "discriminations", + "sprinkler_irrigation", + "soils", + "wet_weather", + "homophobia", + "°_C", + "visually_impaired", + "disabilities", + "macroeconomic", + "sewage", + "°_F", + "Rain", + "macro_economic", + "racial_intolerance", + "irrigated", + "discriminated", + "potable_water", + "racial_minorities", + "physically_challenged", + "exported", + "wastewater", + "discriminated_against", + "degrees_Fahrenheit", + "gender_bias", + "racial_discrimination", + "torrential_rain", + "groundwater", + "potable", + "wintery_weather", + "mentally_disabled", + "irrigating", + "migrants", + "gender_discrimination", + "rains", + "temperatures", + "vaccines", + "exports", + "sandy_soil", + "unemployment", + "racial_prejudice", + "heavy_rain", + "heavy_rains", + "Immigrants", + "physically_handicapped", + "ambient_temperature", + "Racism", + "wintry_weather", + "Discrimination", + "irrigated_agriculture", + "downpours", + "illegals", + "illegal_immigrants", + "undocumented_workers", + "undocumented_immigrants", + "rain_showers", + "ambient_temperatures", + "gender_ethnicity", + "drip_irrigation", + "flu_vaccines", + "daytime_temperatures", + "rainfall", + "H#N#_influenza_vaccine", + "influenza_vaccines", + "H#N#_vaccines", + "smallpox_vaccine", + "Exports", + "influenza_vaccine", + "flu_vaccine", + "H#N#_vaccine", + "swine_flu_vaccine" + ], + "legendgroup": "vulnerability", + "marker": { + "color": [ + 9, + 14, + 6, + 9, + 9, + 17, + 25, + 28, + 14, + 32, + 24, + 9, + 16, + 28, + 23, + 6, + 25, + 28, + 28, + 28, + 15, + 24, + 24, + 14, + 23, + 24, + 24, + 23, + 24, + 26, + 24, + 6, + 24, + 24, + 24, + 32, + 17, + 17, + 17, + 24, + 24, + 16, + 14, + 17, + 24, + 15, + 6, + 6, + 32, + 14, + 15, + 17, + 32, + 24, + 16, + 24, + 14, + 24, + 6, + 28, + 14, + 24, + 15, + 24, + 24, + 17, + 14, + 14, + 17, + 6, + 16, + 23, + 24, + 17, + 15, + 3, + 28, + 14, + 27, + 24, + 17, + 17, + 23, + 6, + 15, + 24, + 17, + 24, + 16, + 17, + 23, + 23, + 23, + 23, + 17, + 15, + 24, + 16, + 3, + 15, + 17, + 3, + 3, + 3, + 3, + 28, + 3, + 3, + 3, + 3 + ], + "coloraxis": "coloraxis", + "symbol": "circle" + }, + "mode": "markers", + "name": "vulnerability", + "orientation": "v", + "showlegend": true, + "type": "scatter", + "x": [ + 9.16235637664795, + 12.767671585083008, + 9.896053314208984, + 9.240710258483887, + 9.290543556213379, + 15.923059463500977, + 10.54406452178955, + 0.19626043736934662, + 12.633519172668457, + 24.75111198425293, + 6.978339672088623, + 9.35623836517334, + 12.579814910888672, + 0.2165854126214981, + 8.579086303710938, + 9.92792797088623, + 10.26561450958252, + 0.2808363735675812, + 0.3294012248516083, + 0.3225804567337036, + 15.000429153442383, + 7.427166938781738, + 6.9527812004089355, + 12.820655822753906, + 8.830239295959473, + 7.550509452819824, + 6.663220405578613, + 8.71061897277832, + 6.589083194732666, + -12.985361099243164, + 7.014978885650635, + 10.140372276306152, + 7.797515869140625, + 7.644662380218506, + 6.5979766845703125, + 24.93074607849121, + 16.063474655151367, + 16.031957626342773, + 15.953804969787598, + 6.765969753265381, + 7.031929016113281, + 12.582038879394531, + 12.48460578918457, + 15.775157928466797, + 6.689704418182373, + 15.040907859802246, + 9.99012279510498, + 10.115378379821777, + 24.975109100341797, + 12.808707237243652, + 15.066388130187988, + 15.964011192321777, + 25.05222511291504, + 6.5349225997924805, + 12.527203559875488, + 7.084386348724365, + 12.771050453186035, + 7.503295421600342, + 9.904162406921387, + 0.27075278759002686, + 12.786842346191406, + 6.9771599769592285, + 15.13479232788086, + 7.086657524108887, + 7.041834831237793, + 16.11159896850586, + 12.757672309875488, + 12.691655158996582, + 15.938840866088867, + 9.794981956481934, + 12.484210968017578, + 8.815749168395996, + 7.036043643951416, + 16.088163375854492, + 15.223651885986328, + 7.790129661560059, + 0.28267717361450195, + 12.45325756072998, + -13.279583930969238, + 6.629983901977539, + 16.08289337158203, + 16.163053512573242, + 8.72900390625, + 9.8550386428833, + 15.07657527923584, + 6.595930576324463, + 15.868152618408203, + 6.896994113922119, + 12.628324508666992, + 16.199296951293945, + 8.62634563446045, + 8.719016075134277, + 8.639336585998535, + 8.64566707611084, + 16.028329849243164, + 15.079449653625488, + 7.499907970428467, + 12.532830238342285, + 7.790839672088623, + 15.261857986450195, + 16.089712142944336, + 7.75033712387085, + 7.71716833114624, + 7.733931064605713, + 7.802220821380615, + 0.2456071823835373, + 7.667672157287598, + 7.674630165100098, + 7.77892541885376, + 7.838179111480713 + ], + "xaxis": "x", + "y": [ + 10.112516403198242, + -5.146145820617676, + 11.058342933654785, + 10.490525245666504, + 10.366625785827637, + -0.464988112449646, + 31.472095489501953, + -0.9640982151031494, + -4.6123480796813965, + -1.4137349128723145, + 9.959176063537598, + 10.348305702209473, + -5.20917272567749, + -0.9652140736579895, + 10.339306831359863, + 11.163494110107422, + 31.355772018432617, + -1.0373811721801758, + -1.0338189601898193, + -0.9910680055618286, + -1.5478788614273071, + 9.836847305297852, + 9.878887176513672, + -4.773446083068848, + 10.140910148620605, + 9.163508415222168, + 9.847591400146484, + 10.160124778747559, + 9.885452270507812, + 19.87122917175293, + 9.854803085327148, + 11.271401405334473, + 9.030710220336914, + 9.786548614501953, + 9.713510513305664, + -1.3317567110061646, + -0.8841704726219177, + -0.4798450469970703, + -0.5381402969360352, + 9.801292419433594, + 9.792922973632812, + -5.284242153167725, + -5.0293498039245605, + -0.7814459800720215, + 9.88368034362793, + -1.4927150011062622, + 11.138364791870117, + 11.193595886230469, + -1.2993342876434326, + -4.733755588531494, + -1.5095746517181396, + -0.8039936423301697, + -1.204244613647461, + 9.878316879272461, + -5.346510410308838, + 9.674796104431152, + -4.668487071990967, + 9.848109245300293, + 11.130931854248047, + -0.9126441478729248, + -4.695742130279541, + 9.772257804870605, + -1.4167968034744263, + 9.531572341918945, + 9.645428657531738, + -0.5683789849281311, + -4.690822124481201, + -4.621603488922119, + -0.44945886731147766, + 10.966743469238281, + -5.313103199005127, + 10.161568641662598, + 9.559785842895508, + -0.9048455357551575, + -1.3202414512634277, + -5.146547317504883, + -0.9798607230186462, + -5.107876300811768, + 20.058378219604492, + 9.778946876525879, + -0.7409117817878723, + -0.5987833738327026, + 10.21772289276123, + 11.271647453308105, + -1.4616918563842773, + 9.737491607666016, + -0.48458540439605713, + 9.76830005645752, + -5.537435054779053, + -0.7557460069656372, + 10.260096549987793, + 10.268465995788574, + 10.198100090026855, + 10.28207015991211, + -0.7947467565536499, + -1.4168654680252075, + 8.679947853088379, + -5.318938732147217, + -5.1545796394348145, + -1.3082618713378906, + -0.9046620726585388, + -5.186274528503418, + -5.219858169555664, + -5.20323371887207, + -5.135258674621582, + -0.9587400555610657, + -5.270003795623779, + -5.261002540588379, + -5.158375263214111, + -5.098591327667236 + ], + "yaxis": "y" + }, + { + "hovertemplate": "%{hovertext}

pillar=availability
principal component of text embeddings=%{x}
secondary component of text embeddings=%{y}
labels=%{marker.color}", + "hovertext": [ + "industry", + "amout", + "marketplace", + "markets", + "amounts", + "funding", + "sums", + "contributions", + "donators", + "donated", + "donors", + "charitable_donations", + "donation", + "Donation", + "Donations", + "Monetary_donations" + ], + "legendgroup": "availability", + "marker": { + "color": [ + 29, + 4, + 29, + 29, + 4, + 4, + 4, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5 + ], + "coloraxis": "coloraxis", + "symbol": "diamond" + }, + "mode": "markers", + "name": "availability", + "orientation": "v", + "showlegend": true, + "type": "scatter", + "x": [ + 0.18043620884418488, + -4.6981987953186035, + 0.21258105337619781, + 0.13186922669410706, + -4.652432441711426, + -4.505962371826172, + -4.539834976196289, + -4.1242194175720215, + -4.087623119354248, + -4.140650272369385, + -4.089099884033203, + -4.198858737945557, + -4.072911739349365, + -4.0027756690979, + -4.094690799713135, + -4.215901851654053 + ], + "xaxis": "x", + "y": [ + -1.8738614320755005, + -2.429811716079712, + -1.8311617374420166, + -1.8468598127365112, + -2.321540594100952, + -2.174583911895752, + -2.2341742515563965, + -2.505549669265747, + -2.4823150634765625, + -2.439889907836914, + -2.5200045108795166, + -2.392725706100464, + -2.5460383892059326, + -2.6063902378082275, + -2.5252842903137207, + -2.5158116817474365 + ], + "yaxis": "y" + }, + { + "hovertemplate": "%{hovertext}

pillar=access
principal component of text embeddings=%{x}
secondary component of text embeddings=%{y}
labels=%{marker.color}", + "hovertext": [ + "costly", + "pricey", + "price", + "outrageously_expensive", + "cheaper", + "pricy", + "pricing", + "prices", + "prohibitively_expensive", + "pricier", + "GDP", + "Prices" + ], + "legendgroup": "access", + "marker": { + "color": [ + 22, + 22, + 29, + 22, + 22, + 22, + 29, + 29, + 22, + 22, + 32, + 29 + ], + "coloraxis": "coloraxis", + "symbol": "square" + }, + "mode": "markers", + "name": "access", + "orientation": "v", + "showlegend": true, + "type": "scatter", + "x": [ + -1.1036288738250732, + -0.9784208536148071, + -0.37404417991638184, + -1.067224144935608, + -0.8902236223220825, + -1.0903464555740356, + -0.11353441327810287, + -0.14094533026218414, + -1.045573353767395, + -0.9335117340087891, + 24.91689109802246, + -0.18147997558116913 + ], + "xaxis": "x", + "y": [ + -1.630856990814209, + -1.627647876739502, + -1.6491870880126953, + -1.5814464092254639, + -1.6146478652954102, + -1.5424124002456665, + -1.7577154636383057, + -1.7066618204116821, + -1.6164547204971313, + -1.679885983467102, + -1.156358003616333, + -1.6531304121017456 + ], + "yaxis": "y" + }, + { + "hovertemplate": "%{hovertext}

pillar=shocks
principal component of text embeddings=%{x}
secondary component of text embeddings=%{y}
labels=%{marker.color}", + "hovertext": [ + "war", + "societal", + "conflict", + "internecine_conflict", + "strife", + "undersea_volcanoes", + "politician", + "catastrophe", + "calamity", + "conflicts", + "Social", + "disasters", + "hostilities", + "natural_disasters", + "catastrophes", + "War", + "Conflict", + "civil_strife", + "volcanic", + "Persian_Gulf_War", + "partisan_politics", + "hurricanes", + "rainstorms", + "devastating_earthquake", + "tornados", + "volcano", + "earthquakes", + "Earthquake", + "quake", + "volcanic_eruptions", + "aftershock", + "lava_flows", + "volcanic_activity", + "thunderstorms", + "tornadoes", + "temblor", + "magnitude_earthquake", + "severe_thunderstorms", + "#.#_magnitude_earthquake", + "#.#_magnitude_quake", + "quakes", + "active_volcanoes", + "volcanos" + ], + "legendgroup": "shocks", + "marker": { + "color": [ + 12, + 31, + 12, + 12, + 12, + 13, + 30, + 20, + 20, + 12, + 31, + 20, + 12, + 20, + 20, + 12, + 12, + 12, + 13, + 12, + 30, + 17, + 17, + 21, + 17, + 13, + 21, + 21, + 21, + 13, + 21, + 13, + 13, + 17, + 17, + 21, + 21, + 17, + 21, + 21, + 21, + 13, + 13 + ], + "coloraxis": "coloraxis", + "symbol": "x" + }, + "mode": "markers", + "name": "shocks", + "orientation": "v", + "showlegend": true, + "type": "scatter", + "x": [ + 23.775033950805664, + 7.8632049560546875, + 23.73737907409668, + 23.739322662353516, + 23.762481689453125, + 12.668766975402832, + 24.632911682128906, + 21.732084274291992, + 21.78840446472168, + 23.85695457458496, + 7.8252129554748535, + 21.57056427001953, + 23.72748565673828, + 21.626258850097656, + 21.661327362060547, + 23.777233123779297, + 23.88597297668457, + 23.608903884887695, + 12.627827644348145, + 23.643224716186523, + 24.67068862915039, + 21.654014587402344, + 16.230154037475586, + 20.910120010375977, + 16.63974380493164, + 12.589360237121582, + 20.940082550048828, + 20.907955169677734, + 20.704904556274414, + 12.658271789550781, + 20.754716873168945, + 12.632294654846191, + 12.61149787902832, + 16.41225242614746, + 16.693267822265625, + 20.801971435546875, + 20.7946720123291, + 16.38044548034668, + 20.90024185180664, + 20.796903610229492, + 20.873451232910156, + 12.596431732177734, + 12.615105628967285 + ], + "xaxis": "x", + "y": [ + -0.5166358947753906, + 8.860580444335938, + -0.6228737235069275, + -0.6622812151908875, + -0.6967945694923401, + -2.5611214637756348, + -0.8868563175201416, + -0.571590781211853, + -0.6115420460700989, + -0.7369848489761353, + 8.780749320983887, + -0.4452560544013977, + -0.5958192944526672, + -0.4690714478492737, + -0.5017377138137817, + -0.5338945388793945, + -0.7218263745307922, + -0.5466699004173279, + -2.572164297103882, + -0.5221036076545715, + -0.8606185913085938, + -0.5041481852531433, + -0.7453354597091675, + -0.6226603984832764, + -0.6450382471084595, + -2.5973639488220215, + -0.5227365493774414, + -0.5900933742523193, + -0.6559988260269165, + -2.5386602878570557, + -0.5897109508514404, + -2.617889642715454, + -2.5297799110412598, + -0.7741866111755371, + -0.6501337289810181, + -0.5965899229049683, + -0.6158409714698792, + -0.7104356288909912, + -0.7185001373291016, + -0.6174815893173218, + -0.5477422475814819, + -2.517280340194702, + -2.5789031982421875 + ], + "yaxis": "y" + }, + { + "hovertemplate": "%{hovertext}

pillar=None
principal component of text embeddings=%{x}
secondary component of text embeddings=%{y}
labels=%{marker.color}", + "hovertext": [ + "money", + "money", + "money", + "money", + "money", + "money", + "money", + "money", + "money", + "health", + "health", + "health", + "children", + "children", + "children", + "children", + "children", + "experience", + "experience", + "financial", + "financial", + "financial", + "financial", + "financial", + "financial", + "financial", + "market", + "market", + "market", + "receiving", + "social", + "social", + "social", + "social", + "social", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "job", + "care", + "care", + "care", + "care", + "water", + "buy", + "buy", + "buy", + "buy", + "economic", + "economic", + "economic", + "economic", + "economic", + "economic", + "economic", + "economic", + "political", + "political", + "political", + "political", + "receive", + "receive", + "received", + "received", + "received", + "received", + "received", + "opportunity", + "opportunity", + "opportunity", + "opportunity", + "obtain", + "markets", + "disabled", + "crisis", + "opportunities", + "opportunities", + "opportunities", + "opportunities", + "opportunities", + "opportunities", + "opportunities", + "opportunities", + "opportunities", + "opportunities", + "opportunities", + "opportunities", + "opportunities", + "agricultural", + "agricultural", + "agricultural", + "geographic", + "geographic", + "ducks", + "geography", + "geography", + "geographical", + "geographical", + "disaster", + "agriculture", + "agriculture", + "agriculture", + "agriculture", + "agriculture", + "weather", + "crop", + "geographically", + "disability", + "disability", + "disability", + "disability", + "disability", + "chickens", + "birds", + "crops", + "crops", + "crops", + "crops", + "salaries", + "salaries", + "salaries", + "salaries", + "medications", + "turkeys", + "wheat", + "wheat", + "wheat", + "wheat", + "rain", + "rain", + "geese", + "salary", + "salary", + "unemployment", + "unemployment", + "unemployment", + "unemployment", + "unemployment", + "unemployment", + "unemployment", + "unemployment", + "unemployment", + "unemployment", + "unemployment", + "jobless", + "jobless", + "jobless", + "jobless" + ], + "legendgroup": "None", + "marker": { + "color": [ + 4, + 4, + 4, + 4, + 4, + 4, + 4, + 4, + 4, + 1, + 1, + 1, + 9, + 9, + 9, + 9, + 9, + 0, + 0, + 33, + 33, + 33, + 33, + 33, + 33, + 33, + 29, + 29, + 29, + 2, + 31, + 31, + 31, + 31, + 31, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 25, + 1, + 1, + 1, + 1, + 14, + 11, + 11, + 11, + 11, + 32, + 32, + 32, + 32, + 32, + 32, + 32, + 32, + 30, + 30, + 30, + 30, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 2, + 29, + 6, + 12, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 18, + 18, + 18, + 7, + 7, + 8, + 7, + 7, + 7, + 7, + 20, + 18, + 18, + 18, + 18, + 18, + 17, + 19, + 7, + 6, + 6, + 6, + 6, + 6, + 8, + 8, + 19, + 19, + 19, + 19, + 10, + 10, + 10, + 10, + 3, + 8, + 19, + 19, + 19, + 19, + 17, + 17, + 8, + 10, + 10, + 27, + 27, + 27, + 27, + 27, + 27, + 27, + 27, + 27, + 27, + 27, + 26, + 26, + 26, + 26 + ], + "coloraxis": "coloraxis", + "symbol": "cross" + }, + "mode": "markers", + "name": "None", + "orientation": "v", + "showlegend": true, + "type": "scatter", + "x": [ + -4.507212162017822, + -4.4262800216674805, + -4.376980781555176, + -4.472561359405518, + -4.478466033935547, + -4.406641960144043, + -4.5273919105529785, + -4.566632270812988, + -4.462923526763916, + 8.432709693908691, + 8.542923927307129, + 8.438213348388672, + 9.243868827819824, + 9.323732376098633, + 9.353984832763672, + 9.28091812133789, + 9.46688175201416, + 33.436790466308594, + 33.42896270751953, + 24.834928512573242, + 24.603927612304688, + 24.81510353088379, + 24.914033889770508, + 24.714937210083008, + 24.85848617553711, + 24.783843994140625, + 0.13158822059631348, + 0.2060222327709198, + 0.07041709870100021, + -3.8944013118743896, + 7.815195560455322, + 7.9211320877075195, + 7.829451084136963, + 7.823032379150391, + 7.851646900177002, + 10.382811546325684, + 10.526248931884766, + 10.266364097595215, + 10.403772354125977, + 10.074569702148438, + 10.3519868850708, + 10.058606147766113, + 10.351016998291016, + 9.907017707824707, + 10.072925567626953, + 10.131407737731934, + 10.245551109313965, + 10.093221664428711, + 10.352201461791992, + 9.824226379394531, + 10.205928802490234, + 10.347371101379395, + 9.96969223022461, + 10.00643539428711, + 10.362354278564453, + 9.932232856750488, + 10.579630851745605, + 10.070685386657715, + 10.280411720275879, + 9.908019065856934, + 10.055655479431152, + 10.26353931427002, + 10.193639755249023, + 10.36439037322998, + 10.125931739807129, + 9.86143684387207, + 10.180801391601562, + 9.962239265441895, + 10.22084903717041, + 10.006352424621582, + 10.262260437011719, + 8.452844619750977, + 8.473775863647461, + 8.484114646911621, + 8.491806030273438, + 12.699315071105957, + -0.5915045142173767, + -0.5873561501502991, + -0.5466613173484802, + -0.6931774020195007, + 24.807939529418945, + 24.832786560058594, + 24.81983757019043, + 24.947004318237305, + 24.587072372436523, + 24.792675018310547, + 24.696651458740234, + 24.700870513916016, + 24.622825622558594, + 24.710262298583984, + 24.635618209838867, + 24.629207611083984, + -3.8478801250457764, + -3.9143288135528564, + -3.906141519546509, + -3.936976909637451, + -3.858661413192749, + -3.932265520095825, + -3.8614954948425293, + 33.24429702758789, + 33.2432746887207, + 33.252357482910156, + 33.05552291870117, + -3.7636804580688477, + 0.21506711840629578, + 10.071990966796875, + 23.506977081298828, + 33.159996032714844, + 33.235191345214844, + 33.110774993896484, + 33.03934860229492, + 33.20603561401367, + 33.24473571777344, + 33.170082092285156, + 33.01203918457031, + 33.145511627197266, + 33.04566955566406, + 33.01129913330078, + 33.11540603637695, + 33.05546188354492, + 12.637762069702148, + 12.703325271606445, + 12.683088302612305, + 7.131596565246582, + 7.1713032722473145, + 9.540665626525879, + 7.1630048751831055, + 7.180014610290527, + 7.161986351013184, + 7.191754341125488, + 21.568710327148438, + 12.767961502075195, + 12.709783554077148, + 12.733067512512207, + 12.74810791015625, + 12.725667953491211, + 15.861473083496094, + 11.574446678161621, + 7.154047966003418, + 10.197831153869629, + 10.304410934448242, + 10.185552597045898, + 10.260804176330566, + 10.31605339050293, + 9.557987213134766, + 9.517667770385742, + 11.821714401245117, + 11.849270820617676, + 11.85051155090332, + 11.834342002868652, + 0.5366057753562927, + 0.5434686541557312, + 0.493112713098526, + 0.5365191102027893, + 8.457159996032715, + 9.551088333129883, + 11.37149429321289, + 11.353878021240234, + 11.38868522644043, + 11.386590003967285, + 15.934645652770996, + 16.065654754638672, + 9.50425910949707, + 0.5356476902961731, + 0.5355372428894043, + -13.246667861938477, + -12.950582504272461, + -13.320332527160645, + -13.17508602142334, + -13.222073554992676, + -13.138623237609863, + -13.177305221557617, + -13.256484031677246, + -13.399938583374023, + -13.151272773742676, + -13.078429222106934, + -13.153911590576172, + -13.203241348266602, + -13.046936988830566, + -13.121779441833496 + ], + "xaxis": "x", + "y": [ + -2.2750625610351562, + -2.1617109775543213, + -2.093186140060425, + -2.203427791595459, + -2.178464412689209, + -2.307161331176758, + -2.301379680633545, + -2.287383556365967, + -2.2355751991271973, + -4.484686851501465, + -4.379030704498291, + -4.488071918487549, + 10.510049819946289, + 10.52122974395752, + 10.431391716003418, + 10.450465202331543, + 10.560538291931152, + 3.770805597305298, + 3.8158607482910156, + -1.5887807607650757, + -1.4921249151229858, + -1.4737590551376343, + -1.5524905920028687, + -1.5470755100250244, + -1.5884498357772827, + -1.5273369550704956, + -1.806786060333252, + -1.8529914617538452, + -1.8813849687576294, + -0.04487733170390129, + 8.809576034545898, + 8.828706741333008, + 8.794428825378418, + 8.889230728149414, + 8.825891494750977, + 31.368797302246094, + 31.796459197998047, + 32.00880432128906, + 31.591533660888672, + 31.790260314941406, + 31.943866729736328, + 31.922914505004883, + 31.77692222595215, + 31.576095581054688, + 31.818601608276367, + 31.540224075317383, + 31.738788604736328, + 31.65993309020996, + 31.488407135009766, + 31.782691955566406, + 31.734996795654297, + 31.52668571472168, + 31.755765914916992, + 31.368860244750977, + 31.66207504272461, + 31.645532608032227, + 31.674280166625977, + 31.610933303833008, + 31.75507926940918, + 31.54774284362793, + 31.65562629699707, + 31.70794677734375, + 32.01081848144531, + 31.8942813873291, + 31.771577835083008, + 31.659448623657227, + 31.668880462646484, + 31.751672744750977, + 31.550310134887695, + 31.7370548248291, + 31.567859649658203, + -4.4550862312316895, + -4.4339470863342285, + -4.422357559204102, + -4.416126728057861, + -4.625192642211914, + -1.5523978471755981, + -1.5859959125518799, + -1.5279690027236938, + -1.5441169738769531, + -1.1945008039474487, + -1.232282280921936, + -1.1883515119552612, + -1.232890248298645, + -1.417033314704895, + -1.2373076677322388, + -1.3513730764389038, + -1.3807910680770874, + -1.008764624595642, + -0.9554536938667297, + -1.0258835554122925, + -1.0276527404785156, + -0.08584870398044586, + -0.07352712005376816, + -0.08116650581359863, + -0.11972293257713318, + -0.08955511450767517, + -0.10064183175563812, + -0.13496297597885132, + 3.7353227138519287, + 3.6970198154449463, + 3.764241933822632, + 3.722795248031616, + -0.08996137976646423, + -1.7895642518997192, + 11.262763023376465, + -0.888943076133728, + 3.5394551753997803, + 3.785940408706665, + 3.5987133979797363, + 3.652507781982422, + 3.6828837394714355, + 3.749213933944702, + 3.5954811573028564, + 3.589816093444824, + 3.562051296234131, + 3.556161642074585, + 3.498551368713379, + 3.641343593597412, + 3.5875024795532227, + -5.698921203613281, + -5.7093281745910645, + -5.761826515197754, + 7.662810325622559, + 7.6909356117248535, + 10.086607933044434, + 7.696770668029785, + 7.719559669494629, + 7.686873435974121, + 7.705811977386475, + -0.5955846905708313, + -5.893669128417969, + -5.8075737953186035, + -5.848629951477051, + -5.869546890258789, + -5.901583671569824, + -0.45020514726638794, + -4.952081203460693, + 7.6730875968933105, + 11.240777015686035, + 11.395076751708984, + 11.296164512634277, + 11.360045433044434, + 11.377558708190918, + 10.074575424194336, + 10.084754943847656, + -5.053019046783447, + -5.046474933624268, + -5.074513912200928, + -5.067023754119873, + -8.25075626373291, + -8.257633209228516, + -8.207293510437012, + -8.250665664672852, + -4.401479244232178, + 10.055825233459473, + -4.8598313331604, + -4.82229471206665, + -4.862067699432373, + -4.853354454040527, + -0.8348350524902344, + -0.9088714122772217, + 10.160237312316895, + -8.249314308166504, + -8.249642372131348, + 20.04449462890625, + 20.085220336914062, + 19.971458435058594, + 20.079345703125, + 20.09954071044922, + 20.16891098022461, + 20.200408935546875, + 20.2004337310791, + 20.07594871520996, + 20.181053161621094, + 20.227428436279297, + 19.99015998840332, + 20.114471435546875, + 19.939321517944336, + 20.05548667907715 + ], + "yaxis": "y" + } + ], + "layout": { + "coloraxis": { + "colorbar": { + "title": { + "text": "labels" + } + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "legend": { + "orientation": "h", + "title": { + "text": "pillar" + }, + "tracegroupgap": 0 + }, + "margin": { + "t": 60 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 1 + ], + "title": { + "text": "principal component of text embeddings" + } + }, + "yaxis": { + "anchor": "x", + "domain": [ + 0, + 1 + ], + "title": { + "text": "secondary component of text embeddings" + } + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "topic_extractorer.view_clusters()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10 As I said the poor people they are poor becaus...\n", + "11 Unemployment\n", + "12 Poverty \n", + "13 Neglecting Agriculture fields \n", + "Name: C2, dtype: object\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/3078728154.py:2: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n" + ] + } + ], + "source": [ + "raw_data = pd.read_csv('Iraq Qual Analyses.csv').loc[:,:'P4.2']\n", + "responses = raw_data.C2.str.replace(',','').str.replace('.','')\n", + "print(responses[10:14])\n", + "word_list = responses[10:14].str.split(\" \").explode()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "sample = topic_extractorer.pre_trained.word.sample(200)\n", + "sims = np.array([wv.similarity(word1,word2) for i,word1 in enumerate(sample[:-1]) for word2 in sample[i+1:]])\n", + "sims.sort()\n", + "median_sim = sims[sims.shape[0]//2]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Prompt', 'C1', 'C2', 'C3', 'P5.2', 'P3.2', 'P1', 'C7', 'C8', 'C9',\n", + " 'C1.2', 'C2.2', 'C3.2', 'C4', 'C4.2', 'C5', '5.2', 'C6', 'C6.2', 'C7.2',\n", + " 'C8.2', 'C9.2', 'C10', 'C10.2', 'P2', 'P3', 'P4', 'P5', 'P1.2', 'P2.2',\n", + " 'P4.2'],\n", + " dtype='object')" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "C1's mental model:\n", + " cluster strength examples \\\n", + "0 9 4.398752 [children] \n", + "\n", + " location \n", + "0 [0.012939453, 0.0016098022, -0.04321289, 0.177... \n", + "C2's mental model:\n", + " cluster strength examples \\\n", + "1 4 0.000000 [money] \n", + "2 18 0.000000 [agriculture] \n", + "3 27 0.000000 [unemployment] \n", + "0 -1 -0.093495 [grow, fields, neglecting] \n", + "\n", + " location \n", + "1 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n", + "2 [-0.034179688, -0.021728516, -0.04296875, -0.0... \n", + "3 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n", + "0 [0.0133463545, 0.23860677, 0.060221355, -0.062... \n", + "C3's mental model:\n", + " cluster strength examples \\\n", + "1 32 1.759501 [economic] \n", + "0 31 0.000000 [social] \n", + "\n", + " location \n", + "1 [0.051757812, 0.003753662, -0.125, 0.032226562... \n", + "0 [0.099121094, -0.09765625, -0.123535156, 0.163... \n", + "P5.2's mental model:\n", + " cluster strength examples \\\n", + "11 19 5.588203 [crop, crops, wheat] \n", + "3 2 4.638495 [received, receive] \n", + "6 8 1.838235 [chickens, ducks, geese] \n", + "1 0 1.759501 [experience] \n", + "2 1 1.759501 [care] \n", + "7 11 1.759501 [buy] \n", + "9 17 1.759501 [rain] \n", + "13 33 1.759501 [financial] \n", + "10 18 1.759501 [agriculture] \n", + "4 3 0.000000 [medications] \n", + "5 4 0.000000 [money] \n", + "8 14 0.000000 [water] \n", + "12 25 0.000000 [job] \n", + "0 -1 -2.185442 [mine, surprise, servants] \n", + "\n", + " location \n", + "11 [0.13769531, 0.24978298, -0.03390842, 0.280870... \n", + "3 [0.056274414, -0.08089193, -0.13297527, -0.182... \n", + "6 [0.020507812, 0.18432617, -0.30004883, 0.13378... \n", + "1 [0.037841797, -0.060058594, -0.05810547, -0.15... \n", + "2 [-0.19726562, 0.25, 0.052246094, 0.029418945, ... \n", + "7 [0.060302734, -0.17871094, -0.09716797, 0.2753... \n", + "9 [-0.05102539, 0.045898438, -0.2734375, -0.2597... \n", + "13 [-0.09277344, -0.024414062, -0.14550781, -0.01... \n", + "10 [-0.034179688, -0.021728516, -0.04296875, -0.0... \n", + "4 [-0.18066406, 0.16796875, -0.16992188, 0.22363... \n", + "5 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n", + "8 [-0.15136719, 0.13671875, 0.11669922, -0.17871... \n", + "12 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "0 [0.0067859357, 0.074561484, 0.043162655, 0.031... \n", + "P3.2's mental model:\n", + " cluster strength examples \\\n", + "1 0 1.759501 [opportunities] \n", + "4 25 1.759501 [job] \n", + "6 29 1.759501 [market] \n", + "2 7 1.014648 [geography, geographic] \n", + "3 11 0.000000 [buy] \n", + "5 27 0.000000 [unemployment] \n", + "0 -1 -0.027927 [connected, situation, surroundings] \n", + "\n", + " location \n", + "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n", + "4 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "6 [-0.15625, -0.087890625, -0.22949219, -0.23144... \n", + "2 [0.06036377, -0.033958435, -0.033325195, 0.248... \n", + "3 [0.060302734, -0.17871094, -0.09716797, 0.2753... \n", + "5 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n", + "0 [0.037760418, 0.038523357, -0.09503852, 0.0713... \n", + "P1's mental model:\n", + " cluster strength examples \\\n", + "1 27 1.759501 [unemployment] \n", + "0 -1 1.148143 [society, unfair] \n", + "\n", + " location \n", + "1 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n", + "0 [-0.13769531, 0.036376953, -0.11740112, -0.020... \n", + "C7's mental model:\n", + " cluster strength examples \\\n", + "0 6 0.0 [disability] \n", + "1 25 0.0 [job] \n", + "\n", + " location \n", + "0 [0.19726562, -0.1484375, -0.15820312, 0.1875, ... \n", + "1 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "C8's mental model:\n", + " cluster strength examples \\\n", + "1 0 1.759501 [opportunities] \n", + "2 25 1.759501 [job] \n", + "0 -1 0.153353 [stay, secure, fate] \n", + "\n", + " location \n", + "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n", + "2 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "0 [0.038635254, 0.014607747, -0.10839844, 0.0579... \n", + "C9's mental model:\n", + " cluster strength examples \\\n", + "0 25 1.759501 [job] \n", + "\n", + " location \n", + "0 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "C1.2's mental model:\n", + " cluster strength examples \\\n", + "1 0 2.639251 [opportunity] \n", + "3 25 2.639251 [job] \n", + "2 18 2.384640 [agricultural, agriculture] \n", + "5 32 1.759501 [economic] \n", + "0 -1 0.191559 [preparing, economics, sectors] \n", + "4 29 0.000000 [market] \n", + "6 33 0.000000 [financial] \n", + "\n", + " location \n", + "1 [-0.025634766, 0.20410156, 0.044189453, -0.007... \n", + "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "2 [-0.060872395, 0.06437174, -0.025227865, 0.007... \n", + "5 [0.051757812, 0.003753662, -0.125, 0.032226562... \n", + "0 [-0.0045700073, 0.09932709, 0.11424255, 0.0610... \n", + "4 [-0.15625, -0.087890625, -0.22949219, -0.23144... \n", + "6 [-0.09277344, -0.024414062, -0.14550781, -0.01... \n", + "C2.2's mental model:\n", + " cluster strength examples \\\n", + "3 33 3.519001 [financial] \n", + "1 12 0.000000 [crisis] \n", + "2 18 0.000000 [agriculture] \n", + "0 -1 -0.107507 [states, eats, available] \n", + "\n", + " location \n", + "3 [-0.09277344, -0.024414062, -0.14550781, -0.01... \n", + "1 [0.11669922, 0.03466797, -0.11816406, 0.4375, ... \n", + "2 [-0.034179688, -0.021728516, -0.04296875, -0.0... \n", + "0 [-0.032714844, 0.068359375, -0.06858317, 0.103... \n", + "C3.2's mental model:\n", + " cluster strength examples \\\n", + "3 25 3.519001 [job] \n", + "1 0 0.000000 [opportunities] \n", + "2 7 0.000000 [geographically] \n", + "0 -1 -0.049945 [shares, profits, resources] \n", + "\n", + " location \n", + "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n", + "2 [-0.107910156, -0.21875, -0.13085938, 0.089355... \n", + "0 [0.03427476, 0.031355638, 0.033590462, 0.10567... \n", + "C4's mental model:\n", + " cluster strength examples \\\n", + "1 4 1.759501 [money] \n", + "6 26 1.759501 [jobless] \n", + "2 7 1.501686 [geographic, geographical] \n", + "0 -1 1.020174 [location, environment, achieve] \n", + "3 11 0.000000 [buy] \n", + "4 17 0.000000 [weather] \n", + "5 25 0.000000 [job] \n", + "\n", + " location \n", + "1 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n", + "6 [0.49609375, 0.026000977, -0.104003906, 0.2158... \n", + "2 [0.0032958984, 0.0063476562, -0.042663574, 0.1... \n", + "0 [-0.0859375, 0.083789065, -0.0803711, 0.026171... \n", + "3 [0.060302734, -0.17871094, -0.09716797, 0.2753... \n", + "4 [-0.18652344, 0.16796875, -0.30273438, -0.0559... \n", + "5 [-0.014587402, -0.048339844, -0.13671875, -0.1... " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "C4.2's mental model:\n", + " cluster strength examples \\\n", + "1 31 0.000000 [social] \n", + "0 -1 -0.001679 [cultivation, aspect] \n", + "\n", + " location \n", + "1 [0.099121094, -0.09765625, -0.123535156, 0.163... \n", + "0 [0.22607422, 0.1387024, 0.028686523, 0.0827331... \n", + "C5's mental model:\n", + " cluster strength examples \\\n", + "1 0 1.759501 [opportunities] \n", + "3 25 1.759501 [job] \n", + "2 7 0.000000 [geographical] \n", + "0 -1 -0.437343 [himself, border, location] \n", + "\n", + " location \n", + "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n", + "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "2 [0.022583008, 0.07861328, 0.015258789, 0.15039... \n", + "0 [0.013224284, 0.08122762, 0.061604816, 0.05407... \n", + "5.2's mental model:\n", + " cluster strength examples \\\n", + "0 25 2.639251 [job] \n", + "\n", + " location \n", + "0 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "C6's mental model:\n", + " cluster strength examples \\\n", + "0 6 0.906088 [disabled, disability] \n", + "\n", + " location \n", + "0 [0.15576172, -0.06085205, -0.15185547, 0.11047... \n", + "C6.2's mental model:\n", + " cluster strength examples \\\n", + "2 1 1.759501 [care] \n", + "1 0 0.000000 [opportunities] \n", + "3 25 0.000000 [job] \n", + "4 27 0.000000 [unemployment] \n", + "0 -1 -0.285983 [designated, body, cases] \n", + "\n", + " location \n", + "2 [-0.19726562, 0.25, 0.052246094, 0.029418945, ... \n", + "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n", + "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "4 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n", + "0 [-0.025497437, 0.045959473, 0.14416504, 0.1986... \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "C7.2's mental model:\n", + " cluster strength examples \\\n", + "4 30 1.759501 [political] \n", + "3 25 1.759501 [job] \n", + "1 0 0.000000 [opportunities] \n", + "2 20 0.000000 [disaster] \n", + "0 -1 -0.396100 [provider, manpower, active] \n", + "\n", + " location \n", + "4 [-0.028686523, 0.029296875, -0.0625, 0.3535156... \n", + "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n", + "2 [0.23339844, -0.0006713867, -0.050048828, 0.18... \n", + "0 [-0.06995985, 0.06215922, 0.015842438, 0.06583... \n", + "C8.2's mental model:\n", + " cluster strength examples \\\n", + "2 26 1.759501 [jobless] \n", + "0 -1 0.000000 [closed] \n", + "1 7 0.000000 [geography] \n", + "\n", + " location \n", + "2 [0.49609375, 0.026000977, -0.104003906, 0.2158... \n", + "0 [-0.041015625, 0.016723633, 0.21484375, 0.1298... \n", + "1 [0.13671875, -0.0019989014, 0.033935547, 0.296... \n", + "C9.2's mental model:\n", + " cluster strength examples \\\n", + "1 6 0.000000 [disability] \n", + "0 -1 -0.079731 [achieve, suitable, laziness] \n", + "\n", + " location \n", + "1 [0.19726562, -0.1484375, -0.15820312, 0.1875, ... \n", + "0 [-0.11816406, -0.007975261, -0.07828776, 0.149... \n", + "C10's mental model:\n", + " cluster strength examples \\\n", + "3 4 2.639251 [money] \n", + "4 25 1.759501 [job] \n", + "1 0 0.000000 [opportunities] \n", + "2 2 0.000000 [obtain] \n", + "0 -1 -0.224280 [vacant, efficacious, filled] \n", + "\n", + " location \n", + "3 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n", + "4 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n", + "2 [-0.079589844, -0.15234375, -0.42382812, -0.13... \n", + "0 [0.0040893555, 0.08319092, 0.07827759, 0.03198... \n", + "C10.2's mental model:\n", + " cluster strength examples \\\n", + "2 31 2.639251 [social] \n", + "1 30 1.759501 [political] \n", + "3 32 1.759501 [economic] \n", + "0 -1 0.902344 [connections, educational] \n", + "\n", + " location \n", + "2 [0.099121094, -0.09765625, -0.123535156, 0.163... \n", + "1 [-0.028686523, 0.029296875, -0.0625, 0.3535156... \n", + "3 [0.051757812, 0.003753662, -0.125, 0.032226562... \n", + "0 [-0.119140625, 0.0053710938, -0.0052083335, 0.... \n", + "P2's mental model:\n", + " cluster strength examples \\\n", + "1 1 2.639251 [health] \n", + "3 25 1.759501 [job] \n", + "0 -1 0.000000 [conditions] \n", + "2 2 0.000000 [receiving] \n", + "\n", + " location \n", + "1 [-0.07421875, 0.11279297, 0.09472656, 0.071777... \n", + "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "0 [-0.27539062, 0.21386719, 0.0390625, 0.1088867... \n", + "2 [0.095703125, -0.015991211, -0.18359375, -0.09... \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "P3's mental model:\n", + " cluster strength examples \\\n", + "0 -1 0.307454 [unavailability, response, items] \n", + "1 10 0.000000 [salary] \n", + "2 29 0.000000 [markets] \n", + "\n", + " location \n", + "0 [-0.04616928, 0.2263794, -0.121520996, 0.08703... \n", + "1 [0.3984375, -0.056152344, -0.16894531, 0.18359... \n", + "2 [-0.052001953, -0.08544922, -0.13574219, 0.106... \n", + "P4's mental model:\n", + " cluster strength examples \\\n", + "0 6 1.759501 [disability] \n", + "1 32 1.759501 [economic] \n", + "\n", + " location \n", + "0 [0.19726562, -0.1484375, -0.15820312, 0.1875, ... \n", + "1 [0.051757812, 0.003753662, -0.125, 0.032226562... \n", + "P5's mental model:\n", + " cluster strength examples \\\n", + "0 0 1.759501 [opportunities] \n", + "1 25 1.759501 [job] \n", + "\n", + " location \n", + "0 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n", + "1 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "P1.2's mental model:\n", + " cluster strength examples \\\n", + "3 25 1.759501 [job] \n", + "1 0 1.084699 [opportunities, opportunity] \n", + "0 -1 0.232175 [active, borders, opened] \n", + "2 18 0.000000 [agricultural] \n", + "\n", + " location \n", + "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "1 [-0.1348877, 0.24072266, -0.017700195, -0.0401... \n", + "0 [0.01599884, 0.118759155, 0.024902344, -0.0052... \n", + "2 [-0.07421875, 0.107421875, -0.016357422, 0.024... \n", + "P2.2's mental model:\n", + " cluster strength examples \\\n", + "1 25 3.519001 [job] \n", + "0 -1 0.399209 [mediation, organizations, sustenance] \n", + "\n", + " location \n", + "1 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "0 [-0.002090454, 0.02607727, -0.022460938, 0.057... \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n", + "C:\\Users\\MAX_VA~1\\AppData\\Local\\Temp/ipykernel_3036/237962038.py:3: FutureWarning:\n", + "\n", + "The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "P4.2's mental model:\n", + " cluster strength examples \\\n", + "5 27 5.278502 [unemployment] \n", + "4 10 4.055685 [salaries, salary] \n", + "2 4 1.759501 [money] \n", + "1 2 0.000000 [receive] \n", + "3 8 0.000000 [birds] \n", + "0 -1 -0.143925 [society, skills, seriously] \n", + "\n", + " location \n", + "5 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n", + "4 [0.24140625, 0.009472656, -0.020996094, 0.2265... \n", + "2 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n", + "1 [0.052001953, -0.22167969, -0.21191406, 0.0996... \n", + "3 [0.07324219, 0.18261719, -0.33984375, -0.02404... \n", + "0 [-0.05913086, 0.08886719, -0.105773926, 0.0845... \n" + ] + } + ], + "source": [ + "mental_models = {}\n", + "for participant in raw_data.columns[1:]:\n", + " responses = raw_data[participant].str.replace(',','').str.replace('.','').str.lower()\n", + " word_list = responses[10:14].str.split(\" \").explode()\n", + " stop_words = [(\"don't\",.34),('reasons',.34),('foods',.6),('unable',.3),('citizens',.4),('my',.3),('hunger',.3),('cause',.2),('factories',.3),('leads',.3),('expired',.3),('living',.4),('low',.2)]\n", + " for stop_word, similarity in stop_words:\n", + " word_list = word_list[[wv.similarity(word.lower(), stop_word)< similarity if word in wv else True for word in word_list]]\n", + " \n", + " overall_clusters = topic_extractorer.get_current_vecs()\n", + " overall_clusters = overall_clusters[overall_clusters.pillar.isna()]\n", + " mental_model = []\n", + " for word in word_list:\n", + " if word in overall_clusters.word.to_list():\n", + " mental_model.append(overall_clusters[overall_clusters.word == word].iloc[0])\n", + " mental_model = pd.DataFrame(mental_model)\n", + " \n", + " coalescence = []\n", + " for label in sorted(mental_model.labels.unique()):\n", + " words = mental_model[mental_model.labels == label].word.to_numpy()\n", + " location = mental_model[mental_model.labels == label].loc[:,\"v0\":\"v299\"].mean(axis=0).to_numpy()\n", + " tally = 0\n", + " num_entries = 0\n", + " for i,word1 in enumerate(words[:-1]):\n", + " for word2 in words[i+1:]:\n", + " #print(word1,word2)\n", + " tally += wv.similarity(word1,word2) - median_sim # adjusting for the median similarity between randomly picked words\n", + " num_entries += 1\n", + " if num_entries == 0: num_entries += 1\n", + " coalescence.append({'cluster':label,'strength':tally/num_entries*len(words), 'examples': list(set(words))[:3],'location':location})\n", + " try:\n", + " coalescence_df = pd.DataFrame(coalescence).sort_values(by='strength',ascending=False)\n", + " print(f\"{participant}'s mental model:\")\n", + " print(coalescence_df)\n", + " mental_models[participant] = coalescence_df\n", + " except Exception:\n", + " print(f\"{participant}'s mental model was not detectable\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.7698261\n" + ] + }, + { + "data": { + "text/plain": [ + "[[(\"['money'] -> ['crop', 'crops', 'wheat']\", 0.13513015),\n", + " (\"['money'] -> ['received', 'receive']\", 0.0568099),\n", + " (\"['money'] -> ['chickens', 'ducks', 'geese']\", 0.15346186),\n", + " (\"['money'] -> ['experience']\", 0.11808307),\n", + " (\"['money'] -> ['care']\", 0.18036431),\n", + " (\"['money'] -> ['buy']\", 0.31760776),\n", + " (\"['money'] -> ['rain']\", 0.11101911),\n", + " (\"['money'] -> ['financial']\", 0.27252272),\n", + " (\"['money'] -> ['agriculture']\", 0.110064715),\n", + " (\"['money'] -> ['medications']\", 0.17193604),\n", + " (\"['money'] -> ['money']\", 1.0),\n", + " (\"['money'] -> ['water']\", 0.24552587),\n", + " (\"['money'] -> ['job']\", 0.24213025)],\n", + " [(\"['agriculture'] -> ['crop', 'crops', 'wheat']\", 0.5422642),\n", + " (\"['agriculture'] -> ['received', 'receive']\", 0.044078436),\n", + " (\"['agriculture'] -> ['chickens', 'ducks', 'geese']\", 0.20737456),\n", + " (\"['agriculture'] -> ['experience']\", 0.0825472),\n", + " (\"['agriculture'] -> ['care']\", 0.120561525),\n", + " (\"['agriculture'] -> ['buy']\", -0.025091732),\n", + " (\"['agriculture'] -> ['rain']\", 0.13316438),\n", + " (\"['agriculture'] -> ['financial']\", 0.18856841),\n", + " (\"['agriculture'] -> ['agriculture']\", 1.0000001),\n", + " (\"['agriculture'] -> ['medications']\", 0.09028715),\n", + " (\"['agriculture'] -> ['money']\", 0.110064715),\n", + " (\"['agriculture'] -> ['water']\", 0.2595156),\n", + " (\"['agriculture'] -> ['job']\", 0.16164827)],\n", + " [(\"['unemployment'] -> ['crop', 'crops', 'wheat']\", 0.17200448),\n", + " (\"['unemployment'] -> ['received', 'receive']\", 0.044345416),\n", + " (\"['unemployment'] -> ['chickens', 'ducks', 'geese']\", 0.09846855),\n", + " (\"['unemployment'] -> ['experience']\", 0.038453504),\n", + " (\"['unemployment'] -> ['care']\", 0.10824404),\n", + " (\"['unemployment'] -> ['buy']\", 0.00931779),\n", + " (\"['unemployment'] -> ['rain']\", 0.13327138),\n", + " (\"['unemployment'] -> ['financial']\", 0.22514835),\n", + " (\"['unemployment'] -> ['agriculture']\", 0.26194343),\n", + " (\"['unemployment'] -> ['medications']\", 0.14619951),\n", + " (\"['unemployment'] -> ['money']\", 0.14758553),\n", + " (\"['unemployment'] -> ['water']\", 0.14522398),\n", + " (\"['unemployment'] -> ['job']\", 0.30947816)]]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mm2 = mental_models['P5.2'].query('cluster>-1')\n", + "mm1 = mental_models['C2'].query('cluster>-1')\n", + "\n", + "def compare_mental_models(mm1,mm2):\n", + " mm1_filtered = mm1.query('cluster>-1')\n", + " mm2_filtered = mm2.query('cluster>-1')\n", + " if mm1_filtered.empty or mm2_filtered.empty:\n", + " return 0\n", + " return np.mean([max([np.dot(row1.location,row2.location)/np.linalg.norm(row1.location)/np.linalg.norm(row2.location) for _,row2 in mm2_filtered.iterrows()]) for _,row1 in mm1_filtered.iterrows()])\n", + "\n", + "print(compare_mental_models(mm1,mm2))\n", + "[[(f\"{row1.examples} -> {row2.examples}\",np.dot(row1.location,row2.location)/np.linalg.norm(row1.location)/np.linalg.norm(row2.location)) for _,row2 in mm2.iterrows()] for _,row1 in mm1.iterrows()]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "results = []\n", + "for name1, mm1 in mental_models.items():\n", + " row = {'name':name1}\n", + " for name2, mm2 in mental_models.items():\n", + " row[name2] = min(compare_mental_models(mm1, mm2),compare_mental_models(mm2, mm1))\n", + " results.append(row)\n", + " \n", + "similarity_df = pd.DataFrame(results).set_index('name')" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "C2 and C2.2 agree\n", + "C2 and P4.2 agree\n", + "C3 and C4.2 agree\n", + "C3 and C10.2 agree\n", + "C3 and P4 agree\n", + "P3.2 and C1.2 agree\n", + "P3.2 and C3.2 agree\n", + "P3.2 and C4 agree\n", + "P3.2 and C5 agree\n", + "P3.2 and C6.2 agree\n", + "P3.2 and C10 agree\n", + "C7 and C8 agree\n", + "C7 and C9 agree\n", + "C7 and 5.2 agree\n", + "C7 and C6 agree\n", + "C7 and C9.2 agree\n", + "C7 and P2 agree\n", + "C7 and P5 agree\n", + "C7 and P2.2 agree\n", + "C7.2 and P1.2 agree\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'C1'},\n", + " {'C2', 'C2.2', 'P4.2'},\n", + " {'C10.2', 'C3', 'C4.2', 'P4'},\n", + " {'P5.2'},\n", + " {'C1.2', 'C10', 'C3.2', 'C4', 'C5', 'C6.2', 'P3.2'},\n", + " {'P1'},\n", + " {'5.2', 'C6', 'C7', 'C8', 'C9', 'C9.2', 'P2', 'P2.2', 'P5'},\n", + " {'C7.2', 'P1.2'},\n", + " {'C8.2'},\n", + " {'P3'}]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "name_set = set(pd.DataFrame(results).name)\n", + "groups = []\n", + "for name1, row in similarity_df.iterrows():\n", + " if name1 in name_set:\n", + " group = {name1}\n", + " for name2 in row.index:\n", + " if row[name2]>.5 and name1 != name2 and name2 in name_set:\n", + " print(f'{name1} and {name2} agree')\n", + " group.add(name2)\n", + " name_set.remove(name2)\n", + " name_set.remove(name1)\n", + " groups.append(group)\n", + "groups" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "group 0:\n", + "\t C1\n", + " cluster strength examples \\\n", + "0 9 4.398752 [children] \n", + "\n", + " location \n", + "0 [0.012939453, 0.0016098022, -0.04321289, 0.177... \n", + "group 1:\n", + "\t C2.2\n", + " cluster strength examples \\\n", + "3 33 3.519001 [financial] \n", + "1 12 0.000000 [crisis] \n", + "2 18 0.000000 [agriculture] \n", + "\n", + " location \n", + "3 [-0.09277344, -0.024414062, -0.14550781, -0.01... \n", + "1 [0.11669922, 0.03466797, -0.11816406, 0.4375, ... \n", + "2 [-0.034179688, -0.021728516, -0.04296875, -0.0... \n", + "\t P4.2\n", + " cluster strength examples \\\n", + "5 27 5.278502 [unemployment] \n", + "4 10 4.055685 [salaries, salary] \n", + "2 4 1.759501 [money] \n", + "1 2 0.000000 [receive] \n", + "3 8 0.000000 [birds] \n", + "\n", + " location \n", + "5 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n", + "4 [0.24140625, 0.009472656, -0.020996094, 0.2265... \n", + "2 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n", + "1 [0.052001953, -0.22167969, -0.21191406, 0.0996... \n", + "3 [0.07324219, 0.18261719, -0.33984375, -0.02404... \n", + "\t C2\n", + " cluster strength examples \\\n", + "1 4 0.0 [money] \n", + "2 18 0.0 [agriculture] \n", + "3 27 0.0 [unemployment] \n", + "\n", + " location \n", + "1 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n", + "2 [-0.034179688, -0.021728516, -0.04296875, -0.0... \n", + "3 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n", + "group 2:\n", + "\t C4.2\n", + " cluster strength examples \\\n", + "1 31 0.0 [social] \n", + "\n", + " location \n", + "1 [0.099121094, -0.09765625, -0.123535156, 0.163... \n", + "\t C3\n", + " cluster strength examples \\\n", + "1 32 1.759501 [economic] \n", + "0 31 0.000000 [social] \n", + "\n", + " location \n", + "1 [0.051757812, 0.003753662, -0.125, 0.032226562... \n", + "0 [0.099121094, -0.09765625, -0.123535156, 0.163... \n", + "\t C10.2\n", + " cluster strength examples \\\n", + "2 31 2.639251 [social] \n", + "1 30 1.759501 [political] \n", + "3 32 1.759501 [economic] \n", + "\n", + " location \n", + "2 [0.099121094, -0.09765625, -0.123535156, 0.163... \n", + "1 [-0.028686523, 0.029296875, -0.0625, 0.3535156... \n", + "3 [0.051757812, 0.003753662, -0.125, 0.032226562... \n", + "\t P4\n", + " cluster strength examples \\\n", + "0 6 1.759501 [disability] \n", + "1 32 1.759501 [economic] \n", + "\n", + " location \n", + "0 [0.19726562, -0.1484375, -0.15820312, 0.1875, ... \n", + "1 [0.051757812, 0.003753662, -0.125, 0.032226562... \n", + "group 3:\n", + "\t P5.2\n", + " cluster strength examples \\\n", + "11 19 5.588203 [crop, crops, wheat] \n", + "3 2 4.638495 [received, receive] \n", + "6 8 1.838235 [chickens, ducks, geese] \n", + "1 0 1.759501 [experience] \n", + "2 1 1.759501 [care] \n", + "7 11 1.759501 [buy] \n", + "9 17 1.759501 [rain] \n", + "13 33 1.759501 [financial] \n", + "10 18 1.759501 [agriculture] \n", + "4 3 0.000000 [medications] \n", + "5 4 0.000000 [money] \n", + "8 14 0.000000 [water] \n", + "12 25 0.000000 [job] \n", + "\n", + " location \n", + "11 [0.13769531, 0.24978298, -0.03390842, 0.280870... \n", + "3 [0.056274414, -0.08089193, -0.13297527, -0.182... \n", + "6 [0.020507812, 0.18432617, -0.30004883, 0.13378... \n", + "1 [0.037841797, -0.060058594, -0.05810547, -0.15... \n", + "2 [-0.19726562, 0.25, 0.052246094, 0.029418945, ... \n", + "7 [0.060302734, -0.17871094, -0.09716797, 0.2753... \n", + "9 [-0.05102539, 0.045898438, -0.2734375, -0.2597... \n", + "13 [-0.09277344, -0.024414062, -0.14550781, -0.01... \n", + "10 [-0.034179688, -0.021728516, -0.04296875, -0.0... \n", + "4 [-0.18066406, 0.16796875, -0.16992188, 0.22363... \n", + "5 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n", + "8 [-0.15136719, 0.13671875, 0.11669922, -0.17871... \n", + "12 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "group 4:\n", + "\t C5\n", + " cluster strength examples \\\n", + "1 0 1.759501 [opportunities] \n", + "3 25 1.759501 [job] \n", + "2 7 0.000000 [geographical] \n", + "\n", + " location \n", + "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n", + "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "2 [0.022583008, 0.07861328, 0.015258789, 0.15039... \n", + "\t C1.2\n", + " cluster strength examples \\\n", + "1 0 2.639251 [opportunity] \n", + "3 25 2.639251 [job] \n", + "2 18 2.384640 [agricultural, agriculture] \n", + "5 32 1.759501 [economic] \n", + "4 29 0.000000 [market] \n", + "6 33 0.000000 [financial] \n", + "\n", + " location \n", + "1 [-0.025634766, 0.20410156, 0.044189453, -0.007... \n", + "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "2 [-0.060872395, 0.06437174, -0.025227865, 0.007... \n", + "5 [0.051757812, 0.003753662, -0.125, 0.032226562... \n", + "4 [-0.15625, -0.087890625, -0.22949219, -0.23144... \n", + "6 [-0.09277344, -0.024414062, -0.14550781, -0.01... \n", + "\t P3.2\n", + " cluster strength examples \\\n", + "1 0 1.759501 [opportunities] \n", + "4 25 1.759501 [job] \n", + "6 29 1.759501 [market] \n", + "2 7 1.014648 [geography, geographic] \n", + "3 11 0.000000 [buy] \n", + "5 27 0.000000 [unemployment] \n", + "\n", + " location \n", + "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n", + "4 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "6 [-0.15625, -0.087890625, -0.22949219, -0.23144... \n", + "2 [0.06036377, -0.033958435, -0.033325195, 0.248... \n", + "3 [0.060302734, -0.17871094, -0.09716797, 0.2753... \n", + "5 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n", + "\t C3.2\n", + " cluster strength examples \\\n", + "3 25 3.519001 [job] \n", + "1 0 0.000000 [opportunities] \n", + "2 7 0.000000 [geographically] \n", + "\n", + " location \n", + "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n", + "2 [-0.107910156, -0.21875, -0.13085938, 0.089355... \n", + "\t C4\n", + " cluster strength examples \\\n", + "1 4 1.759501 [money] \n", + "6 26 1.759501 [jobless] \n", + "2 7 1.501686 [geographic, geographical] \n", + "3 11 0.000000 [buy] \n", + "4 17 0.000000 [weather] \n", + "5 25 0.000000 [job] \n", + "\n", + " location \n", + "1 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n", + "6 [0.49609375, 0.026000977, -0.104003906, 0.2158... \n", + "2 [0.0032958984, 0.0063476562, -0.042663574, 0.1... \n", + "3 [0.060302734, -0.17871094, -0.09716797, 0.2753... \n", + "4 [-0.18652344, 0.16796875, -0.30273438, -0.0559... \n", + "5 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "\t C10\n", + " cluster strength examples \\\n", + "3 4 2.639251 [money] \n", + "4 25 1.759501 [job] \n", + "1 0 0.000000 [opportunities] \n", + "2 2 0.000000 [obtain] \n", + "\n", + " location \n", + "3 [0.15820312, 0.05126953, 0.06640625, 0.2109375... \n", + "4 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n", + "2 [-0.079589844, -0.15234375, -0.42382812, -0.13... \n", + "\t C6.2\n", + " cluster strength examples \\\n", + "2 1 1.759501 [care] \n", + "1 0 0.000000 [opportunities] \n", + "3 25 0.000000 [job] \n", + "4 27 0.000000 [unemployment] \n", + "\n", + " location \n", + "2 [-0.19726562, 0.25, 0.052246094, 0.029418945, ... \n", + "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n", + "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "4 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n", + "group 5:\n", + "\t P1\n", + " cluster strength examples \\\n", + "1 27 1.759501 [unemployment] \n", + "\n", + " location \n", + "1 [0.3984375, -0.045166016, -0.265625, 0.1982421... \n", + "group 6:\n", + "\t C8\n", + " cluster strength examples \\\n", + "1 0 1.759501 [opportunities] \n", + "2 25 1.759501 [job] \n", + "\n", + " location \n", + "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n", + "2 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "\t C9\n", + " cluster strength examples \\\n", + "0 25 1.759501 [job] \n", + "\n", + " location \n", + "0 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "\t P2.2\n", + " cluster strength examples \\\n", + "1 25 3.519001 [job] \n", + "\n", + " location \n", + "1 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "\t C7\n", + " cluster strength examples \\\n", + "0 6 0.0 [disability] \n", + "1 25 0.0 [job] \n", + "\n", + " location \n", + "0 [0.19726562, -0.1484375, -0.15820312, 0.1875, ... \n", + "1 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "\t 5.2\n", + " cluster strength examples \\\n", + "0 25 2.639251 [job] \n", + "\n", + " location \n", + "0 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "\t P5\n", + " cluster strength examples \\\n", + "0 0 1.759501 [opportunities] \n", + "1 25 1.759501 [job] \n", + "\n", + " location \n", + "0 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n", + "1 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "\t C9.2\n", + " cluster strength examples \\\n", + "1 6 0.0 [disability] \n", + "\n", + " location \n", + "1 [0.19726562, -0.1484375, -0.15820312, 0.1875, ... \n", + "\t P2\n", + " cluster strength examples \\\n", + "1 1 2.639251 [health] \n", + "3 25 1.759501 [job] \n", + "2 2 0.000000 [receiving] \n", + "\n", + " location \n", + "1 [-0.07421875, 0.11279297, 0.09472656, 0.071777... \n", + "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "2 [0.095703125, -0.015991211, -0.18359375, -0.09... \n", + "\t C6\n", + " cluster strength examples \\\n", + "0 6 0.906088 [disabled, disability] \n", + "\n", + " location \n", + "0 [0.15576172, -0.06085205, -0.15185547, 0.11047... \n", + "group 7:\n", + "\t P1.2\n", + " cluster strength examples \\\n", + "3 25 1.759501 [job] \n", + "1 0 1.084699 [opportunities, opportunity] \n", + "2 18 0.000000 [agricultural] \n", + "\n", + " location \n", + "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "1 [-0.1348877, 0.24072266, -0.017700195, -0.0401... \n", + "2 [-0.07421875, 0.107421875, -0.016357422, 0.024... \n", + "\t C7.2\n", + " cluster strength examples \\\n", + "4 30 1.759501 [political] \n", + "3 25 1.759501 [job] \n", + "1 0 0.000000 [opportunities] \n", + "2 20 0.000000 [disaster] \n", + "\n", + " location \n", + "4 [-0.028686523, 0.029296875, -0.0625, 0.3535156... \n", + "3 [-0.014587402, -0.048339844, -0.13671875, -0.1... \n", + "1 [-0.24414062, 0.27734375, -0.079589844, -0.073... \n", + "2 [0.23339844, -0.0006713867, -0.050048828, 0.18... \n", + "group 8:\n", + "\t C8.2\n", + " cluster strength examples \\\n", + "2 26 1.759501 [jobless] \n", + "1 7 0.000000 [geography] \n", + "\n", + " location \n", + "2 [0.49609375, 0.026000977, -0.104003906, 0.2158... \n", + "1 [0.13671875, -0.0019989014, 0.033935547, 0.296... \n", + "group 9:\n", + "\t P3\n", + " cluster strength examples \\\n", + "1 10 0.0 [salary] \n", + "2 29 0.0 [markets] \n", + "\n", + " location \n", + "1 [0.3984375, -0.056152344, -0.16894531, 0.18359... \n", + "2 [-0.052001953, -0.08544922, -0.13574219, 0.106... \n" + ] + } + ], + "source": [ + "for i, group in enumerate(groups):\n", + " print(f'group {i}:')\n", + " for item in group:\n", + " if len(mental_models[item].query('cluster>-1').index)>0:\n", + " print(f'\\t {item}')\n", + " print(mental_models[item].query('cluster>-1'))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.41913477" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "compare_mental_models(mental_models['C1.2'],mental_models['C10'])" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'agriculture',\n", + " 'cost',\n", + " 'economic',\n", + " 'family planning',\n", + " 'food availability',\n", + " 'food management',\n", + " 'geographic ',\n", + " 'government',\n", + " 'health',\n", + " 'natural causes',\n", + " 'personal character',\n", + " 'poverty',\n", + " 'social reasons',\n", + " 'unemployment',\n", + " 'unfair society'}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "qual_codes = pd.read_csv('Unprompted Causes Codes.csv').loc[:,'C1':'P4.2']\n", + "topicsLU = {}\n", + "for participant in qual_codes.columns:\n", + " topic_list = qual_codes[participant].dropna().to_list()\n", + " topicsLU[participant] = topic_list\n", + "unique_topics = set()\n", + "for topics in topicsLU.values():\n", + " for topic in topics:\n", + " unique_topics.add(topic.strip().lower().replace('.','').replace('isolatin','isolation').replace('isolation','').replace('geogrpahic','geographic').\n", + " replace('unemployement','unemployment').replace('/poverty','').replace('neglecting ','').replace('high cost of food','cost').replace(' conditions',''))\n", + "unique_topics" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "unprompted_clusters = topic_extractorer.get_current_vecs()[topic_extractorer.get_current_vecs().pillar.isna()]\n", + "# for label in sorted(unprompted_clusters.labels.unique()):\n", + "# words = unprompted_clusters.query(f'labels=={label}').word.unique()[:3]\n", + "# topic_location = unprompted_clusters.loc[:,\"v0\":\"v299\"].mean(axis=0).to_numpy()\n", + "# for topic in unique_topics:\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "latent_vars_map = {\n", + " 'protein supply':['meat','animals','chicken','beef','pork','poultry','cow','pig','protein','duck'],\n", + " 'energy supply' :['corn','wheat','barley','grain','oats','harvest','crops','agriculture'],\n", + " 'food supply': ['meat','animals','chicken','beef','pork','poultry','cow','pig','duck','corn','wheat','barley','grain','oats','harvest','crops','agriculture'],\n", + " 'political stability':['riots', 'protests', 'war', 'crisis', 'instability', 'unrest','disruption','coup','assasination','politics','government'],\n", + " 'armed conflict': ['war','riots','militia','terrorist','conflict','armed'],\n", + " 'floods': ['flood','floods','rain','weather'],\n", + " 'droughts':['dry','drought','rain','droughts','arid'],\n", + " 'earthquakes':['earthquakes','tremors'],\n", + " 'cyclones': ['hurricane','typhoon','cyclone'],\n", + " 'disease' : ['illness','sick','flu','vaccine','malaria','aids','disease','medications','meds','medicine'],\n", + " 'income': ['income','job','joblessness','unemployment','jobs','money','wage'],\n", + " 'prices': ['prices','expensive','cost','costly'],\n", + " 'climate' :['rain','weather','season','dry','arid'],\n", + " 'inclusivity' : ['disabled','illness','disability'],\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
wordpillarv0v1v2v3v4v5v6v7...v292v293v294v295v296v297v298v299maglabels
323moneyNaN0.1582030.0512700.0664060.2109380.035156-0.004669-0.004456-0.082031...-0.060303-0.1259770.0629880.0490720.124023-0.080566-0.056396-0.0786136.0318054
324moneyNaN0.1582030.0512700.0664060.2109380.035156-0.004669-0.004456-0.082031...-0.060303-0.1259770.0629880.0490720.124023-0.080566-0.056396-0.0786136.0318054
325moneyNaN0.1582030.0512700.0664060.2109380.035156-0.004669-0.004456-0.082031...-0.060303-0.1259770.0629880.0490720.124023-0.080566-0.056396-0.0786136.0318054
326moneyNaN0.1582030.0512700.0664060.2109380.035156-0.004669-0.004456-0.082031...-0.060303-0.1259770.0629880.0490720.124023-0.080566-0.056396-0.0786136.0318054
327moneyNaN0.1582030.0512700.0664060.2109380.035156-0.004669-0.004456-0.082031...-0.060303-0.1259770.0629880.0490720.124023-0.080566-0.056396-0.0786136.0318054
..................................................................
676unemploymentNaN0.398438-0.045166-0.2656250.1982420.398438-0.009338-0.166992-0.082031...-0.102539-0.0153810.0395510.314453-0.1796880.1640620.0588380.01257315.34095027
677joblessNaN0.4960940.026001-0.1040040.2158200.2285160.213867-0.227539-0.028320...-0.021729-0.2519530.0620120.078125-0.1572270.251953-0.0405270.02539115.45505626
678joblessNaN0.4960940.026001-0.1040040.2158200.2285160.213867-0.227539-0.028320...-0.021729-0.2519530.0620120.078125-0.1572270.251953-0.0405270.02539115.45505626
679joblessNaN0.4960940.026001-0.1040040.2158200.2285160.213867-0.227539-0.028320...-0.021729-0.2519530.0620120.078125-0.1572270.251953-0.0405270.02539115.45505626
680joblessNaN0.4960940.026001-0.1040040.2158200.2285160.213867-0.227539-0.028320...-0.021729-0.2519530.0620120.078125-0.1572270.251953-0.0405270.02539115.45505626
\n", + "

180 rows × 304 columns

\n", + "
" + ], + "text/plain": [ + " word pillar v0 v1 v2 v3 v4 \\\n", + "323 money NaN 0.158203 0.051270 0.066406 0.210938 0.035156 \n", + "324 money NaN 0.158203 0.051270 0.066406 0.210938 0.035156 \n", + "325 money NaN 0.158203 0.051270 0.066406 0.210938 0.035156 \n", + "326 money NaN 0.158203 0.051270 0.066406 0.210938 0.035156 \n", + "327 money NaN 0.158203 0.051270 0.066406 0.210938 0.035156 \n", + ".. ... ... ... ... ... ... ... \n", + "676 unemployment NaN 0.398438 -0.045166 -0.265625 0.198242 0.398438 \n", + "677 jobless NaN 0.496094 0.026001 -0.104004 0.215820 0.228516 \n", + "678 jobless NaN 0.496094 0.026001 -0.104004 0.215820 0.228516 \n", + "679 jobless NaN 0.496094 0.026001 -0.104004 0.215820 0.228516 \n", + "680 jobless NaN 0.496094 0.026001 -0.104004 0.215820 0.228516 \n", + "\n", + " v5 v6 v7 ... v292 v293 v294 \\\n", + "323 -0.004669 -0.004456 -0.082031 ... -0.060303 -0.125977 0.062988 \n", + "324 -0.004669 -0.004456 -0.082031 ... -0.060303 -0.125977 0.062988 \n", + "325 -0.004669 -0.004456 -0.082031 ... -0.060303 -0.125977 0.062988 \n", + "326 -0.004669 -0.004456 -0.082031 ... -0.060303 -0.125977 0.062988 \n", + "327 -0.004669 -0.004456 -0.082031 ... -0.060303 -0.125977 0.062988 \n", + ".. ... ... ... ... ... ... ... \n", + "676 -0.009338 -0.166992 -0.082031 ... -0.102539 -0.015381 0.039551 \n", + "677 0.213867 -0.227539 -0.028320 ... -0.021729 -0.251953 0.062012 \n", + "678 0.213867 -0.227539 -0.028320 ... -0.021729 -0.251953 0.062012 \n", + "679 0.213867 -0.227539 -0.028320 ... -0.021729 -0.251953 0.062012 \n", + "680 0.213867 -0.227539 -0.028320 ... -0.021729 -0.251953 0.062012 \n", + "\n", + " v295 v296 v297 v298 v299 mag labels \n", + "323 0.049072 0.124023 -0.080566 -0.056396 -0.078613 6.031805 4 \n", + "324 0.049072 0.124023 -0.080566 -0.056396 -0.078613 6.031805 4 \n", + "325 0.049072 0.124023 -0.080566 -0.056396 -0.078613 6.031805 4 \n", + "326 0.049072 0.124023 -0.080566 -0.056396 -0.078613 6.031805 4 \n", + "327 0.049072 0.124023 -0.080566 -0.056396 -0.078613 6.031805 4 \n", + ".. ... ... ... ... ... ... ... \n", + "676 0.314453 -0.179688 0.164062 0.058838 0.012573 15.340950 27 \n", + "677 0.078125 -0.157227 0.251953 -0.040527 0.025391 15.455056 26 \n", + "678 0.078125 -0.157227 0.251953 -0.040527 0.025391 15.455056 26 \n", + "679 0.078125 -0.157227 0.251953 -0.040527 0.025391 15.455056 26 \n", + "680 0.078125 -0.157227 0.251953 -0.040527 0.025391 15.455056 26 \n", + "\n", + "[180 rows x 304 columns]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "unprompted_clusters.query('labels > -1')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
clusterstrengthexamples
182531.796455[job]
1012.258999[opportunity, opportunities, experience]
20279.715584[unemployment]
547.949114[money]
24327.065879[economic]
15186.520283[agricultural, agriculture]
25336.182644[financial]
16195.619564[crop, crops, wheat]
325.514252[receive, receiving, obtain]
10104.750502[salaries, salary]
664.445997[disabled, disability]
994.416175[children]
23314.416175[social]
774.202181[geographically, geography, geographic]
214.049020[health, care]
11113.532940[buy]
22303.532940[political]
19263.532940[jobless]
21293.066149[markets, market]
882.485127[chickens, turkeys, birds]
14171.787024[rain, weather]
13140.000000[water]
17200.000000[disaster]
12120.000000[crisis]
430.000000[medications]
0-1-5.975976[grow, economics, efficacious]
\n", + "
" + ], + "text/plain": [ + " cluster strength examples\n", + "18 25 31.796455 [job]\n", + "1 0 12.258999 [opportunity, opportunities, experience]\n", + "20 27 9.715584 [unemployment]\n", + "5 4 7.949114 [money]\n", + "24 32 7.065879 [economic]\n", + "15 18 6.520283 [agricultural, agriculture]\n", + "25 33 6.182644 [financial]\n", + "16 19 5.619564 [crop, crops, wheat]\n", + "3 2 5.514252 [receive, receiving, obtain]\n", + "10 10 4.750502 [salaries, salary]\n", + "6 6 4.445997 [disabled, disability]\n", + "9 9 4.416175 [children]\n", + "23 31 4.416175 [social]\n", + "7 7 4.202181 [geographically, geography, geographic]\n", + "2 1 4.049020 [health, care]\n", + "11 11 3.532940 [buy]\n", + "22 30 3.532940 [political]\n", + "19 26 3.532940 [jobless]\n", + "21 29 3.066149 [markets, market]\n", + "8 8 2.485127 [chickens, turkeys, birds]\n", + "14 17 1.787024 [rain, weather]\n", + "13 14 0.000000 [water]\n", + "17 20 0.000000 [disaster]\n", + "12 12 0.000000 [crisis]\n", + "4 3 0.000000 [medications]\n", + "0 -1 -5.975976 [grow, economics, efficacious]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sample = topic_extractorer.pre_trained.word.sample(1000)\n", + "sims = np.array([wv.similarity(word1,word2) for i,word1 in enumerate(sample[:-1]) for word2 in sample[i+1:]])\n", + "sims.sort()\n", + "median_sim = sims[sims.shape[0]//2]\n", + "coalescence = []\n", + "for label in sorted(unprompted_clusters.labels.unique()):\n", + " words = unprompted_clusters[unprompted_clusters.labels == label].word.to_numpy()\n", + " tally = 0\n", + " num_entries = 0\n", + " for i,word1 in enumerate(words[:-1]):\n", + " for word2 in words[i+1:]:\n", + " #print(word1,word2)\n", + " tally += wv.similarity(word1,word2) - median_sim # adjusting for the median similarity between randomly picked words\n", + " num_entries += 1\n", + " if num_entries == 0: num_entries += 1\n", + " coalescence.append({'cluster':label,'strength':tally/num_entries*len(words), 'examples': list(set(words))[:3]})\n", + "coalescence_df = pd.DataFrame(coalescence).sort_values(by='strength',ascending=False)\n", + "coalescence_df" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
clusterstrengthexamplesprotein supplyenergy supplyfood supplypolitical stabilityarmed conflictfloodsdroughtsearthquakescyclonesdiseaseincomepricesclimateinclusivity
182531.796455[job]0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.3389950.0000000.0000000.032694
1012.258999[opportunity, opportunities, experience]0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0181040.0000000.0000000.000000
20279.715584[unemployment]0.0000000.0343210.0000000.0727930.0385210.0974610.0826640.0779370.0439210.0265230.4360220.0119580.0343130.134235
547.949114[money]0.0097900.0000000.0045930.0000000.0000000.0000000.0000000.0000000.0000000.0000000.2313980.1268730.0000000.000000
24327.065879[economic]0.0000000.0748150.0019910.1160530.0082620.0845140.0517720.0345590.0413710.0000000.2426110.0148310.0313050.000000
15186.520283[agricultural, agriculture]0.1684420.4031440.2863380.0122770.0000000.0903780.1644450.0000000.0034010.0000000.0884320.0000000.0706590.000000
25336.182644[financial]0.0000000.0000000.0000000.0342910.0000000.0000000.0000000.0000000.0311860.0000000.1161960.0000000.0000000.000000
16195.619564[crop, crops, wheat]0.1372340.5335020.3253880.0000000.0000000.1844790.2297970.0000000.0908990.0279290.0085720.0000000.1487710.000000
325.514252[receive, receiving, obtain]0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.006181
10104.750502[salaries, salary]0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.2629700.0527000.0000000.020607
664.445997[disabled, disability]0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0786020.0672760.0000000.0000000.586652
994.416175[children]0.0244090.0000000.0080600.0000000.0045050.0000000.0000000.0000000.0000000.0707800.0391740.0000000.0000000.171606
23314.416175[social]0.0000000.0000000.0000000.0435130.0031500.0000000.0000000.0000000.0000000.0000000.0685580.0000000.0000000.081211
774.202181[geographically, geography, geographic]0.0000000.0000000.0000000.0000000.0000000.0000000.0107060.0325510.0000000.0000000.0000000.0000000.0000000.000000
214.049020[health, care]0.0007110.0066120.0063180.0000000.0000000.0000000.0000000.0000000.0000000.1894840.0602310.0000000.0000000.204632
11113.532940[buy]0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.1830640.0000000.000000
22303.532940[political]0.0000000.0000000.0000000.1968030.1537360.0000000.0000000.0000000.0000000.0000000.0441110.0000000.0000000.000000
19263.532940[jobless]0.0000000.0275060.0000000.0328730.0527050.0918630.0377060.0444310.1016800.0073230.3555480.0000000.0115190.146293
21293.066149[markets, market]0.0000000.0319850.0000000.0324820.0000000.0000000.0000000.0000000.0000000.0000000.0193520.1106050.0000000.000000
882.485127[chickens, turkeys, birds]0.3421070.1232170.2577760.0000000.0000000.0108130.0093820.0016990.0159040.0435760.0000000.0000000.0044650.000000
14171.787024[rain, weather]0.0000000.0960770.0159080.0000000.0000000.5075420.3577320.0927320.2022580.0000000.0000000.0000000.4090190.000000
13140.000000[water]0.0508710.1005490.0759750.0000000.0000000.2401130.2500100.0117500.0247950.0308570.0027140.0009370.1715740.000000
17200.000000[disaster]0.0000000.0000000.0000000.1272370.0891800.2635810.1014910.2197860.3736700.0019860.0000000.0000000.0277110.010487
12120.000000[crisis]0.0000000.0022680.0000000.3065370.1619240.0962060.0807410.1214560.1042780.0273910.0457870.0000000.0000000.000000
430.000000[medications]0.0000000.0097890.0000000.0000000.0000000.0000000.0000000.0002160.0000000.3749360.0096040.0014130.0000000.129098
0-1-5.975976[grow, economics, efficacious]0.0000000.0118730.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0303790.0000000.000000
\n", + "
" + ], + "text/plain": [ + " cluster strength examples \\\n", + "18 25 31.796455 [job] \n", + "1 0 12.258999 [opportunity, opportunities, experience] \n", + "20 27 9.715584 [unemployment] \n", + "5 4 7.949114 [money] \n", + "24 32 7.065879 [economic] \n", + "15 18 6.520283 [agricultural, agriculture] \n", + "25 33 6.182644 [financial] \n", + "16 19 5.619564 [crop, crops, wheat] \n", + "3 2 5.514252 [receive, receiving, obtain] \n", + "10 10 4.750502 [salaries, salary] \n", + "6 6 4.445997 [disabled, disability] \n", + "9 9 4.416175 [children] \n", + "23 31 4.416175 [social] \n", + "7 7 4.202181 [geographically, geography, geographic] \n", + "2 1 4.049020 [health, care] \n", + "11 11 3.532940 [buy] \n", + "22 30 3.532940 [political] \n", + "19 26 3.532940 [jobless] \n", + "21 29 3.066149 [markets, market] \n", + "8 8 2.485127 [chickens, turkeys, birds] \n", + "14 17 1.787024 [rain, weather] \n", + "13 14 0.000000 [water] \n", + "17 20 0.000000 [disaster] \n", + "12 12 0.000000 [crisis] \n", + "4 3 0.000000 [medications] \n", + "0 -1 -5.975976 [grow, economics, efficacious] \n", + "\n", + " protein supply energy supply food supply political stability \\\n", + "18 0.000000 0.000000 0.000000 0.000000 \n", + "1 0.000000 0.000000 0.000000 0.000000 \n", + "20 0.000000 0.034321 0.000000 0.072793 \n", + "5 0.009790 0.000000 0.004593 0.000000 \n", + "24 0.000000 0.074815 0.001991 0.116053 \n", + "15 0.168442 0.403144 0.286338 0.012277 \n", + "25 0.000000 0.000000 0.000000 0.034291 \n", + "16 0.137234 0.533502 0.325388 0.000000 \n", + "3 0.000000 0.000000 0.000000 0.000000 \n", + "10 0.000000 0.000000 0.000000 0.000000 \n", + "6 0.000000 0.000000 0.000000 0.000000 \n", + "9 0.024409 0.000000 0.008060 0.000000 \n", + "23 0.000000 0.000000 0.000000 0.043513 \n", + "7 0.000000 0.000000 0.000000 0.000000 \n", + "2 0.000711 0.006612 0.006318 0.000000 \n", + "11 0.000000 0.000000 0.000000 0.000000 \n", + "22 0.000000 0.000000 0.000000 0.196803 \n", + "19 0.000000 0.027506 0.000000 0.032873 \n", + "21 0.000000 0.031985 0.000000 0.032482 \n", + "8 0.342107 0.123217 0.257776 0.000000 \n", + "14 0.000000 0.096077 0.015908 0.000000 \n", + "13 0.050871 0.100549 0.075975 0.000000 \n", + "17 0.000000 0.000000 0.000000 0.127237 \n", + "12 0.000000 0.002268 0.000000 0.306537 \n", + "4 0.000000 0.009789 0.000000 0.000000 \n", + "0 0.000000 0.011873 0.000000 0.000000 \n", + "\n", + " armed conflict floods droughts earthquakes cyclones disease \\\n", + "18 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "1 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "20 0.038521 0.097461 0.082664 0.077937 0.043921 0.026523 \n", + "5 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "24 0.008262 0.084514 0.051772 0.034559 0.041371 0.000000 \n", + "15 0.000000 0.090378 0.164445 0.000000 0.003401 0.000000 \n", + "25 0.000000 0.000000 0.000000 0.000000 0.031186 0.000000 \n", + "16 0.000000 0.184479 0.229797 0.000000 0.090899 0.027929 \n", + "3 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "10 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "6 0.000000 0.000000 0.000000 0.000000 0.000000 0.078602 \n", + "9 0.004505 0.000000 0.000000 0.000000 0.000000 0.070780 \n", + "23 0.003150 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "7 0.000000 0.000000 0.010706 0.032551 0.000000 0.000000 \n", + "2 0.000000 0.000000 0.000000 0.000000 0.000000 0.189484 \n", + "11 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "22 0.153736 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "19 0.052705 0.091863 0.037706 0.044431 0.101680 0.007323 \n", + "21 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "8 0.000000 0.010813 0.009382 0.001699 0.015904 0.043576 \n", + "14 0.000000 0.507542 0.357732 0.092732 0.202258 0.000000 \n", + "13 0.000000 0.240113 0.250010 0.011750 0.024795 0.030857 \n", + "17 0.089180 0.263581 0.101491 0.219786 0.373670 0.001986 \n", + "12 0.161924 0.096206 0.080741 0.121456 0.104278 0.027391 \n", + "4 0.000000 0.000000 0.000000 0.000216 0.000000 0.374936 \n", + "0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "\n", + " income prices climate inclusivity \n", + "18 0.338995 0.000000 0.000000 0.032694 \n", + "1 0.018104 0.000000 0.000000 0.000000 \n", + "20 0.436022 0.011958 0.034313 0.134235 \n", + "5 0.231398 0.126873 0.000000 0.000000 \n", + "24 0.242611 0.014831 0.031305 0.000000 \n", + "15 0.088432 0.000000 0.070659 0.000000 \n", + "25 0.116196 0.000000 0.000000 0.000000 \n", + "16 0.008572 0.000000 0.148771 0.000000 \n", + "3 0.000000 0.000000 0.000000 0.006181 \n", + "10 0.262970 0.052700 0.000000 0.020607 \n", + "6 0.067276 0.000000 0.000000 0.586652 \n", + "9 0.039174 0.000000 0.000000 0.171606 \n", + "23 0.068558 0.000000 0.000000 0.081211 \n", + "7 0.000000 0.000000 0.000000 0.000000 \n", + "2 0.060231 0.000000 0.000000 0.204632 \n", + "11 0.000000 0.183064 0.000000 0.000000 \n", + "22 0.044111 0.000000 0.000000 0.000000 \n", + "19 0.355548 0.000000 0.011519 0.146293 \n", + "21 0.019352 0.110605 0.000000 0.000000 \n", + "8 0.000000 0.000000 0.004465 0.000000 \n", + "14 0.000000 0.000000 0.409019 0.000000 \n", + "13 0.002714 0.000937 0.171574 0.000000 \n", + "17 0.000000 0.000000 0.027711 0.010487 \n", + "12 0.045787 0.000000 0.000000 0.000000 \n", + "4 0.009604 0.001413 0.000000 0.129098 \n", + "0 0.000000 0.030379 0.000000 0.000000 " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "for key in latent_vars_map:\n", + " coalescence_df[key] = [max(0,np.mean([((wv.similarity(word1,word2) - median_sim)/(1-median_sim)) for word2 in latent_vars_map[key] for word1 in words1])) for words1 in coalescence_df.examples]\n", + "coalescence_df" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
clusterstrengthexamplesprotein supplyenergy supplyfood supplypolitical stabilityarmed conflictfloodsdroughts...cyclonesdiseaseincomepricesclimateinclusivity02317
182531.796455[job]0.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.3389950.0000000.0000000.0326940000
1012.258999[opportunity, opportunities, experience]0.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0181040.0000000.0000000.0000001000
20279.715584[unemployment]0.0000000.0343210.0000000.0727930.0385210.0974610.082664...0.0439210.0265230.4360220.0119580.0343130.1342350000
547.949114[money]0.0097900.0000000.0045930.0000000.0000000.0000000.000000...0.0000000.0000000.2313980.1268730.0000000.0000000000
24327.065879[economic]0.0000000.0748150.0019910.1160530.0082620.0845140.051772...0.0413710.0000000.2426110.0148310.0313050.0000000000
15186.520283[agricultural, agriculture]0.1684420.4031440.2863380.0122770.0000000.0903780.164445...0.0034010.0000000.0884320.0000000.0706590.0000000000
25336.182644[financial]0.0000000.0000000.0000000.0342910.0000000.0000000.000000...0.0311860.0000000.1161960.0000000.0000000.0000000000
16195.619564[crop, crops, wheat]0.1372340.5335020.3253880.0000000.0000000.1844790.229797...0.0908990.0279290.0085720.0000000.1487710.0000000000
325.514252[receive, receiving, obtain]0.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0061810100
10104.750502[salaries, salary]0.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.2629700.0527000.0000000.0206070000
664.445997[disabled, disability]0.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0786020.0672760.0000000.0000000.5866520000
994.416175[children]0.0244090.0000000.0080600.0000000.0045050.0000000.000000...0.0000000.0707800.0391740.0000000.0000000.1716060000
23314.416175[social]0.0000000.0000000.0000000.0435130.0031500.0000000.000000...0.0000000.0000000.0685580.0000000.0000000.0812110010
774.202181[geographically, geography, geographic]0.0000000.0000000.0000000.0000000.0000000.0000000.010706...0.0000000.0000000.0000000.0000000.0000000.0000000001
214.049020[health, care]0.0007110.0066120.0063180.0000000.0000000.0000000.000000...0.0000000.1894840.0602310.0000000.0000000.2046320000
11113.532940[buy]0.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.1830640.0000000.0000000000
22303.532940[political]0.0000000.0000000.0000000.1968030.1537360.0000000.000000...0.0000000.0000000.0441110.0000000.0000000.0000000000
19263.532940[jobless]0.0000000.0275060.0000000.0328730.0527050.0918630.037706...0.1016800.0073230.3555480.0000000.0115190.1462930000
21293.066149[markets, market]0.0000000.0319850.0000000.0324820.0000000.0000000.000000...0.0000000.0000000.0193520.1106050.0000000.0000000000
882.485127[chickens, turkeys, birds]0.3421070.1232170.2577760.0000000.0000000.0108130.009382...0.0159040.0435760.0000000.0000000.0044650.0000000000
14171.787024[rain, weather]0.0000000.0960770.0159080.0000000.0000000.5075420.357732...0.2022580.0000000.0000000.0000000.4090190.0000000000
13140.000000[water]0.0508710.1005490.0759750.0000000.0000000.2401130.250010...0.0247950.0308570.0027140.0009370.1715740.0000000000
17200.000000[disaster]0.0000000.0000000.0000000.1272370.0891800.2635810.101491...0.3736700.0019860.0000000.0000000.0277110.0104870000
12120.000000[crisis]0.0000000.0022680.0000000.3065370.1619240.0962060.080741...0.1042780.0273910.0457870.0000000.0000000.0000000000
430.000000[medications]0.0000000.0097890.0000000.0000000.0000000.0000000.000000...0.0000000.3749360.0096040.0014130.0000000.1290980000
0-1-5.975976[grow, economics, efficacious]0.0000000.0118730.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0303790.0000000.0000000000
\n", + "

26 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " cluster strength examples \\\n", + "18 25 31.796455 [job] \n", + "1 0 12.258999 [opportunity, opportunities, experience] \n", + "20 27 9.715584 [unemployment] \n", + "5 4 7.949114 [money] \n", + "24 32 7.065879 [economic] \n", + "15 18 6.520283 [agricultural, agriculture] \n", + "25 33 6.182644 [financial] \n", + "16 19 5.619564 [crop, crops, wheat] \n", + "3 2 5.514252 [receive, receiving, obtain] \n", + "10 10 4.750502 [salaries, salary] \n", + "6 6 4.445997 [disabled, disability] \n", + "9 9 4.416175 [children] \n", + "23 31 4.416175 [social] \n", + "7 7 4.202181 [geographically, geography, geographic] \n", + "2 1 4.049020 [health, care] \n", + "11 11 3.532940 [buy] \n", + "22 30 3.532940 [political] \n", + "19 26 3.532940 [jobless] \n", + "21 29 3.066149 [markets, market] \n", + "8 8 2.485127 [chickens, turkeys, birds] \n", + "14 17 1.787024 [rain, weather] \n", + "13 14 0.000000 [water] \n", + "17 20 0.000000 [disaster] \n", + "12 12 0.000000 [crisis] \n", + "4 3 0.000000 [medications] \n", + "0 -1 -5.975976 [grow, economics, efficacious] \n", + "\n", + " protein supply energy supply food supply political stability \\\n", + "18 0.000000 0.000000 0.000000 0.000000 \n", + "1 0.000000 0.000000 0.000000 0.000000 \n", + "20 0.000000 0.034321 0.000000 0.072793 \n", + "5 0.009790 0.000000 0.004593 0.000000 \n", + "24 0.000000 0.074815 0.001991 0.116053 \n", + "15 0.168442 0.403144 0.286338 0.012277 \n", + "25 0.000000 0.000000 0.000000 0.034291 \n", + "16 0.137234 0.533502 0.325388 0.000000 \n", + "3 0.000000 0.000000 0.000000 0.000000 \n", + "10 0.000000 0.000000 0.000000 0.000000 \n", + "6 0.000000 0.000000 0.000000 0.000000 \n", + "9 0.024409 0.000000 0.008060 0.000000 \n", + "23 0.000000 0.000000 0.000000 0.043513 \n", + "7 0.000000 0.000000 0.000000 0.000000 \n", + "2 0.000711 0.006612 0.006318 0.000000 \n", + "11 0.000000 0.000000 0.000000 0.000000 \n", + "22 0.000000 0.000000 0.000000 0.196803 \n", + "19 0.000000 0.027506 0.000000 0.032873 \n", + "21 0.000000 0.031985 0.000000 0.032482 \n", + "8 0.342107 0.123217 0.257776 0.000000 \n", + "14 0.000000 0.096077 0.015908 0.000000 \n", + "13 0.050871 0.100549 0.075975 0.000000 \n", + "17 0.000000 0.000000 0.000000 0.127237 \n", + "12 0.000000 0.002268 0.000000 0.306537 \n", + "4 0.000000 0.009789 0.000000 0.000000 \n", + "0 0.000000 0.011873 0.000000 0.000000 \n", + "\n", + " armed conflict floods droughts ... cyclones disease income \\\n", + "18 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.338995 \n", + "1 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.018104 \n", + "20 0.038521 0.097461 0.082664 ... 0.043921 0.026523 0.436022 \n", + "5 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.231398 \n", + "24 0.008262 0.084514 0.051772 ... 0.041371 0.000000 0.242611 \n", + "15 0.000000 0.090378 0.164445 ... 0.003401 0.000000 0.088432 \n", + "25 0.000000 0.000000 0.000000 ... 0.031186 0.000000 0.116196 \n", + "16 0.000000 0.184479 0.229797 ... 0.090899 0.027929 0.008572 \n", + "3 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 \n", + "10 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.262970 \n", + "6 0.000000 0.000000 0.000000 ... 0.000000 0.078602 0.067276 \n", + "9 0.004505 0.000000 0.000000 ... 0.000000 0.070780 0.039174 \n", + "23 0.003150 0.000000 0.000000 ... 0.000000 0.000000 0.068558 \n", + "7 0.000000 0.000000 0.010706 ... 0.000000 0.000000 0.000000 \n", + "2 0.000000 0.000000 0.000000 ... 0.000000 0.189484 0.060231 \n", + "11 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 \n", + "22 0.153736 0.000000 0.000000 ... 0.000000 0.000000 0.044111 \n", + "19 0.052705 0.091863 0.037706 ... 0.101680 0.007323 0.355548 \n", + "21 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.019352 \n", + "8 0.000000 0.010813 0.009382 ... 0.015904 0.043576 0.000000 \n", + "14 0.000000 0.507542 0.357732 ... 0.202258 0.000000 0.000000 \n", + "13 0.000000 0.240113 0.250010 ... 0.024795 0.030857 0.002714 \n", + "17 0.089180 0.263581 0.101491 ... 0.373670 0.001986 0.000000 \n", + "12 0.161924 0.096206 0.080741 ... 0.104278 0.027391 0.045787 \n", + "4 0.000000 0.000000 0.000000 ... 0.000000 0.374936 0.009604 \n", + "0 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 \n", + "\n", + " prices climate inclusivity 0 2 31 7 \n", + "18 0.000000 0.000000 0.032694 0 0 0 0 \n", + "1 0.000000 0.000000 0.000000 1 0 0 0 \n", + "20 0.011958 0.034313 0.134235 0 0 0 0 \n", + "5 0.126873 0.000000 0.000000 0 0 0 0 \n", + "24 0.014831 0.031305 0.000000 0 0 0 0 \n", + "15 0.000000 0.070659 0.000000 0 0 0 0 \n", + "25 0.000000 0.000000 0.000000 0 0 0 0 \n", + "16 0.000000 0.148771 0.000000 0 0 0 0 \n", + "3 0.000000 0.000000 0.006181 0 1 0 0 \n", + "10 0.052700 0.000000 0.020607 0 0 0 0 \n", + "6 0.000000 0.000000 0.586652 0 0 0 0 \n", + "9 0.000000 0.000000 0.171606 0 0 0 0 \n", + "23 0.000000 0.000000 0.081211 0 0 1 0 \n", + "7 0.000000 0.000000 0.000000 0 0 0 1 \n", + "2 0.000000 0.000000 0.204632 0 0 0 0 \n", + "11 0.183064 0.000000 0.000000 0 0 0 0 \n", + "22 0.000000 0.000000 0.000000 0 0 0 0 \n", + "19 0.000000 0.011519 0.146293 0 0 0 0 \n", + "21 0.110605 0.000000 0.000000 0 0 0 0 \n", + "8 0.000000 0.004465 0.000000 0 0 0 0 \n", + "14 0.000000 0.409019 0.000000 0 0 0 0 \n", + "13 0.000937 0.171574 0.000000 0 0 0 0 \n", + "17 0.000000 0.027711 0.010487 0 0 0 0 \n", + "12 0.000000 0.000000 0.000000 0 0 0 0 \n", + "4 0.001413 0.000000 0.129098 0 0 0 0 \n", + "0 0.030379 0.000000 0.000000 0 0 0 0 \n", + "\n", + "[26 rows x 21 columns]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_matrix = coalescence_df.copy()\n", + "for i,cluster in enumerate(final_matrix.cluster):\n", + " row = final_matrix[final_matrix.cluster == cluster].iloc[0]\n", + " if all(row.loc['protein supply':'inclusivity']<.1) and row.strength > 1:\n", + " new_col = [0]*len(final_matrix)\n", + " new_col[i] = 1\n", + " final_matrix[cluster] = new_col\n", + "final_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8.12 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "5b3ded1ccb95c1d9bd405e7b823d9e85424cde40fbb5985eb47e999ef50e15b4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}