Skip to content

Commit 95e7096

Browse files
authored
remove per capita stop words (#4456)
same change as #4415 but rebased off a clean master svindex diff: https://storage.mtls.cloud.google.com/datcom-embedding-diffs/chejennifer_base_uae_mem_2024_07_09_21_55_14.html base does not remove per capita stop words test removes per capita stop words
1 parent 10a081d commit 95e7096

File tree

22 files changed

+124
-134
lines changed

22 files changed

+124
-134
lines changed

deploy/nl/catalog.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ indexes:
9090
base_uae_mem:
9191
store_type: MEMORY
9292
source_path: ../../tools/nl/embeddings/input/base
93-
embeddings_path: gs://datcom-nl-models/base_uae_mem_2024_07_11_08_35_57/embeddings.csv
93+
embeddings_path: gs://datcom-nl-models/base_uae_mem_2024_07_12_09_03_25/embeddings.csv
9494
model: uae-large-v1-model
9595
healthcheck_query: "Life expectancy"
9696
base_mistral_mem:

run_test.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ function run_lint_fix {
6464
pip3 install isort -q
6565
fi
6666
yapf -r -i -p --style='{based_on_style: google, indent_width: 2}' server/ nl_server/ shared/ tools/ -e=*pb2.py -e=**/.env/**
67-
isort server/ nl_server/ shared/ tools/ --skip-glob *pb2.py --skip-glob **/.env/** --profile google
67+
isort server/ nl_server/ shared/ tools/ --skip-glob=*pb2.py --skip-glob=**/.env/** --profile=google
6868
deactivate
6969
}
7070

server/integration_tests/test_data/demo_fallback/query_2/chart_config.json

+10-10
Original file line numberDiff line numberDiff line change
@@ -78,16 +78,16 @@
7878
"geoId/12"
7979
],
8080
"statVarKey": [
81-
"Count_CriminalIncidents_IsHateCrime_multiple_place_bar_block"
81+
"Count_CriminalActivities_MurderAndNonNegligentManslaughter_multiple_place_bar_block"
8282
],
83-
"title": "Hate Crime Incidents (${date})",
83+
"title": "Murder and Non Negligent Manslaughter Cases (${date})",
8484
"type": "BAR"
8585
}
8686
]
8787
}
8888
],
8989
"denom": "Count_Person",
90-
"title": "Hate Crime Incidents"
90+
"title": "Murder and Non Negligent Manslaughter Cases"
9191
},
9292
{
9393
"columns": [
@@ -104,17 +104,16 @@
104104
"geoId/12"
105105
],
106106
"statVarKey": [
107-
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_AggravatedAssault_IsHateCrime_multiple_place_bar_block",
108-
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_Robbery_IsHateCrime_multiple_place_bar_block"
107+
"Count_CriminalIncidents_IsHateCrime_multiple_place_bar_block"
109108
],
110-
"title": "Hate Crime Cases by Type (${date})",
109+
"title": "Hate Crime Incidents (${date})",
111110
"type": "BAR"
112111
}
113112
]
114113
}
115114
],
116115
"denom": "Count_Person",
117-
"title": "Hate Crime Cases by Type"
116+
"title": "Hate Crime Incidents"
118117
},
119118
{
120119
"columns": [
@@ -131,16 +130,17 @@
131130
"geoId/12"
132131
],
133132
"statVarKey": [
134-
"Count_CriminalActivities_MurderAndNonNegligentManslaughter_multiple_place_bar_block"
133+
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_AggravatedAssault_IsHateCrime_multiple_place_bar_block",
134+
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_Robbery_IsHateCrime_multiple_place_bar_block"
135135
],
136-
"title": "Murder and Non Negligent Manslaughter Cases (${date})",
136+
"title": "Hate Crime Cases by Type (${date})",
137137
"type": "BAR"
138138
}
139139
]
140140
}
141141
],
142142
"denom": "Count_Person",
143-
"title": "Murder and Non Negligent Manslaughter Cases"
143+
"title": "Hate Crime Cases by Type"
144144
},
145145
{
146146
"columns": [

server/integration_tests/test_data/demo_fallback/query_3/chart_config.json

+26-26
Original file line numberDiff line numberDiff line change
@@ -71,76 +71,76 @@
7171
"tiles": [
7272
{
7373
"statVarKey": [
74-
"Count_CriminalIncidents_IsHateCrime"
74+
"Count_CriminalActivities_MurderAndNonNegligentManslaughter"
7575
],
76-
"title": "Hate Crime Incidents in California",
76+
"title": "Murder and Non Negligent Manslaughter Cases in California",
7777
"type": "LINE"
7878
}
7979
]
8080
},
8181
{
8282
"tiles": [
8383
{
84-
"description": "Hate Crime Incidents in California",
84+
"description": "Murder and Non Negligent Manslaughter Cases in California",
8585
"statVarKey": [
86-
"Count_CriminalIncidents_IsHateCrime"
86+
"Count_CriminalActivities_MurderAndNonNegligentManslaughter"
8787
],
88-
"title": "Hate Crime Incidents in California",
88+
"title": "Murder and Non Negligent Manslaughter Cases in California",
8989
"type": "HIGHLIGHT"
9090
}
9191
]
9292
}
9393
],
9494
"denom": "Count_Person",
95-
"title": "Hate Crime Incidents"
95+
"title": "Murder and Non Negligent Manslaughter Cases"
9696
},
9797
{
9898
"columns": [
9999
{
100100
"tiles": [
101101
{
102102
"statVarKey": [
103-
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_AggravatedAssault_IsHateCrime",
104-
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_Intimidation_IsHateCrime",
105-
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_Robbery_IsHateCrime"
103+
"Count_CriminalIncidents_IsHateCrime"
106104
],
107-
"title": "Hate Crime Cases by Type in California",
105+
"title": "Hate Crime Incidents in California",
108106
"type": "LINE"
109107
}
110108
]
111-
}
112-
],
113-
"denom": "Count_Person",
114-
"title": "Hate Crime Cases by Type"
115-
},
116-
{
117-
"columns": [
109+
},
118110
{
119111
"tiles": [
120112
{
113+
"description": "Hate Crime Incidents in California",
121114
"statVarKey": [
122-
"Count_CriminalActivities_MurderAndNonNegligentManslaughter"
115+
"Count_CriminalIncidents_IsHateCrime"
123116
],
124-
"title": "Murder and Non Negligent Manslaughter Cases in California",
125-
"type": "LINE"
117+
"title": "Hate Crime Incidents in California",
118+
"type": "HIGHLIGHT"
126119
}
127120
]
128-
},
121+
}
122+
],
123+
"denom": "Count_Person",
124+
"title": "Hate Crime Incidents"
125+
},
126+
{
127+
"columns": [
129128
{
130129
"tiles": [
131130
{
132-
"description": "Murder and Non Negligent Manslaughter Cases in California",
133131
"statVarKey": [
134-
"Count_CriminalActivities_MurderAndNonNegligentManslaughter"
132+
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_AggravatedAssault_IsHateCrime",
133+
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_Intimidation_IsHateCrime",
134+
"Count_CriminalIncidents_BiasMotivationDisabilityStatus_Robbery_IsHateCrime"
135135
],
136-
"title": "Murder and Non Negligent Manslaughter Cases in California",
137-
"type": "HIGHLIGHT"
136+
"title": "Hate Crime Cases by Type in California",
137+
"type": "LINE"
138138
}
139139
]
140140
}
141141
],
142142
"denom": "Count_Person",
143-
"title": "Murder and Non Negligent Manslaughter Cases"
143+
"title": "Hate Crime Cases by Type"
144144
},
145145
{
146146
"columns": [

server/integration_tests/test_data/detection_api_multivar/comparemalepopulationwithfemalepopulation/debug_info.json

+9-9
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
"Parts": [
5555
{
5656
"CosineScore": [
57-
0.9257857799530029
57+
0.9257858991622925
5858
],
5959
"QueryPart": "male population",
6060
"SV": [
@@ -63,8 +63,8 @@
6363
},
6464
{
6565
"CosineScore": [
66-
0.9296980500221252,
67-
0.8847433924674988
66+
0.9296978712081909,
67+
0.884743332862854
6868
],
6969
"QueryPart": "female population",
7070
"SV": [
@@ -80,7 +80,7 @@
8080
"Parts": [
8181
{
8282
"CosineScore": [
83-
0.9117770791053772
83+
0.911777138710022
8484
],
8585
"QueryPart": "male population female",
8686
"SV": [
@@ -89,8 +89,8 @@
8989
},
9090
{
9191
"CosineScore": [
92-
0.8982378840446472,
93-
0.8723467588424683
92+
0.8982377648353577,
93+
0.8723466396331787
9494
],
9595
"QueryPart": "population",
9696
"SV": [
@@ -106,9 +106,9 @@
106106
"Parts": [
107107
{
108108
"CosineScore": [
109-
0.8034241199493408,
110-
0.8017654418945312,
111-
0.775155782699585,
109+
0.8034241795539856,
110+
0.8017653822898865,
111+
0.7751558423042297,
112112
0.7698014974594116,
113113
0.7586972713470459
114114
],

server/integration_tests/test_data/detection_api_multivar/howarefactorslikeobesity,bloodpressureandasthmaimpactedbyclimatechange/debug_info.json

+5-5
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,10 @@
113113
"Parts": [
114114
{
115115
"CosineScore": [
116-
0.7768431901931763,
117-
0.7495046257972717,
118-
0.7342654466629028,
119-
0.7325114011764526
116+
0.7768430709838867,
117+
0.749504566192627,
118+
0.7342653274536133,
119+
0.7325113415718079
120120
],
121121
"QueryPart": "factors like obesity blood pressure asthma",
122122
"SV": [
@@ -128,7 +128,7 @@
128128
},
129129
{
130130
"CosineScore": [
131-
0.9056920409202576
131+
0.9056921005249023
132132
],
133133
"QueryPart": "impacted climate change",
134134
"SV": [

server/integration_tests/test_data/detection_translate_chinese/chart_config.json

-3
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,6 @@
1111
"contained_in_place_type": "City",
1212
"had_default_type": false,
1313
"type": 4
14-
},
15-
{
16-
"type": 14
1714
}
1815
],
1916
"client": "test_detect",

server/integration_tests/test_data/e2e_edge_cases2/povertyvs.unemploymentrateindistrictsoftamilnadu/chart_config.json

-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
}
3232
],
3333
"denom": "Count_Person",
34-
"startWithDenom": true,
3534
"title": "Population Below Poverty Line"
3635
},
3736
{

server/integration_tests/test_data/e2e_high_sv_threshold/chart_config.json

-4
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@
6262
}
6363
],
6464
"denom": "Count_Person",
65-
"startWithDenom": true,
6665
"title": "Count of Mortality Event: 1 Years or Less in Counties of Massachusetts"
6766
},
6867
{
@@ -83,7 +82,6 @@
8382
}
8483
],
8584
"denom": "Count_Person",
86-
"startWithDenom": true,
8785
"title": "Causes of Infant Mortality"
8886
},
8987
{
@@ -116,7 +114,6 @@
116114
}
117115
],
118116
"denom": "Count_Person",
119-
"startWithDenom": true,
120117
"title": "Mortality Events (1 Years or Less): Symptoms, Signs and Abnormal Clinical and Laboratory Findings, Not Elsewhere Classified) in Counties of Massachusetts"
121118
},
122119
{
@@ -149,7 +146,6 @@
149146
}
150147
],
151148
"denom": "Count_Person",
152-
"startWithDenom": true,
153149
"title": "Mortality Events (1 Years or Less): Certain Conditions Originating in the Perinatal Period) in Counties of Massachusetts"
154150
},
155151
{

server/integration_tests/test_data/e2e_india_demo/howdoesliteracyratecomparetopovertyinindia/chart_config.json

-6
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
}
2424
],
2525
"denom": "Count_Person",
26-
"startWithDenom": true,
2726
"title": "Population: Literate vs. Population Below poverty line"
2827
},
2928
{
@@ -56,7 +55,6 @@
5655
}
5756
],
5857
"denom": "Count_Person",
59-
"startWithDenom": true,
6058
"title": "Population: Literate in Administrative Area 1 Places of India"
6159
},
6260
{
@@ -89,7 +87,6 @@
8987
}
9088
],
9189
"denom": "Count_Person",
92-
"startWithDenom": true,
9390
"title": "Population Below Poverty Line in Administrative Area 1 Places of India"
9491
},
9592
{
@@ -111,7 +108,6 @@
111108
}
112109
],
113110
"denom": "Count_Person",
114-
"startWithDenom": true,
115111
"title": "Population: Illiterate vs. Population Below poverty line (Per Capita)"
116112
},
117113
{
@@ -195,7 +191,6 @@
195191
}
196192
],
197193
"denom": "Count_Person",
198-
"startWithDenom": true,
199194
"title": "Population: Literate vs. Population Below poverty line (Per Capita)"
200195
},
201196
{
@@ -217,7 +212,6 @@
217212
}
218213
],
219214
"denom": "Count_Person",
220-
"startWithDenom": true,
221215
"title": "Population: Illiterate vs. Population Below poverty line"
222216
}
223217
],

0 commit comments

Comments
 (0)