-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpy_commands.py
94 lines (80 loc) · 2.1 KB
/
py_commands.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import json
import uptasticsearch
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
from pandas.io.json import json_normalize
# curl http://localhost:9200/utexas/_mapping > fields.json
# cat fields.json
query_dict = {"query": {"match_all": {}}}
query_string = '{"query": {"match_all": {}}}'
# Uptasticsearch
uptasticsearch.es_search(
es_host="http://localhost:9200",
query_body=query_string,
es_index="utexas",
max_hits = 10
)
# Elasticsearch-py
es = Elasticsearch(['http://localhost:9200'])
res = es.search(
index="utexas",
body= query_dict,
size = 10
)
#res['hits']['hits']
json_normalize(res['hits']['hits'])
# Elasticsearch-dsl
res2 = Search(using = es).query("match", _index = 'utexas').execute()
#res2.to_dict()['hits']['hits']
json_normalize(res2.to_dict()['hits']['hits'])
# Additional Queries
match_one = """{
"query": { "match": { "ciptitle.raw": "COMPUTER SCIENCE"} }
}"""
match_one_greater_one = """{
"query":
{ "bool" : {
"must" : [ { "match": { "ciptitle.raw": "COMPUTER SCIENCE" } }
, { "range" : { "cellcount" : { "gt" : 0 } } } ]
} }
}"""
match_two_greater_one = """{
"query":
{ "bool" : {
"must" : [ { "match": { "ciptitle.raw": "COMPUTER SCIENCE" } }
, { "match": { "institution_id": "3599" } }
, { "range" : { "cellcount" : { "gt" : 0 } } } ]
} }
}"""
query_and_filter = """{ "query":
{ "bool": {
"must": [
{ "match": { "ciptitle.raw": "AREA STUDIES" } }
, { "match": { "deglevel": "Baccalaureate" } }
] ,
"filter": [
{ "match": {"institution_id": "3599" } }
]
} }
}"""
query_and_sort = """{ "query":
{ "bool" : {
"must" : [ { "range": { "p50_earnings": { "gte" : 75000 } } }
, { "match": { "institution_id": "3658" } }
, { "range": { "cellcount" : { "gt" : 0 } } } ]
}
},
"sort": {
"ciptitle.raw": "asc"
}
}"""
summarize_one = """{
"aggs": {
"common_majors": {
"terms": {
"field": "ciptitle.raw"
}
}
}
}"""
json_normalize(res['aggregations']['common_majors']['buckets'])