From 610f2631c0296127bba1dc6066acec82b0748107 Mon Sep 17 00:00:00 2001 From: onesuper Date: Wed, 16 Nov 2016 17:24:42 +0800 Subject: [PATCH] support stats/extended_stats result extraction --- CHANGELOG.md | 1 + README.md | 6 +++++- pandasticsearch/operators.py | 4 ++-- pandasticsearch/queries.py | 7 +++++-- pandasticsearch/types.py | 8 ++++++++ 5 files changed, 21 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 558b862..da0e63f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ### 0.2.0 +* support metric agg: `stats`, `extended_stats` * support boolean filter: `like`, `rlike`, `startswith`, `notnull` * display time in `df.show()` diff --git a/README.md b/README.md index 9f7152a..161cbb2 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ df.sort(ScriptSorter('doc["age"].value * 2')).collect() df[df.gender == 'male'].agg(df.age.avg).collect() # [Row(avg(age)=12)] -# Groupby +# Groupby only (will give the `doc_count`) df.groupby('gender').collect() # [Row(doc_count=1), Row(doc_count=2)] @@ -120,6 +120,10 @@ df[df.gender == 'male'].agg(df.age.avg).to_pandas() # avg(age) # 0 12 +# Advanced ES functinality +df.groupby(df.gender).agg(df.age.stats).to_pandas() +df.agg(df.age.extended_stats).to_pandas() +df.agg(df.age.percentiles).to_pandas() ``` diff --git a/pandasticsearch/operators.py b/pandasticsearch/operators.py index 0bfcb04..f2d5706 100644 --- a/pandasticsearch/operators.py +++ b/pandasticsearch/operators.py @@ -1,7 +1,7 @@ # -*- coding: UTF-8 -*- -_metric_aggs = ('avg', 'min', 'max', 'cardinality', 'value_count', 'sum' - 'percentiles', 'percentile_ranks') +_metric_aggs = ('avg', 'min', 'max', 'cardinality', 'value_count', 'sum', + 'percentiles', 'percentile_ranks', 'stats', 'extended_stats') _sort_mode = ('min', 'max', 'sum', 'avg', 'median') diff --git a/pandasticsearch/queries.py b/pandasticsearch/queries.py index ff6a506..0bd4ff6 100644 --- a/pandasticsearch/queries.py +++ b/pandasticsearch/queries.py @@ -192,8 +192,11 @@ def _process_agg(cls, bucket, indexes=(), names=()): row[k] = v['value'] elif 'values' in v: # percentiles row = v['values'] - if k == 'doc_count': # count docs - row['doc_count'] = v + else: + row.update(v) # stats + else: + if k == 'doc_count': # count docs + row['doc_count'] = v if len(row) > 0: yield (names, indexes, row) diff --git a/pandasticsearch/types.py b/pandasticsearch/types.py index 44fbf48..44530bd 100644 --- a/pandasticsearch/types.py +++ b/pandasticsearch/types.py @@ -96,6 +96,14 @@ def percentiles(self): def percentile_ranks(self): return MetricAggregator(self._field, 'percentile_ranks') + @property + def stats(self): + return MetricAggregator(self._field, 'stats') + + @property + def extended_stats(self): + return MetricAggregator(self._field, 'extended_stats') + class Row(tuple): """