Skip to content

[FEA] Support groupby aggregations on list of series #1096

@mrocklin

Description

@mrocklin
import cudf
df = cudf.DataFrame({'x': [1, 2, 3], 'y': [1, 2, 1]})
df.groupby([df.x]).y.sum()

Traceback

TypeError                                 Traceback (most recent call last)
<ipython-input-1-6404360106bb> in <module>
      1 import cudf
      2 df = cudf.DataFrame({'x': [1, 2, 3], 'y': [1, 2, 1]})
----> 3 df.groupby([df.x]).y.sum()

~/cudf/python/cudf/dataframe/dataframe.py in groupby(self, by, sort, as_index, method, level)
   1807             # __apply_agg
   1808             result = Groupby(self, by=by, method=method, as_index=as_index,
-> 1809                              level=level)
   1810             return result
   1811

~/cudf/python/cudf/groupby/groupby.py in __init__(self, df, by, method, as_index, level)
    107         else:
    108             self._by = [by] if isinstance(by, (str, Number)) else list(by)
--> 109         self._val_columns = [idx for idx in self._df.columns
    110                              if idx not in self._by]
    111         self._as_index = as_index

~/cudf/python/cudf/groupby/groupby.py in <listcomp>(.0)
    108             self._by = [by] if isinstance(by, (str, Number)) else list(by)
    109         self._val_columns = [idx for idx in self._df.columns
--> 110                              if idx not in self._by]
    111         self._as_index = as_index
    112         if (method == "hash"):

~/cudf/python/cudf/dataframe/series.py in __eq__(self, other)
    467
    468     def __eq__(self, other):
--> 469         return self._unordered_compare(other, 'eq')
    470
    471     def __ne__(self, other):

~/cudf/python/cudf/dataframe/series.py in _unordered_compare(self, other, cmpops)
    450     def _unordered_compare(self, other, cmpops):
    451         nvtx_range_push("CUDF_UNORDERED_COMP", "orange")
--> 452         other = self._normalize_binop_value(other)
    453         outcol = self._column.unordered_compare(cmpops, other._column)
    454         result = self._copy_construct(data=outcol)

~/cudf/python/cudf/dataframe/series.py in _normalize_binop_value(self, other)
    445             return Series(other)
    446         else:
--> 447             col = self._column.normalize_binop_value(other)
    448             return self._copy_construct(data=col)
    449

~/cudf/python/cudf/dataframe/numerical.py in normalize_binop_value(self, other)
    122             return self.replace(data=Buffer(ary), dtype=ary.dtype)
    123         else:
--> 124             raise TypeError('cannot broadcast {}'.format(type(other)))
    125
    126     def astype(self, dtype):

TypeError: cannot broadcast <class 'str'>

Metadata

Metadata

Assignees

Labels

PythonAffects Python cuDF API.daskDask issuefeature requestNew feature or request

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions