diff --git a/python/ray/dataframe/dataframe.py b/python/ray/dataframe/dataframe.py index d250a13d5..2c4bf831f 100644 --- a/python/ray/dataframe/dataframe.py +++ b/python/ray/dataframe/dataframe.py @@ -634,6 +634,9 @@ class DataFrame(object): elif isinstance(by, compat.string_types): by = self.__getitem__(by).values.tolist() elif is_list_like(by): + if isinstance(by, pd.Series): + by = by.values.tolist() + mismatch = len(by) != len(self) if axis == 0 \ else len(by) != len(self.columns) diff --git a/python/ray/dataframe/groupby.py b/python/ray/dataframe/groupby.py index 94c8c4747..733943fc9 100644 --- a/python/ray/dataframe/groupby.py +++ b/python/ray/dataframe/groupby.py @@ -3,7 +3,6 @@ from __future__ import division from __future__ import print_function import pandas.core.groupby -import numpy as np import pandas as pd from pandas.core.dtypes.common import is_list_like import ray @@ -34,7 +33,7 @@ class DataFrameGroupBy(object): self._index_grouped = pd.Series(self._columns, index=self._index)\ .groupby(by=by, sort=sort) - self._keys_and_values = [(k, np.array(v)) + self._keys_and_values = [(k, v) for k, v in self._index_grouped] self._grouped_partitions = \ @@ -44,7 +43,7 @@ class DataFrameGroupBy(object): as_index, sort, group_keys, - squeeze) + part, + squeeze) + tuple(part.tolist()), num_return_vals=len(self)) for part in partitions)))