Dataframe deprecation (#2353)

2026-06-28 01:00:10 +08:00 · 2018-07-06 00:16:22 -07:00
parent 4f1d14e176
commit 4185aaed10
22 changed files with 6 additions and 16270 deletions
@@ -1,60 +1,2 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import pandas
-# TODO: In the future `set_option` or similar needs to run on every node
-# in order to keep all pandas instances across nodes consistent
-from pandas import (eval, unique, value_counts, cut, to_numeric, factorize,
-                    test, qcut, match, Panel, date_range, Index, MultiIndex,
-                    CategoricalIndex, Series, bdate_range, DatetimeIndex,
-                    Timedelta, Timestamp, to_timedelta, set_eng_float_format,
-                    set_option, NaT, PeriodIndex, Categorical)
-import threading
-
-pandas_version = pandas.__version__
-pandas_major = int(pandas_version.split(".")[0])
-pandas_minor = int(pandas_version.split(".")[1])
-
-if pandas_major == 0 and pandas_minor != 22:
-    raise Exception("In order to use Pandas on Ray, your pandas version must "
-                    "be 0.22. You can run 'pip install pandas==0.22'")
-
-DEFAULT_NPARTITIONS = 8
-
-
-def set_npartition_default(n):
-    global DEFAULT_NPARTITIONS
-    DEFAULT_NPARTITIONS = n
-
-
-def get_npartitions():
-    return DEFAULT_NPARTITIONS
-
-
-# We import these file after above two function
-# because they depend on npartitions.
-from .dataframe import DataFrame  # noqa: 402
-from .io import (read_csv, read_parquet, read_json, read_html,  # noqa: 402
-                 read_clipboard, read_excel, read_hdf, read_feather,  # noqa: 402
-                 read_msgpack, read_stata, read_sas, read_pickle,  # noqa: 402
-                 read_sql)  # noqa: 402
-from .concat import concat  # noqa: 402
-from .datetimes import to_datetime  # noqa: 402
-from .reshape import get_dummies  # noqa: 402
-
-__all__ = [
-    "DataFrame", "Series", "read_csv", "read_parquet", "concat", "eval",
-    "unique", "value_counts", "cut", "to_numeric", "factorize", "test", "qcut",
-    "match", "to_datetime", "get_dummies", "Panel", "date_range", "Index",
-    "MultiIndex", "Series", "bdate_range", "DatetimeIndex", "to_timedelta",
-    "set_eng_float_format", "set_option", "CategoricalIndex", "Timedelta",
-    "Timestamp", "NaT", "PeriodIndex", "Categorical"
-]
-
-try:
-    if threading.current_thread().name == "MainThread":
-        import ray
-        ray.init()
-except AssertionError:
-    pass
+raise DeprecationWarning("Pandas on Ray has moved to Modin: "
+                         "github.com/modin-project/modin")
@@ -1,133 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import pandas
-import numpy as np
-from .dataframe import DataFrame
-from .utils import _reindex_helper
-
-
-def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
-           keys=None, levels=None, names=None, verify_integrity=False,
-           copy=True):
-
-    if keys is not None:
-        objs = [objs[k] for k in keys]
-    else:
-        objs = list(objs)
-
-    if len(objs) == 0:
-        raise ValueError("No objects to concatenate")
-
-    objs = [obj for obj in objs if obj is not None]
-
-    if len(objs) == 0:
-        raise ValueError("All objects passed were None")
-
-    try:
-        type_check = next(obj for obj in objs
-                          if not isinstance(obj, (pandas.Series,
-                                                  pandas.DataFrame,
-                                                  DataFrame)))
-    except StopIteration:
-        type_check = None
-    if type_check is not None:
-        raise ValueError("cannot concatenate object of type \"{0}\"; only "
-                         "pandas.Series, pandas.DataFrame, "
-                         "and ray.dataframe.DataFrame objs are "
-                         "valid", type(type_check))
-
-    all_series = all(isinstance(obj, pandas.Series)
-                     for obj in objs)
-    if all_series:
-        return DataFrame(pandas.concat(objs, axis, join, join_axes,
-                                       ignore_index, keys, levels, names,
-                                       verify_integrity, copy))
-
-    if isinstance(objs, dict):
-        raise NotImplementedError(
-            "Obj as dicts not implemented. To contribute to "
-            "Pandas on Ray, please visit github.com/ray-project/ray.")
-
-    axis = pandas.DataFrame()._get_axis_number(axis)
-
-    if join not in ['inner', 'outer']:
-        raise ValueError("Only can inner (intersect) or outer (union) join the"
-                         " other axis")
-
-    # We need this in a list because we use it later.
-    all_index, all_columns = list(zip(*[(obj.index, obj.columns)
-                                        for obj in objs]))
-
-    def series_to_df(series, columns):
-        df = pandas.DataFrame(series)
-        df.columns = columns
-        return DataFrame(df)
-
-    # Pandas puts all of the Series in a single column named 0. This is
-    # true regardless of the existence of another column named 0 in the
-    # concat.
-    if axis == 0:
-        objs = [series_to_df(obj, [0])
-                if isinstance(obj, pandas.Series) else obj for obj in objs]
-    else:
-        # Pandas starts the count at 0 so this will increment the names as
-        # long as there's a new nameless Series being added.
-        def name_incrementer(i):
-            val = i[0]
-            i[0] += 1
-            return val
-
-        i = [0]
-        objs = [series_to_df(obj, obj.name if obj.name is not None
-                             else name_incrementer(i))
-                if isinstance(obj, pandas.Series) else obj for obj in objs]
-
-    # Using concat on the columns and index is fast because they're empty,
-    # and it forces the error checking. It also puts the columns in the
-    # correct order for us.
-    final_index = \
-        pandas.concat([pandas.DataFrame(index=idx) for idx in all_index],
-                      axis=axis, join=join, join_axes=join_axes,
-                      ignore_index=ignore_index, keys=keys, levels=levels,
-                      names=names, verify_integrity=verify_integrity,
-                      copy=False).index
-    final_columns = \
-        pandas.concat([pandas.DataFrame(columns=col)
-                       for col in all_columns],
-                      axis=axis, join=join, join_axes=join_axes,
-                      ignore_index=ignore_index, keys=keys, levels=levels,
-                      names=names, verify_integrity=verify_integrity,
-                      copy=False).columns
-
-    # Put all of the DataFrames into Ray format
-    # TODO just partition the DataFrames instead of building a new Ray DF.
-    objs = [DataFrame(obj) if isinstance(obj, (pandas.DataFrame,
-                                               pandas.Series)) else obj
-            for obj in objs]
-
-    # Here we reuse all_columns/index so we don't have to materialize objects
-    # from remote memory built in the previous line. In the future, we won't be
-    # building new DataFrames, rather just partitioning the DataFrames.
-    if axis == 0:
-        new_blocks = np.array([_reindex_helper._submit(
-            args=tuple([all_columns[i], final_columns, axis,
-                       len(objs[0]._block_partitions)] + part.tolist()),
-            num_return_vals=len(objs[0]._block_partitions))
-            for i in range(len(objs))
-            for part in objs[i]._block_partitions])
-    else:
-        # Transposing the columns is necessary because the remote task treats
-        # everything like rows and returns in row-major format. Luckily, this
-        # operation is cheap in numpy.
-        new_blocks = np.array([_reindex_helper._submit(
-            args=tuple([all_index[i], final_index, axis,
-                       len(objs[0]._block_partitions.T)] + part.tolist()),
-            num_return_vals=len(objs[0]._block_partitions.T))
-            for i in range(len(objs))
-            for part in objs[i]._block_partitions.T]).T
-
-    return DataFrame(block_partitions=new_blocks,
-                     columns=final_columns,
-                     index=final_index)
@@ -1,64 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import pandas
-import ray
-
-from .dataframe import DataFrame
-from .utils import _map_partitions
-
-
-def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, utc=None,
-                box=True, format=None, exact=True, unit=None,
-                infer_datetime_format=False, origin='unix'):
-    """Convert the arg to datetime format. If not Ray DataFrame, this falls
-       back on pandas.
-
-    Args:
-        errors ('raise' or 'ignore'): If 'ignore', errors are silenced.
-        dayfirst (bool): Date format is passed in as day first.
-        yearfirst (bool): Date format is passed in as year first.
-        utc (bool): retuns a UTC DatetimeIndex if True.
-        box (bool): If True, returns a DatetimeIndex.
-        format (string): strftime to parse time, eg "%d/%m/%Y".
-        exact (bool): If True, require an exact format match.
-        unit (string, default 'ns'): unit of the arg.
-        infer_datetime_format (bool): Whether or not to infer the format.
-        origin (string): Define the reference date.
-
-    Returns:
-        Type depends on input:
-
-        - list-like: DatetimeIndex
-        - Series: Series of datetime64 dtype
-        - scalar: Timestamp
-    """
-    if not isinstance(arg, DataFrame):
-        return pandas.to_datetime(arg, errors=errors, dayfirst=dayfirst,
-                                  yearfirst=yearfirst, utc=utc, box=box,
-                                  format=format, exact=exact, unit=unit,
-                                  infer_datetime_format=infer_datetime_format,
-                                  origin=origin)
-    if errors == 'raise':
-        pandas.to_datetime(pandas.DataFrame(columns=arg.columns),
-                           errors=errors, dayfirst=dayfirst,
-                           yearfirst=yearfirst, utc=utc, box=box,
-                           format=format, exact=exact, unit=unit,
-                           infer_datetime_format=infer_datetime_format,
-                           origin=origin)
-
-    def datetime_helper(df, cols):
-        df.columns = cols
-        return pandas.to_datetime(df, errors=errors, dayfirst=dayfirst,
-                                  yearfirst=yearfirst, utc=utc, box=box,
-                                  format=format, exact=exact, unit=unit,
-                                  infer_datetime_format=infer_datetime_format,
-                                  origin=origin)
-
-    datetime_series = _map_partitions(datetime_helper, arg._row_partitions,
-                                      arg.columns)
-    result = pandas.concat(ray.get(datetime_series), copy=False)
-    result.index = arg.index
-
-    return result
@@ -1,581 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import pandas
-import numpy as np
-import pandas.core.groupby
-from pandas.core.dtypes.common import is_list_like
-import pandas.core.common as com
-
-import ray
-
-from .utils import _inherit_docstrings, _reindex_helper
-from .concat import concat
-from .index_metadata import _IndexMetadata
-
-
-@_inherit_docstrings(pandas.core.groupby.DataFrameGroupBy,
-                     excluded=[pandas.core.groupby.DataFrameGroupBy,
-                               pandas.core.groupby.DataFrameGroupBy.__init__])
-class DataFrameGroupBy(object):
-
-    def __init__(self, df, by, axis, level, as_index, sort, group_keys,
-                 squeeze, **kwargs):
-
-        self._columns = df.columns
-        self._index = df.index
-        self._axis = axis
-
-        self._row_metadata = df._row_metadata
-        self._col_metadata = df._col_metadata
-
-        if axis == 0:
-            partitions = [column for column in df._block_partitions.T]
-            self._index_grouped = \
-                pandas.Series(self._index, index=self._index) \
-                .groupby(by=by, sort=sort)
-        else:
-            partitions = [row for row in df._block_partitions]
-            self._index_grouped = \
-                pandas.Series(self._columns, index=self._columns) \
-                .groupby(by=by, sort=sort)
-
-        self._keys_and_values = [(k, v)
-                                 for k, v in self._index_grouped]
-
-        if len(self) > 1:
-            self._grouped_partitions = \
-                list(zip(*(groupby._submit(args=(by,
-                                                 axis,
-                                                 level,
-                                                 as_index,
-                                                 sort,
-                                                 group_keys,
-                                                 squeeze)
-                                           + tuple(part.tolist()),
-                                           num_return_vals=len(self))
-                           for part in partitions)))
-        else:
-            if axis == 0:
-                self._grouped_partitions = [df._col_partitions]
-            else:
-                self._grouped_partitions = [df._row_partitions]
-
-    def __getattr__(self, key):
-        """Afer regular attribute access, looks up the name in the columns
-
-        Args:
-            key (str): Attribute name.
-
-        Returns:
-            The value of the attribute.
-        """
-        try:
-            return object.__getattribute__(self, key)
-        except AttributeError as e:
-            if key in self._columns:
-                raise NotImplementedError(
-                    "SeriesGroupBy is not implemented."
-                    "To contribute to Pandas on Ray, please visit "
-                    "github.com/ray-project/ray.")
-            raise e
-
-    @property
-    def _iter(self):
-        from .dataframe import DataFrame
-
-        if self._axis == 0:
-            return [(self._keys_and_values[i][0],
-                     DataFrame(col_partitions=part,
-                               columns=self._columns,
-                               index=self._keys_and_values[i][1].index,
-                               col_metadata=self._col_metadata))
-                    for i, part in enumerate(self._grouped_partitions)]
-        else:
-            return [(self._keys_and_values[i][0],
-                     DataFrame(row_partitions=part,
-                               columns=self._keys_and_values[i][1].index,
-                               index=self._index,
-                               row_metadata=self._row_metadata))
-                    for i, part in enumerate(self._grouped_partitions)]
-
-    @property
-    def ngroups(self):
-        return len(self)
-
-    def skew(self, **kwargs):
-        return self._apply_agg_function(lambda df: df.skew(axis=self._axis,
-                                                           **kwargs))
-
-    def ffill(self, limit=None):
-        return self._apply_df_function(lambda df: df.ffill(axis=self._axis,
-                                                           limit=limit))
-
-    def sem(self, ddof=1):
-        return self._apply_agg_function(lambda df: df.sem(axis=self._axis,
-                                                          ddof=ddof))
-
-    def mean(self, *args, **kwargs):
-        return self._apply_agg_function(lambda df: df.mean(axis=self._axis,
-                                                           *args,
-                                                           **kwargs))
-
-    def any(self):
-        return self._apply_agg_function(lambda df: df.any(axis=self._axis))
-
-    @property
-    def plot(self):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    def ohlc(self):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    def __bytes__(self):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    @property
-    def tshift(self):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    @property
-    def groups(self):
-        return {k: pandas.Index(v) for k, v in self._keys_and_values}
-
-    def min(self, **kwargs):
-        return self._apply_agg_function(lambda df: df.min(axis=self._axis,
-                                                          **kwargs))
-
-    def idxmax(self):
-        def idxmax_helper(df, index):
-            result = df.idxmax(axis=self._axis)
-            result = result.apply(lambda v: index[v])
-            return result
-
-        results = [idxmax_helper(g[1], i[1])
-                   for g, i in zip(self._iter, self._index_grouped)]
-
-        new_df = concat(results, axis=1)
-        if self._axis == 0:
-            new_df = new_df.T
-            new_df.columns = self._columns
-            new_df.index = [k for k, v in self._iter]
-        else:
-            new_df.columns = [k for k, v in self._iter]
-            new_df.index = self._index
-        return new_df
-
-    @property
-    def ndim(self):
-        return 2  # ndim is always 2 for DataFrames
-
-    def shift(self, periods=1, freq=None, axis=0):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    def nth(self, n, dropna=None):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    def cumsum(self, axis=0, *args, **kwargs):
-        return self._apply_df_function(lambda df: df.cumsum(axis,
-                                                            *args,
-                                                            **kwargs))
-
-    @property
-    def indices(self):
-        return dict(self._keys_and_values)
-
-    def pct_change(self):
-        return self._apply_agg_function(
-            lambda df: df.pct_change(axis=self._axis))
-
-    def filter(self, func, dropna=True, *args, **kwargs):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    def cummax(self, axis=0, **kwargs):
-        return self._apply_df_function(lambda df: df.cummax(axis,
-                                                            **kwargs))
-
-    def apply(self, func, *args, **kwargs):
-        def apply_helper(df):
-            return df.apply(func, axis=self._axis, *args, **kwargs)
-
-        result = [func(v) for k, v in self._iter]
-        if self._axis == 0:
-            if isinstance(result[0], pandas.Series):
-                # Applied an aggregation function
-                new_df = concat(result, axis=1).T
-                new_df.columns = self._columns
-                new_df.index = [k for k, v in self._iter]
-            else:
-                new_df = concat(result, axis=self._axis)
-                new_df._block_partitions = np.array([_reindex_helper._submit(
-                    args=tuple([new_df.index, self._index, self._axis ^ 1,
-                                len(new_df._block_partitions)]
-                               + block.tolist()),
-                    num_return_vals=len(new_df._block_partitions))
-                    for block in new_df._block_partitions.T]).T
-                new_df.index = self._index
-                new_df._row_metadata = \
-                    _IndexMetadata(new_df._block_partitions[:, 0],
-                                   index=new_df.index, axis=0)
-        else:
-            if isinstance(result[0], pandas.Series):
-                # Applied an aggregation function
-                new_df = concat(result, axis=1)
-                new_df.columns = [k for k, v in self._iter]
-                new_df.index = self._index
-            else:
-                new_df = concat(result, axis=self._axis)
-                new_df._block_partitions = np.array([_reindex_helper._submit(
-                    args=tuple([new_df.columns, self._columns, self._axis ^ 1,
-                                new_df._block_partitions.shape[1]]
-                               + block.tolist()),
-                    num_return_vals=new_df._block_partitions.shape[1])
-                    for block in new_df._block_partitions])
-                new_df.columns = self._columns
-                new_df._col_metadata = \
-                    _IndexMetadata(new_df._block_partitions[0, :],
-                                   index=new_df.columns, axis=1)
-        return new_df
-
-    @property
-    def dtypes(self):
-        if self._axis == 1:
-            raise ValueError("Cannot call dtypes on groupby with axis=1")
-        return self._apply_agg_function(lambda df: df.dtypes)
-
-    def first(self, **kwargs):
-        return self._apply_agg_function(lambda df: df.first(offset=0,
-                                                            **kwargs))
-
-    def backfill(self, limit=None):
-        return self.bfill(limit)
-
-    def __getitem__(self, key):
-        # This operation requires a SeriesGroupBy Object
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    def cummin(self, axis=0, **kwargs):
-        return self._apply_df_function(lambda df: df.cummin(axis=axis,
-                                                            **kwargs))
-
-    def bfill(self, limit=None):
-        return self._apply_df_function(lambda df: df.bfill(axis=self._axis,
-                                                           limit=limit))
-
-    def idxmin(self):
-        def idxmin_helper(df, index):
-            result = df.idxmin(axis=self._axis)
-            result = result.apply(lambda v: index[v])
-            return result
-
-        results = [idxmin_helper(g[1], i[1])
-                   for g, i in zip(self._iter, self._index_grouped)]
-
-        new_df = concat(results, axis=1)
-        if self._axis == 0:
-            new_df = new_df.T
-            new_df.columns = self._columns
-            new_df.index = [k for k, v in self._iter]
-        else:
-            new_df.columns = [k for k, v in self._iter]
-            new_df.index = self._index
-        return new_df
-
-    def prod(self, **kwargs):
-        return self._apply_agg_function(lambda df: df.prod(axis=self._axis,
-                                                           **kwargs))
-
-    def std(self, ddof=1, *args, **kwargs):
-        return self._apply_agg_function(lambda df: df.std(axis=self._axis,
-                                                          ddof=ddof,
-                                                          *args,
-                                                          **kwargs))
-
-    def aggregate(self, arg, *args, **kwargs):
-        if self._axis != 0:
-            # This is not implemented in pandas,
-            # so we throw a different message
-            raise NotImplementedError("axis other than 0 is not supported")
-
-        if is_list_like(arg):
-            raise NotImplementedError(
-                "This requires Multi-level index to be implemented. "
-                "To contribute to Pandas on Ray, please visit "
-                "github.com/ray-project/ray.")
-        return self._apply_agg_function(lambda df: df.agg(arg,
-                                                          axis=self._axis,
-                                                          *args,
-                                                          **kwargs))
-
-    def last(self, **kwargs):
-        return self._apply_df_function(lambda df: df.last(offset=0,
-                                                          **kwargs))
-
-    def mad(self):
-        return self._apply_agg_function(lambda df: df.mad())
-
-    def rank(self):
-        return self._apply_df_function(lambda df: df.rank(axis=self._axis))
-
-    @property
-    def corrwith(self):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    def pad(self, limit=None):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    def max(self, **kwargs):
-        return self._apply_agg_function(lambda df: df.max(axis=self._axis,
-                                                          **kwargs))
-
-    def var(self, ddof=1, *args, **kwargs):
-        return self._apply_agg_function(lambda df: df.var(ddof=ddof,
-                                                          axis=self._axis,
-                                                          *args,
-                                                          **kwargs))
-
-    def get_group(self, name, obj=None):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    def __len__(self):
-        return len(self._keys_and_values)
-
-    def all(self):
-        return self._apply_agg_function(lambda df: df.all())
-
-    def size(self):
-        return self._apply_agg_function(lambda df: df.size)
-
-    def sum(self, **kwargs):
-        return self._apply_agg_function(lambda df:
-                                        df.sum(axis=self._axis, **kwargs))
-
-    def __unicode__(self):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    def describe(self, **kwargs):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    def boxplot(self, grouped, subplots=True, column=None, fontsize=None,
-                rot=0, grid=True, ax=None, figsize=None, layout=None, **kwds):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    def ngroup(self, ascending=True):
-        return self._index_grouped.ngroup(ascending)
-
-    def nunique(self, dropna=True):
-        return self._apply_agg_function(lambda df: df.nunique(dropna=dropna,
-                                                              axis=self._axis))
-
-    def resample(self, rule, *args, **kwargs):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    def median(self, **kwargs):
-        return self._apply_agg_function(lambda df: df.median(axis=self._axis,
-                                                             **kwargs))
-
-    def head(self, n=5):
-        result = [v.head(n) for k, v in self._iter]
-        new_df = concat(result, axis=self._axis)
-
-        if self._axis == 0:
-            index_head = [v[:n] for k, v in self._keys_and_values]
-            flattened_index = {i for j in index_head for i in j}
-            sorted_index = [i for i in self._index if i in flattened_index]
-            new_df._block_partitions = np.array([_reindex_helper._submit(
-                args=tuple([new_df.index, sorted_index, 1,
-                            len(new_df._block_partitions)] + block.tolist()),
-                num_return_vals=len(new_df._block_partitions))
-                for block in new_df._block_partitions.T]).T
-            new_df.index = sorted_index
-            new_df._row_metadata = \
-                _IndexMetadata(new_df._block_partitions[:, 0],
-                               index=new_df.index, axis=0)
-
-        return new_df
-
-    def cumprod(self, axis=0, *args, **kwargs):
-        return self._apply_df_function(lambda df: df.cumprod(axis,
-                                                             *args,
-                                                             **kwargs))
-
-    def __iter__(self):
-        return self._iter.__iter__()
-
-    def agg(self, arg, *args, **kwargs):
-        return self.aggregate(arg, *args, **kwargs)
-
-    def cov(self):
-        return self._apply_agg_function(lambda df: df.cov())
-
-    def transform(self, func, *args, **kwargs):
-        return self._apply_df_function(lambda df: df.transform(func,
-                                                               *args,
-                                                               **kwargs))
-
-    def corr(self, **kwargs):
-        return self._apply_agg_function(lambda df: df.corr(**kwargs))
-
-    def fillna(self, **kwargs):
-        return self._apply_df_function(lambda df: df.fillna(axis=self._axis,
-                                                            **kwargs))
-
-    def count(self, **kwargs):
-        return self._apply_agg_function(lambda df: df.count(self._axis,
-                                                            **kwargs))
-
-    def pipe(self, func, *args, **kwargs):
-        return com._pipe(self, func, *args, **kwargs)
-
-    def cumcount(self, ascending=True):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    def tail(self, n=5):
-        result = [v.tail(n) for k, v in self._iter]
-        new_df = concat(result, axis=self._axis)
-
-        if self._axis == 0:
-            index_tail = [v[-n:] for k, v in self._keys_and_values]
-            flattened_index = {i for j in index_tail for i in j}
-            sorted_index = [i for i in self._index if i in flattened_index]
-            new_df._block_partitions = np.array([_reindex_helper._submit(
-                args=tuple([new_df.index, sorted_index, 1,
-                            len(new_df._block_partitions)] + block.tolist()),
-                num_return_vals=len(new_df._block_partitions))
-                for block in new_df._block_partitions.T]).T
-            new_df.index = sorted_index
-            new_df._row_metadata = \
-                _IndexMetadata(new_df._block_partitions[:, 0],
-                               index=new_df.index, axis=0)
-
-        return new_df
-
-    # expanding and rolling are unique cases and need to likely be handled
-    # separately. They do not appear to be commonly used.
-    def expanding(self, *args, **kwargs):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    def rolling(self, *args, **kwargs):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    def hist(self):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    def quantile(self, q=0.5, **kwargs):
-        if is_list_like(q):
-            raise NotImplementedError(
-                "This requires Multi-level index to be implemented. "
-                "To contribute to Pandas on Ray, please visit "
-                "github.com/ray-project/ray.")
-
-        return self._apply_agg_function(lambda df: df.quantile(q=q,
-                                                               axis=self._axis,
-                                                               **kwargs))
-
-    def diff(self):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    def take(self, **kwargs):
-        return self._apply_df_function(lambda df: df.take(**kwargs))
-
-    def _apply_agg_function(self, f):
-        assert callable(f), "\'{0}\' object is not callable".format(type(f))
-
-        result = [f(v) for k, v in self._iter]
-        new_df = concat(result, axis=1)
-
-        if self._axis == 0:
-            new_df = new_df.T
-            new_df.columns = self._columns
-            new_df.index = [k for k, v in self._iter]
-        else:
-            new_df.columns = [k for k, v in self._iter]
-            new_df.index = self._index
-        return new_df
-
-    def _apply_df_function(self, f, concat_axis=None):
-        assert callable(f), "\'{0}\' object is not callable".format(type(f))
-
-        result = [f(v) for k, v in self._iter]
-        concat_axis = self._axis if concat_axis is None else concat_axis
-
-        new_df = concat(result, axis=concat_axis)
-
-        if self._axis == 0:
-            new_df._block_partitions = np.array([_reindex_helper._submit(
-                args=tuple([new_df.index, self._index, 1,
-                            len(new_df._block_partitions)] + block.tolist()),
-                num_return_vals=len(new_df._block_partitions))
-                for block in new_df._block_partitions.T]).T
-            new_df.index = self._index
-            new_df._row_metadata = \
-                _IndexMetadata(new_df._block_partitions[:, 0],
-                               index=new_df.index, axis=0)
-        else:
-            new_df._block_partitions = np.array([_reindex_helper._submit(
-                args=tuple([new_df.columns, self._columns, 0,
-                            new_df._block_partitions.shape[1]]
-                           + block.tolist()),
-                num_return_vals=new_df._block_partitions.shape[1])
-                for block in new_df._block_partitions])
-            new_df.columns = self._columns
-            new_df._col_metadata = \
-                _IndexMetadata(new_df._block_partitions[0, :],
-                               index=new_df.columns, axis=1)
-
-        return new_df
-
-
-@ray.remote
-def groupby(by, axis, level, as_index, sort, group_keys, squeeze, *df):
-
-    df = pandas.concat(df, axis=axis)
-
-    return [v for k, v in df.groupby(by=by,
-                                     axis=axis,
-                                     level=level,
-                                     as_index=as_index,
-                                     sort=sort,
-                                     group_keys=group_keys,
-                                     squeeze=squeeze)]
@@ -1,415 +0,0 @@
-import pandas
-import numpy as np
-import ray
-
-from .utils import (
-    _build_row_lengths,
-    _build_col_widths,
-    _build_coord_df)
-
-from pandas.core.indexing import convert_to_index_sliceable
-
-
-class _IndexMetadata(object):
-    """Wrapper for Pandas indexes in Ray DataFrames. Handles all of the
-    metadata specific to the axis of partition (setting indexes,
-    calculating the index within partition of a value, etc.). This
-    implementation assumes the underlying index lies across multiple
-    partitions.
-
-    IMPORTANT NOTE: Currently all operations, as implemented, are inplace.
-
-    WARNING: Currently, the `_lengths` item is the source of truth for an
-    _IndexMetadata object, since it is easy to manage, and that the coord_df
-    item may be deprecated in the future. As such, it is _very_ important that
-    any functions that mutate the coord_df splits in anyway first modify the
-    lengths. Otherwise bad things might happen!
-    """
-
-    def __init__(self, dfs=None, index=None, axis=0, lengths_oid=None,
-                 coord_df_oid=None):
-        """Inits a IndexMetadata from Ray DataFrame partitions
-
-        Args:
-            dfs ([ObjectID]): ObjectIDs of dataframe partitions
-            index (pandas.Index): Index of the Ray DataFrame.
-            axis: Axis of partition (0=row partitions, 1=column partitions)
-
-        Returns:
-            A IndexMetadata backed by the specified pandas.Index, partitioned
-            off specified partitions
-        """
-        assert (lengths_oid is None) == (coord_df_oid is None), \
-            "Must pass both or neither of lengths_oid and coord_df_oid"
-
-        if dfs is not None and lengths_oid is None:
-            if axis == 0:
-                lengths_oid = _build_row_lengths.remote(dfs)
-            else:
-                lengths_oid = _build_col_widths.remote(dfs)
-            coord_df_oid = _build_coord_df.remote(lengths_oid, index)
-
-        self._lengths = lengths_oid
-        self._coord_df = coord_df_oid
-        self._index_cache = index
-        self._cached_index = False
-
-    def _get__lengths(self):
-        if isinstance(self._lengths_cache, ray.ObjectID) or \
-            (isinstance(self._lengths_cache, list) and
-             isinstance(self._lengths_cache[0], ray.ObjectID)):
-            self._lengths_cache = ray.get(self._lengths_cache)
-        return self._lengths_cache
-
-    def _set__lengths(self, lengths):
-        self._lengths_cache = lengths
-
-    _lengths = property(_get__lengths, _set__lengths)
-
-    def _get__coord_df(self):
-        """Get the coordinate dataframe wrapped by this _IndexMetadata.
-
-        Since we may have had an index set before our coord_df was
-        materialized, we'll have to apply it to the newly materialized df
-        """
-        if isinstance(self._coord_df_cache, ray.ObjectID):
-            self._coord_df_cache = ray.get(self._coord_df_cache)
-        if self._cached_index:
-            self._coord_df_cache.index = self._index_cache
-            self._cached_index = False
-        return self._coord_df_cache
-
-    def _set__coord_df(self, coord_df):
-        """Set the coordinate dataframe wrapped by this _IndexMetadata.
-
-        Sometimes we set the _IndexMetadata's coord_df outside of the
-        constructor, generally using fxns like drop(). This produces a modified
-        index, so we need to reflect the change on the index cache.
-
-        If the set _IndexMetadata is an OID instead (due to a copy or whatever
-        reason), we fall back relying on `_index_cache`.
-        """
-        if not isinstance(coord_df, ray.ObjectID):
-            self._index_cache = coord_df.index
-        self._coord_df_cache = coord_df
-
-    _coord_df = property(_get__coord_df, _set__coord_df)
-
-    def _get_index(self):
-        """Get the index wrapped by this _IndexMetadata.
-
-        The only time `self._index_cache` would be None is in a newly created
-        _IndexMetadata object without a specified `index` parameter (See the
-        _IndexMetadata constructor for more details)
-        """
-        if isinstance(self._coord_df_cache, ray.ObjectID):
-            return self._index_cache
-        else:
-            return self._coord_df_cache.index
-
-    def _set_index(self, new_index):
-        """Set the index wrapped by this _IndexMetadata.
-
-        It is important to always set `_index_cache` even if the coord_df is
-        materialized due to the possibility that it is set to an OID later on.
-        This design is more straightforward than caching indexes on setting the
-        coord_df to an OID due to the possibility of an OID-to-OID change.
-        """
-        new_index = pandas.DataFrame(index=new_index).index
-        assert len(new_index) == len(self)
-
-        self._index_cache = new_index
-        if isinstance(self._coord_df_cache, ray.ObjectID):
-            self._cached_index = True
-        else:
-            self._coord_df_cache.index = new_index
-
-    index = property(_get_index, _set_index)
-
-    def _get_index_cache(self):
-        """Get the cached Index object, which may sometimes be an OID.
-
-        This will ray.get the Index object out of the Ray store lazily, such
-        that it is not grabbed until it is needed in the driver. This layer of
-        abstraction is important for allowing this object to be instantiated
-        with a remote Index object.
-
-        Returns:
-            The Index object in _index_cache.
-        """
-        if self._index_cache_validator is None:
-            self._index_cache_validator = pandas.RangeIndex(len(self))
-        elif isinstance(self._index_cache_validator,
-                        ray.ObjectID):
-            self._index_cache_validator = ray.get(self._index_cache_validator)
-
-        return self._index_cache_validator
-
-    def _set_index_cache(self, new_index):
-        """Sets the new index cache.
-
-        Args:
-            new_index: The Index to set the _index_cache to.
-        """
-        self._index_cache_validator = new_index
-
-    # _index_cache_validator is an extra layer of abstraction to allow the
-    # cache to accept ObjectIDs and ray.get them when needed.
-    _index_cache = property(_get_index_cache, _set_index_cache)
-
-    def coords_of(self, key):
-        """Returns the coordinates (partition, index_within_partition) of the
-        provided key in the index. Can be called on its own or implicitly
-        through __getitem__
-
-        Args:
-            key:
-                item to get coordinates of. Can also be a tuple of item
-                and {"partition", "index_within_partition"} if caller only
-                needs one of the coordinates
-
-        Returns:
-            Pandas object with the keys specified. If key is a single object
-            it will be a pandas.Series with items `partition` and
-            `index_within_partition`, and if key is a slice or if the key is
-            duplicate it will be a pandas.DataFrame with said items as columns.
-        """
-        return self._coord_df.loc[key]
-
-    def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
-                group_keys=True, squeeze=False, **kwargs):
-        # TODO: Find out what this does, and write a docstring
-        assignments_df = self._coord_df.groupby(by=by, axis=axis, level=level,
-                                                as_index=as_index, sort=sort,
-                                                group_keys=group_keys,
-                                                squeeze=squeeze, **kwargs)\
-            .apply(lambda x: x[:])
-        return assignments_df
-
-    def partition_series(self, partition):
-        return self[self._coord_df['partition'] == partition,
-                    'index_within_partition']
-
-    def __len__(self):
-        return int(sum(self._lengths))
-
-    def reset_partition_coords(self, partitions=None):
-        partitions = np.array(partitions)
-
-        for partition in partitions:
-            partition_mask = (self._coord_df['partition'] == partition)
-            # Since we are replacing columns with RangeIndex inside the
-            # partition, we have to make sure that our reference to it is
-            # upandasated as well.
-            try:
-                self._coord_df.loc[partition_mask,
-                                   'index_within_partition'] = np.arange(
-                                       sum(partition_mask)).astype(int)
-            except ValueError:
-                # Copy the arrow sealed dataframe so we can mutate it.
-                # We only do this the first time we try to mutate the sealed.
-                self._coord_df = self._coord_df.copy()
-                self._coord_df.loc[partition_mask,
-                                   'index_within_partition'] = np.arange(
-                                       sum(partition_mask)).astype(int)
-
-    def insert(self, key, loc=None, partition=None,
-               index_within_partition=None):
-        """Inserts a key at a certain location in the index, or a certain coord
-        in a partition. Called with either `loc` or `partition` and
-        `index_within_partition`. If called with both, `loc` will be used.
-
-        Args:
-            key: item to insert into index
-            loc: location to insert into index
-            partition: partition to insert into
-            index_within_partition: index within partition to insert into
-
-        Returns:
-            DataFrame with coordinates of insert
-        """
-        # Perform insert on a specific partition
-        # Determine which partition to place it in, and where in that partition
-        if loc is not None:
-            cum_lens = np.cumsum(self._lengths)
-            if len(cum_lens) > 1:
-                partition = np.digitize(loc, cum_lens[:-1], right=True)
-            else:
-                partition = 0
-            if partition >= len(cum_lens):
-                if loc > cum_lens[-1]:
-                    raise IndexError("index {0} is out of bounds".format(loc))
-                else:
-                    index_within_partition = self._lengths[-1]
-            else:
-                first_in_partition = \
-                        np.asscalar(np.concatenate(([0], cum_lens))[partition])
-                index_within_partition = loc - first_in_partition
-
-        # TODO: Stop-gap solution until we begin passing IndexMetadatas
-        return partition, index_within_partition
-
-        # Generate new index
-        new_index = self.index.insert(loc, key)
-
-        # Shift indices in partition where we inserted column
-        idx_locs = (self._coord_df.partition == partition) & \
-                   (self._coord_df.index_within_partition ==
-                    index_within_partition)
-        # TODO: Determine why self._coord_df{,_cache} are read-only
-        _coord_df_copy = self._coord_df.copy()
-        _coord_df_copy.loc[idx_locs, 'index_within_partition'] += 1
-
-        # TODO: Determine if there's a better way to do a row-index insert in
-        # pandas, because this is very annoying/unsure of efficiency
-        # Create new coord entry to insert
-        coord_to_insert = pandas.DataFrame(
-                {'partition': partition,
-                 'index_within_partition': index_within_partition},
-                index=[key])
-
-        # Insert into cached RangeIndex, and order by new column index
-        self._coord_df = _coord_df_copy.append(coord_to_insert).loc[new_index]
-
-        # Return inserted coordinate for callee
-        return coord_to_insert
-
-    def get_global_indices(self, partition, index_within_partition_list):
-        total = 0
-        for i in range(partition):
-            total += self._lengths[i]
-
-        return [total + i for i in index_within_partition_list]
-
-    def squeeze(self, partition, index_within_partition):
-        """Prepare a single coordinate for removal by "squeezing" the
-        subsequent coordinates "up" one index within that partition. To be used
-        with "_IndexMetadata.drop" for when all the "squeezed" coordinates are
-        dropped in batch. Note that this function doesn't actually mutate the
-        coord_df.
-        """
-        self._coord_df = self._coord_df.copy()
-
-        partition_mask = self._coord_df.partition == partition
-        index_within_partition_mask = \
-            self._coord_df.index_within_partition > index_within_partition
-        self._coord_df.loc[partition_mask & index_within_partition_mask,
-                           'index_within_partition'] -= 1
-
-    def copy(self):
-        # TODO: Investigate copy-on-write wrapper for metadata objects
-        coord_df_copy = self._coord_df_cache
-        if not isinstance(self._coord_df_cache, ray.ObjectID):
-            coord_df_copy = self._coord_df_cache.copy()
-
-        lengths_copy = self._lengths_cache
-        if not isinstance(self._lengths_cache, ray.ObjectID):
-            lengths_copy = self._lengths_cache.copy()
-
-        index_copy = self._index_cache
-        if self._index_cache is not None:
-            index_copy = self._index_cache.copy()
-
-        return _IndexMetadata(index=index_copy,
-                              coord_df_oid=coord_df_copy,
-                              lengths_oid=lengths_copy)
-
-    def __getitem__(self, key):
-        """Returns the coordinates (partition, index_within_partition) of the
-        provided key in the index. Essentially just an alias for
-        `_IndexMetadata.coords_of` that allows for slice passing, since
-        slices cannot be passed with slice notation other than through
-        `__getitem__` calls.
-
-        Args:
-            key:
-                item to get coordinates of. Can also be a tuple of item
-                and {"partition", "index_within_partition"} if caller only
-                needs one of the coordinates
-
-        Returns:
-            Pandas object with the keys specified. If key is a single object
-            it will be a pandas.Series with items `partition` and
-            `index_within_partition`, and if key is a slice or if the key is
-            duplicate it will be a pandas.DataFrame with said items as columns.
-        """
-        return self.coords_of(key)
-
-    def first_valid_index(self):
-        return self._coord_df.first_valid_index()
-
-    def last_valid_index(self):
-        return self._coord_df.last_valid_index()
-
-    def drop(self, labels, errors='raise'):
-        """Drop the specified labels from the IndexMetadata
-
-        Args:
-            labels (scalar or list-like):
-                The labels to drop
-            errors ('raise' or 'ignore'):
-                If 'ignore', suppress errors for when labels don't exist
-
-        Returns:
-            DataFrame with coordinates of dropped labels
-        """
-        dropped = self.coords_of(labels)
-
-        # Upandasate first lengths to prevent possible length inconsistencies
-        if isinstance(dropped, pandas.DataFrame):
-            try:
-                drop_per_part = dropped.groupby(["partition"]).size()\
-                        .reindex(index=pandas.RangeIndex(len(self._lengths)),
-                                 fill_value=0)
-            except ValueError:
-                # Copy the arrow sealed dataframe so we can mutate it.
-                dropped = dropped.copy()
-                drop_per_part = dropped.groupby(["partition"]).size()\
-                    .reindex(index=pandas.RangeIndex(len(self._lengths)),
-                             fill_value=0)
-        elif isinstance(dropped, pandas.Series):
-            drop_per_part = np.zeros_like(self._lengths)
-            drop_per_part[dropped["partition"]] = 1
-        else:
-            raise AssertionError("Unrecognized result from `coords_of`")
-
-        self._lengths = self._lengths - np.array(drop_per_part)
-
-        new_coord_df = self._coord_df.drop(labels, errors=errors)
-
-        num_dropped = 0
-        for i, length in enumerate(self._lengths):
-            if length == 0:
-                num_dropped += 1
-            if num_dropped > 0:
-                new_coord_df['partition'][new_coord_df['partition'] == i] \
-                    -= num_dropped
-
-        self._coord_df = new_coord_df
-        return dropped
-
-    def rename_index(self, mapper):
-        """Rename the index.
-
-        Args:
-            mapper: name to rename the index as
-        """
-        self._coord_df = self._coord_df.rename_axis(mapper, axis=0)
-
-    def convert_to_index_sliceable(self, key):
-        """Converts and performs error checking on the passed slice
-
-        Args:
-            key: slice to convert and check
-        """
-        return convert_to_index_sliceable(self._coord_df, key)
-
-    def get_partition(self, partition_id):
-        """Return a view of coord_df where partition = partition_id
-        """
-        return self._coord_df[self._coord_df.partition == partition_id]
-
-    def sorted_index(self):
-        return (self._coord_df
-                    .sort_values(['partition', 'index_within_partition'])
-                    .index)
@@ -1,469 +0,0 @@
-"""Indexing Helper Class works as follows:
-
-_Location_Indexer_Base provide methods framework for __getitem__
-  and __setitem__ that work with Ray DataFrame's internal index. Base
-  class's __{get,set}item__ takes in partitions & idx_in_partition data
-  and perform lookup/item write.
-
-_LocIndexer and _iLocIndexer is responsible for indexer specific logic and
-  lookup computation. Loc will take care of enlarge dataframe. Both indexer
-  will take care of translating pandas's lookup to Ray DataFrame's internal
-  lookup.
-
-An illustration is available at
-https://github.com/ray-project/ray/pull/1955#issuecomment-386781826
-"""
-import pandas
-import numpy as np
-import ray
-from warnings import warn
-
-from pandas.api.types import (is_scalar, is_list_like, is_bool)
-from pandas.core.dtypes.common import is_integer
-from pandas.core.indexing import IndexingError
-
-from .utils import (_blocks_to_col, _get_nan_block_id, extractor,
-                    _mask_block_partitions, writer)
-from .index_metadata import _IndexMetadata
-from .dataframe import DataFrame
-
-
-def is_slice(x): return isinstance(x, slice)
-
-
-def is_2d(x): return is_list_like(x) or is_slice(x)
-
-
-def is_tuple(x): return isinstance(x, tuple)
-
-
-def is_boolean_array(x): return is_list_like(x) and all(map(is_bool, x))
-
-
-def is_integer_slice(x):
-    if not is_slice(x):
-        return False
-    for pos in [x.start, x.stop, x.step]:
-        if not ((pos is None) or is_integer(pos)):
-            return False  # one position is neither None nor int
-    return True
-
-
-_ENLARGEMENT_WARNING = """
-Passing list-likes to .loc or [] with any missing label will raise
-KeyError in the future, you can use .reindex() as an alternative.
-
-See the documentation here:
-http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
-"""
-
-_ILOC_INT_ONLY_ERROR = """
-Location based indexing can only have [integer, integer slice (START point is
-INCLUDED, END point is EXCLUDED), listlike of integers, boolean array] types.
-"""
-
-
-def _parse_tuple(tup):
-    """Unpack the user input for getitem and setitem and compute ndim
-
-    loc[a] -> ([a], :), 1D
-    loc[[a,b],] -> ([a,b], :),
-    loc[a,b] -> ([a], [b]), 0D
-    """
-    row_loc, col_loc = slice(None), slice(None)
-
-    if is_tuple(tup):
-        row_loc = tup[0]
-        if len(tup) == 2:
-            col_loc = tup[1]
-        if len(tup) > 2:
-            raise IndexingError('Too many indexers')
-    else:
-        row_loc = tup
-
-    ndim = _compute_ndim(row_loc, col_loc)
-    row_loc = [row_loc] if is_scalar(row_loc) else row_loc
-    col_loc = [col_loc] if is_scalar(col_loc) else col_loc
-
-    return row_loc, col_loc, ndim
-
-
-def _is_enlargement(locator, coord_df):
-    """Determine if a locator will enlarge the corrd_df.
-
-    Enlargement happens when you trying to locate using labels isn't in the
-    original index. In other words, enlargement == adding NaNs !
-    """
-    if is_list_like(locator) and not is_slice(
-            locator) and len(locator) > 0 and not is_boolean_array(locator):
-        n_diff_elems = len(pandas.Index(locator).difference(coord_df.index))
-        is_enlargement_boolean = n_diff_elems > 0
-        return is_enlargement_boolean
-    return False
-
-
-def _warn_enlargement():
-    warn(FutureWarning(_ENLARGEMENT_WARNING))
-
-
-def _compute_ndim(row_loc, col_loc):
-    """Compute the ndim of result from locators
-    """
-    row_scaler = is_scalar(row_loc)
-    col_scaler = is_scalar(col_loc)
-
-    if row_scaler and col_scaler:
-        ndim = 0
-    elif row_scaler ^ col_scaler:
-        ndim = 1
-    else:
-        ndim = 2
-
-    return ndim
-
-
-class _Location_Indexer_Base():
-    """Base class for location indexer like loc and iloc
-    """
-
-    def __init__(self, ray_df):
-        self.df = ray_df
-        self.col_coord_df = ray_df._col_metadata._coord_df
-        self.row_coord_df = ray_df._row_metadata._coord_df
-        self.block_oids = ray_df._block_partitions
-
-        self.is_view = False
-        if isinstance(ray_df, DataFrameView):
-            self.block_oids = ray_df._block_partitions_data
-            self.is_view = True
-
-    def __getitem__(self, row_lookup, col_lookup, ndim):
-        """
-        Args:
-            row_lookup: A pandas dataframe, a partial view from row_coord_df
-            col_lookup: A pandas dataframe, a partial view from col_coord_df
-            ndim: the dimension of returned data
-        """
-        if ndim == 2:
-            return self._generate_view(row_lookup, col_lookup)
-
-        extracted = self._retrive_items(row_lookup, col_lookup)
-        if ndim == 1:
-            result = ray.get(_blocks_to_col.remote(*extracted)).squeeze()
-
-            if is_scalar(result):
-                result = pandas.Series(result)
-
-            scaler_axis = row_lookup if len(row_lookup) == 1 else col_lookup
-            series_name = scaler_axis.iloc[0].name
-            result.name = series_name
-
-            index_axis = row_lookup if len(col_lookup) == 1 else col_lookup
-            result.index = index_axis.index
-
-        if ndim == 0:
-            result = ray.get(extracted[0]).squeeze()
-
-        return result
-
-    def _retrive_items(self, row_lookup, col_lookup):
-        """Given lookup dataframes, return a list of result oids
-        """
-        result_oids = []
-
-        # We have to copy before we groupby because
-        # https://github.com/pandas-dev/pandas/issues/10043
-        row_groups = row_lookup.copy().groupby('partition')
-        col_groups = col_lookup.copy().groupby('partition')
-        for row_blk, row_data in row_groups:
-            for col_blk, col_data in col_groups:
-                block_oid = self.block_oids[row_blk, col_blk]
-                row_idx = row_data['index_within_partition']
-                col_idx = col_data['index_within_partition']
-
-                result_oid = extractor.remote(block_oid, row_idx, col_idx)
-                result_oids.append(result_oid)
-        return result_oids
-
-    def _generate_view(self, row_lookup, col_lookup):
-        """Generate a DataFrameView from lookup
-        """
-        row_lengths = [0] * len(self.df._row_metadata._lengths)
-        for i in row_lookup["partition"]:
-            row_lengths[i] += 1
-        col_lengths = [0] * len(self.df._col_metadata._lengths)
-        for i in col_lookup["partition"]:
-            col_lengths[i] += 1
-
-        row_metadata_view = _IndexMetadata(
-            coord_df_oid=row_lookup, lengths_oid=row_lengths)
-
-        col_metadata_view = _IndexMetadata(
-            coord_df_oid=col_lookup, lengths_oid=col_lengths)
-
-        df_view = DataFrameView(
-            block_partitions=self.block_oids,
-            row_metadata=row_metadata_view,
-            col_metadata=col_metadata_view,
-            index=row_metadata_view.index,
-            columns=col_metadata_view.index)
-
-        return df_view
-
-    def __setitem__(self, row_lookup, col_lookup, item):
-        """
-        Args:
-            row_lookup: A pandas dataframe, a partial view from row_coord_df
-            col_lookup: A pandas dataframe, a partial view from col_coord_df
-            item: The new item needs to be set. It can be any shape that's
-                broadcastable to the product of the lookup tables.
-        """
-        to_shape = (len(row_lookup), len(col_lookup))
-        item = self._broadcast_item(item, to_shape)
-        self._write_items(row_lookup, col_lookup, item)
-
-    def _broadcast_item(self, item, to_shape):
-        """Use numpy to broadcast or reshape item.
-
-        Notes:
-            - Numpy is memory efficent, there shouldn't be performance issue.
-        """
-        try:
-            item = np.array(item)
-            if np.prod(to_shape) == np.prod(item.shape):
-                return item.reshape(to_shape)
-            else:
-                return np.broadcast_to(item, to_shape)
-        except ValueError:
-            from_shape = np.array(item).shape
-            raise ValueError(
-                "could not broadcast input array from \
-                shape {from_shape} into shape {to_shape}".format(
-                    from_shape=from_shape, to_shape=to_shape))
-
-    def _write_items(self, row_lookup, col_lookup, item):
-        """Perform remote write and replace blocks.
-        """
-
-        # We have to copy before we groupby because
-        # https://github.com/pandas-dev/pandas/issues/10043
-        row_groups = row_lookup.copy().groupby('partition')
-        col_groups = col_lookup.copy().groupby('partition')
-
-        row_item_index = 0
-        for row_blk, row_data in row_groups:
-            row_len = len(row_data)
-
-            col_item_index = 0
-            for col_blk, col_data in col_groups:
-                col_len = len(col_data)
-
-                block_oid = self.block_oids[row_blk, col_blk]
-                row_idx = row_data['index_within_partition']
-                col_idx = col_data['index_within_partition']
-
-                item_to_write = item[row_item_index:row_item_index + row_len,
-                                     col_item_index:col_item_index + col_len]
-
-                result_oid = writer.remote(block_oid, row_idx, col_idx,
-                                           item_to_write)
-
-                if self.is_view:
-                    self.df._block_partitions_data[row_blk,
-                                                   col_blk] = result_oid
-                else:
-                    self.df._block_partitions[row_blk, col_blk] = result_oid
-
-                col_item_index += col_len
-            row_item_index += row_len
-
-
-class _Loc_Indexer(_Location_Indexer_Base):
-    """A indexer for ray_df.loc[] functionality"""
-
-    def __getitem__(self, key):
-        row_loc, col_loc, ndim = _parse_tuple(key)
-        self._handle_enlargement(row_loc, col_loc)
-        row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)
-        ndim = self._expand_dim(row_lookup, col_lookup, ndim)
-        result = super(_Loc_Indexer, self).__getitem__(row_lookup, col_lookup,
-                                                       ndim)
-        return result
-
-    def __setitem__(self, key, item):
-        row_loc, col_loc, _ = _parse_tuple(key)
-        self._handle_enlargement(row_loc, col_loc)
-        row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)
-        super(_Loc_Indexer, self).__setitem__(row_lookup, col_lookup,
-                                              item)
-
-    def _handle_enlargement(self, row_loc, col_loc):
-        """Handle Enlargement (if there is one).
-
-        Returns:
-            None
-        """
-        locators = [row_loc, col_loc]
-        coord_dfs = [self.row_coord_df, self.col_coord_df]
-        axis = ['row', 'col']
-        metadata = {'row': self.df._row_metadata, 'col': self.df._col_metadata}
-
-        for loc, coord, axis in zip(locators, coord_dfs, axis):
-            if _is_enlargement(loc, coord):
-                new_meta = self._enlarge_axis(loc, axis=axis)
-                _warn_enlargement()
-                metadata[axis] = new_meta
-
-        self.row_coord_df = metadata['row']._coord_df
-        self.col_coord_df = metadata['col']._coord_df
-
-    def _enlarge_axis(self, locator, axis):
-        """Add rows/columns to block partitions according to locator.
-
-        Returns:
-            metadata (_IndexMetadata)
-        """
-        # 1. Prepare variables
-        row_based_bool = axis == 'row'
-        # major == the axis of the locator
-        major_meta = self.df._row_metadata if row_based_bool \
-            else self.df._col_metadata
-        minor_meta = self.df._col_metadata if row_based_bool \
-            else self.df._row_metadata
-
-        # 2. Compute the nan labels and add blocks
-        nan_labels = self._compute_enlarge_labels(locator, major_meta.index)
-        num_nan_labels = len(nan_labels)
-        blk_part_n_row, blk_part_n_col = self.block_oids.shape
-
-        nan_blk_lens = minor_meta._lengths
-        nan_blks = np.array([[
-            _get_nan_block_id(
-                num_nan_labels, n_cols, transpose=not row_based_bool)
-            for n_cols in nan_blk_lens
-        ]])
-        nan_blks = nan_blks.T if not row_based_bool else nan_blks
-
-        self.block_oids = np.concatenate(
-            [self.block_oids, nan_blks], axis=0 if row_based_bool else 1)
-
-        # 3. Prepare metadata to return
-        nan_coord_df = pandas.DataFrame(data=[{
-            '': name,
-            'partition': blk_part_n_row if row_based_bool else blk_part_n_col,
-            'index_within_partition': i
-        } for name, i in zip(nan_labels, np.arange(num_nan_labels))
-        ]).set_index('')
-
-        coord_df = pandas.concat([major_meta._coord_df, nan_coord_df])
-        coord_df = coord_df.loc[locator]  # Re-index that allows duplicates
-
-        lens = major_meta._lengths
-        lens = np.concatenate([lens, np.array([num_nan_labels])])
-
-        metadata_view = _IndexMetadata(coord_df_oid=coord_df, lengths_oid=lens)
-        return metadata_view
-
-    def _compute_enlarge_labels(self, locator, base_index):
-        """Helper for _enlarge_axis, compute common labels and extra labels.
-
-        Returns:
-             nan_labels: The labels needs to be added
-        """
-        # base_index_type can be pd.Index or pd.DatetimeIndex
-        # depending on user input and pandas behavior
-        # See issue #2264
-        base_index_type = type(base_index)
-        locator_as_index = base_index_type(locator)
-
-        nan_labels = locator_as_index.difference(base_index)
-        common_labels = locator_as_index.intersection(base_index)
-
-        if len(common_labels) == 0:
-            raise KeyError(
-                'None of [{labels}] are in the [{base_index_name}]'.format(
-                    labels=list(locator_as_index), base_index_name=base_index))
-
-        return nan_labels
-
-    def _expand_dim(self, row_lookup, col_lookup, ndim):
-        """Expand the dimension if necessary.
-        This method is for cases like duplicate labels.
-        """
-        many_rows = len(row_lookup) > 1
-        many_cols = len(col_lookup) > 1
-
-        if ndim == 0 and (many_rows or many_cols):
-            ndim = 1
-        if ndim == 1 and (many_rows and many_cols):
-            ndim = 2
-
-        return ndim
-
-    def _compute_lookup(self, row_loc, col_loc):
-        # We use reindex for list to avoid duplicates.
-        row_lookup = self.row_coord_df.loc[row_loc]
-        col_lookup = self.col_coord_df.loc[col_loc]
-        return row_lookup, col_lookup
-
-
-class _iLoc_Indexer(_Location_Indexer_Base):
-    """A indexer for ray_df.iloc[] functionality"""
-
-    def __getitem__(self, key):
-        row_loc, col_loc, ndim = _parse_tuple(key)
-
-        self._check_dtypes(row_loc)
-        self._check_dtypes(col_loc)
-
-        row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)
-        result = super(_iLoc_Indexer, self).__getitem__(
-            row_lookup, col_lookup, ndim)
-        return result
-
-    def __setitem__(self, key, item):
-        row_loc, col_loc, _ = _parse_tuple(key)
-
-        self._check_dtypes(row_loc)
-        self._check_dtypes(col_loc)
-
-        row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)
-        super(_iLoc_Indexer, self).__setitem__(
-            row_lookup, col_lookup, item)
-
-    def _compute_lookup(self, row_loc, col_loc):
-        # We use reindex for list to avoid duplicates.
-        return self.row_coord_df.iloc[row_loc], self.col_coord_df.iloc[col_loc]
-
-    def _check_dtypes(self, locator):
-        is_int = is_integer(locator)
-        is_int_slice = is_integer_slice(locator)
-        is_int_list = is_list_like(locator) and all(map(is_integer, locator))
-        is_bool_arr = is_boolean_array(locator)
-
-        if not any([is_int, is_int_slice, is_int_list, is_bool_arr]):
-            raise ValueError(_ILOC_INT_ONLY_ERROR)
-
-
-class DataFrameView(DataFrame):
-    """A subclass of DataFrame where the index can be smaller than blocks.
-    """
-
-    def __init__(self, block_partitions, row_metadata, col_metadata, index,
-                 columns):
-        self._block_partitions = block_partitions
-        self._row_metadata = row_metadata
-        self._col_metadata = col_metadata
-        self.index = index
-        self.columns = columns
-
-    def _get_block_partitions(self):
-        oid_arr = _mask_block_partitions(self._block_partitions_data,
-                                         self._row_metadata,
-                                         self._col_metadata)
-        return oid_arr
-
-    def _set_block_partitions(self, new_block_partitions):
-        self._block_partitions_data = new_block_partitions
-
-    _block_partitions = property(_get_block_partitions, _set_block_partitions)
@@ -1,524 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from itertools import chain
-from io import BytesIO
-import os
-import re
-import warnings
-import pandas
-
-from pyarrow.parquet import ParquetFile
-from pandas.io.common import _infer_compression  # don't depend on internal API
-
-
-from .dataframe import ray, DataFrame
-from . import get_npartitions
-from .utils import from_pandas
-
-
-# Parquet
-def read_parquet(path, engine='auto', columns=None, **kwargs):
-    """Load a parquet object from the file path, returning a DataFrame.
-    Ray DataFrame only supports pyarrow engine for now.
-
-    Args:
-        path: The filepath of the parquet file.
-              We only support local files for now.
-        engine: Ray only support pyarrow reader.
-                This argument doesn't do anything for now.
-        kwargs: Pass into parquet's read_row_group function.
-    """
-    pf = ParquetFile(path)
-
-    n_rows = pf.metadata.num_rows
-    chunksize = n_rows // get_npartitions()
-    n_row_groups = pf.metadata.num_row_groups
-
-    idx_regex = re.compile('__index_level_\d+__')
-    columns = [
-        name for name in pf.metadata.schema.names if not idx_regex.match(name)
-    ]
-
-    df_from_row_groups = [
-        _read_parquet_row_group.remote(path, columns, i, kwargs)
-        for i in range(n_row_groups)
-    ]
-    splited_dfs = ray.get(
-        [_split_df.remote(df, chunksize) for df in df_from_row_groups])
-    df_remotes = list(chain.from_iterable(splited_dfs))
-
-    return DataFrame(row_partitions=df_remotes, columns=columns)
-
-
-@ray.remote
-def _read_parquet_row_group(path, columns, row_group_id, kwargs={}):
-    """Read a parquet row_group given file_path.
-    """
-    pf = ParquetFile(path)
-    df = pf.read_row_group(row_group_id, columns=columns, **kwargs).to_pandas()
-    return df
-
-
-@ray.remote
-def _split_df(pandas_df, chunksize):
-    """Split a pandas_df into partitions.
-
-    Returns:
-        remote_df_ids ([ObjectID])
-    """
-    dataframes = []
-
-    while len(pandas_df) > chunksize:
-        t_df = pandas_df[:chunksize]
-        t_df.reset_index(drop=True)
-        top = ray.put(t_df)
-        dataframes.append(top)
-        pandas_df = pandas_df[chunksize:]
-    else:
-        pandas_df = pandas_df.reset_index(drop=True)
-        dataframes.append(ray.put(pandas_df))
-
-    return dataframes
-
-
-# CSV
-def _compute_offset(fn, npartitions, ignore_first_line=False):
-    """
-    Calculate the currect bytes offsets for a csv file.
-    Return a list of (start, end) tuple where the end == \n or EOF.
-    """
-    total_bytes = os.path.getsize(fn)
-    bio = open(fn, 'rb')
-    if ignore_first_line:
-        start = len(bio.readline())
-        chunksize = (total_bytes - start) // npartitions
-    else:
-        start = 0
-        chunksize = total_bytes // npartitions
-    if chunksize == 0:
-        chunksize = 1
-
-    offsets = []
-    while start < total_bytes:
-        bio.seek(chunksize, 1)  # Move forward {chunksize} bytes
-        extend_line = bio.readline()  # Move after the next \n
-        total_offset = chunksize + len(extend_line)
-        # The position of the \n we just crossed.
-        new_line_cursor = start + total_offset - 1
-        offsets.append((start, new_line_cursor))
-        start = new_line_cursor + 1
-
-    bio.close()
-    return offsets
-
-
-def _get_firstline(file_path):
-    bio = open(file_path, 'rb')
-    first = bio.readline()
-    bio.close()
-    return first
-
-
-def _infer_column(first_line, kwargs={}):
-    return pandas.read_csv(BytesIO(first_line), **kwargs).columns
-
-
-@ray.remote
-def _read_csv_with_offset(fn, start, end, kwargs={}, header=b''):
-    kwargs["quoting"] = int(kwargs["quoting"])  # See issue #2078
-
-    bio = open(fn, 'rb')
-    bio.seek(start)
-    to_read = header + bio.read(end - start)
-    bio.close()
-    pandas_df = pandas.read_csv(BytesIO(to_read), **kwargs)
-    index = pandas_df.index
-    # Partitions must have RangeIndex
-    pandas_df.index = pandas.RangeIndex(0, len(pandas_df))
-    return pandas_df, index
-
-
-@ray.remote
-def get_index(*partition_indices):
-    return partition_indices[0].append(partition_indices[1:])
-
-
-def read_csv(filepath_or_buffer,
-             sep=',',
-             delimiter=None,
-             header='infer',
-             names=None,
-             index_col=None,
-             usecols=None,
-             squeeze=False,
-             prefix=None,
-             mangle_dupe_cols=True,
-             dtype=None,
-             engine=None,
-             converters=None,
-             true_values=None,
-             false_values=None,
-             skipinitialspace=False,
-             skiprows=None,
-             nrows=None,
-             na_values=None,
-             keep_default_na=True,
-             na_filter=True,
-             verbose=False,
-             skip_blank_lines=True,
-             parse_dates=False,
-             infer_datetime_format=False,
-             keep_date_col=False,
-             date_parser=None,
-             dayfirst=False,
-             iterator=False,
-             chunksize=None,
-             compression='infer',
-             thousands=None,
-             decimal=b'.',
-             lineterminator=None,
-             quotechar='"',
-             quoting=0,
-             escapechar=None,
-             comment=None,
-             encoding=None,
-             dialect=None,
-             tupleize_cols=None,
-             error_bad_lines=True,
-             warn_bad_lines=True,
-             skipfooter=0,
-             skip_footer=0,
-             doublequote=True,
-             delim_whitespace=False,
-             as_recarray=None,
-             compact_ints=None,
-             use_unsigned=None,
-             low_memory=True,
-             buffer_lines=None,
-             memory_map=False,
-             float_precision=None):
-    """Read csv file from local disk.
-
-    Args:
-        filepath:
-              The filepath of the csv file.
-              We only support local files for now.
-        kwargs: Keyword arguments in pandas::from_csv
-    """
-
-    kwargs = {
-        'sep': sep,
-        'delimiter': delimiter,
-        'header': header,
-        'names': names,
-        'index_col': index_col,
-        'usecols': usecols,
-        'squeeze': squeeze,
-        'prefix': prefix,
-        'mangle_dupe_cols': mangle_dupe_cols,
-        'dtype': dtype,
-        'engine': engine,
-        'converters': converters,
-        'true_values': true_values,
-        'false_values': false_values,
-        'skipinitialspace': skipinitialspace,
-        'skiprows': skiprows,
-        'nrows': nrows,
-        'na_values': na_values,
-        'keep_default_na': keep_default_na,
-        'na_filter': na_filter,
-        'verbose': verbose,
-        'skip_blank_lines': skip_blank_lines,
-        'parse_dates': parse_dates,
-        'infer_datetime_format': infer_datetime_format,
-        'keep_date_col': keep_date_col,
-        'date_parser': date_parser,
-        'dayfirst': dayfirst,
-        'iterator': iterator,
-        'chunksize': chunksize,
-        'compression': compression,
-        'thousands': thousands,
-        'decimal': decimal,
-        'lineterminator': lineterminator,
-        'quotechar': quotechar,
-        'quoting': quoting,
-        'escapechar': escapechar,
-        'comment': comment,
-        'encoding': encoding,
-        'dialect': dialect,
-        'tupleize_cols': tupleize_cols,
-        'error_bad_lines': error_bad_lines,
-        'warn_bad_lines': warn_bad_lines,
-        'skipfooter': skipfooter,
-        'skip_footer': skip_footer,
-        'doublequote': doublequote,
-        'delim_whitespace': delim_whitespace,
-        'as_recarray': as_recarray,
-        'compact_ints': compact_ints,
-        'use_unsigned': use_unsigned,
-        'low_memory': low_memory,
-        'buffer_lines': buffer_lines,
-        'memory_map': memory_map,
-        'float_precision': float_precision,
-    }
-
-    # Default to Pandas read_csv for non-serializable objects
-    if not isinstance(filepath_or_buffer, str) or \
-            _infer_compression(filepath_or_buffer, compression) is not None:
-
-        warnings.warn("Defaulting to Pandas implementation",
-                      PendingDeprecationWarning)
-
-        pandas_obj = pandas.read_csv(filepath_or_buffer, **kwargs)
-        if isinstance(pandas_obj, pandas.DataFrame):
-            return from_pandas(pandas_obj, get_npartitions())
-
-        return pandas_obj
-
-    filepath = filepath_or_buffer
-
-    # TODO: handle case where header is a list of lines
-    first_line = _get_firstline(filepath)
-    columns = _infer_column(first_line, kwargs=kwargs)
-    if header is None or (header == "infer" and names is not None):
-        first_line = b""
-        ignore_first_line = False
-    else:
-        ignore_first_line = True
-
-    offsets = _compute_offset(filepath, get_npartitions(),
-                              ignore_first_line=ignore_first_line)
-
-    # Serialize objects to speed up later use in remote tasks
-    first_line_id = ray.put(first_line)
-    kwargs_id = ray.put(kwargs)
-
-    df_obj_ids = []
-    index_obj_ids = []
-    for start, end in offsets:
-        if start != 0:
-            df, index = _read_csv_with_offset._submit(
-                args=(filepath, start, end, kwargs_id, first_line_id),
-                num_return_vals=2)
-        else:
-            df, index = _read_csv_with_offset._submit(
-                args=(filepath, start, end, kwargs_id),
-                num_return_vals=2)
-        df_obj_ids.append(df)
-        index_obj_ids.append(index)
-
-    index = get_index.remote(*index_obj_ids) if index_col is not None else None
-
-    return DataFrame(row_partitions=df_obj_ids, columns=columns, index=index)
-
-
-def read_json(path_or_buf=None,
-              orient=None,
-              typ='frame',
-              dtype=True,
-              convert_axes=True,
-              convert_dates=True,
-              keep_default_dates=True,
-              numpy=False,
-              precise_float=False,
-              date_unit=None,
-              encoding=None,
-              lines=False,
-              chunksize=None,
-              compression='infer'):
-
-    warnings.warn("Defaulting to Pandas implementation",
-                  PendingDeprecationWarning)
-
-    port_frame = pandas.read_json(path_or_buf, orient, typ, dtype,
-                                  convert_axes, convert_dates,
-                                  keep_default_dates, numpy, precise_float,
-                                  date_unit, encoding, lines, chunksize,
-                                  compression)
-    ray_frame = from_pandas(port_frame, get_npartitions())
-
-    return ray_frame
-
-
-def read_html(io,
-              match='.+',
-              flavor=None,
-              header=None,
-              index_col=None,
-              skiprows=None,
-              attrs=None,
-              parse_dates=False,
-              tupleize_cols=None,
-              thousands=',',
-              encoding=None,
-              decimal='.',
-              converters=None,
-              na_values=None,
-              keep_default_na=True):
-
-    warnings.warn("Defaulting to Pandas implementation",
-                  PendingDeprecationWarning)
-
-    port_frame = pandas.read_html(io, match, flavor, header, index_col,
-                                  skiprows, attrs, parse_dates, tupleize_cols,
-                                  thousands, encoding, decimal, converters,
-                                  na_values, keep_default_na)
-    ray_frame = from_pandas(port_frame[0], get_npartitions())
-
-    return ray_frame
-
-
-def read_clipboard(sep=r'\s+'):
-
-    warnings.warn("Defaulting to Pandas implementation",
-                  PendingDeprecationWarning)
-
-    port_frame = pandas.read_clipboard(sep)
-    ray_frame = from_pandas(port_frame, get_npartitions())
-
-    return ray_frame
-
-
-def read_excel(io,
-               sheet_name=0,
-               header=0,
-               skiprows=None,
-               skip_footer=0,
-               index_col=None,
-               names=None,
-               usecols=None,
-               parse_dates=False,
-               date_parser=None,
-               na_values=None,
-               thousands=None,
-               convert_float=True,
-               converters=None,
-               dtype=None,
-               true_values=None,
-               false_values=None,
-               engine=None,
-               squeeze=False):
-
-    warnings.warn("Defaulting to Pandas implementation",
-                  PendingDeprecationWarning)
-
-    port_frame = pandas.read_excel(io, sheet_name, header, skiprows,
-                                   skip_footer, index_col, names, usecols,
-                                   parse_dates, date_parser, na_values,
-                                   thousands, convert_float, converters, dtype,
-                                   true_values, false_values, engine, squeeze)
-    ray_frame = from_pandas(port_frame, get_npartitions())
-
-    return ray_frame
-
-
-def read_hdf(path_or_buf,
-             key=None,
-             mode='r'):
-
-    warnings.warn("Defaulting to Pandas implementation",
-                  PendingDeprecationWarning)
-
-    port_frame = pandas.read_hdf(path_or_buf, key, mode)
-    ray_frame = from_pandas(port_frame, get_npartitions())
-
-    return ray_frame
-
-
-def read_feather(path,
-                 nthreads=1):
-
-    warnings.warn("Defaulting to Pandas implementation",
-                  PendingDeprecationWarning)
-
-    port_frame = pandas.read_feather(path)
-    ray_frame = from_pandas(port_frame, get_npartitions())
-
-    return ray_frame
-
-
-def read_msgpack(path_or_buf,
-                 encoding='utf-8',
-                 iterator=False):
-
-    warnings.warn("Defaulting to Pandas implementation",
-                  PendingDeprecationWarning)
-
-    port_frame = pandas.read_msgpack(path_or_buf, encoding, iterator)
-    ray_frame = from_pandas(port_frame, get_npartitions())
-
-    return ray_frame
-
-
-def read_stata(filepath_or_buffer,
-               convert_dates=True,
-               convert_categoricals=True,
-               encoding=None,
-               index_col=None,
-               convert_missing=False,
-               preserve_dtypes=True,
-               columns=None,
-               order_categoricals=True,
-               chunksize=None,
-               iterator=False):
-
-    warnings.warn("Defaulting to Pandas implementation",
-                  PendingDeprecationWarning)
-
-    port_frame = pandas.read_stata(filepath_or_buffer, convert_dates,
-                                   convert_categoricals, encoding, index_col,
-                                   convert_missing, preserve_dtypes, columns,
-                                   order_categoricals, chunksize, iterator)
-    ray_frame = from_pandas(port_frame, get_npartitions())
-
-    return ray_frame
-
-
-def read_sas(filepath_or_buffer,
-             format=None,
-             index=None,
-             encoding=None,
-             chunksize=None,
-             iterator=False):
-
-    warnings.warn("Defaulting to Pandas implementation",
-                  PendingDeprecationWarning)
-
-    port_frame = pandas.read_sas(filepath_or_buffer, format, index, encoding,
-                                 chunksize, iterator)
-    ray_frame = from_pandas(port_frame, get_npartitions())
-
-    return ray_frame
-
-
-def read_pickle(path,
-                compression='infer'):
-
-    warnings.warn("Defaulting to Pandas implementation",
-                  PendingDeprecationWarning)
-
-    port_frame = pandas.read_pickle(path, compression)
-    ray_frame = from_pandas(port_frame, get_npartitions())
-
-    return ray_frame
-
-
-def read_sql(sql,
-             con,
-             index_col=None,
-             coerce_float=True,
-             params=None,
-             parse_dates=None,
-             columns=None,
-             chunksize=None):
-
-    warnings.warn("Defaulting to Pandas implementation",
-                  PendingDeprecationWarning)
-
-    port_frame = pandas.read_sql(sql, con, index_col, coerce_float, params,
-                                 parse_dates, columns, chunksize)
-    ray_frame = from_pandas(port_frame, get_npartitions())
-
-    return ray_frame
@@ -1,29 +0,0 @@
-from collections import Iterator
-
-
-class PartitionIterator(Iterator):
-    def __init__(self, partitions, func):
-        """PartitionIterator class to define a generator on partitioned data
-
-        Args:
-            partitions ([ObjectID]): Partitions to iterate over
-            func (callable): The function to get inner iterables from
-                each partition
-        """
-        self.partitions = iter(partitions)
-        self.func = func
-        self.iter_cache = iter([])
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        return self.next()
-
-    def next(self):
-        try:
-            return next(self.iter_cache)
-        except StopIteration:
-            next_partition = next(self.partitions)
-            self.iter_cache = self.func(next_partition)
-            return self.next()
@@ -1,94 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import inspect
-
-
-def code_gen(pandas_obj, ray_obj, path):
-    """Generate code skeleton for methods not in Ray
-
-    Args:
-        pandas_obj: The pandas object to generate code from.
-        ray_obj: The ray object to diff against.
-        path: Path to output the file to.
-    """
-
-    with open(path, "w") as outfile:
-        funcs = pandas_ray_diff(pandas_obj, ray_obj)
-
-        for func in funcs:
-            if func[0] == "_" and func[1] != "_":
-                continue
-            if "attr" in func:
-                # let's not mess with these
-                continue
-            try:
-                outfile.write(
-                    "\ndef " + func +
-                    str(inspect.signature(getattr(pandas_obj, func))) + ":\n")
-
-            except TypeError:
-                outfile.write("\n@property")
-                outfile.write("\ndef " + func + "(self):\n")
-            except ValueError:
-                continue
-            outfile.write(
-                "    raise NotImplementedError(\"Not Yet implemented.\")\n")
-
-
-def code_gen_test(ray_obj, path, name):
-    """Generate tests for methods in Ray."""
-
-    with open(path, "a") as outfile:
-        funcs = dir(ray_obj)
-
-        for func in funcs:
-            if func[0] == "_" and func[1] != "_":
-                continue
-
-            outfile.write("\n\ndef test_" + func + "():\n")
-            outfile.write(
-                "    ray_" + name + " = create_test_" + name + "()\n\n" +
-                "    with pytest.raises(NotImplementedError):\n" +
-                "        ray_" + name + "." + func)
-            try:
-                first = True
-                param_num = \
-                    len(inspect.signature(getattr(ray_obj, func)).parameters)
-                if param_num > 1:
-                    param_num -= 1
-
-                for _ in range(param_num):
-                    if first:
-                        outfile.write("(None")
-                        first = False
-                    else:
-                        outfile.write(", None")
-            except (TypeError, ValueError, NotImplementedError):
-                outfile.write("\n")
-                continue
-
-            if first:
-                outfile.write("(")
-            outfile.write(")\n")
-
-
-def pandas_ray_diff(pandas_obj, ray_obj):
-    """Gets the diff of the methods in the Pandas and Ray objects.
-
-    Args:
-        pandas_obj: The Pandas object to diff.
-        ray_obj: The Ray object to diff.
-
-    Returns:
-        A list of method names that are different between the two.
-    """
-    pandas_funcs = dir(pandas_obj)
-    ray_funcs = dir(ray_obj)
-
-    pandas_funcs = set(filter(lambda f: f[0] != "_" or f[1] == "_",
-                              pandas_funcs))
-
-    diff = [x for x in pandas_funcs if x not in set(ray_funcs)]
-    return diff
@@ -1,125 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import ray
-import pandas
-import numpy as np
-
-from pandas import compat
-from pandas.core.dtypes.common import is_list_like
-from itertools import cycle
-
-from .dataframe import DataFrame
-from .utils import _deploy_func
-
-
-def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
-                columns=None, sparse=False, drop_first=False):
-    """Convert categorical variable into indicator variables.
-
-    Args:
-        data (array-like, Series, or DataFrame): data to encode.
-        prefix (string, [string]): Prefix to apply to each encoded column
-                                   label.
-        prefix_sep (string, [string]): Separator between prefix and value.
-        dummy_na (bool): Add a column to indicate NaNs.
-        columns: Which columns to encode.
-        sparse (bool): Not Implemented: If True, returns SparseDataFrame.
-        drop_first (bool): Whether to remove the first level of encoded data.
-
-    Returns:
-        DataFrame or one-hot encoded data.
-    """
-    if not isinstance(data, DataFrame):
-        return pandas.get_dummies(data, prefix=prefix, prefix_sep=prefix_sep,
-                                  dummy_na=dummy_na, columns=columns,
-                                  sparse=sparse, drop_first=drop_first)
-
-    if sparse:
-        raise NotImplementedError(
-            "SparseDataFrame is not implemented. "
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
-
-    if columns is None:
-        columns_to_encode = data.dtypes.isin([np.dtype("O"), 'category'])
-        columns_to_encode = data.columns[columns_to_encode]
-    else:
-        columns_to_encode = columns
-
-    def check_len(item, name):
-        len_msg = ("Length of '{name}' ({len_item}) did not match the "
-                   "length of the columns being encoded ({len_enc}).")
-
-        if is_list_like(item):
-            if not len(item) == len(columns_to_encode):
-                len_msg = len_msg.format(name=name, len_item=len(item),
-                                         len_enc=len(columns_to_encode))
-                raise ValueError(len_msg)
-
-    check_len(prefix, 'prefix')
-    check_len(prefix_sep, 'prefix_sep')
-    if isinstance(prefix, compat.string_types):
-        prefix = cycle([prefix])
-        prefix = [next(prefix) for i in range(len(columns_to_encode))]
-    if isinstance(prefix, dict):
-        prefix = [prefix[col] for col in columns_to_encode]
-
-    if prefix is None:
-        prefix = columns_to_encode
-
-    # validate separators
-    if isinstance(prefix_sep, compat.string_types):
-        prefix_sep = cycle([prefix_sep])
-        prefix_sep = [next(prefix_sep) for i in range(len(columns_to_encode))]
-    elif isinstance(prefix_sep, dict):
-        prefix_sep = [prefix_sep[col] for col in columns_to_encode]
-
-    if set(columns_to_encode) == set(data.columns):
-        with_dummies = []
-        dropped_columns = pandas.Index()
-    else:
-        with_dummies = data.drop(columns_to_encode, axis=1)._col_partitions
-        dropped_columns = data.columns.drop(columns_to_encode)
-
-    def get_dummies_remote(df, to_drop, prefix, prefix_sep):
-        df = df.drop(to_drop, axis=1)
-
-        if df.size == 0:
-            return df, df.columns
-
-        df = pandas.get_dummies(df, prefix=prefix, prefix_sep=prefix_sep,
-                                dummy_na=dummy_na, columns=None, sparse=sparse,
-                                drop_first=drop_first)
-        columns = df.columns
-        df.columns = pandas.RangeIndex(0, len(df.columns))
-        return df, columns
-
-    total = 0
-    columns = []
-    for i, part in enumerate(data._col_partitions):
-        col_index = data._col_metadata.partition_series(i)
-
-        # TODO(kunalgosar): Handle the case of duplicate columns here
-        to_encode = col_index.index.isin(columns_to_encode)
-
-        to_encode = col_index[to_encode]
-        to_drop = col_index.drop(to_encode.index)
-
-        result = _deploy_func._submit(
-            args=(get_dummies_remote, part, to_drop,
-                  prefix[total:total + len(to_encode)],
-                  prefix_sep[total:total + len(to_encode)]),
-            num_return_vals=2)
-
-        with_dummies.append(result[0])
-        columns.append(result[1])
-        total += len(to_encode)
-
-    columns = ray.get(columns)
-    dropped_columns = dropped_columns.append(columns)
-
-    return DataFrame(col_partitions=with_dummies,
-                     columns=dropped_columns,
-                     index=data.index)
@@ -1,970 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import pandas
-
-from .utils import _inherit_docstrings
-
-
-def na_op():
-    """Pandas uses a similar function to handle na values.
-    """
-    raise NotImplementedError("Not Yet implemented.")
-
-
-@_inherit_docstrings(pandas.Series, excluded=[pandas.Series,
-                                              pandas.Series.__init__])
-class Series(object):
-
-    def __init__(self, series_oids):
-        """Constructor for a Series object.
-
-        Args:
-            series_oids ([ObjectID]): The list of remote Series objects.
-        """
-        self.series_oids = series_oids
-
-    @property
-    def T(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __abs__(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __add__(self, right, name='__add__', na_op=na_op):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __and__(self, other):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __array__(self, result=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __array_prepare__(self, result, context=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def __array_priority__(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __array_wrap__(self, result, context=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __bool__(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __bytes__(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __class__(self, data=None, index=None, dtype=None, name=None,
-                  copy=False, fastpath=False):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __contains__(self, key):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __copy__(self, deep=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __deepcopy__(self, memo=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __delitem__(self, key):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __dir__(self):
-        return list(type(self).__dict__.keys())
-
-    def __div__(self, right, name='__truediv__', na_op=na_op):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __divmod__(self, right, name='__divmod__', na_op=na_op):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def __doc__(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __eq__(self, other, axis=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __finalize__(self, other, method=None, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __float__(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __floordiv__(self, right, name='__floordiv__', na_op=na_op):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __ge__(self, other, axis=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __getitem__(self, key):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __getstate__(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __gt__(self, other, axis=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __iadd__(self, other):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __imul__(self, other):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __int__(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __invert__(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __ipow__(self, other):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __isub__(self, other):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __iter__(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __itruediv__(self, other):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __le__(self, other, axis=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __len__(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __long__(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __lt__(self, other, axis=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __mod__(self, right, name='__mod__', na_op=na_op):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __mul__(self, right, name='__mul__', na_op=na_op):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __ne__(self, other, axis=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __neg__(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __nonzero__(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __or__(self, other):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __pow__(self, right, name='__pow__', na_op=na_op):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __repr__(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __round__(self, decimals=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __setitem__(self, key, value):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __setstate__(self, state):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __sizeof__(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __str__(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __sub__(self, right, name='__sub__', na_op=na_op):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __truediv__(self, right, name='__truediv__', na_op=na_op):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def __xor__(self, other):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def abs(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def add(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def add_prefix(self, prefix):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def add_suffix(self, suffix):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def agg(self, func, axis=0, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def aggregate(self, func, axis=0, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def align(self, other, join='outer', axis=None, level=None, copy=True,
-              fill_value=None, method=None, limit=None, fill_axis=0,
-              broadcast_axis=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def all(self, axis=None, bool_only=None, skipna=None, level=None,
-            **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def any(self, axis=None, bool_only=None, skipna=None, level=None,
-            **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def append(self, to_append, ignore_index=False, verify_integrity=False):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def apply(self, func, convert_dtype=True, args=(), **kwds):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def argmax(self, axis=None, skipna=True, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def argmin(self, axis=None, skipna=True, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def argsort(self, axis=0, kind='quicksort', order=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def as_blocks(self, copy=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def as_matrix(self, columns=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def asfreq(self, freq, method=None, how=None, normalize=False,
-               fill_value=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def asof(self, where, subset=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def astype(self, dtype, copy=True, errors='raise', **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def at(self, axis=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def at_time(self, time, asof=False):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def autocorr(self, lag=1):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def between(self, left, right, inclusive=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def between_time(self, start_time, end_time, include_start=True,
-                     include_end=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def bfill(self, axis=None, inplace=False, limit=None, downcast=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def bool(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def clip(self, lower=None, upper=None, axis=None, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def clip_lower(self, threshold, axis=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def clip_upper(self, threshold, axis=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def combine(self, other, func, fill_value=np.nan):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def combine_first(self, other):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def compound(self, axis=None, skipna=None, level=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def compress(self, condition, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def consolidate(self, inplace=False):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def convert_objects(self, convert_dates=True, convert_numeric=False,
-                        convert_timedeltas=True, copy=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def copy(self, deep=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def corr(self, other, method='pearson', min_periods=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def count(self, level=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def cov(self, other, min_periods=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def cummax(self, axis=None, skipna=True, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def cummin(self, axis=None, skipna=True, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def cumprod(self, axis=None, skipna=True, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def cumsum(self, axis=None, skipna=True, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def describe(self, percentiles=None, include=None, exclude=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def diff(self, periods=1):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def div(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def divide(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def dot(self, other):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def drop_duplicates(self, keep='first', inplace=False):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def dropna(self, axis=0, inplace=False, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def duplicated(self, keep='first'):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def eq(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def equals(self, other):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def ewm(self, com=None, span=None, halflife=None, alpha=None,
-            min_periods=0, freq=None, adjust=True, ignore_na=False, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def expanding(self, min_periods=1, freq=None, center=False, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def factorize(self, sort=False, na_sentinel=-1):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def ffill(self, axis=None, inplace=False, limit=None, downcast=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def fillna(self, value=None, method=None, axis=None, inplace=False,
-               limit=None, downcast=None, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def filter(self, items=None, like=None, regex=None, axis=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def first(self, offset):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def first_valid_index(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def floordiv(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def from_array(self, arr, index=None, name=None, dtype=None, copy=False,
-                   fastpath=False):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def from_csv(self, path, sep=',', parse_dates=True, header=None,
-                 index_col=0, encoding=None, infer_datetime_format=False):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def ge(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def get(self, key, default=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def get_dtype_counts(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def get_ftype_counts(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def get_value(self, label, takeable=False):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def get_values(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
-                group_keys=True, squeeze=False, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def gt(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def head(self, n=5):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def hist(self, by=None, ax=None, grid=True, xlabelsize=None, xrot=None,
-             ylabelsize=None, yrot=None, figsize=None, bins=10, **kwds):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def iat(self, axis=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def idxmax(self, axis=None, skipna=True, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def idxmin(self, axis=None, skipna=True, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def iloc(self, axis=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
-                    limit_direction='forward', downcast=None, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def isin(self, values):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def isnull(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def item(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def items(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def iteritems(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def ix(self, axis=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def keys(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def kurt(self, axis=None, skipna=None, level=None, numeric_only=None,
-             **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def kurtosis(self, axis=None, skipna=None, level=None, numeric_only=None,
-                 **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def last(self, offset):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def last_valid_index(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def le(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def loc(self, axis=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def lt(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def mad(self, axis=None, skipna=None, level=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def map(self, arg, na_action=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def mask(self, cond, other=np.nan, inplace=False, axis=None, level=None,
-             try_cast=False, raise_on_error=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def max(self, axis=None, skipna=None, level=None, numeric_only=None,
-            **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def mean(self, axis=None, skipna=None, level=None, numeric_only=None,
-             **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def median(self, axis=None, skipna=None, level=None, numeric_only=None,
-               **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def memory_usage(self, index=True, deep=False):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def min(self, axis=None, skipna=None, level=None, numeric_only=None,
-            **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def mod(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def mode(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def mul(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def multiply(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def ne(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def nlargest(self, n=5, keep='first'):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def nonzero(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def notnull(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def nsmallest(self, n=5, keep='first'):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def nunique(self, dropna=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
-                   **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def pipe(self, func, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def plot(self, kind='line', ax=None, figsize=None, use_index=True,
-             title=None, grid=None, legend=False, style=None, logx=False,
-             logy=False, loglog=False, xticks=None, yticks=None, xlim=None,
-             ylim=None, rot=None, fontsize=None, colormap=None, table=False,
-             yerr=None, xerr=None, label=None, secondary_y=False, **kwds):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def pop(self, item):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def pow(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def prod(self, axis=None, skipna=None, level=None, numeric_only=None,
-             **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def product(self, axis=None, skipna=None, level=None, numeric_only=None,
-                **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def ptp(self, axis=None, skipna=None, level=None, numeric_only=None,
-            **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def put(self, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def quantile(self, q=0.5, interpolation='linear'):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def radd(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def rank(self, axis=0, method='average', numeric_only=None,
-             na_option='keep', ascending=True, pct=False):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def ravel(self, order='C'):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def rdiv(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def reindex(self, index=None, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def reindex_axis(self, labels, axis=0, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def reindex_like(self, other, method=None, copy=True, limit=None,
-                     tolerance=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def rename(self, index=None, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def rename_axis(self, mapper, axis=0, copy=True, inplace=False):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def reorder_levels(self, order):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def repeat(self, repeats, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def replace(self, to_replace=None, value=None, inplace=False, limit=None,
-                regex=False, method='pad', axis=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
-                 label=None, convention='start', kind=None, loffset=None,
-                 limit=None, base=0, on=None, level=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def reset_index(self, level=None, drop=False, name=None, inplace=False):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def reshape(self, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def rfloordiv(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def rmod(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def rmul(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def rolling(self, window, min_periods=None, freq=None, center=False,
-                win_type=None, on=None, axis=0, closed=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def round(self, decimals=0, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def rpow(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def rsub(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def rtruediv(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def sample(self, n=None, frac=None, replace=False, weights=None,
-               random_state=None, axis=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def searchsorted(self, value, side='left', sorter=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def select(self, crit, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def sem(self, axis=None, skipna=None, level=None, ddof=1,
-            numeric_only=None, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def set_axis(self, axis, labels):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def set_value(self, label, value, takeable=False):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def shift(self, periods=1, freq=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def skew(self, axis=None, skipna=None, level=None, numeric_only=None,
-             **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def slice_shift(self, periods=1, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
-                   kind='quicksort', na_position='last', sort_remaining=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def sort_values(self, axis=0, ascending=True, inplace=False,
-                    kind='quicksort', na_position='last'):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def sortlevel(self, level=0, ascending=True, sort_remaining=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def squeeze(self, axis=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def std(self, axis=None, skipna=None, level=None, ddof=1,
-            numeric_only=None, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def sub(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def subtract(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def sum(self, axis=None, skipna=None, level=None, numeric_only=None,
-            **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def swapaxes(self, axis1, axis2, copy=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def swaplevel(self, i=-2, j=-1, copy=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def tail(self, n=5):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def to_clipboard(self, excel=None, sep=None, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def to_csv(self, path=None, index=True, sep=',', na_rep='',
-               float_format=None, header=False, index_label=None, mode='w',
-               encoding=None, date_format=None, decimal='.'):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def to_dense(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def to_dict(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
-                 float_format=None, columns=None, header=True, index=True,
-                 index_label=None, startrow=0, startcol=0, engine=None,
-                 merge_cells=True, encoding=None, inf_rep='inf',
-                 verbose=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def to_frame(self, name=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def to_hdf(self, path_or_buf, key, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def to_json(self, path_or_buf=None, orient=None, date_format=None,
-                double_precision=10, force_ascii=True, date_unit='ms',
-                default_handler=None, lines=False):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def to_latex(self, buf=None, columns=None, col_space=None, header=True,
-                 index=True, na_rep='NaN', formatters=None, float_format=None,
-                 sparsify=None, index_names=True, bold_rows=False,
-                 column_format=None, longtable=None, escape=None,
-                 encoding=None, decimal='.', multicolumn=None,
-                 multicolumn_format=None, multirow=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def to_period(self, freq=None, copy=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def to_pickle(self, path, compression='infer'):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def to_sparse(self, kind='block', fill_value=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def to_sql(self, name, con, flavor=None, schema=None, if_exists='fail',
-               index=True, index_label=None, chunksize=None, dtype=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def to_string(self, buf=None, na_rep='NaN', float_format=None,
-                  header=True, index=True, length=False, dtype=False,
-                  name=False, max_rows=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def to_timestamp(self, freq=None, how='start', copy=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def to_xarray(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def tolist(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def transform(self, func, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def transpose(self, *args, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def truediv(self, other, level=None, fill_value=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def truncate(self, before=None, after=None, axis=None, copy=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def tshift(self, periods=1, freq=None, axis=0):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def tz_convert(self, tz, axis=0, level=None, copy=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def tz_localize(self, tz, axis=0, level=None, copy=True,
-                    ambiguous='raise'):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def unique(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def unstack(self, level=-1, fill_value=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def upandasate(self, other):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def valid(self, inplace=False, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def value_counts(self, normalize=False, sort=True, ascending=False,
-                     bins=None, dropna=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def var(self, axis=None, skipna=None, level=None, ddof=1,
-            numeric_only=None, **kwargs):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def view(self, dtype=None):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
-              try_cast=False, raise_on_error=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    def xs(key, axis=0, level=None, drop_level=True):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def asobject(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def axes(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def base(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def blocks(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def data(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def dtype(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def dtypes(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def empty(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def flags(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def ftype(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def ftypes(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def hasnans(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def imag(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def index(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def is_copy(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def is_monotonic(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def is_monotonic_decreasing(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def is_monotonic_increasing(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def is_unique(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def itemsize(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def name(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def nbytes(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def ndim(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def real(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def shape(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def size(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def strides(self):
-        raise NotImplementedError("Not Yet implemented.")
-
-    @property
-    def values(self):
-        raise NotImplementedError("Not Yet implemented.")
@@ -1,127 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import pytest
-import pandas
-import ray.dataframe as pd
-from ray.dataframe.utils import (
-    to_pandas,
-    from_pandas
-)
-
-
-@pytest.fixture
-def ray_df_equals_pandas(ray_df, pandas_df):
-    return to_pandas(ray_df).sort_index().equals(pandas_df.sort_index())
-
-
-@pytest.fixture
-def generate_dfs():
-    df = pandas.DataFrame({'col1': [0, 1, 2, 3],
-                           'col2': [4, 5, 6, 7],
-                           'col3': [8, 9, 10, 11],
-                           'col4': [12, 13, 14, 15],
-                           'col5': [0, 0, 0, 0]})
-
-    df2 = pandas.DataFrame({'col1': [0, 1, 2, 3],
-                            'col2': [4, 5, 6, 7],
-                            'col3': [8, 9, 10, 11],
-                            'col6': [12, 13, 14, 15],
-                            'col7': [0, 0, 0, 0]})
-    return df, df2
-
-
-@pytest.fixture
-def generate_none_dfs():
-    df = pandas.DataFrame({'col1': [0, 1, 2, 3],
-                           'col2': [4, 5, None, 7],
-                           'col3': [8, 9, 10, 11],
-                           'col4': [12, 13, 14, 15],
-                           'col5': [None, None, None, None]})
-
-    df2 = pandas.DataFrame({'col1': [0, 1, 2, 3],
-                            'col2': [4, 5, 6, 7],
-                            'col3': [8, 9, 10, 11],
-                            'col6': [12, 13, 14, 15],
-                            'col7': [0, 0, 0, 0]})
-    return df, df2
-
-
-@pytest.fixture
-def test_df_concat():
-    df, df2 = generate_dfs()
-
-    assert(ray_df_equals_pandas(pd.concat([df, df2]),
-                                pandas.concat([df, df2])))
-
-
-def test_ray_concat():
-    df, df2 = generate_dfs()
-    ray_df, ray_df2 = from_pandas(df, 2), from_pandas(df2, 2)
-
-    assert ray_df_equals_pandas(pd.concat([ray_df, ray_df2]),
-                                pandas.concat([df, df2]))
-
-
-def test_ray_concat_on_index():
-    df, df2 = generate_dfs()
-    ray_df, ray_df2 = from_pandas(df, 2), from_pandas(df2, 2)
-
-    assert ray_df_equals_pandas(pd.concat([ray_df, ray_df2], axis='index'),
-                                pandas.concat([df, df2], axis='index'))
-
-    assert ray_df_equals_pandas(pd.concat([ray_df, ray_df2], axis='rows'),
-                                pandas.concat([df, df2], axis='rows'))
-
-    assert ray_df_equals_pandas(pd.concat([ray_df, ray_df2], axis=0),
-                                pandas.concat([df, df2], axis=0))
-
-
-def test_ray_concat_on_column():
-    df, df2 = generate_dfs()
-    ray_df, ray_df2 = from_pandas(df, 2), from_pandas(df2, 2)
-
-    assert ray_df_equals_pandas(pd.concat([ray_df, ray_df2], axis=1),
-                                pandas.concat([df, df2], axis=1))
-
-    assert ray_df_equals_pandas(pd.concat([ray_df, ray_df2], axis="columns"),
-                                pandas.concat([df, df2], axis="columns"))
-
-
-def test_invalid_axis_errors():
-    df, df2 = generate_dfs()
-    ray_df, ray_df2 = from_pandas(df, 2), from_pandas(df2, 2)
-
-    with pytest.raises(ValueError):
-        pd.concat([ray_df, ray_df2], axis=2)
-
-
-def test_mixed_concat():
-    df, df2 = generate_dfs()
-    df3 = df.copy()
-
-    mixed_dfs = [from_pandas(df, 2), from_pandas(df2, 2), df3]
-
-    assert(ray_df_equals_pandas(pd.concat(mixed_dfs),
-                                pandas.concat([df, df2, df3])))
-
-
-def test_mixed_inner_concat():
-    df, df2 = generate_dfs()
-    df3 = df.copy()
-
-    mixed_dfs = [from_pandas(df, 2), from_pandas(df2, 2), df3]
-
-    assert(ray_df_equals_pandas(pd.concat(mixed_dfs, join='inner'),
-                                pandas.concat([df, df2, df3], join='inner')))
-
-
-def test_mixed_none_concat():
-    df, df2 = generate_none_dfs()
-    df3 = df.copy()
-
-    mixed_dfs = [from_pandas(df, 2), from_pandas(df2, 2), df3]
-
-    assert(ray_df_equals_pandas(pd.concat(mixed_dfs),
-                                pandas.concat([df, df2, df3])))
@@ -1,589 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import pytest
-import sys
-import pandas
-import numpy as np
-import ray.dataframe as pd
-from ray.dataframe.utils import (
-    from_pandas,
-    to_pandas)
-
-PY2 = False
-if sys.version_info.major < 3:
-    PY2 = True
-
-
-@pytest.fixture
-def ray_df_equals_pandas(ray_df, pandas_df):
-    assert isinstance(ray_df, pd.DataFrame)
-    assert to_pandas(ray_df).equals(pandas_df)
-
-
-@pytest.fixture
-def ray_df_almost_equals_pandas(ray_df, pandas_df):
-    assert isinstance(ray_df, pd.DataFrame)
-    difference = to_pandas(ray_df) - pandas_df
-    diff_max = difference.max().max()
-    assert to_pandas(ray_df).equals(pandas_df) or diff_max < 0.0001
-
-
-@pytest.fixture
-def ray_series_equals_pandas(ray_df, pandas_df):
-    assert ray_df.equals(pandas_df)
-
-
-@pytest.fixture
-def ray_df_equals(ray_df1, ray_df2):
-    assert to_pandas(ray_df1).equals(to_pandas(ray_df2))
-
-
-@pytest.fixture
-def ray_groupby_equals_pandas(ray_groupby, pandas_groupby):
-    for g1, g2 in zip(ray_groupby, pandas_groupby):
-        assert g1[0] == g2[0]
-        ray_df_equals_pandas(g1[1], g2[1])
-
-
-def test_simple_row_groupby():
-    pandas_df = pandas.DataFrame({'col1': [0, 1, 2, 3],
-                                  'col2': [4, 5, 6, 7],
-                                  'col3': [3, 8, 12, 10],
-                                  'col4': [17, 13, 16, 15],
-                                  'col5': [-4, -5, -6, -7]})
-
-    ray_df = from_pandas(pandas_df, 2)
-
-    by = [1, 2, 1, 2]
-    n = 1
-
-    ray_groupby = ray_df.groupby(by=by)
-    pandas_groupby = pandas_df.groupby(by=by)
-
-    ray_groupby_equals_pandas(ray_groupby, pandas_groupby)
-    test_ngroups(ray_groupby, pandas_groupby)
-    test_skew(ray_groupby, pandas_groupby)
-    test_ffill(ray_groupby, pandas_groupby)
-    test_sem(ray_groupby, pandas_groupby)
-    test_mean(ray_groupby, pandas_groupby)
-    test_any(ray_groupby, pandas_groupby)
-    test_min(ray_groupby, pandas_groupby)
-    test_idxmax(ray_groupby, pandas_groupby)
-    test_ndim(ray_groupby, pandas_groupby)
-    test_cumsum(ray_groupby, pandas_groupby)
-    test_pct_change(ray_groupby, pandas_groupby)
-    test_cummax(ray_groupby, pandas_groupby)
-
-    apply_functions = [lambda df: df.sum(), lambda df: -df]
-    for func in apply_functions:
-        test_apply(ray_groupby, pandas_groupby, func)
-
-    test_dtypes(ray_groupby, pandas_groupby)
-    test_first(ray_groupby, pandas_groupby)
-    test_backfill(ray_groupby, pandas_groupby)
-    test_cummin(ray_groupby, pandas_groupby)
-    test_bfill(ray_groupby, pandas_groupby)
-    test_idxmin(ray_groupby, pandas_groupby)
-    test_prod(ray_groupby, pandas_groupby)
-    test_std(ray_groupby, pandas_groupby)
-
-    agg_functions = ['min', 'max']
-    for func in agg_functions:
-        test_agg(ray_groupby, pandas_groupby, func)
-        test_aggregate(ray_groupby, pandas_groupby, func)
-
-    test_last(ray_groupby, pandas_groupby)
-    test_mad(ray_groupby, pandas_groupby)
-    test_rank(ray_groupby, pandas_groupby)
-    test_max(ray_groupby, pandas_groupby)
-    test_var(ray_groupby, pandas_groupby)
-    test_len(ray_groupby, pandas_groupby)
-    test_sum(ray_groupby, pandas_groupby)
-    test_ngroup(ray_groupby, pandas_groupby)
-    test_nunique(ray_groupby, pandas_groupby)
-    test_median(ray_groupby, pandas_groupby)
-    test_head(ray_groupby, pandas_groupby, n)
-    test_cumprod(ray_groupby, pandas_groupby)
-    test_cov(ray_groupby, pandas_groupby)
-
-    transform_functions = [lambda df: df + 4, lambda df: -df - 10]
-    for func in transform_functions:
-        test_transform(ray_groupby, pandas_groupby, func)
-
-    pipe_functions = [lambda dfgb: dfgb.sum()]
-    for func in pipe_functions:
-        test_pipe(ray_groupby, pandas_groupby, func)
-
-    test_corr(ray_groupby, pandas_groupby)
-    test_fillna(ray_groupby, pandas_groupby)
-    test_count(ray_groupby, pandas_groupby)
-    test_tail(ray_groupby, pandas_groupby, n)
-    test_quantile(ray_groupby, pandas_groupby)
-    test_take(ray_groupby, pandas_groupby)
-
-
-def test_single_group_row_groupby():
-    pandas_df = pandas.DataFrame({'col1': [0, 1, 2, 3],
-                                  'col2': [4, 5, 36, 7],
-                                  'col3': [3, 8, 12, 10],
-                                  'col4': [17, 3, 16, 15],
-                                  'col5': [-4, 5, -6, -7]})
-
-    ray_df = from_pandas(pandas_df, 2)
-
-    by = [1, 1, 1, 1]
-    n = 6
-
-    ray_groupby = ray_df.groupby(by=by)
-    pandas_groupby = pandas_df.groupby(by=by)
-
-    ray_groupby_equals_pandas(ray_groupby, pandas_groupby)
-    test_ngroups(ray_groupby, pandas_groupby)
-    test_skew(ray_groupby, pandas_groupby)
-    test_ffill(ray_groupby, pandas_groupby)
-    test_sem(ray_groupby, pandas_groupby)
-    test_mean(ray_groupby, pandas_groupby)
-    test_any(ray_groupby, pandas_groupby)
-    test_min(ray_groupby, pandas_groupby)
-    test_idxmax(ray_groupby, pandas_groupby)
-    test_ndim(ray_groupby, pandas_groupby)
-    test_cumsum(ray_groupby, pandas_groupby)
-    test_pct_change(ray_groupby, pandas_groupby)
-    test_cummax(ray_groupby, pandas_groupby)
-
-    apply_functions = [lambda df: df.sum(), lambda df: -df]
-    for func in apply_functions:
-        test_apply(ray_groupby, pandas_groupby, func)
-
-    test_dtypes(ray_groupby, pandas_groupby)
-    test_first(ray_groupby, pandas_groupby)
-    test_backfill(ray_groupby, pandas_groupby)
-    test_cummin(ray_groupby, pandas_groupby)
-    test_bfill(ray_groupby, pandas_groupby)
-    test_idxmin(ray_groupby, pandas_groupby)
-    test_prod(ray_groupby, pandas_groupby)
-    test_std(ray_groupby, pandas_groupby)
-
-    agg_functions = ['min', 'max']
-    for func in agg_functions:
-        test_agg(ray_groupby, pandas_groupby, func)
-        test_aggregate(ray_groupby, pandas_groupby, func)
-
-    test_last(ray_groupby, pandas_groupby)
-    test_mad(ray_groupby, pandas_groupby)
-    test_rank(ray_groupby, pandas_groupby)
-    test_max(ray_groupby, pandas_groupby)
-    test_var(ray_groupby, pandas_groupby)
-    test_len(ray_groupby, pandas_groupby)
-    test_sum(ray_groupby, pandas_groupby)
-    test_ngroup(ray_groupby, pandas_groupby)
-    test_nunique(ray_groupby, pandas_groupby)
-    test_median(ray_groupby, pandas_groupby)
-    test_head(ray_groupby, pandas_groupby, n)
-    test_cumprod(ray_groupby, pandas_groupby)
-    test_cov(ray_groupby, pandas_groupby)
-
-    transform_functions = [lambda df: df + 4, lambda df: -df - 10]
-    for func in transform_functions:
-        test_transform(ray_groupby, pandas_groupby, func)
-
-    pipe_functions = [lambda dfgb: dfgb.sum()]
-    for func in pipe_functions:
-        test_pipe(ray_groupby, pandas_groupby, func)
-
-    test_corr(ray_groupby, pandas_groupby)
-    test_fillna(ray_groupby, pandas_groupby)
-    test_count(ray_groupby, pandas_groupby)
-    test_tail(ray_groupby, pandas_groupby, n)
-    test_quantile(ray_groupby, pandas_groupby)
-    test_take(ray_groupby, pandas_groupby)
-
-
-def test_large_row_groupby():
-    pandas_df = pandas.DataFrame(np.random.randint(0, 8, size=(100, 4)),
-                                 columns=list('ABCD'))
-
-    ray_df = from_pandas(pandas_df, 2)
-
-    by = pandas_df['A'].tolist()
-    n = 4
-
-    ray_groupby = ray_df.groupby(by=by)
-    pandas_groupby = pandas_df.groupby(by=by)
-
-    ray_groupby_equals_pandas(ray_groupby, pandas_groupby)
-    test_ngroups(ray_groupby, pandas_groupby)
-    test_skew(ray_groupby, pandas_groupby)
-    test_ffill(ray_groupby, pandas_groupby)
-    test_sem(ray_groupby, pandas_groupby)
-    test_mean(ray_groupby, pandas_groupby)
-    test_any(ray_groupby, pandas_groupby)
-    test_min(ray_groupby, pandas_groupby)
-    test_idxmax(ray_groupby, pandas_groupby)
-    test_ndim(ray_groupby, pandas_groupby)
-    test_cumsum(ray_groupby, pandas_groupby)
-    test_pct_change(ray_groupby, pandas_groupby)
-    test_cummax(ray_groupby, pandas_groupby)
-
-    apply_functions = [lambda df: df.sum(), lambda df: -df]
-    for func in apply_functions:
-        test_apply(ray_groupby, pandas_groupby, func)
-
-    test_dtypes(ray_groupby, pandas_groupby)
-    test_first(ray_groupby, pandas_groupby)
-    test_backfill(ray_groupby, pandas_groupby)
-    test_cummin(ray_groupby, pandas_groupby)
-    test_bfill(ray_groupby, pandas_groupby)
-    test_idxmin(ray_groupby, pandas_groupby)
-    # test_prod(ray_groupby, pandas_groupby) causes overflows
-    test_std(ray_groupby, pandas_groupby)
-
-    agg_functions = ['min', 'max']
-    for func in agg_functions:
-        test_agg(ray_groupby, pandas_groupby, func)
-        test_aggregate(ray_groupby, pandas_groupby, func)
-
-    test_last(ray_groupby, pandas_groupby)
-    test_mad(ray_groupby, pandas_groupby)
-    test_rank(ray_groupby, pandas_groupby)
-    test_max(ray_groupby, pandas_groupby)
-    test_var(ray_groupby, pandas_groupby)
-    test_len(ray_groupby, pandas_groupby)
-    test_sum(ray_groupby, pandas_groupby)
-    test_ngroup(ray_groupby, pandas_groupby)
-    test_nunique(ray_groupby, pandas_groupby)
-    test_median(ray_groupby, pandas_groupby)
-    test_head(ray_groupby, pandas_groupby, n)
-    # test_cumprod(ray_groupby, pandas_groupby) causes overflows
-    test_cov(ray_groupby, pandas_groupby)
-
-    transform_functions = [lambda df: df + 4, lambda df: -df - 10]
-    for func in transform_functions:
-        test_transform(ray_groupby, pandas_groupby, func)
-
-    pipe_functions = [lambda dfgb: dfgb.sum()]
-    for func in pipe_functions:
-        test_pipe(ray_groupby, pandas_groupby, func)
-
-    test_corr(ray_groupby, pandas_groupby)
-    test_fillna(ray_groupby, pandas_groupby)
-    test_count(ray_groupby, pandas_groupby)
-    test_tail(ray_groupby, pandas_groupby, n)
-    test_quantile(ray_groupby, pandas_groupby)
-    test_take(ray_groupby, pandas_groupby)
-
-
-def test_simple_col_groupby():
-    pandas_df = pandas.DataFrame({'col1': [0, 3, 2, 3],
-                                  'col2': [4, 1, 6, 7],
-                                  'col3': [3, 8, 2, 10],
-                                  'col4': [1, 13, 6, 15],
-                                  'col5': [-4, 5, 6, -7]})
-
-    ray_df = from_pandas(pandas_df, 2)
-
-    by = [1, 2, 3, 2, 1]
-
-    ray_groupby = ray_df.groupby(axis=1, by=by)
-    pandas_groupby = pandas_df.groupby(axis=1, by=by)
-
-    ray_groupby_equals_pandas(ray_groupby, pandas_groupby)
-    test_ngroups(ray_groupby, pandas_groupby)
-    test_skew(ray_groupby, pandas_groupby)
-    test_ffill(ray_groupby, pandas_groupby)
-    test_sem(ray_groupby, pandas_groupby)
-    test_mean(ray_groupby, pandas_groupby)
-    test_any(ray_groupby, pandas_groupby)
-    test_min(ray_groupby, pandas_groupby)
-    test_ndim(ray_groupby, pandas_groupby)
-
-    if not PY2:
-        # idxmax and idxmin fail on column groupby in pandas with python2
-        test_idxmax(ray_groupby, pandas_groupby)
-        test_idxmin(ray_groupby, pandas_groupby)
-        test_rank(ray_groupby, pandas_groupby)
-        test_quantile(ray_groupby, pandas_groupby)
-
-    # https://github.com/pandas-dev/pandas/issues/21127
-    # test_cumsum(ray_groupby, pandas_groupby)
-    # test_cummax(ray_groupby, pandas_groupby)
-    # test_cummin(ray_groupby, pandas_groupby)
-    # test_cumprod(ray_groupby, pandas_groupby)
-
-    test_pct_change(ray_groupby, pandas_groupby)
-    apply_functions = [lambda df: -df, lambda df: df.sum(axis=1)]
-    for func in apply_functions:
-        test_apply(ray_groupby, pandas_groupby, func)
-
-    test_first(ray_groupby, pandas_groupby)
-    test_backfill(ray_groupby, pandas_groupby)
-    test_bfill(ray_groupby, pandas_groupby)
-    test_prod(ray_groupby, pandas_groupby)
-    test_std(ray_groupby, pandas_groupby)
-    test_last(ray_groupby, pandas_groupby)
-    test_mad(ray_groupby, pandas_groupby)
-    test_max(ray_groupby, pandas_groupby)
-    test_var(ray_groupby, pandas_groupby)
-    test_len(ray_groupby, pandas_groupby)
-    test_sum(ray_groupby, pandas_groupby)
-
-    # Pandas fails on this case with ValueError
-    # test_ngroup(ray_groupby, pandas_groupby)
-    # test_nunique(ray_groupby, pandas_groupby)
-    test_median(ray_groupby, pandas_groupby)
-    test_cov(ray_groupby, pandas_groupby)
-
-    transform_functions = [lambda df: df + 4, lambda df: -df - 10]
-    for func in transform_functions:
-        test_transform(ray_groupby, pandas_groupby, func)
-
-    pipe_functions = [lambda dfgb: dfgb.sum()]
-    for func in pipe_functions:
-        test_pipe(ray_groupby, pandas_groupby, func)
-
-    test_corr(ray_groupby, pandas_groupby)
-    test_fillna(ray_groupby, pandas_groupby)
-    test_count(ray_groupby, pandas_groupby)
-    test_take(ray_groupby, pandas_groupby)
-
-
-@pytest.fixture
-def test_ngroups(ray_groupby, pandas_groupby):
-    assert ray_groupby.ngroups == pandas_groupby.ngroups
-
-
-@pytest.fixture
-def test_skew(ray_groupby, pandas_groupby):
-    ray_df_almost_equals_pandas(ray_groupby.skew(), pandas_groupby.skew())
-
-
-@pytest.fixture
-def test_ffill(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.ffill(), pandas_groupby.ffill())
-
-
-@pytest.fixture
-def test_sem(ray_groupby, pandas_groupby):
-    with pytest.raises(NotImplementedError):
-        ray_groupby.sem()
-
-
-@pytest.fixture
-def test_mean(ray_groupby, pandas_groupby):
-    ray_df_almost_equals_pandas(ray_groupby.mean(), pandas_groupby.mean())
-
-
-@pytest.fixture
-def test_any(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.any(), pandas_groupby.any())
-
-
-@pytest.fixture
-def test_min(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.min(), pandas_groupby.min())
-
-
-@pytest.fixture
-def test_idxmax(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.idxmax(), pandas_groupby.idxmax())
-
-
-@pytest.fixture
-def test_ndim(ray_groupby, pandas_groupby):
-    assert ray_groupby.ndim == pandas_groupby.ndim
-
-
-@pytest.fixture
-def test_cumsum(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.cumsum(), pandas_groupby.cumsum())
-    ray_df_equals_pandas(ray_groupby.cumsum(axis=1),
-                         pandas_groupby.cumsum(axis=1))
-
-
-@pytest.fixture
-def test_pct_change(ray_groupby, pandas_groupby):
-    with pytest.raises(NotImplementedError):
-        ray_groupby.pct_change()
-
-
-@pytest.fixture
-def test_cummax(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.cummax(), pandas_groupby.cummax())
-    ray_df_equals_pandas(ray_groupby.cummax(axis=1),
-                         pandas_groupby.cummax(axis=1))
-
-
-@pytest.fixture
-def test_apply(ray_groupby, pandas_groupby, func):
-    ray_df_equals_pandas(ray_groupby.apply(func), pandas_groupby.apply(func))
-
-
-@pytest.fixture
-def test_dtypes(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.dtypes, pandas_groupby.dtypes)
-
-
-@pytest.fixture
-def test_first(ray_groupby, pandas_groupby):
-    with pytest.raises(NotImplementedError):
-        ray_groupby.first()
-
-
-@pytest.fixture
-def test_backfill(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.backfill(), pandas_groupby.backfill())
-
-
-@pytest.fixture
-def test_cummin(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.cummin(), pandas_groupby.cummin())
-    ray_df_equals_pandas(ray_groupby.cummin(axis=1),
-                         pandas_groupby.cummin(axis=1))
-
-
-@pytest.fixture
-def test_bfill(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.bfill(), pandas_groupby.bfill())
-
-
-@pytest.fixture
-def test_idxmin(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.idxmin(), pandas_groupby.idxmin())
-
-
-@pytest.fixture
-def test_prod(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.prod(), pandas_groupby.prod())
-
-
-@pytest.fixture
-def test_std(ray_groupby, pandas_groupby):
-    ray_df_almost_equals_pandas(ray_groupby.std(), pandas_groupby.std())
-
-
-@pytest.fixture
-def test_aggregate(ray_groupby, pandas_groupby, func):
-    ray_df_equals_pandas(ray_groupby.aggregate(func),
-                         pandas_groupby.aggregate(func))
-
-
-@pytest.fixture
-def test_agg(ray_groupby, pandas_groupby, func):
-    ray_df_equals_pandas(ray_groupby.agg(func), pandas_groupby.agg(func))
-
-
-@pytest.fixture
-def test_last(ray_groupby, pandas_groupby):
-    with pytest.raises(NotImplementedError):
-        ray_groupby.last()
-
-
-@pytest.fixture
-def test_mad(ray_groupby, pandas_groupby):
-    with pytest.raises(NotImplementedError):
-        ray_groupby.mad()
-
-
-@pytest.fixture
-def test_rank(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.rank(), pandas_groupby.rank())
-
-
-@pytest.fixture
-def test_max(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.max(), pandas_groupby.max())
-
-
-@pytest.fixture
-def test_var(ray_groupby, pandas_groupby):
-    ray_df_almost_equals_pandas(ray_groupby.var(), pandas_groupby.var())
-
-
-@pytest.fixture
-def test_len(ray_groupby, pandas_groupby):
-    assert len(ray_groupby) == len(pandas_groupby)
-
-
-@pytest.fixture
-def test_sum(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.sum(), pandas_groupby.sum())
-
-
-@pytest.fixture
-def test_ngroup(ray_groupby, pandas_groupby):
-    ray_series_equals_pandas(ray_groupby.ngroup(), pandas_groupby.ngroup())
-
-
-@pytest.fixture
-def test_nunique(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.nunique(), pandas_groupby.nunique())
-
-
-@pytest.fixture
-def test_median(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.median(), pandas_groupby.median())
-
-
-@pytest.fixture
-def test_head(ray_groupby, pandas_groupby, n):
-    ray_df_equals_pandas(ray_groupby.head(n=n), pandas_groupby.head(n=n))
-
-
-@pytest.fixture
-def test_cumprod(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.cumprod(), pandas_groupby.cumprod())
-    ray_df_equals_pandas(ray_groupby.cumprod(axis=1),
-                         pandas_groupby.cumprod(axis=1))
-
-
-@pytest.fixture
-def test_cov(ray_groupby, pandas_groupby):
-    with pytest.raises(NotImplementedError):
-        ray_groupby.cov()
-
-
-@pytest.fixture
-def test_transform(ray_groupby, pandas_groupby, func):
-    ray_df_equals_pandas(ray_groupby.transform(func),
-                         pandas_groupby.transform(func))
-
-
-@pytest.fixture
-def test_corr(ray_groupby, pandas_groupby):
-    with pytest.raises(NotImplementedError):
-        ray_groupby.corr()
-
-
-@pytest.fixture
-def test_fillna(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.fillna(method="ffill"),
-                         pandas_groupby.fillna(method="ffill"))
-
-
-@pytest.fixture
-def test_count(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.count(), pandas_groupby.count())
-
-
-@pytest.fixture
-def test_pipe(ray_groupby, pandas_groupby, func):
-    ray_df_equals_pandas(ray_groupby.pipe(func), pandas_groupby.pipe(func))
-
-
-@pytest.fixture
-def test_tail(ray_groupby, pandas_groupby, n):
-    ray_df_equals_pandas(ray_groupby.tail(n=n), pandas_groupby.tail(n=n))
-
-
-@pytest.fixture
-def test_quantile(ray_groupby, pandas_groupby):
-    ray_df_equals_pandas(ray_groupby.quantile(q=0.4),
-                         pandas_groupby.quantile(q=0.4))
-
-
-@pytest.fixture
-def test_take(ray_groupby, pandas_groupby):
-    with pytest.raises(NotImplementedError):
-        ray_groupby.take(indices=[1])
@@ -1,675 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import pytest
-import numpy as np
-import pandas
-from ray.dataframe.utils import to_pandas
-import ray.dataframe as pd
-import os
-import sqlite3
-
-TEST_PARQUET_FILENAME = 'test.parquet'
-TEST_CSV_FILENAME = 'test.csv'
-TEST_JSON_FILENAME = 'test.json'
-TEST_HTML_FILENAME = 'test.html'
-TEST_EXCEL_FILENAME = 'test.xlsx'
-TEST_FEATHER_FILENAME = 'test.feather'
-TEST_HDF_FILENAME = 'test.hdf'
-TEST_MSGPACK_FILENAME = 'test.msg'
-TEST_STATA_FILENAME = 'test.dta'
-TEST_PICKLE_FILENAME = 'test.pkl'
-TEST_SAS_FILENAME = os.getcwd() + '/data/test1.sas7bdat'
-TEST_SQL_FILENAME = 'test.db'
-SMALL_ROW_SIZE = 2000
-LARGE_ROW_SIZE = 7e6
-
-
-@pytest.fixture
-def ray_df_equals_pandas(ray_df, pandas_df):
-    return to_pandas(ray_df).sort_index().equals(pandas_df.sort_index())
-
-
-@pytest.fixture
-def setup_parquet_file(row_size, force=False):
-    if os.path.exists(TEST_PARQUET_FILENAME) and not force:
-        pass
-    else:
-        df = pandas.DataFrame({
-            'col1': np.arange(row_size),
-            'col2': np.arange(row_size)
-        })
-        df.to_parquet(TEST_PARQUET_FILENAME)
-
-
-@pytest.fixture
-def create_test_ray_dataframe():
-    df = pd.DataFrame({'col1': [0, 1, 2, 3],
-                       'col2': [4, 5, 6, 7],
-                       'col3': [8, 9, 10, 11],
-                       'col4': [12, 13, 14, 15],
-                       'col5': [0, 0, 0, 0]})
-
-    return df
-
-
-@pytest.fixture
-def create_test_pandas_dataframe():
-    df = pandas.DataFrame({'col1': [0, 1, 2, 3],
-                           'col2': [4, 5, 6, 7],
-                           'col3': [8, 9, 10, 11],
-                           'col4': [12, 13, 14, 15],
-                           'col5': [0, 0, 0, 0]})
-
-    return df
-
-
-@pytest.fixture
-def test_files_eq(path1, path2):
-    with open(path1, 'rb') as file1, open(path2, 'rb') as file2:
-        file1_content = file1.read()
-        file2_content = file2.read()
-
-        if file1_content == file2_content:
-            return True
-        else:
-            return False
-
-
-@pytest.fixture
-def teardown_test_file(test_path):
-    if os.path.exists(test_path):
-        os.remove(test_path)
-
-
-@pytest.fixture
-def teardown_parquet_file():
-    if os.path.exists(TEST_PARQUET_FILENAME):
-        os.remove(TEST_PARQUET_FILENAME)
-
-
-@pytest.fixture
-def setup_csv_file(row_size, force=False, delimiter=','):
-    if os.path.exists(TEST_CSV_FILENAME) and not force:
-        pass
-    else:
-        df = pandas.DataFrame({
-            'col1': np.arange(row_size),
-            'col2': np.arange(row_size)
-        })
-        df.to_csv(TEST_CSV_FILENAME, sep=delimiter)
-
-
-@pytest.fixture
-def teardown_csv_file():
-    if os.path.exists(TEST_CSV_FILENAME):
-        os.remove(TEST_CSV_FILENAME)
-
-
-@pytest.fixture
-def setup_json_file(row_size, force=False):
-    if os.path.exists(TEST_JSON_FILENAME) and not force:
-        pass
-    else:
-        df = pandas.DataFrame({
-            'col1': np.arange(row_size),
-            'col2': np.arange(row_size)
-        })
-        df.to_json(TEST_JSON_FILENAME)
-
-
-@pytest.fixture
-def teardown_json_file():
-    if os.path.exists(TEST_JSON_FILENAME):
-        os.remove(TEST_JSON_FILENAME)
-
-
-@pytest.fixture
-def setup_html_file(row_size, force=False):
-    if os.path.exists(TEST_HTML_FILENAME) and not force:
-        pass
-    else:
-        df = pandas.DataFrame({
-            'col1': np.arange(row_size),
-            'col2': np.arange(row_size)
-        })
-        df.to_html(TEST_HTML_FILENAME)
-
-
-@pytest.fixture
-def teardown_html_file():
-    if os.path.exists(TEST_HTML_FILENAME):
-        os.remove(TEST_HTML_FILENAME)
-
-
-@pytest.fixture
-def setup_clipboard(row_size, force=False):
-    df = pandas.DataFrame({
-        'col1': np.arange(row_size),
-        'col2': np.arange(row_size)
-    })
-    df.to_clipboard()
-
-
-@pytest.fixture
-def setup_excel_file(row_size, force=False):
-    if os.path.exists(TEST_EXCEL_FILENAME) and not force:
-        pass
-    else:
-        df = pandas.DataFrame({
-            'col1': np.arange(row_size),
-            'col2': np.arange(row_size)
-        })
-        df.to_excel(TEST_EXCEL_FILENAME)
-
-
-@pytest.fixture
-def teardown_excel_file():
-    if os.path.exists(TEST_EXCEL_FILENAME):
-        os.remove(TEST_EXCEL_FILENAME)
-
-
-@pytest.fixture
-def setup_feather_file(row_size, force=False):
-    if os.path.exists(TEST_FEATHER_FILENAME) and not force:
-        pass
-    else:
-        df = pandas.DataFrame({
-            'col1': np.arange(row_size),
-            'col2': np.arange(row_size)
-        })
-        df.to_feather(TEST_FEATHER_FILENAME)
-
-
-@pytest.fixture
-def teardown_feather_file():
-    if os.path.exists(TEST_FEATHER_FILENAME):
-        os.remove(TEST_FEATHER_FILENAME)
-
-
-@pytest.fixture
-def setup_hdf_file(row_size, force=False):
-    if os.path.exists(TEST_HDF_FILENAME) and not force:
-        pass
-    else:
-        df = pandas.DataFrame({
-            'col1': np.arange(row_size),
-            'col2': np.arange(row_size)
-        })
-        df.to_hdf(TEST_HDF_FILENAME, 'test')
-
-
-@pytest.fixture
-def teardown_hdf_file():
-    if os.path.exists(TEST_HDF_FILENAME):
-        os.remove(TEST_HDF_FILENAME)
-
-
-@pytest.fixture
-def setup_msgpack_file(row_size, force=False):
-    if os.path.exists(TEST_MSGPACK_FILENAME) and not force:
-        pass
-    else:
-        df = pandas.DataFrame({
-            'col1': np.arange(row_size),
-            'col2': np.arange(row_size)
-        })
-        df.to_msgpack(TEST_MSGPACK_FILENAME)
-
-
-@pytest.fixture
-def teardown_msgpack_file():
-    if os.path.exists(TEST_MSGPACK_FILENAME):
-        os.remove(TEST_MSGPACK_FILENAME)
-
-
-@pytest.fixture
-def setup_stata_file(row_size, force=False):
-    if os.path.exists(TEST_STATA_FILENAME) and not force:
-        pass
-    else:
-        df = pandas.DataFrame({
-            'col1': np.arange(row_size),
-            'col2': np.arange(row_size)
-        })
-        df.to_stata(TEST_STATA_FILENAME)
-
-
-@pytest.fixture
-def teardown_stata_file():
-    if os.path.exists(TEST_STATA_FILENAME):
-        os.remove(TEST_STATA_FILENAME)
-
-
-@pytest.fixture
-def setup_pickle_file(row_size, force=False):
-    if os.path.exists(TEST_PICKLE_FILENAME) and not force:
-        pass
-    else:
-        df = pandas.DataFrame({
-            'col1': np.arange(row_size),
-            'col2': np.arange(row_size)
-        })
-        df.to_pickle(TEST_PICKLE_FILENAME)
-
-
-@pytest.fixture
-def teardown_pickle_file():
-    if os.path.exists(TEST_PICKLE_FILENAME):
-        os.remove(TEST_PICKLE_FILENAME)
-
-
-@pytest.fixture
-def setup_sql_file(conn, force=False):
-    if os.path.exists(TEST_SQL_FILENAME) and not force:
-        pass
-    else:
-        df = pandas.DataFrame({'col1': [0, 1, 2, 3],
-                               'col2': [4, 5, 6, 7],
-                               'col3': [8, 9, 10, 11],
-                               'col4': [12, 13, 14, 15],
-                               'col5': [0, 0, 0, 0]})
-        df.to_sql(TEST_SQL_FILENAME.split(".")[0], conn)
-
-
-@pytest.fixture
-def teardown_sql_file():
-    if os.path.exists(TEST_SQL_FILENAME):
-        os.remove(TEST_SQL_FILENAME)
-
-
-def test_from_parquet_small():
-
-    setup_parquet_file(SMALL_ROW_SIZE)
-
-    pandas_df = pandas.read_parquet(TEST_PARQUET_FILENAME)
-    ray_df = pd.read_parquet(TEST_PARQUET_FILENAME)
-    assert ray_df_equals_pandas(ray_df, pandas_df)
-
-    teardown_parquet_file()
-
-
-def test_from_parquet_large():
-    setup_parquet_file(LARGE_ROW_SIZE)
-
-    pandas_df = pandas.read_parquet(TEST_PARQUET_FILENAME)
-    ray_df = pd.read_parquet(TEST_PARQUET_FILENAME)
-
-    assert ray_df_equals_pandas(ray_df, pandas_df)
-
-    teardown_parquet_file()
-
-
-def test_from_csv():
-    setup_csv_file(SMALL_ROW_SIZE)
-
-    pandas_df = pandas.read_csv(TEST_CSV_FILENAME)
-    ray_df = pd.read_csv(TEST_CSV_FILENAME)
-
-    assert ray_df_equals_pandas(ray_df, pandas_df)
-
-    teardown_csv_file()
-
-
-def test_from_json():
-    setup_json_file(SMALL_ROW_SIZE)
-
-    pandas_df = pandas.read_json(TEST_JSON_FILENAME)
-    ray_df = pd.read_json(TEST_JSON_FILENAME)
-
-    assert ray_df_equals_pandas(ray_df, pandas_df)
-
-    teardown_json_file()
-
-
-def test_from_html():
-    setup_html_file(SMALL_ROW_SIZE)
-
-    pandas_df = pandas.read_html(TEST_HTML_FILENAME)[0]
-    ray_df = pd.read_html(TEST_HTML_FILENAME)
-
-    assert ray_df_equals_pandas(ray_df, pandas_df)
-
-    teardown_html_file()
-
-
-@pytest.mark.skip(reason="No clipboard on Travis")
-def test_from_clipboard():
-    setup_clipboard(SMALL_ROW_SIZE)
-
-    pandas_df = pandas.read_clipboard()
-    ray_df = pd.read_clipboard()
-
-    assert ray_df_equals_pandas(ray_df, pandas_df)
-
-
-def test_from_excel():
-    setup_excel_file(SMALL_ROW_SIZE)
-
-    pandas_df = pandas.read_excel(TEST_EXCEL_FILENAME)
-    ray_df = pd.read_excel(TEST_EXCEL_FILENAME)
-
-    assert ray_df_equals_pandas(ray_df, pandas_df)
-
-    teardown_excel_file()
-
-
-def test_from_feather():
-    setup_feather_file(SMALL_ROW_SIZE)
-
-    pandas_df = pandas.read_feather(TEST_FEATHER_FILENAME)
-    ray_df = pd.read_feather(TEST_FEATHER_FILENAME)
-
-    assert ray_df_equals_pandas(ray_df, pandas_df)
-
-    teardown_feather_file()
-
-
-@pytest.mark.skip(reason="Memory overflow on Travis")
-def test_from_hdf():
-    setup_hdf_file(SMALL_ROW_SIZE)
-
-    pandas_df = pandas.read_hdf(TEST_HDF_FILENAME, key='test')
-    ray_df = pd.read_hdf(TEST_HDF_FILENAME, key='test')
-
-    assert ray_df_equals_pandas(ray_df, pandas_df)
-
-    teardown_hdf_file()
-
-
-def test_from_msgpack():
-    setup_msgpack_file(SMALL_ROW_SIZE)
-
-    pandas_df = pandas.read_msgpack(TEST_MSGPACK_FILENAME)
-    ray_df = pd.read_msgpack(TEST_MSGPACK_FILENAME)
-
-    assert ray_df_equals_pandas(ray_df, pandas_df)
-
-    teardown_msgpack_file()
-
-
-def test_from_stata():
-    setup_stata_file(SMALL_ROW_SIZE)
-
-    pandas_df = pandas.read_stata(TEST_STATA_FILENAME)
-    ray_df = pd.read_stata(TEST_STATA_FILENAME)
-
-    assert ray_df_equals_pandas(ray_df, pandas_df)
-
-    teardown_stata_file()
-
-
-def test_from_pickle():
-    setup_pickle_file(SMALL_ROW_SIZE)
-
-    pandas_df = pandas.read_pickle(TEST_PICKLE_FILENAME)
-    ray_df = pd.read_pickle(TEST_PICKLE_FILENAME)
-
-    assert ray_df_equals_pandas(ray_df, pandas_df)
-
-    teardown_pickle_file()
-
-
-def test_from_sql():
-    conn = sqlite3.connect(TEST_SQL_FILENAME)
-    setup_sql_file(conn, True)
-
-    pandas_df = pandas.read_sql("select * from test", conn)
-    ray_df = pd.read_sql("select * from test", conn)
-
-    assert ray_df_equals_pandas(ray_df, pandas_df)
-
-    teardown_sql_file()
-
-
-@pytest.mark.skip(reason="No SAS write methods in Pandas")
-def test_from_sas():
-    pandas_df = pandas.read_sas(TEST_SAS_FILENAME)
-    ray_df = pd.read_sas(TEST_SAS_FILENAME)
-
-    assert ray_df_equals_pandas(ray_df, pandas_df)
-
-
-def test_from_csv_delimiter():
-    setup_csv_file(SMALL_ROW_SIZE, delimiter='|')
-
-    pandas_df = pandas.read_csv(TEST_CSV_FILENAME)
-    ray_df = pd.read_csv(TEST_CSV_FILENAME)
-
-    assert ray_df_equals_pandas(ray_df, pandas_df)
-
-    teardown_csv_file()
-
-
-@pytest.mark.skip(reason="No clipboard on Travis")
-def test_to_clipboard():
-    ray_df = create_test_ray_dataframe()
-    pandas_df = create_test_pandas_dataframe()
-
-    ray_df.to_clipboard()
-    ray_as_clip = pandas.read_clipboard()
-
-    pandas_df.to_clipboard()
-    pandas_as_clip = pandas.read_clipboard()
-
-    assert(ray_as_clip.equals(pandas_as_clip))
-
-
-def test_to_csv():
-    ray_df = create_test_ray_dataframe()
-    pandas_df = create_test_pandas_dataframe()
-
-    TEST_CSV_DF_FILENAME = "test_df.csv"
-    TEST_CSV_pandas_FILENAME = "test_pandas.csv"
-
-    ray_df.to_csv(TEST_CSV_DF_FILENAME)
-    pandas_df.to_csv(TEST_CSV_pandas_FILENAME)
-
-    assert(test_files_eq(TEST_CSV_DF_FILENAME,
-                         TEST_CSV_pandas_FILENAME))
-
-    teardown_test_file(TEST_CSV_pandas_FILENAME)
-    teardown_test_file(TEST_CSV_DF_FILENAME)
-
-
-def test_to_dense():
-    ray_df = create_test_ray_dataframe()
-
-    with pytest.raises(NotImplementedError):
-        ray_df.to_dense()
-
-
-def test_to_dict():
-    ray_df = create_test_ray_dataframe()
-
-    with pytest.raises(NotImplementedError):
-        ray_df.to_dict()
-
-
-def test_to_excel():
-    ray_df = create_test_ray_dataframe()
-    pandas_df = create_test_pandas_dataframe()
-
-    TEST_EXCEL_DF_FILENAME = "test_df.xlsx"
-    TEST_EXCEL_pandas_FILENAME = "test_pandas.xlsx"
-
-    ray_writer = pandas.ExcelWriter(TEST_EXCEL_DF_FILENAME)
-    pandas_writer = pandas.ExcelWriter(TEST_EXCEL_pandas_FILENAME)
-
-    ray_df.to_excel(ray_writer)
-    pandas_df.to_excel(pandas_writer)
-
-    ray_writer.save()
-    pandas_writer.save()
-
-    assert(test_files_eq(TEST_EXCEL_DF_FILENAME,
-                         TEST_EXCEL_pandas_FILENAME))
-
-    teardown_test_file(TEST_EXCEL_DF_FILENAME)
-    teardown_test_file(TEST_EXCEL_pandas_FILENAME)
-
-
-def test_to_feather():
-    ray_df = create_test_ray_dataframe()
-    pandas_df = create_test_pandas_dataframe()
-
-    TEST_FEATHER_DF_FILENAME = "test_df.feather"
-    TEST_FEATHER_pandas_FILENAME = "test_pandas.feather"
-
-    ray_df.to_feather(TEST_FEATHER_DF_FILENAME)
-    pandas_df.to_feather(TEST_FEATHER_pandas_FILENAME)
-
-    assert(test_files_eq(TEST_FEATHER_DF_FILENAME,
-                         TEST_FEATHER_pandas_FILENAME))
-
-    teardown_test_file(TEST_FEATHER_pandas_FILENAME)
-    teardown_test_file(TEST_FEATHER_DF_FILENAME)
-
-
-def test_to_gbq():
-    ray_df = create_test_ray_dataframe()
-
-    TEST_GBQ_DF_FILENAME = "test_df.gbq"
-    with pytest.raises(NotImplementedError):
-        ray_df.to_gbq(TEST_GBQ_DF_FILENAME, None)
-
-
-def test_to_html():
-    ray_df = create_test_ray_dataframe()
-    pandas_df = create_test_pandas_dataframe()
-
-    TEST_HTML_DF_FILENAME = "test_df.html"
-    TEST_HTML_pandas_FILENAME = "test_pandas.html"
-
-    ray_df.to_html(TEST_HTML_DF_FILENAME)
-    pandas_df.to_html(TEST_HTML_pandas_FILENAME)
-
-    assert(test_files_eq(TEST_HTML_DF_FILENAME,
-                         TEST_HTML_pandas_FILENAME))
-
-    teardown_test_file(TEST_HTML_pandas_FILENAME)
-    teardown_test_file(TEST_HTML_DF_FILENAME)
-
-
-def test_to_json():
-    ray_df = create_test_ray_dataframe()
-    pandas_df = create_test_pandas_dataframe()
-
-    TEST_JSON_DF_FILENAME = "test_df.json"
-    TEST_JSON_pandas_FILENAME = "test_pandas.json"
-
-    ray_df.to_json(TEST_JSON_DF_FILENAME)
-    pandas_df.to_json(TEST_JSON_pandas_FILENAME)
-
-    assert(test_files_eq(TEST_JSON_DF_FILENAME,
-                         TEST_JSON_pandas_FILENAME))
-
-    teardown_test_file(TEST_JSON_pandas_FILENAME)
-    teardown_test_file(TEST_JSON_DF_FILENAME)
-
-
-def test_to_latex():
-    ray_df = create_test_ray_dataframe()
-
-    with pytest.raises(NotImplementedError):
-        ray_df.to_latex()
-
-
-def test_to_msgpack():
-    ray_df = create_test_ray_dataframe()
-    pandas_df = create_test_pandas_dataframe()
-
-    TEST_MSGPACK_DF_FILENAME = "test_df.msgpack"
-    TEST_MSGPACK_pandas_FILENAME = "test_pandas.msgpack"
-
-    ray_df.to_msgpack(TEST_MSGPACK_DF_FILENAME)
-    pandas_df.to_msgpack(TEST_MSGPACK_pandas_FILENAME)
-
-    assert(test_files_eq(TEST_MSGPACK_DF_FILENAME,
-                         TEST_MSGPACK_pandas_FILENAME))
-
-    teardown_test_file(TEST_MSGPACK_pandas_FILENAME)
-    teardown_test_file(TEST_MSGPACK_DF_FILENAME)
-
-
-def test_to_panel():
-    ray_df = create_test_ray_dataframe()
-
-    with pytest.raises(NotImplementedError):
-        ray_df.to_panel()
-
-
-def test_to_parquet():
-    ray_df = create_test_ray_dataframe()
-    pandas_df = create_test_pandas_dataframe()
-
-    TEST_PARQUET_DF_FILENAME = "test_df.parquet"
-    TEST_PARQUET_pandas_FILENAME = "test_pandas.parquet"
-
-    ray_df.to_parquet(TEST_PARQUET_DF_FILENAME)
-    pandas_df.to_parquet(TEST_PARQUET_pandas_FILENAME)
-
-    assert(test_files_eq(TEST_PARQUET_DF_FILENAME,
-                         TEST_PARQUET_pandas_FILENAME))
-
-    teardown_test_file(TEST_PARQUET_pandas_FILENAME)
-    teardown_test_file(TEST_PARQUET_DF_FILENAME)
-
-
-def test_to_period():
-    ray_df = create_test_ray_dataframe()
-
-    with pytest.raises(NotImplementedError):
-        ray_df.to_period()
-
-
-def test_to_pickle():
-    ray_df = create_test_ray_dataframe()
-    pandas_df = create_test_pandas_dataframe()
-
-    TEST_PICKLE_DF_FILENAME = "test_df.pkl"
-    TEST_PICKLE_pandas_FILENAME = "test_pandas.pkl"
-
-    ray_df.to_pickle(TEST_PICKLE_DF_FILENAME)
-    pandas_df.to_pickle(TEST_PICKLE_pandas_FILENAME)
-
-    assert(test_files_eq(TEST_PICKLE_DF_FILENAME,
-                         TEST_PICKLE_pandas_FILENAME))
-
-    teardown_test_file(TEST_PICKLE_pandas_FILENAME)
-    teardown_test_file(TEST_PICKLE_DF_FILENAME)
-
-
-def test_to_sql():
-    ray_df = create_test_ray_dataframe()
-    pandas_df = create_test_pandas_dataframe()
-
-    TEST_SQL_DF_FILENAME = "test_df.sql"
-    TEST_SQL_pandas_FILENAME = "test_pandas.sql"
-
-    ray_df.to_pickle(TEST_SQL_DF_FILENAME)
-    pandas_df.to_pickle(TEST_SQL_pandas_FILENAME)
-
-    assert(test_files_eq(TEST_SQL_DF_FILENAME,
-                         TEST_SQL_pandas_FILENAME))
-
-    teardown_test_file(TEST_SQL_DF_FILENAME)
-    teardown_test_file(TEST_SQL_pandas_FILENAME)
-
-
-def test_to_stata():
-    ray_df = create_test_ray_dataframe()
-    pandas_df = create_test_pandas_dataframe()
-
-    TEST_STATA_DF_FILENAME = "test_df.stata"
-    TEST_STATA_pandas_FILENAME = "test_pandas.stata"
-
-    ray_df.to_stata(TEST_STATA_DF_FILENAME)
-    pandas_df.to_stata(TEST_STATA_pandas_FILENAME)
-
-    assert(test_files_eq(TEST_STATA_DF_FILENAME,
-                         TEST_STATA_pandas_FILENAME))
-
-    teardown_test_file(TEST_STATA_pandas_FILENAME)
-    teardown_test_file(TEST_STATA_DF_FILENAME)
@@ -1,577 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import pandas
-import numpy as np
-import ray
-
-from . import get_npartitions
-
-
-_NAN_BLOCKS = {}
-_MEMOIZER_CAPACITY = 1000  # Capacity per function
-
-
-class LRUCache:
-    """A LRUCache implemented with collections.OrderedDict
-
-    Notes:
-        - OrderedDict will record the order each item is inserted.
-        - The head of the queue will be LRU items.
-    """
-
-    def __init__(self, capacity):
-        self.capacity = capacity
-        self.cache = collections.OrderedDict()
-
-    def __contains__(self, key):
-        return key in self.cache
-
-    def __getitem__(self, key):
-        """Retrieve item from cache and re-insert it to the back of the queue
-        """
-        value = self.cache.pop(key)
-        self.cache[key] = value
-        return value
-
-    def __setitem__(self, key, value):
-        if key in self.cache:
-            self.cache.pop(key)
-
-        if len(self.cache) >= self.capacity:
-            # Pop oldest items at the beginning of the queue
-            self.cache.popitem(last=False)
-
-        self.cache[key] = value
-
-
-class memoize:
-    """A basic memoizer that cache the input and output of the remote function
-
-    Notes:
-        - How is this implemented?
-          This meoizer is implemented by adding a caching layer to the remote
-          function's remote attribute. When user call f.remote(*args), we will
-          first check against the cache, and then call the ray remote function
-          if we can't find the return value in the cache.
-        - When should this be used?
-          This should be used when we anticipate temporal locality for the
-          function. For example, we can reasonally assume users will perform
-          columnar operation repetitively over time (like sum() or loc[]).
-        - Caveat
-          Don't use this decorator if the any argument to the remote function
-          will mutate. Following snippet will fail
-          ```py
-              @memoize
-              @ray.remote
-              def f(obj):
-                ...
-
-              mutable_obj = [1]
-              oid_1 = f.remote(mutable_obj) # will be cached
-
-              mutable_obj.append(3)
-              oid_2 = f.remote(mutable_obj) # cache hit!
-
-              oid_1 == oid_2 # True!
-           ```
-           In short, use this function sparingly. The ideal case is that all
-           inputs are ray ObjectIDs because they are immutable objects.
-        - Future Development
-          - Fix the mutability bug
-          - Dynamic cache size (Fixed as 1000 for now)
-    """
-
-    def __init__(self, f):
-        # Save of remote function
-        self.old_remote_func = f.remote
-        self.cache = LRUCache(capacity=_MEMOIZER_CAPACITY)
-
-    def remote(self, *args):
-        """Return cached result if the arguments are cached
-        """
-        args = tuple(args)
-
-        if args in self.cache:
-            cached_result = self.cache[args]
-            return cached_result
-
-        result = self.old_remote_func(*args)
-        self.cache[args] = result
-        return result
-
-
-def _get_nan_block_id(n_row=1, n_col=1, transpose=False):
-    """A memory efficent way to get a block of NaNs.
-
-    Args:
-        n_rows(int): number of rows
-        n_col(int): number of columns
-        transpose(bool): if true, swap rows and columns
-    Returns:
-        ObjectID of the NaN block
-    """
-    global _NAN_BLOCKS
-    if transpose:
-        n_row, n_col = n_col, n_row
-    shape = (n_row, n_col)
-    if shape not in _NAN_BLOCKS:
-        arr = np.tile(np.array(np.NaN), shape)
-        _NAN_BLOCKS[shape] = ray.put(pandas.DataFrame(data=arr))
-    return _NAN_BLOCKS[shape]
-
-
-def _get_lengths(df):
-    """Gets the length of the dataframe.
-    Args:
-        df: A remote pandas.DataFrame object.
-    Returns:
-        Returns an integer length of the dataframe object. If the attempt
-            fails, returns 0 as the length.
-    """
-    try:
-        return len(df)
-    # Because we sometimes have cases where we have summary statistics in our
-    # DataFrames
-    except TypeError:
-        return 0
-
-
-def _get_widths(df):
-    """Gets the width (number of columns) of the dataframe.
-    Args:
-        df: A remote pandas.DataFrame object.
-    Returns:
-        Returns an integer width of the dataframe object. If the attempt
-            fails, returns 0 as the length.
-    """
-    try:
-        return len(df.columns)
-    # Because we sometimes have cases where we have summary statistics in our
-    # DataFrames
-    except TypeError:
-        return 0
-
-
-def _partition_pandas_dataframe(df, num_partitions=None, row_chunksize=None):
-    """Partitions a Pandas DataFrame object.
-    Args:
-        df (pandas.DataFrame): The pandas DataFrame to convert.
-        npartitions (int): The number of partitions to split the DataFrame
-            into. Has priority over chunksize.
-        row_chunksize (int): The number of rows to put in each partition.
-    Returns:
-        [ObjectID]: A list of object IDs corresponding to the dataframe
-        partitions
-    """
-    if num_partitions is not None:
-        row_chunksize = len(df) // num_partitions \
-            if len(df) % num_partitions == 0 \
-            else len(df) // num_partitions + 1
-    else:
-        assert row_chunksize is not None
-
-    temp_df = df
-
-    row_partitions = []
-    while len(temp_df) > row_chunksize:
-        t_df = temp_df[:row_chunksize]
-        # reset_index here because we want a pandas.RangeIndex
-        # within the partitions. It is smaller and sometimes faster.
-        t_df.reset_index(drop=True, inplace=True)
-        t_df.columns = pandas.RangeIndex(0, len(t_df.columns))
-        top = ray.put(t_df)
-        row_partitions.append(top)
-        temp_df = temp_df[row_chunksize:]
-    else:
-        # Handle the last chunk correctly.
-        # This call is necessary to prevent modifying original df
-        temp_df = temp_df[:]
-        temp_df.reset_index(drop=True, inplace=True)
-        temp_df.columns = pandas.RangeIndex(0, len(temp_df.columns))
-        row_partitions.append(ray.put(temp_df))
-
-    return row_partitions
-
-
-def from_pandas(df, num_partitions=None, chunksize=None):
-    """Converts a pandas DataFrame to a Ray DataFrame.
-    Args:
-        df (pandas.DataFrame): The pandas DataFrame to convert.
-        num_partitions (int): The number of partitions to split the DataFrame
-            into. Has priority over chunksize.
-        chunksize (int): The number of rows to put in each partition.
-    Returns:
-        A new Ray DataFrame object.
-    """
-    from .dataframe import DataFrame
-
-    row_partitions = \
-        _partition_pandas_dataframe(df, num_partitions, chunksize)
-
-    return DataFrame(row_partitions=row_partitions,
-                     columns=df.columns,
-                     index=df.index)
-
-
-def to_pandas(df):
-    """Converts a Ray DataFrame to a pandas DataFrame/Series.
-    Args:
-        df (ray.DataFrame): The Ray DataFrame to convert.
-    Returns:
-        A new pandas DataFrame.
-    """
-    pandas_df = pandas.concat(ray.get(df._row_partitions), copy=False)
-    pandas_df.index = df.index
-    pandas_df.columns = df.columns
-    return pandas_df
-
-
-@ray.remote
-def extractor(df_chunk, row_loc, col_loc):
-    """Retrieve an item from remote block
-    """
-    # We currently have to do the writable flag trick because a pandas bug
-    # https://github.com/pandas-dev/pandas/issues/17192
-    try:
-        row_loc.flags.writeable = True
-        col_loc.flags.writeable = True
-    except AttributeError:
-        # Locators might be scaler or python list
-        pass
-    return df_chunk.iloc[row_loc, col_loc]
-
-
-@ray.remote
-def writer(df_chunk, row_loc, col_loc, item):
-    """Make a copy of the block and write new item to it
-    """
-    df_chunk = df_chunk.copy()
-    df_chunk.iloc[row_loc, col_loc] = item
-    return df_chunk
-
-
-def _mask_block_partitions(blk_partitions, row_metadata, col_metadata):
-    """Return the squeezed/expanded block partitions as defined by
-    row_metadata and col_metadata.
-
-    Note:
-        Very naive implementation. Extract one scaler at a time in a double
-        for loop.
-    """
-    col_df = col_metadata._coord_df
-    row_df = row_metadata._coord_df
-
-    result_oids = []
-    shape = (len(row_df.index), len(col_df.index))
-
-    for _, row_partition_data in row_df.iterrows():
-        for _, col_partition_data in col_df.iterrows():
-            row_part = row_partition_data.partition
-            col_part = col_partition_data.partition
-            block_oid = blk_partitions[row_part, col_part]
-
-            row_idx = row_partition_data['index_within_partition']
-            col_idx = col_partition_data['index_within_partition']
-
-            result_oid = extractor.remote(block_oid, [row_idx], [col_idx])
-            result_oids.append(result_oid)
-    return np.array(result_oids).reshape(shape)
-
-
-@ray.remote
-def _deploy_func(func, dataframe, *args):
-    """Deploys a function for the _map_partitions call.
-    Args:
-        dataframe (pandas.DataFrame): The pandas DataFrame for this partition.
-    Returns:
-        A futures object representing the return value of the function
-        provided.
-    """
-    if len(args) == 0:
-        return func(dataframe)
-    else:
-        return func(dataframe, *args)
-
-
-def _map_partitions(func, partitions, *argslists):
-    """Apply a function across the specified axis
-
-    Args:
-        func (callable): The function to apply
-        partitions ([ObjectID]): The list of partitions to map func on.
-
-    Returns:
-        A list of partitions ([ObjectID]) with the result of the function
-    """
-    if partitions is None:
-        return None
-
-    assert(callable(func))
-    if len(argslists) == 0:
-        return [_deploy_func.remote(func, part) for part in partitions]
-    elif len(argslists) == 1:
-        return [_deploy_func.remote(func, part, argslists[0])
-                for part in partitions]
-    else:
-        assert(all(len(args) == len(partitions) for args in argslists))
-        return [_deploy_func.remote(func, *args)
-                for args in zip(partitions, *argslists)]
-
-
-@ray.remote
-def _build_col_widths(df_col):
-    """Compute widths (# of columns) for each partition."""
-    widths = np.array(ray.get([_deploy_func.remote(_get_widths, d)
-                      for d in df_col]))
-
-    return widths
-
-
-@ray.remote
-def _build_row_lengths(df_row):
-    """Compute lengths (# of rows) for each partition."""
-    lengths = np.array(ray.get([_deploy_func.remote(_get_lengths, d)
-                       for d in df_row]))
-
-    return lengths
-
-
-@ray.remote
-def _build_coord_df(lengths, index):
-    """Build the coordinate dataframe over all partitions."""
-    filtered_lengths = [x for x in lengths if x > 0]
-    coords = None
-    if len(filtered_lengths) > 0:
-        coords = np.vstack([np.column_stack((np.full(l, i), np.arange(l)))
-                            for i, l in enumerate(filtered_lengths)])
-    col_names = ("partition", "index_within_partition")
-    return pandas.DataFrame(coords, index=index, columns=col_names)
-
-
-def _create_block_partitions(partitions, axis=0, length=None):
-
-    if length is not None and length != 0 and get_npartitions() > length:
-        npartitions = length
-    elif length == 0:
-        npartitions = 1
-    else:
-        npartitions = get_npartitions()
-
-    x = [create_blocks._submit(args=(partition, npartitions, axis),
-                               num_return_vals=npartitions)
-         for partition in partitions]
-
-    # In the case that axis is 1 we have to transpose because we build the
-    # columns into rows. Fortunately numpy is efficient at this.
-    blocks = np.array(x) if axis == 0 else np.array(x).T
-
-    # Sometimes we only get a single column or row, which is
-    # problematic for building blocks from the partitions, so we
-    # add whatever dimension we're missing from the input.
-    return fix_blocks_dimensions(blocks, axis)
-
-
-@ray.remote
-def create_blocks(df, npartitions, axis):
-    return create_blocks_helper(df, npartitions, axis)
-
-
-def create_blocks_helper(df, npartitions, axis):
-    # Single partition dataframes don't need to be repartitioned
-    if npartitions == 1:
-        return df
-    # In the case that the size is not a multiple of the number of partitions,
-    # we need to add one to each partition to avoid losing data off the end
-    block_size = df.shape[axis ^ 1] // npartitions \
-        if df.shape[axis ^ 1] % npartitions == 0 \
-        else df.shape[axis ^ 1] // npartitions + 1
-
-    # if not isinstance(df.columns, pandas.RangeIndex):
-    #     df.columns = pandas.RangeIndex(0, len(df.columns))
-
-    blocks = [df.iloc[:, i * block_size: (i + 1) * block_size]
-              if axis == 0
-              else df.iloc[i * block_size: (i + 1) * block_size, :]
-              for i in range(npartitions)]
-
-    for block in blocks:
-        block.columns = pandas.RangeIndex(0, len(block.columns))
-        block.reset_index(inplace=True, drop=True)
-    return blocks
-
-
-@memoize
-@ray.remote
-def _blocks_to_col(*partition):
-    if len(partition):
-        return pandas.concat(partition, axis=0, copy=False)\
-            .reset_index(drop=True)
-    else:
-        return pandas.Series()
-
-
-@memoize
-@ray.remote
-def _blocks_to_row(*partition):
-    row_part = pandas.concat(partition, axis=1, copy=False)\
-        .reset_index(drop=True)
-    # Because our block partitions contain different indices (for the
-    # columns), this change is needed to ensure correctness.
-    row_part.columns = pandas.RangeIndex(0, len(row_part.columns))
-    return row_part
-
-
-def _inherit_docstrings(parent, excluded=[]):
-    """Creates a decorator which overwrites a decorated class' __doc__
-    attribute with parent's __doc__ attribute. Also overwrites __doc__ of
-    methods and properties defined in the class with the __doc__ of matching
-    methods and properties in parent.
-
-    Args:
-        parent (object): Class from which the decorated class inherits __doc__.
-        excluded (list): List of parent objects from which the class does not
-            inherit docstrings.
-
-    Returns:
-        function: decorator which replaces the decorated class' documentation
-            parent's documentation.
-    """
-    def decorator(cls):
-        if parent not in excluded:
-            cls.__doc__ = parent.__doc__
-        for attr, obj in cls.__dict__.items():
-            parent_obj = getattr(parent, attr, None)
-            if parent_obj in excluded or \
-                    (not callable(parent_obj) and
-                     not isinstance(parent_obj, property)):
-                continue
-            if callable(obj):
-                obj.__doc__ = parent_obj.__doc__
-            elif isinstance(obj, property) and obj.fget is not None:
-                p = property(obj.fget, obj.fset, obj.fdel, parent_obj.__doc__)
-                setattr(cls, attr, p)
-
-        return cls
-
-    return decorator
-
-
-@ray.remote
-def _reindex_helper(old_index, new_index, axis, npartitions, *df):
-    """Reindexes a dataframe to prepare for join/concat.
-
-    Args:
-        df: The DataFrame partition
-        old_index: The index/column for this partition.
-        new_index: The new index/column to assign.
-        axis: Which axis to reindex over.
-
-    Returns:
-        A new set of blocks made up of DataFrames.
-    """
-    df = pandas.concat(df, axis=axis ^ 1)
-    if axis == 1:
-        df.index = old_index
-    elif axis == 0:
-        df.columns = old_index
-
-    df = df.reindex(new_index, copy=False, axis=axis ^ 1)
-    return create_blocks_helper(df, npartitions, axis)
-
-
-@ray.remote
-def _co_op_helper(func, left_columns, right_columns, left_df_len, left_idx,
-                  *zipped):
-    """Copartition operation where two DataFrames must have aligned indexes.
-
-    NOTE: This function assumes things are already copartitioned. Requires that
-        row partitions are passed in as blocks.
-
-    Args:
-        func: The operation to conduct between two DataFrames.
-        left_columns: The column names for the left DataFrame.
-        right_columns: The column names for the right DataFrame.
-        left_df_len: The length of the left. This is used so we can split up
-            the zipped partitions.
-        zipped: The DataFrame partitions (in blocks).
-
-    Returns:
-         A new set of blocks for the partitioned DataFrame.
-    """
-    left = pandas.concat(zipped[:left_df_len], axis=1, copy=False).copy()
-    left.columns = left_columns
-    if left_idx is not None:
-        left.index = left_idx
-
-    right = pandas.concat(zipped[left_df_len:], axis=1, copy=False).copy()
-    right.columns = right_columns
-
-    new_rows = func(left, right)
-
-    new_blocks = create_blocks_helper(new_rows, left_df_len, 0)
-
-    if left_idx is not None:
-        new_blocks.append(new_rows.index)
-
-    return new_blocks
-
-
-@ray.remote
-def _match_partitioning(column_partition, lengths, index):
-    """Match the number of rows on each partition. Used in df.merge().
-
-    NOTE: This function can cause problems when there are empty column
-        partitions.
-
-        The way this function is intended to be used is as follows: Align the
-        right partitioning with the left. The left will remain unchanged. Then,
-        you are free to perform actions on a per-partition basis with the
-        partitioning.
-
-        The index objects must already be identical for this to work correctly.
-
-    Args:
-        column_partition: The column partition to change.
-        lengths: The lengths of each row partition to match to.
-        index: The index index of the column_partition. This is used to push
-            down to the inner frame for correctness in the merge.
-
-    Returns:
-         A list of blocks created from this column partition.
-    """
-    partitioned_list = []
-
-    columns = column_partition.columns
-    # We set this because this is the only place we can guarantee correct
-    # placement. We use it in the case the user wants to join on the index.
-    column_partition.index = index
-    for length in lengths:
-        if len(column_partition) == 0:
-            partitioned_list.append(pandas.DataFrame(columns=columns))
-            continue
-
-        partitioned_list.append(column_partition.iloc[:length, :])
-        column_partition = column_partition.iloc[length:, :]
-    return partitioned_list
-
-
-@ray.remote
-def _concat_index(*index_parts):
-    return index_parts[0].append(index_parts[1:])
-
-
-def fix_blocks_dimensions(blocks, axis):
-    """Checks that blocks is 2D, and adds a dimension if not.
-    """
-    if blocks.ndim < 2:
-        return np.expand_dims(blocks, axis=axis ^ 1)
-    return blocks
-
-
-@ray.remote
-def _compile_remote_dtypes(*column_of_blocks):
-    small_dfs = [df.loc[0:0] for df in column_of_blocks]
-    return pandas.concat(small_dfs, copy=False).dtypes