From 62680011ee1fd771e413dbd9650ef5e197a2cea9 Mon Sep 17 00:00:00 2001 From: Helen Che Date: Thu, 8 Feb 2018 22:12:03 -0800 Subject: [PATCH] [DataFrame] Add implementation for get method (#1496) * Add implementation for get method Add tests for get method Add implementation/tests for get_dtype_counts method Add implementation/tests for get_ftype_counts method * Add test fixtures * Change method tests to fixtures * Flake8 --- python/ray/dataframe/dataframe.py | 36 ++++++- python/ray/dataframe/test/test_dataframe.py | 108 ++++++++++++++++---- 2 files changed, 119 insertions(+), 25 deletions(-) diff --git a/python/ray/dataframe/dataframe.py b/python/ray/dataframe/dataframe.py index a8f61a366..c39289069 100644 --- a/python/ray/dataframe/dataframe.py +++ b/python/ray/dataframe/dataframe.py @@ -596,13 +596,43 @@ class DataFrame(object): raise NotImplementedError("Not Yet implemented.") def get(self, key, default=None): - raise NotImplementedError("Not Yet implemented.") + """Get item from object for given key (DataFrame column, Panel + slice, etc.). Returns default value if not found. + + Args: + key (DataFrame column, Panel slice) : the key for which value + to get + + Returns: + value (type of items contained in object) : A value that is + stored at the key + """ + temp_df = self._map_partitions(lambda df: df.get(key, default=default)) + return to_pandas(temp_df) def get_dtype_counts(self): - raise NotImplementedError("Not Yet implemented.") + """Get the counts of dtypes in this object. + + Returns: + The counts of dtypes in this object. + """ + return ray.get( + _deploy_func.remote( + lambda df: df.get_dtype_counts(), self._df[0] + ) + ) def get_ftype_counts(self): - raise NotImplementedError("Not Yet implemented.") + """Get the counts of ftypes in this object. + + Returns: + The counts of ftypes in this object. + """ + return ray.get( + _deploy_func.remote( + lambda df: df.get_ftype_counts(), self._df[0] + ) + ) def get_value(self, index, col, takeable=False): raise NotImplementedError("Not Yet implemented.") diff --git a/python/ray/dataframe/test/test_dataframe.py b/python/ray/dataframe/test/test_dataframe.py index e4ab722cb..d3d69168a 100644 --- a/python/ray/dataframe/test/test_dataframe.py +++ b/python/ray/dataframe/test/test_dataframe.py @@ -3,10 +3,10 @@ from __future__ import division from __future__ import print_function import pytest -import ray.dataframe as rdf import numpy as np import pandas as pd import ray +import ray.dataframe as rdf @pytest.fixture @@ -109,6 +109,24 @@ def test_transpose(ray_df, pandas_df): assert(ray_df_equals_pandas(ray_df.transpose(), pandas_df.transpose())) +@pytest.fixture +def test_get(ray_df, pandas_df, key): + assert(ray_df.get(key).equals(pandas_df.get(key))) + assert ray_df.get( + key, default='default').equals( + pandas_df.get(key, default='default')) + + +@pytest.fixture +def test_get_dtype_counts(ray_df, pandas_df): + assert(ray_df.get_dtype_counts().equals(pandas_df.get_dtype_counts())) + + +@pytest.fixture +def test_get_ftype_counts(ray_df, pandas_df): + assert(ray_df.get_ftype_counts().equals(pandas_df.get_ftype_counts())) + + @pytest.fixture def create_test_dataframe(): df = pd.DataFrame({'col1': [0, 1, 2, 3], @@ -136,6 +154,11 @@ def test_int_dataframe(): lambda x: x, lambda x: False] + keys = ['col1', + 'col2', + 'col3', + 'col4'] + test_roundtrip(ray_df, pandas_df) test_index(ray_df, pandas_df) test_size(ray_df, pandas_df) @@ -171,6 +194,12 @@ def test_int_dataframe(): test_idxmin(ray_df, pandas_df) test_pop(ray_df, pandas_df) + for key in keys: + test_get(ray_df, pandas_df, key) + + test_get_dtype_counts(ray_df, pandas_df) + test_get_ftype_counts(ray_df, pandas_df) + def test_float_dataframe(): @@ -188,6 +217,11 @@ def test_float_dataframe(): lambda x: x, lambda x: False] + keys = ['col1', + 'col2', + 'col3', + 'col4'] + test_roundtrip(ray_df, pandas_df) test_index(ray_df, pandas_df) test_size(ray_df, pandas_df) @@ -223,6 +257,57 @@ def test_float_dataframe(): test_idxmin(ray_df, pandas_df) test_pop(ray_df, pandas_df) + for key in keys: + test_get(ray_df, pandas_df, key) + + test_get_dtype_counts(ray_df, pandas_df) + test_get_ftype_counts(ray_df, pandas_df) + + +def test_mixed_dtype_dataframe(): + pandas_df = pd.DataFrame({ + 'col1': [1, 2, 3, 4], + 'col2': [4, 5, 6, 7], + 'col3': [8.0, 9.4, 10.1, 11.3], + 'col4': ['a', 'b', 'c', 'd']}) + + ray_df = rdf.from_pandas(pandas_df, 2) + + testfuncs = [lambda x: x + x, + lambda x: str(x), + lambda x: x, + lambda x: False] + + keys = ['col1', + 'col2', + 'col3', + 'col4'] + + test_roundtrip(ray_df, pandas_df) + test_index(ray_df, pandas_df) + test_size(ray_df, pandas_df) + test_ndim(ray_df, pandas_df) + test_ftypes(ray_df, pandas_df) + test_values(ray_df, pandas_df) + test_axes(ray_df, pandas_df) + test_shape(ray_df, pandas_df) + test_add_prefix(ray_df, pandas_df) + test_add_suffix(ray_df, pandas_df) + + for testfunc in testfuncs: + test_applymap(ray_df, pandas_df, testfunc) + + test_copy(ray_df) + test_sum(ray_df, pandas_df) + test_keys(ray_df, pandas_df) + test_transpose(ray_df, pandas_df) + + for key in keys: + test_get(ray_df, pandas_df, key) + + test_get_dtype_counts(ray_df, pandas_df) + test_get_ftype_counts(ray_df, pandas_df) + def test_add(): ray_df = create_test_dataframe() @@ -631,27 +716,6 @@ def test_ge(): ray_df.ge(None) -def test_get(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.get(None) - - -def test_get_dtype_counts(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.get_dtype_counts() - - -def test_get_ftype_counts(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.get_ftype_counts() - - def test_get_value(): ray_df = create_test_dataframe()