From 8d1a0b0d043a4dad776be6f349e0f5f2d8c18eae Mon Sep 17 00:00:00 2001 From: Hari Subbaraj Date: Fri, 16 Feb 2018 14:00:59 -0800 Subject: [PATCH] [DataFrame] Dataframe functions (max, min, notnull, notna) (#1500) * Finished max, min, notna, notnull * flake8 satisfied * fixed pytest fixture error * flake8 sufficed * post-code review * added methods to new mixed types test --- python/ray/dataframe/dataframe.py | 56 +++++++++++++++++++-- python/ray/dataframe/test/test_dataframe.py | 51 +++++++++++-------- 2 files changed, 82 insertions(+), 25 deletions(-) diff --git a/python/ray/dataframe/dataframe.py b/python/ray/dataframe/dataframe.py index c39289069..f01f9f3dc 100644 --- a/python/ray/dataframe/dataframe.py +++ b/python/ray/dataframe/dataframe.py @@ -768,7 +768,22 @@ class DataFrame(object): def max(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): - raise NotImplementedError("Not Yet implemented.") + """Perform max across the DataFrame. + + Args: + axis (int): The axis to take the max on. + skipna (bool): True to skip NA values, false otherwise. + + Returns: + The max of the DataFrame. + """ + if(axis == 1): + return self._map_partitions( + lambda df: df.max(axis=axis, skipna=skipna, level=level, + numeric_only=numeric_only, **kwargs)) + else: + return self.T.max(axis=1, skipna=None, level=None, + numeric_only=None, **kwargs) def mean(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): @@ -793,7 +808,22 @@ class DataFrame(object): def min(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): - raise NotImplementedError("Not Yet implemented.") + """Perform min across the DataFrame. + + Args: + axis (int): The axis to take the min on. + skipna (bool): True to skip NA values, false otherwise. + + Returns: + The min of the DataFrame. + """ + if(axis == 1): + return self._map_partitions( + lambda df: df.min(axis=axis, skipna=skipna, level=level, + numeric_only=numeric_only, **kwargs)) + else: + return self.T.min(axis=1, skipna=skipna, level=level, + numeric_only=numeric_only, **kwargs) def mod(self, other, axis='columns', level=None, fill_value=None): raise NotImplementedError("Not Yet implemented.") @@ -814,10 +844,28 @@ class DataFrame(object): raise NotImplementedError("Not Yet implemented.") def notna(self): - raise NotImplementedError("Not Yet implemented.") + """Perform notna across the DataFrame. + + Args: + None + + Returns: + Boolean DataFrame where value is False if corresponding + value is NaN, True otherwise + """ + return self._map_partitions(lambda df: df.notna()) def notnull(self): - raise NotImplementedError("Not Yet implemented.") + """Perform notnull across the DataFrame. + + Args: + None + + Returns: + Boolean DataFrame where value is False if corresponding + value is NaN, True otherwise + """ + return self._map_partitions(lambda df: df.notnull()) def nsmallest(self, n, columns, keep='first'): raise NotImplementedError("Not Yet implemented.") diff --git a/python/ray/dataframe/test/test_dataframe.py b/python/ray/dataframe/test/test_dataframe.py index d3d69168a..266a1954a 100644 --- a/python/ray/dataframe/test/test_dataframe.py +++ b/python/ray/dataframe/test/test_dataframe.py @@ -41,7 +41,10 @@ def test_ftypes(ray_df, pandas_df): @pytest.fixture def test_values(ray_df, pandas_df): - assert(np.array_equal(ray_df.values, pandas_df.values)) + a = np.ndarray.flatten(ray_df.values) + b = np.ndarray.flatten(pandas_df.values) + for c, d in zip(a, b): + assert(c == d or (np.isnan(c) and np.isnan(d))) @pytest.fixture @@ -200,6 +203,11 @@ def test_int_dataframe(): test_get_dtype_counts(ray_df, pandas_df) test_get_ftype_counts(ray_df, pandas_df) + test_max(ray_df, pandas_df) + test_min(ray_df, pandas_df) + test_notna(ray_df, pandas_df) + test_notnull(ray_df, pandas_df) + def test_float_dataframe(): @@ -256,6 +264,10 @@ def test_float_dataframe(): test_idxmax(ray_df, pandas_df) test_idxmin(ray_df, pandas_df) test_pop(ray_df, pandas_df) + test_max(ray_df, pandas_df) + test_min(ray_df, pandas_df) + test_notna(ray_df, pandas_df) + test_notnull(ray_df, pandas_df) for key in keys: test_get(ray_df, pandas_df, key) @@ -308,6 +320,11 @@ def test_mixed_dtype_dataframe(): test_get_dtype_counts(ray_df, pandas_df) test_get_ftype_counts(ray_df, pandas_df) + test_max(ray_df, pandas_df) + test_min(ray_df, pandas_df) + test_notna(ray_df, pandas_df) + test_notnull(ray_df, pandas_df) + def test_add(): ray_df = create_test_dataframe() @@ -887,11 +904,9 @@ def test_mask(): ray_df.mask(None) -def test_max(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.max() +@pytest.fixture +def test_max(ray_df, pandas_df): + assert(ray_df_equals_pandas(ray_df.max(), pandas_df.max())) def test_mean(): @@ -929,11 +944,9 @@ def test_merge(): ray_df.merge(None) -def test_min(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.min() +@pytest.fixture +def test_min(ray_df, pandas_df): + assert(ray_df_equals_pandas(ray_df.min(), pandas_df.min())) def test_mod(): @@ -978,18 +991,14 @@ def test_nlargest(): ray_df.nlargest(None, None) -def test_notna(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.notna() +@pytest.fixture +def test_notna(ray_df, pandas_df): + assert(ray_df_equals_pandas(ray_df.notna(), pandas_df.notna())) -def test_notnull(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.notnull() +@pytest.fixture +def test_notnull(ray_df, pandas_df): + assert(ray_df_equals_pandas(ray_df.notnull(), pandas_df.notnull())) def test_nsmallest():