[DataFrame] Dataframe functions (max, min, notnull, notna) (#1500)

* Finished max, min, notna, notnull

* flake8 satisfied

* fixed pytest fixture error

* flake8 sufficed

* post-code review

* added methods to new mixed types test
This commit is contained in:
Hari Subbaraj
2018-02-16 14:00:59 -08:00
committed by Devin Petersohn
parent a24cc28773
commit 8d1a0b0d04
2 changed files with 82 additions and 25 deletions
+52 -4
View File
@@ -768,7 +768,22 @@ class DataFrame(object):
def max(self, axis=None, skipna=None, level=None, numeric_only=None,
**kwargs):
raise NotImplementedError("Not Yet implemented.")
"""Perform max across the DataFrame.
Args:
axis (int): The axis to take the max on.
skipna (bool): True to skip NA values, false otherwise.
Returns:
The max of the DataFrame.
"""
if(axis == 1):
return self._map_partitions(
lambda df: df.max(axis=axis, skipna=skipna, level=level,
numeric_only=numeric_only, **kwargs))
else:
return self.T.max(axis=1, skipna=None, level=None,
numeric_only=None, **kwargs)
def mean(self, axis=None, skipna=None, level=None, numeric_only=None,
**kwargs):
@@ -793,7 +808,22 @@ class DataFrame(object):
def min(self, axis=None, skipna=None, level=None, numeric_only=None,
**kwargs):
raise NotImplementedError("Not Yet implemented.")
"""Perform min across the DataFrame.
Args:
axis (int): The axis to take the min on.
skipna (bool): True to skip NA values, false otherwise.
Returns:
The min of the DataFrame.
"""
if(axis == 1):
return self._map_partitions(
lambda df: df.min(axis=axis, skipna=skipna, level=level,
numeric_only=numeric_only, **kwargs))
else:
return self.T.min(axis=1, skipna=skipna, level=level,
numeric_only=numeric_only, **kwargs)
def mod(self, other, axis='columns', level=None, fill_value=None):
raise NotImplementedError("Not Yet implemented.")
@@ -814,10 +844,28 @@ class DataFrame(object):
raise NotImplementedError("Not Yet implemented.")
def notna(self):
raise NotImplementedError("Not Yet implemented.")
"""Perform notna across the DataFrame.
Args:
None
Returns:
Boolean DataFrame where value is False if corresponding
value is NaN, True otherwise
"""
return self._map_partitions(lambda df: df.notna())
def notnull(self):
raise NotImplementedError("Not Yet implemented.")
"""Perform notnull across the DataFrame.
Args:
None
Returns:
Boolean DataFrame where value is False if corresponding
value is NaN, True otherwise
"""
return self._map_partitions(lambda df: df.notnull())
def nsmallest(self, n, columns, keep='first'):
raise NotImplementedError("Not Yet implemented.")
+30 -21
View File
@@ -41,7 +41,10 @@ def test_ftypes(ray_df, pandas_df):
@pytest.fixture
def test_values(ray_df, pandas_df):
assert(np.array_equal(ray_df.values, pandas_df.values))
a = np.ndarray.flatten(ray_df.values)
b = np.ndarray.flatten(pandas_df.values)
for c, d in zip(a, b):
assert(c == d or (np.isnan(c) and np.isnan(d)))
@pytest.fixture
@@ -200,6 +203,11 @@ def test_int_dataframe():
test_get_dtype_counts(ray_df, pandas_df)
test_get_ftype_counts(ray_df, pandas_df)
test_max(ray_df, pandas_df)
test_min(ray_df, pandas_df)
test_notna(ray_df, pandas_df)
test_notnull(ray_df, pandas_df)
def test_float_dataframe():
@@ -256,6 +264,10 @@ def test_float_dataframe():
test_idxmax(ray_df, pandas_df)
test_idxmin(ray_df, pandas_df)
test_pop(ray_df, pandas_df)
test_max(ray_df, pandas_df)
test_min(ray_df, pandas_df)
test_notna(ray_df, pandas_df)
test_notnull(ray_df, pandas_df)
for key in keys:
test_get(ray_df, pandas_df, key)
@@ -308,6 +320,11 @@ def test_mixed_dtype_dataframe():
test_get_dtype_counts(ray_df, pandas_df)
test_get_ftype_counts(ray_df, pandas_df)
test_max(ray_df, pandas_df)
test_min(ray_df, pandas_df)
test_notna(ray_df, pandas_df)
test_notnull(ray_df, pandas_df)
def test_add():
ray_df = create_test_dataframe()
@@ -887,11 +904,9 @@ def test_mask():
ray_df.mask(None)
def test_max():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.max()
@pytest.fixture
def test_max(ray_df, pandas_df):
assert(ray_df_equals_pandas(ray_df.max(), pandas_df.max()))
def test_mean():
@@ -929,11 +944,9 @@ def test_merge():
ray_df.merge(None)
def test_min():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.min()
@pytest.fixture
def test_min(ray_df, pandas_df):
assert(ray_df_equals_pandas(ray_df.min(), pandas_df.min()))
def test_mod():
@@ -978,18 +991,14 @@ def test_nlargest():
ray_df.nlargest(None, None)
def test_notna():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.notna()
@pytest.fixture
def test_notna(ray_df, pandas_df):
assert(ray_df_equals_pandas(ray_df.notna(), pandas_df.notna()))
def test_notnull():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.notnull()
@pytest.fixture
def test_notnull(ray_df, pandas_df):
assert(ray_df_equals_pandas(ray_df.notnull(), pandas_df.notnull()))
def test_nsmallest():