[DataFrame] Test bugfixes (#2111)

This commit is contained in:
Devin Petersohn
2018-05-21 20:01:19 -10:00
committed by Robert Nishihara
parent f1fc373de7
commit 317c9450e7
2 changed files with 56 additions and 54 deletions
+3 -3
View File
@@ -1563,7 +1563,7 @@ class DataFrame(object):
Returns:
A new DataFrame with the Divide applied.
"""
return self._operator_helper(pd.DataFrame.add, other, axis, level,
return self._operator_helper(pd.DataFrame.div, other, axis, level,
fill_value)
def divide(self, other, axis='columns', level=None, fill_value=None):
@@ -3991,7 +3991,7 @@ class DataFrame(object):
self._col_partitions)
new_columns = self.columns
new_index = self.index.sort_values()
new_index = self.index.sort_values(ascending=ascending)
new_row_parts = None
else:
columns = self.columns
@@ -3999,7 +3999,7 @@ class DataFrame(object):
lambda df: _sort_helper(df, columns, axis, *args),
self._row_partitions)
new_columns = self.columns.sort_values()
new_columns = self.columns.sort_values(ascending=ascending)
new_index = self.index
new_column_parts = None
+53 -51
View File
@@ -861,27 +861,27 @@ def test_inter_df_math(op, simple=False):
pandas_df = pd.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7],
"col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]})
ray_df_equals_pandas(getattr(ray_df, op)(ray_df),
getattr(pandas_df, op)(pandas_df))
ray_df_equals_pandas(getattr(ray_df, op)(4),
getattr(pandas_df, op)(4))
ray_df_equals_pandas(getattr(ray_df, op)(4.0),
getattr(pandas_df, op)(4.0))
assert ray_df_equals_pandas(getattr(ray_df, op)(ray_df),
getattr(pandas_df, op)(pandas_df))
assert ray_df_equals_pandas(getattr(ray_df, op)(4),
getattr(pandas_df, op)(4))
assert ray_df_equals_pandas(getattr(ray_df, op)(4.0),
getattr(pandas_df, op)(4.0))
ray_df2 = rdf.DataFrame({"A": [0, 2], "col1": [0, 19], "col2": [1, 1]})
pandas_df2 = pd.DataFrame({"A": [0, 2], "col1": [0, 19], "col2": [1, 1]})
ray_df_equals_pandas(getattr(ray_df, op)(ray_df2),
getattr(pandas_df, op)(pandas_df2))
assert ray_df_equals_pandas(getattr(ray_df, op)(ray_df2),
getattr(pandas_df, op)(pandas_df2))
list_test = [0, 1, 2, 4]
if not simple:
ray_df_equals_pandas(getattr(ray_df, op)(list_test, axis=1),
getattr(pandas_df, op)(list_test, axis=1))
assert ray_df_equals_pandas(getattr(ray_df, op)(list_test, axis=1),
getattr(pandas_df, op)(list_test, axis=1))
ray_df_equals_pandas(getattr(ray_df, op)(list_test, axis=0),
getattr(pandas_df, op)(list_test, axis=0))
assert ray_df_equals_pandas(getattr(ray_df, op)(list_test, axis=0),
getattr(pandas_df, op)(list_test, axis=0))
@pytest.fixture
@@ -892,18 +892,18 @@ def test_comparison_inter_ops(op):
pandas_df = pd.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7],
"col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]})
ray_df_equals_pandas(getattr(ray_df, op)(ray_df),
getattr(pandas_df, op)(pandas_df))
ray_df_equals_pandas(getattr(ray_df, op)(4),
getattr(pandas_df, op)(4))
ray_df_equals_pandas(getattr(ray_df, op)(4.0),
getattr(pandas_df, op)(4.0))
assert ray_df_equals_pandas(getattr(ray_df, op)(ray_df),
getattr(pandas_df, op)(pandas_df))
assert ray_df_equals_pandas(getattr(ray_df, op)(4),
getattr(pandas_df, op)(4))
assert ray_df_equals_pandas(getattr(ray_df, op)(4.0),
getattr(pandas_df, op)(4.0))
ray_df2 = rdf.DataFrame({"A": [0, 2], "col1": [0, 19], "col2": [1, 1]})
pandas_df2 = pd.DataFrame({"A": [0, 2], "col1": [0, 19], "col2": [1, 1]})
ray_df_equals_pandas(getattr(ray_df2, op)(ray_df2),
getattr(pandas_df2, op)(pandas_df2))
assert ray_df_equals_pandas(getattr(ray_df2, op)(ray_df2),
getattr(pandas_df2, op)(pandas_df2))
@pytest.fixture
@@ -914,10 +914,10 @@ def test_inter_df_math_right_ops(op):
pandas_df = pd.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7],
"col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]})
ray_df_equals_pandas(getattr(ray_df, op)(4),
getattr(pandas_df, op)(4))
ray_df_equals_pandas(getattr(ray_df, op)(4.0),
getattr(pandas_df, op)(4.0))
assert ray_df_equals_pandas(getattr(ray_df, op)(4),
getattr(pandas_df, op)(4))
assert ray_df_equals_pandas(getattr(ray_df, op)(4.0),
getattr(pandas_df, op)(4.0))
def test_add():
@@ -1938,14 +1938,14 @@ def test_fillna_datetime_columns(num_partitions=2):
@pytest.fixture
def test_filter(ray_df, pandas_df, by):
ray_df_equals_pandas(ray_df.filter(items=by['items']),
pandas_df.filter(items=by['items']))
assert ray_df_equals_pandas(ray_df.filter(items=by['items']),
pandas_df.filter(items=by['items']))
ray_df_equals_pandas(ray_df.filter(regex=by['regex']),
pandas_df.filter(regex=by['regex']))
assert ray_df_equals_pandas(ray_df.filter(regex=by['regex']),
pandas_df.filter(regex=by['regex']))
ray_df_equals_pandas(ray_df.filter(like=by['like']),
pandas_df.filter(like=by['like']))
assert ray_df_equals_pandas(ray_df.filter(like=by['like']),
pandas_df.filter(like=by['like']))
def test_first():
@@ -2008,7 +2008,7 @@ def test_gt():
@pytest.fixture
def test_head(ray_df, pandas_df, n=5):
ray_df_equals_pandas(ray_df.head(n), pandas_df.head(n))
assert ray_df_equals_pandas(ray_df.head(n), pandas_df.head(n))
def test_hist():
@@ -2129,7 +2129,7 @@ def test_join():
for how in join_types:
ray_join = ray_df.join(ray_df2, how=how)
pandas_join = pandas_df.join(pandas_df2, how=how)
ray_df_equals_pandas(ray_join, pandas_join)
assert ray_df_equals_pandas(ray_join, pandas_join)
ray_df3 = rdf.DataFrame({"col7": [1, 2, 3, 5, 6, 7, 8]})
@@ -2139,7 +2139,7 @@ def test_join():
for how in join_types:
ray_join = ray_df.join([ray_df2, ray_df3], how=how)
pandas_join = pandas_df.join([pandas_df2, pandas_df3], how=how)
ray_df_equals_pandas(ray_join, pandas_join)
assert ray_df_equals_pandas(ray_join, pandas_join)
def test_kurt():
@@ -2408,7 +2408,7 @@ def test_pop(ray_df, pandas_df):
ray_popped = temp_ray_df.pop('col2')
pandas_popped = temp_pandas_df.pop('col2')
assert ray_popped.sort_index().equals(pandas_popped.sort_index())
ray_df_equals_pandas(temp_ray_df, temp_pandas_df)
assert ray_df_equals_pandas(temp_ray_df, temp_pandas_df)
def test_pow():
@@ -2759,10 +2759,14 @@ def test_reset_index(ray_df, pandas_df, inplace=False):
assert to_pandas(ray_df_cp).equals(pd_df_cp)
@pytest.mark.skip(reason="dtypes on different partitions may not match up, "
"no fix for this yet")
def test_rfloordiv():
test_inter_df_math_right_ops("rfloordiv")
@pytest.mark.skip(reason="dtypes on different partitions may not match up, "
"no fix for this yet")
def test_rmod():
test_inter_df_math_right_ops("rmod")
@@ -2792,6 +2796,8 @@ def test_rsub():
test_inter_df_math_right_ops("rsub")
@pytest.mark.skip(reason="dtypes on different partitions may not match up, "
"no fix for this yet")
def test_rtruediv():
test_inter_df_math_right_ops("rtruediv")
@@ -2893,12 +2899,12 @@ def test_sort_index():
pandas_result = pandas_df.sort_index()
ray_result = ray_df.sort_index()
ray_df_equals_pandas(ray_result, pandas_result)
assert ray_df_equals_pandas(ray_result, pandas_result)
pandas_result = pandas_df.sort_index(ascending=False)
ray_result = ray_df.sort_index(ascending=False)
ray_df_equals_pandas(ray_result, pandas_result)
assert ray_df_equals_pandas(ray_result, pandas_result)
def test_sort_values():
@@ -2908,22 +2914,22 @@ def test_sort_values():
pandas_result = pandas_df.sort_values(by=1)
ray_result = ray_df.sort_values(by=1)
ray_df_equals_pandas(ray_result, pandas_result)
assert ray_df_equals_pandas(ray_result, pandas_result)
pandas_result = pandas_df.sort_values(by=1, axis=1)
ray_result = ray_df.sort_values(by=1, axis=1)
ray_df_equals_pandas(ray_result, pandas_result)
assert ray_df_equals_pandas(ray_result, pandas_result)
pandas_result = pandas_df.sort_values(by=[1, 3])
ray_result = ray_df.sort_values(by=[1, 3])
ray_df_equals_pandas(ray_result, pandas_result)
assert ray_df_equals_pandas(ray_result, pandas_result)
pandas_result = pandas_df.sort_values(by=[1, 67], axis=1)
ray_result = ray_df.sort_values(by=[1, 67], axis=1)
ray_df_equals_pandas(ray_result, pandas_result)
assert ray_df_equals_pandas(ray_result, pandas_result)
def test_sortlevel():
@@ -2976,7 +2982,7 @@ def test_swaplevel():
@pytest.fixture
def test_tail(ray_df, pandas_df):
ray_df_equals_pandas(ray_df.tail(), pandas_df.tail())
assert ray_df_equals_pandas(ray_df.tail(), pandas_df.tail())
def test_take():
@@ -3023,10 +3029,10 @@ def test_to_xarray():
@pytest.fixture
def test_transform(ray_df, pandas_df):
ray_df_equals_pandas(ray_df.transform(lambda df: df.isna()),
pandas_df.transform(lambda df: df.isna()))
ray_df_equals_pandas(ray_df.transform('isna'),
pandas_df.transform('isna'))
assert ray_df_equals_pandas(ray_df.transform(lambda df: df.isna()),
pandas_df.transform(lambda df: df.isna()))
assert ray_df_equals_pandas(ray_df.transform('isna'),
pandas_df.transform('isna'))
def test_truediv():
@@ -3272,7 +3278,7 @@ def test___delitem__(ray_df, pd_df):
pd_df = pd_df.copy()
ray_df.__delitem__('col1')
pd_df.__delitem__('col1')
ray_df_equals_pandas(ray_df, pd_df)
assert ray_df_equals_pandas(ray_df, pd_df)
# Issue 2027
last_label = pd_df.iloc[:, -1].name
@@ -3348,10 +3354,6 @@ def test_is_copy():
ray_df.is_copy
def test___itruediv__():
test_inter_df_math("__itruediv__", simple=True)
def test___div__():
test_inter_df_math("__div__", simple=True)
@@ -3416,4 +3418,4 @@ def test_get_dummies():
'B': ['b', 'a', 'c'],
'C': [1, 2, 3]})
ray_df_equals_pandas(rdf.get_dummies(ray_df), pd.get_dummies(pd_df))
assert ray_df_equals_pandas(rdf.get_dummies(ray_df), pd.get_dummies(pd_df))