[DataFrame] Fix transpose with nan values and add functionality needed for Index (#1545)

This commit is contained in:
Devin Petersohn
2018-02-21 08:46:37 -08:00
committed by Robert Nishihara
parent db4a920bdb
commit de6fa02c85
3 changed files with 174 additions and 38 deletions
+52 -4
View File
@@ -22,6 +22,12 @@ def test_roundtrip(ray_df, pandas_df):
@pytest.fixture
def test_index(ray_df, pandas_df):
assert(ray_df.index.equals(pandas_df.index))
ray_df_cp = ray_df.copy()
pandas_df_cp = pandas_df.copy()
ray_df_cp.index = [str(i) for i in ray_df_cp.index]
pandas_df_cp.index = [str(i) for i in pandas_df_cp.index]
assert(ray_df_cp.index.sort_values().equals(pandas_df_cp.index))
@pytest.fixture
@@ -41,10 +47,7 @@ def test_ftypes(ray_df, pandas_df):
@pytest.fixture
def test_values(ray_df, pandas_df):
a = np.ndarray.flatten(ray_df.values)
b = np.ndarray.flatten(pandas_df.values)
for c, d in zip(a, b):
assert(c == d or (np.isnan(c) and np.isnan(d)))
np.testing.assert_equal(ray_df.values, pandas_df.values)
@pytest.fixture
@@ -339,6 +342,51 @@ def test_mixed_dtype_dataframe():
test_notnull(ray_df, pandas_df)
def test_nan_dataframe():
pandas_df = pd.DataFrame({
'col1': [1, 2, 3, np.nan],
'col2': [4, 5, np.nan, 7],
'col3': [8, np.nan, 10, 11],
'col4': [np.nan, 13, 14, 15]})
ray_df = rdf.from_pandas(pandas_df, 2)
testfuncs = [lambda x: x + x,
lambda x: str(x),
lambda x: x,
lambda x: False]
keys = ['col1',
'col2',
'col3',
'col4']
test_roundtrip(ray_df, pandas_df)
test_index(ray_df, pandas_df)
test_size(ray_df, pandas_df)
test_ndim(ray_df, pandas_df)
test_ftypes(ray_df, pandas_df)
test_values(ray_df, pandas_df)
test_axes(ray_df, pandas_df)
test_shape(ray_df, pandas_df)
test_add_prefix(ray_df, pandas_df)
test_add_suffix(ray_df, pandas_df)
for testfunc in testfuncs:
test_applymap(ray_df, pandas_df, testfunc)
test_copy(ray_df)
test_sum(ray_df, pandas_df)
test_keys(ray_df, pandas_df)
test_transpose(ray_df, pandas_df)
for key in keys:
test_get(ray_df, pandas_df, key)
test_get_dtype_counts(ray_df, pandas_df)
test_get_ftype_counts(ray_df, pandas_df)
def test_add():
ray_df = create_test_dataframe()