Adding support for concat (#1739)

adding tests

fixing flake8

adding init

flake 8 on test

fixing tests, imports, and flake8

handling for index

adding tests for row, index

added more robust error handling for axis

fixing test failures

cleaning up error sfor 2.7

updating travis

resolving import

fixing flake8

moved import order

Fixing to refactor and delaying implementing ray-pd inner concat

resolving ray-pd concat and from_pandas mutation

Revert "resolving ray-pd concat and from_pandas mutation"

This reverts commit 5db43e4e89e328286532f3ef98a4526575c5d08d.
This commit is contained in:
adgirish
2018-04-09 21:36:24 -07:00
committed by Devin Petersohn
parent 3039cca242
commit efeaacbedc
5 changed files with 203 additions and 3 deletions
+107
View File
@@ -0,0 +1,107 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pytest
import pandas as pd
import ray.dataframe as rdf
from ray.dataframe.utils import (
to_pandas,
from_pandas
)
@pytest.fixture
def ray_df_equals_pandas(ray_df, pandas_df):
return to_pandas(ray_df).sort_index().equals(pandas_df.sort_index())
@pytest.fixture
def ray_df_equals(ray_df1, ray_df2):
return to_pandas(ray_df1).sort_index().equals(
to_pandas(ray_df2).sort_index()
)
@pytest.fixture
def generate_dfs():
df = pd.DataFrame({'col1': [0, 1, 2, 3],
'col2': [4, 5, 6, 7],
'col3': [8, 9, 10, 11],
'col4': [12, 13, 14, 15],
'col5': [0, 0, 0, 0]})
df2 = pd.DataFrame({'col1': [0, 1, 2, 3],
'col2': [4, 5, 6, 7],
'col3': [8, 9, 10, 11],
'col6': [12, 13, 14, 15],
'col7': [0, 0, 0, 0]})
return df, df2
@pytest.fixture
def test_df_concat():
df, df2 = generate_dfs()
assert(ray_df_equals_pandas(rdf.concat([df, df2]), pd.concat([df, df2])))
def test_ray_concat():
df, df2 = generate_dfs()
ray_df, ray_df2 = from_pandas(df, 2), from_pandas(df2, 2)
assert(ray_df_equals_pandas(rdf.concat([ray_df, ray_df2]),
pd.concat([df, df2])))
def test_ray_concat_on_index():
df, df2 = generate_dfs()
ray_df, ray_df2 = from_pandas(df, 2), from_pandas(df2, 2)
assert(ray_df_equals_pandas(rdf.concat([ray_df, ray_df2], axis='index'),
pd.concat([df, df2], axis='index')))
assert(ray_df_equals_pandas(rdf.concat([ray_df, ray_df2], axis='rows'),
pd.concat([df, df2], axis='rows')))
assert(ray_df_equals_pandas(rdf.concat([ray_df, ray_df2], axis=0),
pd.concat([df, df2], axis=0)))
def test_ray_concat_on_column():
df, df2 = generate_dfs()
ray_df, ray_df2 = from_pandas(df, 2), from_pandas(df2, 2)
with pytest.raises(NotImplementedError):
rdf.concat([ray_df, ray_df2], axis=1)
with pytest.raises(NotImplementedError):
rdf.concat([ray_df, ray_df2], axis="columns")
def test_invalid_axis_errors():
df, df2 = generate_dfs()
ray_df, ray_df2 = from_pandas(df, 2), from_pandas(df2, 2)
with pytest.raises(ValueError):
rdf.concat([ray_df, ray_df2], axis=2)
def test_mixed_concat():
df, df2 = generate_dfs()
df3 = df.copy()
mixed_dfs = [from_pandas(df, 2), from_pandas(df2, 2), df3]
assert(ray_df_equals_pandas(rdf.concat(mixed_dfs),
pd.concat([df, df2, df3])))
def test_mixed_inner_concat():
df, df2 = generate_dfs()
df3 = df.copy()
mixed_dfs = [from_pandas(df, 2), from_pandas(df2, 2), df3]
with pytest.raises(NotImplementedError):
rdf.concat(mixed_dfs, join="inner")
+3 -2
View File
@@ -5,10 +5,11 @@ from __future__ import print_function
import pytest
import numpy as np
import pandas as pd
import ray.dataframe as rdf
import ray.dataframe.io as io
import os
from ray.dataframe.utils import to_pandas
TEST_PARQUET_FILENAME = 'test.parquet'
TEST_CSV_FILENAME = 'test.csv'
SMALL_ROW_SIZE = 2000
@@ -17,7 +18,7 @@ LARGE_ROW_SIZE = 7e6
@pytest.fixture
def ray_df_equals_pandas(ray_df, pandas_df):
return rdf.to_pandas(ray_df).sort_index().equals(pandas_df.sort_index())
return to_pandas(ray_df).sort_index().equals(pandas_df.sort_index())
@pytest.fixture