[DataFrame] Implement loc, iloc (#1612)

* Add parquet-cpp to gitignore

* Add read_csv and read_parquet

* Gitignore pytest_cache

* Fix flake8

* Add io to __init__

* Changing Index. Currently running tests, but so far untested.

* Removing issue of reassigning DF in from_pandas

* Fixing lint

* Fix bug

* Fix bug

* Fix bug

* Better performance

* Fixing index issue with sum

* Address comments

* Update io with index

* Updating performance and implementation. Adding tests

* Fixing off-by-1

* Fix lint

* Address Comments

* Make pop compatible with new to_pandas

* Format Code

* Cleanup some index issue

* Bug fix: assigned reset_index back

* Implement loc and iloc

* Revert whitespace

* Format code

* Address comments
This commit is contained in:
Simon Mo
2018-02-27 01:57:52 -08:00
committed by Devin Petersohn
parent b79597dc00
commit 4ab16d7fb3
3 changed files with 169 additions and 23 deletions
+48 -18
View File
@@ -223,6 +223,9 @@ def test_int_dataframe():
test_notna(ray_df, pandas_df)
test_notnull(ray_df, pandas_df)
test_loc(ray_df, pandas_df)
test_iloc(ray_df, pandas_df)
labels = ['a', 'b', 'c', 'd']
test_set_axis(ray_df, pandas_df, labels, 0)
test_set_axis(ray_df, pandas_df, labels, 'rows')
@@ -325,6 +328,9 @@ def test_float_dataframe():
test_iteritems(ray_df, pandas_df)
test_itertuples(ray_df, pandas_df)
test_loc(ray_df, pandas_df)
test_iloc(ray_df, pandas_df)
labels = ['a', 'b', 'c', 'd']
test_set_axis(ray_df, pandas_df, labels, 0)
test_set_axis(ray_df, pandas_df, labels, 'rows')
@@ -348,10 +354,10 @@ def test_float_dataframe():
def test_mixed_dtype_dataframe():
pandas_df = pd.DataFrame({
'col1': [1, 2, 3, 4],
'col2': [4, 5, 6, 7],
'col3': [8.0, 9.4, 10.1, 11.3],
'col4': ['a', 'b', 'c', 'd']})
'col1': [1, 2, 3, 4],
'col2': [4, 5, 6, 7],
'col3': [8.0, 9.4, 10.1, 11.3],
'col4': ['a', 'b', 'c', 'd']})
ray_df = rdf.from_pandas(pandas_df, 2)
@@ -432,6 +438,9 @@ def test_mixed_dtype_dataframe():
test_iteritems(ray_df, pandas_df)
test_itertuples(ray_df, pandas_df)
test_loc(ray_df, pandas_df)
test_iloc(ray_df, pandas_df)
labels = ['a', 'b', 'c', 'd']
test_set_axis(ray_df, pandas_df, labels, 0)
test_set_axis(ray_df, pandas_df, labels, 'rows')
@@ -454,10 +463,10 @@ def test_mixed_dtype_dataframe():
def test_nan_dataframe():
pandas_df = pd.DataFrame({
'col1': [1, 2, 3, np.nan],
'col2': [4, 5, np.nan, 7],
'col3': [8, np.nan, 10, 11],
'col4': [np.nan, 13, 14, 15]})
'col1': [1, 2, 3, np.nan],
'col2': [4, 5, np.nan, 7],
'col3': [8, np.nan, 10, 11],
'col4': [np.nan, 13, 14, 15]})
ray_df = rdf.from_pandas(pandas_df, 2)
@@ -528,6 +537,9 @@ def test_nan_dataframe():
test_iteritems(ray_df, pandas_df)
test_itertuples(ray_df, pandas_df)
test_loc(ray_df, pandas_df)
test_iloc(ray_df, pandas_df)
labels = ['a', 'b', 'c', 'd']
test_set_axis(ray_df, pandas_df, labels, 0)
test_set_axis(ray_df, pandas_df, labels, 'rows')
@@ -860,14 +872,14 @@ def test_eq():
def test_equals():
pandas_df1 = pd.DataFrame({'col1': [2.9, 3, 3, 3],
'col2': [2, 3, 4, 1]})
'col2': [2, 3, 4, 1]})
ray_df1 = rdf.from_pandas(pandas_df1, 2)
ray_df2 = rdf.from_pandas(pandas_df1, 3)
assert ray_df1.equals(ray_df2)
pandas_df2 = pd.DataFrame({'col1': [2.9, 3, 3, 3],
'col2': [2, 3, 5, 1]})
'col2': [2, 3, 5, 1]})
ray_df3 = rdf.from_pandas(pandas_df2, 4)
assert not ray_df3.equals(ray_df1)
@@ -2074,11 +2086,20 @@ def test___rsub__():
ray_df.__rsub__(None, None, None)
def test_loc():
ray_df = create_test_dataframe()
@pytest.fixture
def test_loc(ray_df, pd_df):
# Singleton
assert ray_df.loc[0].equals(pd_df.loc[0])
assert ray_df.loc[0, 'col1'] == pd_df.loc[0, 'col1']
with pytest.raises(NotImplementedError):
ray_df.loc()
# List
assert ray_df.loc[[1, 2]].equals(pd_df.loc[[1, 2]])
assert ray_df.loc[[1, 2], ['col1']].equals(pd_df.loc[[1, 2], ['col1']])
# Slice
assert ray_df.loc[1:, 'col1'].equals(pd_df.loc[1:, 'col1'])
assert ray_df.loc[1:2, 'col1'].equals(pd_df.loc[1:2, 'col1'])
assert ray_df.loc[1:2, 'col1':'col2'].equals(pd_df.loc[1:2, 'col1':'col2'])
def test_is_copy():
@@ -2116,8 +2137,17 @@ def test_ix():
ray_df.ix()
def test_iloc():
ray_df = create_test_dataframe()
@pytest.fixture
def test_iloc(ray_df, pd_df):
# Singleton
assert ray_df.iloc[0].equals(pd_df.iloc[0])
assert ray_df.iloc[0, 1] == pd_df.iloc[0, 1]
with pytest.raises(NotImplementedError):
ray_df.iloc()
# List
assert ray_df.iloc[[1, 2]].equals(pd_df.iloc[[1, 2]])
assert ray_df.iloc[[1, 2], [1, 0]].equals(pd_df.iloc[[1, 2], [1, 0]])
# Slice
assert ray_df.iloc[1:, 0].equals(pd_df.iloc[1:, 0])
assert ray_df.iloc[1:2, 0].equals(pd_df.iloc[1:2, 0])
assert ray_df.iloc[1:2, 0:2].equals(pd_df.iloc[1:2, 0:2])