mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 01:00:10 +08:00
[DataFrame] Implements df.pipe (#1999)
* Add empty df test * Fix flake8 issues * rebase with master * reset master tests * Implement df.pipe * fix tests * Use test_pipe as a pytest.fixture * Add newline at EOF
This commit is contained in:
committed by
Devin Petersohn
parent
a1d7bb31a4
commit
22d4950fae
@@ -2783,9 +2783,17 @@ class DataFrame(object):
|
||||
"github.com/ray-project/ray.")
|
||||
|
||||
def pipe(self, func, *args, **kwargs):
|
||||
raise NotImplementedError(
|
||||
"To contribute to Pandas on Ray, please visit "
|
||||
"github.com/ray-project/ray.")
|
||||
"""Apply func(self, *args, **kwargs)
|
||||
|
||||
Args:
|
||||
func: function to apply to the df.
|
||||
args: positional arguments passed into ``func``.
|
||||
kwargs: a dictionary of keyword arguments passed into ``func``.
|
||||
|
||||
Returns:
|
||||
object: the return type of ``func``.
|
||||
"""
|
||||
return com._pipe(self, func, *args, **kwargs)
|
||||
|
||||
def pivot(self, index=None, columns=None, values=None):
|
||||
raise NotImplementedError(
|
||||
|
||||
@@ -262,6 +262,7 @@ def test_int_dataframe():
|
||||
test_cummin(ray_df, pandas_df)
|
||||
test_cumprod(ray_df, pandas_df)
|
||||
test_cumsum(ray_df, pandas_df)
|
||||
test_pipe(ray_df, pandas_df)
|
||||
|
||||
# test_loc(ray_df, pandas_df)
|
||||
# test_iloc(ray_df, pandas_df)
|
||||
@@ -405,6 +406,7 @@ def test_float_dataframe():
|
||||
test_cummin(ray_df, pandas_df)
|
||||
test_cumprod(ray_df, pandas_df)
|
||||
test_cumsum(ray_df, pandas_df)
|
||||
test_pipe(ray_df, pandas_df)
|
||||
|
||||
test___len__(ray_df, pandas_df)
|
||||
test_first_valid_index(ray_df, pandas_df)
|
||||
@@ -568,6 +570,7 @@ def test_mixed_dtype_dataframe():
|
||||
test_min(ray_df, pandas_df)
|
||||
test_notna(ray_df, pandas_df)
|
||||
test_notnull(ray_df, pandas_df)
|
||||
test_pipe(ray_df, pandas_df)
|
||||
|
||||
# TODO Fix pandas so that the behavior is correct
|
||||
# We discovered a bug where argmax does not always give the same result
|
||||
@@ -718,6 +721,7 @@ def test_nan_dataframe():
|
||||
test_cummin(ray_df, pandas_df)
|
||||
test_cumprod(ray_df, pandas_df)
|
||||
test_cumsum(ray_df, pandas_df)
|
||||
test_pipe(ray_df, pandas_df)
|
||||
|
||||
test___len__(ray_df, pandas_df)
|
||||
test_first_valid_index(ray_df, pandas_df)
|
||||
@@ -2151,11 +2155,34 @@ def test_pct_change():
|
||||
ray_df.pct_change()
|
||||
|
||||
|
||||
def test_pipe():
|
||||
ray_df = create_test_dataframe()
|
||||
@pytest.fixture
|
||||
def test_pipe(ray_df, pandas_df):
|
||||
n = len(ray_df.index)
|
||||
a, b, c = 2 % n, 0, 3 % n
|
||||
col = ray_df.columns[3 % len(ray_df.columns)]
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.pipe(None)
|
||||
def h(x):
|
||||
return x.drop(columns=[col])
|
||||
|
||||
def g(x, arg1=0):
|
||||
for _ in range(arg1):
|
||||
x = x.append(x)
|
||||
return x
|
||||
|
||||
def f(x, arg2=0, arg3=0):
|
||||
return x.drop([arg2, arg3])
|
||||
|
||||
assert ray_df_equals(f(g(h(ray_df), arg1=a), arg2=b, arg3=c),
|
||||
(ray_df.pipe(h)
|
||||
.pipe(g, arg1=a)
|
||||
.pipe(f, arg2=b, arg3=c)))
|
||||
|
||||
assert ray_df_equals_pandas((ray_df.pipe(h)
|
||||
.pipe(g, arg1=a)
|
||||
.pipe(f, arg2=b, arg3=c)),
|
||||
(pandas_df.pipe(h)
|
||||
.pipe(g, arg1=a)
|
||||
.pipe(f, arg2=b, arg3=c)))
|
||||
|
||||
|
||||
def test_pivot():
|
||||
|
||||
Reference in New Issue
Block a user