mirror of
https://github.com/wassname/ray.git
synced 2026-06-30 22:20:31 +08:00
[DataFrame] Implement rank (#1991)
* rank method completed * added sanity checks * flake8 * updated sanity checks * flake8 * updated sanity checks and style * updated dtype logic * Fixing test
This commit is contained in:
committed by
Devin Petersohn
parent
857458c37c
commit
9f28529e2c
@@ -3166,9 +3166,52 @@ class DataFrame(object):
|
||||
|
||||
def rank(self, axis=0, method='average', numeric_only=None,
|
||||
na_option='keep', ascending=True, pct=False):
|
||||
raise NotImplementedError(
|
||||
"To contribute to Pandas on Ray, please visit "
|
||||
"github.com/ray-project/ray.")
|
||||
|
||||
"""
|
||||
Compute numerical data ranks (1 through n) along axis.
|
||||
Equal values are assigned a rank that is the [method] of
|
||||
the ranks of those values.
|
||||
|
||||
Args:
|
||||
axis (int): 0 or 'index' for row-wise,
|
||||
1 or 'columns' for column-wise
|
||||
interpolation: {‘average’, ‘min’, ‘max’, ‘first’, ‘dense’}
|
||||
Specifies which method to use for equal vals
|
||||
numeric_only (boolean)
|
||||
Include only float, int, boolean data.
|
||||
na_option: {'keep', 'top', 'bottom'}
|
||||
Specifies how to handle NA options
|
||||
ascending (boolean):
|
||||
Decedes ranking order
|
||||
pct (boolean):
|
||||
Computes percentage ranking of data
|
||||
Returns:
|
||||
A new DataFrame
|
||||
"""
|
||||
|
||||
def rank_helper(df):
|
||||
return df.rank(axis=axis, method=method,
|
||||
numeric_only=numeric_only,
|
||||
na_option=na_option,
|
||||
ascending=ascending, pct=pct)
|
||||
|
||||
axis = pd.DataFrame()._get_axis_number(axis)
|
||||
|
||||
if (axis == 1):
|
||||
new_cols = self.dtypes[self.dtypes.apply(
|
||||
lambda x: is_numeric_dtype(x))].index
|
||||
result = _map_partitions(rank_helper,
|
||||
self._row_partitions)
|
||||
return DataFrame(row_partitions=result,
|
||||
columns=new_cols,
|
||||
index=self.index)
|
||||
|
||||
if (axis == 0):
|
||||
result = _map_partitions(rank_helper,
|
||||
self._col_partitions)
|
||||
return DataFrame(col_partitions=result,
|
||||
columns=self.columns,
|
||||
index=self.index)
|
||||
|
||||
def rdiv(self, other, axis='columns', level=None, fill_value=None):
|
||||
return self._single_df_op_helper(
|
||||
|
||||
@@ -229,6 +229,7 @@ def test_int_dataframe():
|
||||
test_quantile(ray_df, pandas_df, .75)
|
||||
test_describe(ray_df, pandas_df)
|
||||
test_diff(ray_df, pandas_df)
|
||||
test_rank(ray_df, pandas_df)
|
||||
|
||||
test_all(ray_df, pandas_df)
|
||||
test_any(ray_df, pandas_df)
|
||||
@@ -396,6 +397,7 @@ def test_float_dataframe():
|
||||
test_quantile(ray_df, pandas_df, .75)
|
||||
test_describe(ray_df, pandas_df)
|
||||
test_diff(ray_df, pandas_df)
|
||||
test_rank(ray_df, pandas_df)
|
||||
|
||||
test_all(ray_df, pandas_df)
|
||||
test_any(ray_df, pandas_df)
|
||||
@@ -564,6 +566,9 @@ def test_mixed_dtype_dataframe():
|
||||
test_quantile(ray_df, pandas_df, .75)
|
||||
test_describe(ray_df, pandas_df)
|
||||
|
||||
# TODO Reolve once Pandas-20962 is resolved.
|
||||
# test_rank(ray_df, pandas_df)
|
||||
|
||||
test_all(ray_df, pandas_df)
|
||||
test_any(ray_df, pandas_df)
|
||||
test___getitem__(ray_df, pandas_df)
|
||||
@@ -722,6 +727,7 @@ def test_nan_dataframe():
|
||||
test_quantile(ray_df, pandas_df, .75)
|
||||
test_describe(ray_df, pandas_df)
|
||||
test_diff(ray_df, pandas_df)
|
||||
test_rank(ray_df, pandas_df)
|
||||
|
||||
test_all(ray_df, pandas_df)
|
||||
test_any(ray_df, pandas_df)
|
||||
@@ -2377,11 +2383,10 @@ def test_radd():
|
||||
test_inter_df_math_right_ops("radd")
|
||||
|
||||
|
||||
def test_rank():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.rank()
|
||||
@pytest.fixture
|
||||
def test_rank(ray_df, pandas_df):
|
||||
assert(ray_df_equals_pandas(ray_df.rank(), pandas_df.rank()))
|
||||
assert(ray_df_equals_pandas(ray_df.rank(axis=1), pandas_df.rank(axis=1)))
|
||||
|
||||
|
||||
def test_rdiv():
|
||||
|
||||
Reference in New Issue
Block a user