From 9f28529e2cb00b3b73d1fe4085b756f630e91d63 Mon Sep 17 00:00:00 2001
From: Rohan Singh <11rohans@gmail.com>
Date: Sun, 6 May 2018 09:32:33 -0700
Subject: [PATCH] [DataFrame] Implement rank (#1991)

* rank method completed

* added sanity checks

* flake8

* updated sanity checks

* flake8

* updated sanity checks and style

* updated dtype logic

* Fixing test
---
 python/ray/dataframe/dataframe.py           | 49 +++++++++++++++++++--
 python/ray/dataframe/test/test_dataframe.py | 15 ++++---
 2 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/python/ray/dataframe/dataframe.py b/python/ray/dataframe/dataframe.py
index b886691d5..bd6558fd3 100644
--- a/python/ray/dataframe/dataframe.py
+++ b/python/ray/dataframe/dataframe.py
@@ -3166,9 +3166,52 @@ class DataFrame(object):
 
     def rank(self, axis=0, method='average', numeric_only=None,
              na_option='keep', ascending=True, pct=False):
-        raise NotImplementedError(
-            "To contribute to Pandas on Ray, please visit "
-            "github.com/ray-project/ray.")
+
+        """
+        Compute numerical data ranks (1 through n) along axis.
+        Equal values are assigned a rank that is the [method] of
+        the ranks of those values.
+
+        Args:
+            axis (int): 0 or 'index' for row-wise,
+                        1 or 'columns' for column-wise
+            interpolation: {‘average’, ‘min’, ‘max’, ‘first’, ‘dense’}
+                Specifies which method to use for equal vals
+            numeric_only (boolean)
+                Include only float, int, boolean data.
+            na_option: {'keep', 'top', 'bottom'}
+                Specifies how to handle NA options
+            ascending (boolean):
+                Decedes ranking order
+            pct (boolean):
+                Computes percentage ranking of data
+        Returns:
+            A new DataFrame
+        """
+
+        def rank_helper(df):
+            return df.rank(axis=axis, method=method,
+                           numeric_only=numeric_only,
+                           na_option=na_option,
+                           ascending=ascending, pct=pct)
+
+        axis = pd.DataFrame()._get_axis_number(axis)
+
+        if (axis == 1):
+            new_cols = self.dtypes[self.dtypes.apply(
+                                   lambda x: is_numeric_dtype(x))].index
+            result = _map_partitions(rank_helper,
+                                     self._row_partitions)
+            return DataFrame(row_partitions=result,
+                             columns=new_cols,
+                             index=self.index)
+
+        if (axis == 0):
+            result = _map_partitions(rank_helper,
+                                     self._col_partitions)
+            return DataFrame(col_partitions=result,
+                             columns=self.columns,
+                             index=self.index)
 
     def rdiv(self, other, axis='columns', level=None, fill_value=None):
         return self._single_df_op_helper(
diff --git a/python/ray/dataframe/test/test_dataframe.py b/python/ray/dataframe/test/test_dataframe.py
index 43d11b6b4..fa3fb5667 100644
--- a/python/ray/dataframe/test/test_dataframe.py
+++ b/python/ray/dataframe/test/test_dataframe.py
@@ -229,6 +229,7 @@ def test_int_dataframe():
     test_quantile(ray_df, pandas_df, .75)
     test_describe(ray_df, pandas_df)
     test_diff(ray_df, pandas_df)
+    test_rank(ray_df, pandas_df)
 
     test_all(ray_df, pandas_df)
     test_any(ray_df, pandas_df)
@@ -396,6 +397,7 @@ def test_float_dataframe():
     test_quantile(ray_df, pandas_df, .75)
     test_describe(ray_df, pandas_df)
     test_diff(ray_df, pandas_df)
+    test_rank(ray_df, pandas_df)
 
     test_all(ray_df, pandas_df)
     test_any(ray_df, pandas_df)
@@ -564,6 +566,9 @@ def test_mixed_dtype_dataframe():
     test_quantile(ray_df, pandas_df, .75)
     test_describe(ray_df, pandas_df)
 
+    # TODO Reolve once Pandas-20962 is resolved.
+    # test_rank(ray_df, pandas_df)
+
     test_all(ray_df, pandas_df)
     test_any(ray_df, pandas_df)
     test___getitem__(ray_df, pandas_df)
@@ -722,6 +727,7 @@ def test_nan_dataframe():
     test_quantile(ray_df, pandas_df, .75)
     test_describe(ray_df, pandas_df)
     test_diff(ray_df, pandas_df)
+    test_rank(ray_df, pandas_df)
 
     test_all(ray_df, pandas_df)
     test_any(ray_df, pandas_df)
@@ -2377,11 +2383,10 @@ def test_radd():
     test_inter_df_math_right_ops("radd")
 
 
-def test_rank():
-    ray_df = create_test_dataframe()
-
-    with pytest.raises(NotImplementedError):
-        ray_df.rank()
+@pytest.fixture
+def test_rank(ray_df, pandas_df):
+    assert(ray_df_equals_pandas(ray_df.rank(), pandas_df.rank()))
+    assert(ray_df_equals_pandas(ray_df.rank(axis=1), pandas_df.rank(axis=1)))
 
 
 def test_rdiv():