[DataFrame] Fixed repr, info, and memory_usage (#1874)

* working with dataframes with too many rows and columns * repr works for jupyter notebooks now * added comments and test file * added repr test file to .travis.yml * added back ray.dataframe as pd to test file * fixed pandas importing issues in test file * getting the front and back of df more efficiently * only keeping dataframe tests in travis * fixing numpy array for row and col lengths issue * doesn't add dimensions if df is small enough * implemented memory_usage() * completed memory_usage - still failing 2 tests * only failing one test for memory_usage * all repr and dataframes tests passing now * fixing error related to python2 in info() * fixing python2 errors * fixed linting errosr * using _arithmetic_helper in memory_usage() * fixed last lint error * removed testing-specific code * adding back travis test * removing extra tests from travis * re-added concat test * fixes with new indexing scheme * code cleanup * fully working with new indexing scheme * added tests for info and memory_usage * removed test file
2026-06-30 21:46:28 +08:00 · 2018-04-11 08:07:07 -07:00
parent 806b2c844e
commit a3ddde398c
3 changed files with 196 additions and 43 deletions
@@ -1648,11 +1648,12 @@ def test_infer_objects():
        ray_df.infer_objects()


-def test_info():
-    ray_df = create_test_dataframe()
-
-    with pytest.raises(NotImplementedError):
-        ray_df.info()
+@pytest.fixture
+def test_info(ray_df):
+    info_string = ray_df.info()
+    assert '<class \'ray.dataframe.dataframe.DataFrame\'>\n' in info_string
+    info_string = ray_df.info(memory_usage=True)
+    assert 'memory_usage: ' in info_string


@pytest.fixture
@@ -1815,11 +1816,12 @@ def test_melt():
        ray_df.melt()


-def test_memory_usage():
-    ray_df = create_test_dataframe()
-
-    with pytest.raises(NotImplementedError):
-        ray_df.memory_usage()
+@pytest.fixture
+def test_memory_usage(ray_df):
+    assert type(ray_df.memory_usage()) is pd.core.series.Series
+    assert ray_df.memory_usage(index=True).at['Index'] is not None
+    assert ray_df.memory_usage(deep=True).sum() >= \
+        ray_df.memory_usage(deep=False).sum()


 def test_merge():