diff --git a/python/ray/dataframe/__init__.py b/python/ray/dataframe/__init__.py index 5081df3d3..5ad6b85e8 100644 --- a/python/ray/dataframe/__init__.py +++ b/python/ray/dataframe/__init__.py @@ -30,8 +30,11 @@ def get_npartitions(): # because they depend on npartitions. from .dataframe import DataFrame # noqa: 402 from .series import Series # noqa: 402 -from .io import (read_csv, read_parquet) # noqa: 402 -from .concat import concat # noqa: 402 +from .io import (read_csv, read_parquet, read_json, read_html, # noqa: 402 + read_clipboard, read_excel, read_hdf, read_feather, # noqa: 402 + read_msgpack, read_stata, read_sas, read_pickle, # noqa: 402 + read_sql) # noqa: 402 +from .concat import concat # noqa: 402 __all__ = [ "DataFrame", "Series", "read_csv", "read_parquet", "concat", "eval" diff --git a/python/ray/dataframe/dataframe.py b/python/ray/dataframe/dataframe.py index 943d825c1..a069d1c24 100644 --- a/python/ray/dataframe/dataframe.py +++ b/python/ray/dataframe/dataframe.py @@ -9,7 +9,7 @@ from pandas.core.index import _ensure_index_from_sequences from pandas._libs import lib from pandas.core.dtypes.cast import maybe_upcast_putmask from pandas import compat -from pandas.compat import lzip +from pandas.compat import lzip, cPickle as pkl import pandas.core.common as com from pandas.core.dtypes.common import ( is_bool_dtype, @@ -2924,19 +2924,30 @@ class DataFrame(object): "github.com/ray-project/ray.") def to_clipboard(self, excel=None, sep=None, **kwargs): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") - def to_csv(self, path_or_buf=None, sep=', ', na_rep='', float_format=None, + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_clipboard(excel, sep, **kwargs) + + def to_csv(self, path_or_buf=None, sep=',', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, mode='w', encoding=None, compression=None, quoting=None, quotechar='"', line_terminator='\n', chunksize=None, tupleize_cols=None, date_format=None, doublequote=True, escapechar=None, decimal='.'): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_csv(path_or_buf, sep, na_rep, float_format, + columns, header, index, index_label, + mode, encoding, compression, quoting, + quotechar, line_terminator, chunksize, + tupleize_cols, date_format, doublequote, + escapechar, decimal) def to_dense(self): raise NotImplementedError( @@ -2953,14 +2964,24 @@ class DataFrame(object): index_label=None, startrow=0, startcol=0, engine=None, merge_cells=True, encoding=None, inf_rep='inf', verbose=True, freeze_panes=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_excel(excel_writer, sheet_name, na_rep, + float_format, columns, header, index, + index_label, startrow, startcol, engine, + merge_cells, encoding, inf_rep, verbose, + freeze_panes) def to_feather(self, fname): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_feather(fname) def to_gbq(self, destination_table, project_id, chunksize=10000, verbose=True, reauth=False, if_exists='fail', @@ -2970,9 +2991,12 @@ class DataFrame(object): "github.com/ray-project/ray.") def to_hdf(self, path_or_buf, key, **kwargs): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_hdf(path_or_buf, key, **kwargs) def to_html(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='np.NaN', formatters=None, @@ -2980,16 +3004,29 @@ class DataFrame(object): justify=None, bold_rows=True, classes=None, escape=True, max_rows=None, max_cols=None, show_dimensions=False, notebook=False, decimal='.', border=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_html(buf, columns, col_space, header, + index, na_rep, formatters, + float_format, sparsify, index_names, + justify, bold_rows, classes, escape, + max_rows, max_cols, show_dimensions, + notebook, decimal, border) def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=10, force_ascii=True, date_unit='ms', default_handler=None, lines=False, compression=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_json(path_or_buf, orient, date_format, + double_precision, force_ascii, date_unit, + default_handler, lines, compression) def to_latex(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='np.NaN', formatters=None, @@ -3002,9 +3039,12 @@ class DataFrame(object): "github.com/ray-project/ray.") def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_msgpack(path_or_buf, encoding, **kwargs) def to_panel(self): raise NotImplementedError( @@ -3013,19 +3053,26 @@ class DataFrame(object): def to_parquet(self, fname, engine='auto', compression='snappy', **kwargs): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_parquet(fname, engine, compression, **kwargs) def to_period(self, freq=None, axis=0, copy=True): raise NotImplementedError( "To contribute to Pandas on Ray, please visit " "github.com/ray-project/ray.") - def to_pickle(self, path, compression='infer', protocol=4): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + def to_pickle(self, path, compression='infer', + protocol=pkl.HIGHEST_PROTOCOL): + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_pickle(path, compression, protocol) def to_records(self, index=True, convert_datetime64=True): raise NotImplementedError( @@ -3039,16 +3086,25 @@ class DataFrame(object): def to_sql(self, name, con, flavor=None, schema=None, if_exists='fail', index=True, index_label=None, chunksize=None, dtype=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_sql(name, con, flavor, schema, if_exists, + index, index_label, chunksize, dtype) def to_stata(self, fname, convert_dates=None, write_index=True, encoding='latin-1', byteorder=None, time_stamp=None, data_label=None, variable_labels=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_stata(fname, convert_dates, write_index, + encoding, byteorder, time_stamp, + data_label, variable_labels) def to_string(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='np.NaN', formatters=None, diff --git a/python/ray/dataframe/test/test_dataframe.py b/python/ray/dataframe/test/test_dataframe.py index 87b1af8c3..dc3b83d35 100644 --- a/python/ray/dataframe/test/test_dataframe.py +++ b/python/ray/dataframe/test/test_dataframe.py @@ -8,9 +8,8 @@ import pandas as pd import pandas.util.testing as tm import ray.dataframe as rdf from ray.dataframe.utils import ( - to_pandas, - from_pandas -) + from_pandas, + to_pandas) from pandas.tests.frame.common import TestData @@ -2665,118 +2664,6 @@ def test_take(): ray_df.take(None) -def test_to_clipboard(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_clipboard() - - -def test_to_csv(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_csv() - - -def test_to_dense(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_dense() - - -def test_to_dict(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_dict() - - -def test_to_excel(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_excel(None) - - -def test_to_feather(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_feather(None) - - -def test_to_gbq(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_gbq(None, None) - - -def test_to_hdf(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_hdf(None, None) - - -def test_to_html(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_html() - - -def test_to_json(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_json() - - -def test_to_latex(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_latex() - - -def test_to_msgpack(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_msgpack() - - -def test_to_panel(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_panel() - - -def test_to_parquet(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_parquet(None) - - -def test_to_period(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_period() - - -def test_to_pickle(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_pickle(None) - - def test_to_records(): ray_df = create_test_dataframe() @@ -2791,20 +2678,6 @@ def test_to_sparse(): ray_df.to_sparse() -def test_to_sql(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_sql(None, None) - - -def test_to_stata(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_stata(None) - - def test_to_string(): ray_df = create_test_dataframe() diff --git a/python/ray/dataframe/test/test_io.py b/python/ray/dataframe/test/test_io.py index 2bfbf7c43..c2ab544be 100644 --- a/python/ray/dataframe/test/test_io.py +++ b/python/ray/dataframe/test/test_io.py @@ -4,9 +4,9 @@ from __future__ import print_function import pytest import numpy as np -import pandas as pd +import pandas from ray.dataframe.utils import to_pandas -import ray.dataframe.io as io +import ray.dataframe as pd import os import sqlite3 @@ -36,13 +36,53 @@ def setup_parquet_file(row_size, force=False): if os.path.exists(TEST_PARQUET_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) df.to_parquet(TEST_PARQUET_FILENAME) +@pytest.fixture +def create_test_ray_dataframe(): + df = pd.DataFrame({'col1': [0, 1, 2, 3], + 'col2': [4, 5, 6, 7], + 'col3': [8, 9, 10, 11], + 'col4': [12, 13, 14, 15], + 'col5': [0, 0, 0, 0]}) + + return df + + +@pytest.fixture +def create_test_pandas_dataframe(): + df = pandas.DataFrame({'col1': [0, 1, 2, 3], + 'col2': [4, 5, 6, 7], + 'col3': [8, 9, 10, 11], + 'col4': [12, 13, 14, 15], + 'col5': [0, 0, 0, 0]}) + + return df + + +@pytest.fixture +def test_files_eq(path1, path2): + with open(path1, 'rb') as file1, open(path2, 'rb') as file2: + file1_content = file1.read() + file2_content = file2.read() + + if file1_content == file2_content: + return True + else: + return False + + +@pytest.fixture +def teardown_test_file(test_path): + if os.path.exists(test_path): + os.remove(test_path) + + @pytest.fixture def teardown_parquet_file(): if os.path.exists(TEST_PARQUET_FILENAME): @@ -54,7 +94,7 @@ def setup_csv_file(row_size, force=False, delimiter=','): if os.path.exists(TEST_CSV_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -72,7 +112,7 @@ def setup_json_file(row_size, force=False): if os.path.exists(TEST_JSON_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -90,7 +130,7 @@ def setup_html_file(row_size, force=False): if os.path.exists(TEST_HTML_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -105,7 +145,7 @@ def teardown_html_file(): @pytest.fixture def setup_clipboard(row_size, force=False): - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -117,7 +157,7 @@ def setup_excel_file(row_size, force=False): if os.path.exists(TEST_EXCEL_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -135,7 +175,7 @@ def setup_feather_file(row_size, force=False): if os.path.exists(TEST_FEATHER_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -153,7 +193,7 @@ def setup_hdf_file(row_size, force=False): if os.path.exists(TEST_HDF_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -171,7 +211,7 @@ def setup_msgpack_file(row_size, force=False): if os.path.exists(TEST_MSGPACK_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -189,7 +229,7 @@ def setup_stata_file(row_size, force=False): if os.path.exists(TEST_STATA_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -207,7 +247,7 @@ def setup_pickle_file(row_size, force=False): if os.path.exists(TEST_PICKLE_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -225,11 +265,11 @@ def setup_sql_file(conn, force=False): if os.path.exists(TEST_SQL_FILENAME) and not force: pass else: - df = pd.DataFrame({'col1': [0, 1, 2, 3], - 'col2': [4, 5, 6, 7], - 'col3': [8, 9, 10, 11], - 'col4': [12, 13, 14, 15], - 'col5': [0, 0, 0, 0]}) + df = pandas.DataFrame({'col1': [0, 1, 2, 3], + 'col2': [4, 5, 6, 7], + 'col3': [8, 9, 10, 11], + 'col4': [12, 13, 14, 15], + 'col5': [0, 0, 0, 0]}) df.to_sql(TEST_SQL_FILENAME.split(".")[0], conn) @@ -243,9 +283,9 @@ def test_from_parquet_small(): setup_parquet_file(SMALL_ROW_SIZE) - pd_df = pd.read_parquet(TEST_PARQUET_FILENAME) - ray_df = io.read_parquet(TEST_PARQUET_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + pandas_df = pandas.read_parquet(TEST_PARQUET_FILENAME) + ray_df = pd.read_parquet(TEST_PARQUET_FILENAME) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_parquet_file() @@ -253,10 +293,10 @@ def test_from_parquet_small(): def test_from_parquet_large(): setup_parquet_file(LARGE_ROW_SIZE) - pd_df = pd.read_parquet(TEST_PARQUET_FILENAME) - ray_df = io.read_parquet(TEST_PARQUET_FILENAME) + pandas_df = pandas.read_parquet(TEST_PARQUET_FILENAME) + ray_df = pd.read_parquet(TEST_PARQUET_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_parquet_file() @@ -264,10 +304,10 @@ def test_from_parquet_large(): def test_from_csv(): setup_csv_file(SMALL_ROW_SIZE) - pd_df = pd.read_csv(TEST_CSV_FILENAME) - ray_df = io.read_csv(TEST_CSV_FILENAME) + pandas_df = pandas.read_csv(TEST_CSV_FILENAME) + ray_df = pd.read_csv(TEST_CSV_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_csv_file() @@ -275,10 +315,10 @@ def test_from_csv(): def test_from_json(): setup_json_file(SMALL_ROW_SIZE) - pd_df = pd.read_json(TEST_JSON_FILENAME) - ray_df = io.read_json(TEST_JSON_FILENAME) + pandas_df = pandas.read_json(TEST_JSON_FILENAME) + ray_df = pd.read_json(TEST_JSON_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_json_file() @@ -286,10 +326,10 @@ def test_from_json(): def test_from_html(): setup_html_file(SMALL_ROW_SIZE) - pd_df = pd.read_html(TEST_HTML_FILENAME)[0] - ray_df = io.read_html(TEST_HTML_FILENAME) + pandas_df = pandas.read_html(TEST_HTML_FILENAME)[0] + ray_df = pd.read_html(TEST_HTML_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_html_file() @@ -298,19 +338,19 @@ def test_from_html(): def test_from_clipboard(): setup_clipboard(SMALL_ROW_SIZE) - pd_df = pd.read_clipboard() - ray_df = io.read_clipboard() + pandas_df = pandas.read_clipboard() + ray_df = pd.read_clipboard() - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) def test_from_excel(): setup_excel_file(SMALL_ROW_SIZE) - pd_df = pd.read_excel(TEST_EXCEL_FILENAME) - ray_df = io.read_excel(TEST_EXCEL_FILENAME) + pandas_df = pandas.read_excel(TEST_EXCEL_FILENAME) + ray_df = pd.read_excel(TEST_EXCEL_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_excel_file() @@ -318,10 +358,10 @@ def test_from_excel(): def test_from_feather(): setup_feather_file(SMALL_ROW_SIZE) - pd_df = pd.read_feather(TEST_FEATHER_FILENAME) - ray_df = io.read_feather(TEST_FEATHER_FILENAME) + pandas_df = pandas.read_feather(TEST_FEATHER_FILENAME) + ray_df = pd.read_feather(TEST_FEATHER_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_feather_file() @@ -330,10 +370,10 @@ def test_from_feather(): def test_from_hdf(): setup_hdf_file(SMALL_ROW_SIZE) - pd_df = pd.read_hdf(TEST_HDF_FILENAME, key='test') - ray_df = io.read_hdf(TEST_HDF_FILENAME, key='test') + pandas_df = pandas.read_hdf(TEST_HDF_FILENAME, key='test') + ray_df = pd.read_hdf(TEST_HDF_FILENAME, key='test') - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_hdf_file() @@ -341,10 +381,10 @@ def test_from_hdf(): def test_from_msgpack(): setup_msgpack_file(SMALL_ROW_SIZE) - pd_df = pd.read_msgpack(TEST_MSGPACK_FILENAME) - ray_df = io.read_msgpack(TEST_MSGPACK_FILENAME) + pandas_df = pandas.read_msgpack(TEST_MSGPACK_FILENAME) + ray_df = pd.read_msgpack(TEST_MSGPACK_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_msgpack_file() @@ -352,10 +392,10 @@ def test_from_msgpack(): def test_from_stata(): setup_stata_file(SMALL_ROW_SIZE) - pd_df = pd.read_stata(TEST_STATA_FILENAME) - ray_df = io.read_stata(TEST_STATA_FILENAME) + pandas_df = pandas.read_stata(TEST_STATA_FILENAME) + ray_df = pd.read_stata(TEST_STATA_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_stata_file() @@ -363,10 +403,10 @@ def test_from_stata(): def test_from_pickle(): setup_pickle_file(SMALL_ROW_SIZE) - pd_df = pd.read_pickle(TEST_PICKLE_FILENAME) - ray_df = io.read_pickle(TEST_PICKLE_FILENAME) + pandas_df = pandas.read_pickle(TEST_PICKLE_FILENAME) + ray_df = pd.read_pickle(TEST_PICKLE_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_pickle_file() @@ -375,17 +415,261 @@ def test_from_sql(): conn = sqlite3.connect(TEST_SQL_FILENAME) setup_sql_file(conn, True) - pd_df = pd.read_sql("select * from test", conn) - ray_df = io.read_sql("select * from test", conn) + pandas_df = pandas.read_sql("select * from test", conn) + ray_df = pd.read_sql("select * from test", conn) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_sql_file() @pytest.mark.skip(reason="No SAS write methods in Pandas") def test_from_sas(): - pd_df = pd.read_sas(TEST_SAS_FILENAME) - ray_df = io.read_sas(TEST_SAS_FILENAME) + pandas_df = pandas.read_sas(TEST_SAS_FILENAME) + ray_df = pd.read_sas(TEST_SAS_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) + + +def test_from_csv_delimiter(): + setup_csv_file(SMALL_ROW_SIZE, delimiter='|') + + pandas_df = pandas.read_csv(TEST_CSV_FILENAME) + ray_df = pd.read_csv(TEST_CSV_FILENAME) + + assert ray_df_equals_pandas(ray_df, pandas_df) + + teardown_csv_file() + + +@pytest.mark.skip(reason="No clipboard on Travis") +def test_to_clipboard(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + ray_df.to_clipboard() + ray_as_clip = pandas.read_clipboard() + + pandas_df.to_clipboard() + pandas_as_clip = pandas.read_clipboard() + + assert(ray_as_clip.equals(pandas_as_clip)) + + +def test_to_csv(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_CSV_DF_FILENAME = "test_df.csv" + TEST_CSV_pandas_FILENAME = "test_pandas.csv" + + ray_df.to_csv(TEST_CSV_DF_FILENAME) + pandas_df.to_csv(TEST_CSV_pandas_FILENAME) + + assert(test_files_eq(TEST_CSV_DF_FILENAME, + TEST_CSV_pandas_FILENAME)) + + teardown_test_file(TEST_CSV_pandas_FILENAME) + teardown_test_file(TEST_CSV_DF_FILENAME) + + +def test_to_dense(): + ray_df = create_test_ray_dataframe() + + with pytest.raises(NotImplementedError): + ray_df.to_dense() + + +def test_to_dict(): + ray_df = create_test_ray_dataframe() + + with pytest.raises(NotImplementedError): + ray_df.to_dict() + + +def test_to_excel(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_EXCEL_DF_FILENAME = "test_df.xlsx" + TEST_EXCEL_pandas_FILENAME = "test_pandas.xlsx" + + ray_writer = pandas.ExcelWriter(TEST_EXCEL_DF_FILENAME) + pandas_writer = pandas.ExcelWriter(TEST_EXCEL_pandas_FILENAME) + + ray_df.to_excel(ray_writer) + pandas_df.to_excel(pandas_writer) + + ray_writer.save() + pandas_writer.save() + + assert(test_files_eq(TEST_EXCEL_DF_FILENAME, + TEST_EXCEL_pandas_FILENAME)) + + teardown_test_file(TEST_EXCEL_DF_FILENAME) + teardown_test_file(TEST_EXCEL_pandas_FILENAME) + + +def test_to_feather(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_FEATHER_DF_FILENAME = "test_df.feather" + TEST_FEATHER_pandas_FILENAME = "test_pandas.feather" + + ray_df.to_feather(TEST_FEATHER_DF_FILENAME) + pandas_df.to_feather(TEST_FEATHER_pandas_FILENAME) + + assert(test_files_eq(TEST_FEATHER_DF_FILENAME, + TEST_FEATHER_pandas_FILENAME)) + + teardown_test_file(TEST_FEATHER_pandas_FILENAME) + teardown_test_file(TEST_FEATHER_DF_FILENAME) + + +def test_to_gbq(): + ray_df = create_test_ray_dataframe() + + TEST_GBQ_DF_FILENAME = "test_df.gbq" + with pytest.raises(NotImplementedError): + ray_df.to_gbq(TEST_GBQ_DF_FILENAME, None) + + +def test_to_html(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_HTML_DF_FILENAME = "test_df.html" + TEST_HTML_pandas_FILENAME = "test_pandas.html" + + ray_df.to_html(TEST_HTML_DF_FILENAME) + pandas_df.to_html(TEST_HTML_pandas_FILENAME) + + assert(test_files_eq(TEST_HTML_DF_FILENAME, + TEST_HTML_pandas_FILENAME)) + + teardown_test_file(TEST_HTML_pandas_FILENAME) + teardown_test_file(TEST_HTML_DF_FILENAME) + + +def test_to_json(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_JSON_DF_FILENAME = "test_df.json" + TEST_JSON_pandas_FILENAME = "test_pandas.json" + + ray_df.to_json(TEST_JSON_DF_FILENAME) + pandas_df.to_json(TEST_JSON_pandas_FILENAME) + + assert(test_files_eq(TEST_JSON_DF_FILENAME, + TEST_JSON_pandas_FILENAME)) + + teardown_test_file(TEST_JSON_pandas_FILENAME) + teardown_test_file(TEST_JSON_DF_FILENAME) + + +def test_to_latex(): + ray_df = create_test_ray_dataframe() + + with pytest.raises(NotImplementedError): + ray_df.to_latex() + + +def test_to_msgpack(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_MSGPACK_DF_FILENAME = "test_df.msgpack" + TEST_MSGPACK_pandas_FILENAME = "test_pandas.msgpack" + + ray_df.to_msgpack(TEST_MSGPACK_DF_FILENAME) + pandas_df.to_msgpack(TEST_MSGPACK_pandas_FILENAME) + + assert(test_files_eq(TEST_MSGPACK_DF_FILENAME, + TEST_MSGPACK_pandas_FILENAME)) + + teardown_test_file(TEST_MSGPACK_pandas_FILENAME) + teardown_test_file(TEST_MSGPACK_DF_FILENAME) + + +def test_to_panel(): + ray_df = create_test_ray_dataframe() + + with pytest.raises(NotImplementedError): + ray_df.to_panel() + + +def test_to_parquet(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_PARQUET_DF_FILENAME = "test_df.parquet" + TEST_PARQUET_pandas_FILENAME = "test_pandas.parquet" + + ray_df.to_parquet(TEST_PARQUET_DF_FILENAME) + pandas_df.to_parquet(TEST_PARQUET_pandas_FILENAME) + + assert(test_files_eq(TEST_PARQUET_DF_FILENAME, + TEST_PARQUET_pandas_FILENAME)) + + teardown_test_file(TEST_PARQUET_pandas_FILENAME) + teardown_test_file(TEST_PARQUET_DF_FILENAME) + + +def test_to_period(): + ray_df = create_test_ray_dataframe() + + with pytest.raises(NotImplementedError): + ray_df.to_period() + + +def test_to_pickle(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_PICKLE_DF_FILENAME = "test_df.pkl" + TEST_PICKLE_pandas_FILENAME = "test_pandas.pkl" + + ray_df.to_pickle(TEST_PICKLE_DF_FILENAME) + pandas_df.to_pickle(TEST_PICKLE_pandas_FILENAME) + + assert(test_files_eq(TEST_PICKLE_DF_FILENAME, + TEST_PICKLE_pandas_FILENAME)) + + teardown_test_file(TEST_PICKLE_pandas_FILENAME) + teardown_test_file(TEST_PICKLE_DF_FILENAME) + + +def test_to_sql(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_SQL_DF_FILENAME = "test_df.sql" + TEST_SQL_pandas_FILENAME = "test_pandas.sql" + + ray_df.to_pickle(TEST_SQL_DF_FILENAME) + pandas_df.to_pickle(TEST_SQL_pandas_FILENAME) + + assert(test_files_eq(TEST_SQL_DF_FILENAME, + TEST_SQL_pandas_FILENAME)) + + teardown_test_file(TEST_SQL_DF_FILENAME) + teardown_test_file(TEST_SQL_pandas_FILENAME) + + +def test_to_stata(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_STATA_DF_FILENAME = "test_df.stata" + TEST_STATA_pandas_FILENAME = "test_pandas.stata" + + ray_df.to_stata(TEST_STATA_DF_FILENAME) + pandas_df.to_stata(TEST_STATA_pandas_FILENAME) + + assert(test_files_eq(TEST_STATA_DF_FILENAME, + TEST_STATA_pandas_FILENAME)) + + teardown_test_file(TEST_STATA_pandas_FILENAME) + teardown_test_file(TEST_STATA_DF_FILENAME)