[DataFrame] Implementing write methods (#1918)

* Add in write methods and functionality

* infer highest available pickle version

* Fix import rebase artifact

* formatting changes to test

* fix lint
This commit is contained in:
Kunal Gosar
2018-04-22 21:25:33 -07:00
committed by Devin Petersohn
parent baf97e450b
commit 7c9f39241e
4 changed files with 446 additions and 230 deletions
+5 -2
View File
@@ -30,8 +30,11 @@ def get_npartitions():
# because they depend on npartitions.
from .dataframe import DataFrame # noqa: 402
from .series import Series # noqa: 402
from .io import (read_csv, read_parquet) # noqa: 402
from .concat import concat # noqa: 402
from .io import (read_csv, read_parquet, read_json, read_html, # noqa: 402
read_clipboard, read_excel, read_hdf, read_feather, # noqa: 402
read_msgpack, read_stata, read_sas, read_pickle, # noqa: 402
read_sql) # noqa: 402
from .concat import concat # noqa: 402
__all__ = [
"DataFrame", "Series", "read_csv", "read_parquet", "concat", "eval"
+95 -39
View File
@@ -9,7 +9,7 @@ from pandas.core.index import _ensure_index_from_sequences
from pandas._libs import lib
from pandas.core.dtypes.cast import maybe_upcast_putmask
from pandas import compat
from pandas.compat import lzip
from pandas.compat import lzip, cPickle as pkl
import pandas.core.common as com
from pandas.core.dtypes.common import (
is_bool_dtype,
@@ -2924,19 +2924,30 @@ class DataFrame(object):
"github.com/ray-project/ray.")
def to_clipboard(self, excel=None, sep=None, **kwargs):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")
def to_csv(self, path_or_buf=None, sep=', ', na_rep='', float_format=None,
warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)
port_frame = to_pandas(self)
port_frame.to_clipboard(excel, sep, **kwargs)
def to_csv(self, path_or_buf=None, sep=',', na_rep='', float_format=None,
columns=None, header=True, index=True, index_label=None,
mode='w', encoding=None, compression=None, quoting=None,
quotechar='"', line_terminator='\n', chunksize=None,
tupleize_cols=None, date_format=None, doublequote=True,
escapechar=None, decimal='.'):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")
warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)
port_frame = to_pandas(self)
port_frame.to_csv(path_or_buf, sep, na_rep, float_format,
columns, header, index, index_label,
mode, encoding, compression, quoting,
quotechar, line_terminator, chunksize,
tupleize_cols, date_format, doublequote,
escapechar, decimal)
def to_dense(self):
raise NotImplementedError(
@@ -2953,14 +2964,24 @@ class DataFrame(object):
index_label=None, startrow=0, startcol=0, engine=None,
merge_cells=True, encoding=None, inf_rep='inf', verbose=True,
freeze_panes=None):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")
warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)
port_frame = to_pandas(self)
port_frame.to_excel(excel_writer, sheet_name, na_rep,
float_format, columns, header, index,
index_label, startrow, startcol, engine,
merge_cells, encoding, inf_rep, verbose,
freeze_panes)
def to_feather(self, fname):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")
warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)
port_frame = to_pandas(self)
port_frame.to_feather(fname)
def to_gbq(self, destination_table, project_id, chunksize=10000,
verbose=True, reauth=False, if_exists='fail',
@@ -2970,9 +2991,12 @@ class DataFrame(object):
"github.com/ray-project/ray.")
def to_hdf(self, path_or_buf, key, **kwargs):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")
warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)
port_frame = to_pandas(self)
port_frame.to_hdf(path_or_buf, key, **kwargs)
def to_html(self, buf=None, columns=None, col_space=None, header=True,
index=True, na_rep='np.NaN', formatters=None,
@@ -2980,16 +3004,29 @@ class DataFrame(object):
justify=None, bold_rows=True, classes=None, escape=True,
max_rows=None, max_cols=None, show_dimensions=False,
notebook=False, decimal='.', border=None):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")
warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)
port_frame = to_pandas(self)
port_frame.to_html(buf, columns, col_space, header,
index, na_rep, formatters,
float_format, sparsify, index_names,
justify, bold_rows, classes, escape,
max_rows, max_cols, show_dimensions,
notebook, decimal, border)
def to_json(self, path_or_buf=None, orient=None, date_format=None,
double_precision=10, force_ascii=True, date_unit='ms',
default_handler=None, lines=False, compression=None):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")
warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)
port_frame = to_pandas(self)
port_frame.to_json(path_or_buf, orient, date_format,
double_precision, force_ascii, date_unit,
default_handler, lines, compression)
def to_latex(self, buf=None, columns=None, col_space=None, header=True,
index=True, na_rep='np.NaN', formatters=None,
@@ -3002,9 +3039,12 @@ class DataFrame(object):
"github.com/ray-project/ray.")
def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")
warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)
port_frame = to_pandas(self)
port_frame.to_msgpack(path_or_buf, encoding, **kwargs)
def to_panel(self):
raise NotImplementedError(
@@ -3013,19 +3053,26 @@ class DataFrame(object):
def to_parquet(self, fname, engine='auto', compression='snappy',
**kwargs):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")
warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)
port_frame = to_pandas(self)
port_frame.to_parquet(fname, engine, compression, **kwargs)
def to_period(self, freq=None, axis=0, copy=True):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")
def to_pickle(self, path, compression='infer', protocol=4):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")
def to_pickle(self, path, compression='infer',
protocol=pkl.HIGHEST_PROTOCOL):
warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)
port_frame = to_pandas(self)
port_frame.to_pickle(path, compression, protocol)
def to_records(self, index=True, convert_datetime64=True):
raise NotImplementedError(
@@ -3039,16 +3086,25 @@ class DataFrame(object):
def to_sql(self, name, con, flavor=None, schema=None, if_exists='fail',
index=True, index_label=None, chunksize=None, dtype=None):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")
warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)
port_frame = to_pandas(self)
port_frame.to_sql(name, con, flavor, schema, if_exists,
index, index_label, chunksize, dtype)
def to_stata(self, fname, convert_dates=None, write_index=True,
encoding='latin-1', byteorder=None, time_stamp=None,
data_label=None, variable_labels=None):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")
warnings.warn("Defaulting to Pandas implementation",
PendingDeprecationWarning)
port_frame = to_pandas(self)
port_frame.to_stata(fname, convert_dates, write_index,
encoding, byteorder, time_stamp,
data_label, variable_labels)
def to_string(self, buf=None, columns=None, col_space=None, header=True,
index=True, na_rep='np.NaN', formatters=None,
+2 -129
View File
@@ -8,9 +8,8 @@ import pandas as pd
import pandas.util.testing as tm
import ray.dataframe as rdf
from ray.dataframe.utils import (
to_pandas,
from_pandas
)
from_pandas,
to_pandas)
from pandas.tests.frame.common import TestData
@@ -2665,118 +2664,6 @@ def test_take():
ray_df.take(None)
def test_to_clipboard():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_clipboard()
def test_to_csv():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_csv()
def test_to_dense():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_dense()
def test_to_dict():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_dict()
def test_to_excel():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_excel(None)
def test_to_feather():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_feather(None)
def test_to_gbq():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_gbq(None, None)
def test_to_hdf():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_hdf(None, None)
def test_to_html():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_html()
def test_to_json():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_json()
def test_to_latex():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_latex()
def test_to_msgpack():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_msgpack()
def test_to_panel():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_panel()
def test_to_parquet():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_parquet(None)
def test_to_period():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_period()
def test_to_pickle():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_pickle(None)
def test_to_records():
ray_df = create_test_dataframe()
@@ -2791,20 +2678,6 @@ def test_to_sparse():
ray_df.to_sparse()
def test_to_sql():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_sql(None, None)
def test_to_stata():
ray_df = create_test_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_stata(None)
def test_to_string():
ray_df = create_test_dataframe()
+344 -60
View File
@@ -4,9 +4,9 @@ from __future__ import print_function
import pytest
import numpy as np
import pandas as pd
import pandas
from ray.dataframe.utils import to_pandas
import ray.dataframe.io as io
import ray.dataframe as pd
import os
import sqlite3
@@ -36,13 +36,53 @@ def setup_parquet_file(row_size, force=False):
if os.path.exists(TEST_PARQUET_FILENAME) and not force:
pass
else:
df = pd.DataFrame({
df = pandas.DataFrame({
'col1': np.arange(row_size),
'col2': np.arange(row_size)
})
df.to_parquet(TEST_PARQUET_FILENAME)
@pytest.fixture
def create_test_ray_dataframe():
df = pd.DataFrame({'col1': [0, 1, 2, 3],
'col2': [4, 5, 6, 7],
'col3': [8, 9, 10, 11],
'col4': [12, 13, 14, 15],
'col5': [0, 0, 0, 0]})
return df
@pytest.fixture
def create_test_pandas_dataframe():
df = pandas.DataFrame({'col1': [0, 1, 2, 3],
'col2': [4, 5, 6, 7],
'col3': [8, 9, 10, 11],
'col4': [12, 13, 14, 15],
'col5': [0, 0, 0, 0]})
return df
@pytest.fixture
def test_files_eq(path1, path2):
with open(path1, 'rb') as file1, open(path2, 'rb') as file2:
file1_content = file1.read()
file2_content = file2.read()
if file1_content == file2_content:
return True
else:
return False
@pytest.fixture
def teardown_test_file(test_path):
if os.path.exists(test_path):
os.remove(test_path)
@pytest.fixture
def teardown_parquet_file():
if os.path.exists(TEST_PARQUET_FILENAME):
@@ -54,7 +94,7 @@ def setup_csv_file(row_size, force=False, delimiter=','):
if os.path.exists(TEST_CSV_FILENAME) and not force:
pass
else:
df = pd.DataFrame({
df = pandas.DataFrame({
'col1': np.arange(row_size),
'col2': np.arange(row_size)
})
@@ -72,7 +112,7 @@ def setup_json_file(row_size, force=False):
if os.path.exists(TEST_JSON_FILENAME) and not force:
pass
else:
df = pd.DataFrame({
df = pandas.DataFrame({
'col1': np.arange(row_size),
'col2': np.arange(row_size)
})
@@ -90,7 +130,7 @@ def setup_html_file(row_size, force=False):
if os.path.exists(TEST_HTML_FILENAME) and not force:
pass
else:
df = pd.DataFrame({
df = pandas.DataFrame({
'col1': np.arange(row_size),
'col2': np.arange(row_size)
})
@@ -105,7 +145,7 @@ def teardown_html_file():
@pytest.fixture
def setup_clipboard(row_size, force=False):
df = pd.DataFrame({
df = pandas.DataFrame({
'col1': np.arange(row_size),
'col2': np.arange(row_size)
})
@@ -117,7 +157,7 @@ def setup_excel_file(row_size, force=False):
if os.path.exists(TEST_EXCEL_FILENAME) and not force:
pass
else:
df = pd.DataFrame({
df = pandas.DataFrame({
'col1': np.arange(row_size),
'col2': np.arange(row_size)
})
@@ -135,7 +175,7 @@ def setup_feather_file(row_size, force=False):
if os.path.exists(TEST_FEATHER_FILENAME) and not force:
pass
else:
df = pd.DataFrame({
df = pandas.DataFrame({
'col1': np.arange(row_size),
'col2': np.arange(row_size)
})
@@ -153,7 +193,7 @@ def setup_hdf_file(row_size, force=False):
if os.path.exists(TEST_HDF_FILENAME) and not force:
pass
else:
df = pd.DataFrame({
df = pandas.DataFrame({
'col1': np.arange(row_size),
'col2': np.arange(row_size)
})
@@ -171,7 +211,7 @@ def setup_msgpack_file(row_size, force=False):
if os.path.exists(TEST_MSGPACK_FILENAME) and not force:
pass
else:
df = pd.DataFrame({
df = pandas.DataFrame({
'col1': np.arange(row_size),
'col2': np.arange(row_size)
})
@@ -189,7 +229,7 @@ def setup_stata_file(row_size, force=False):
if os.path.exists(TEST_STATA_FILENAME) and not force:
pass
else:
df = pd.DataFrame({
df = pandas.DataFrame({
'col1': np.arange(row_size),
'col2': np.arange(row_size)
})
@@ -207,7 +247,7 @@ def setup_pickle_file(row_size, force=False):
if os.path.exists(TEST_PICKLE_FILENAME) and not force:
pass
else:
df = pd.DataFrame({
df = pandas.DataFrame({
'col1': np.arange(row_size),
'col2': np.arange(row_size)
})
@@ -225,11 +265,11 @@ def setup_sql_file(conn, force=False):
if os.path.exists(TEST_SQL_FILENAME) and not force:
pass
else:
df = pd.DataFrame({'col1': [0, 1, 2, 3],
'col2': [4, 5, 6, 7],
'col3': [8, 9, 10, 11],
'col4': [12, 13, 14, 15],
'col5': [0, 0, 0, 0]})
df = pandas.DataFrame({'col1': [0, 1, 2, 3],
'col2': [4, 5, 6, 7],
'col3': [8, 9, 10, 11],
'col4': [12, 13, 14, 15],
'col5': [0, 0, 0, 0]})
df.to_sql(TEST_SQL_FILENAME.split(".")[0], conn)
@@ -243,9 +283,9 @@ def test_from_parquet_small():
setup_parquet_file(SMALL_ROW_SIZE)
pd_df = pd.read_parquet(TEST_PARQUET_FILENAME)
ray_df = io.read_parquet(TEST_PARQUET_FILENAME)
assert ray_df_equals_pandas(ray_df, pd_df)
pandas_df = pandas.read_parquet(TEST_PARQUET_FILENAME)
ray_df = pd.read_parquet(TEST_PARQUET_FILENAME)
assert ray_df_equals_pandas(ray_df, pandas_df)
teardown_parquet_file()
@@ -253,10 +293,10 @@ def test_from_parquet_small():
def test_from_parquet_large():
setup_parquet_file(LARGE_ROW_SIZE)
pd_df = pd.read_parquet(TEST_PARQUET_FILENAME)
ray_df = io.read_parquet(TEST_PARQUET_FILENAME)
pandas_df = pandas.read_parquet(TEST_PARQUET_FILENAME)
ray_df = pd.read_parquet(TEST_PARQUET_FILENAME)
assert ray_df_equals_pandas(ray_df, pd_df)
assert ray_df_equals_pandas(ray_df, pandas_df)
teardown_parquet_file()
@@ -264,10 +304,10 @@ def test_from_parquet_large():
def test_from_csv():
setup_csv_file(SMALL_ROW_SIZE)
pd_df = pd.read_csv(TEST_CSV_FILENAME)
ray_df = io.read_csv(TEST_CSV_FILENAME)
pandas_df = pandas.read_csv(TEST_CSV_FILENAME)
ray_df = pd.read_csv(TEST_CSV_FILENAME)
assert ray_df_equals_pandas(ray_df, pd_df)
assert ray_df_equals_pandas(ray_df, pandas_df)
teardown_csv_file()
@@ -275,10 +315,10 @@ def test_from_csv():
def test_from_json():
setup_json_file(SMALL_ROW_SIZE)
pd_df = pd.read_json(TEST_JSON_FILENAME)
ray_df = io.read_json(TEST_JSON_FILENAME)
pandas_df = pandas.read_json(TEST_JSON_FILENAME)
ray_df = pd.read_json(TEST_JSON_FILENAME)
assert ray_df_equals_pandas(ray_df, pd_df)
assert ray_df_equals_pandas(ray_df, pandas_df)
teardown_json_file()
@@ -286,10 +326,10 @@ def test_from_json():
def test_from_html():
setup_html_file(SMALL_ROW_SIZE)
pd_df = pd.read_html(TEST_HTML_FILENAME)[0]
ray_df = io.read_html(TEST_HTML_FILENAME)
pandas_df = pandas.read_html(TEST_HTML_FILENAME)[0]
ray_df = pd.read_html(TEST_HTML_FILENAME)
assert ray_df_equals_pandas(ray_df, pd_df)
assert ray_df_equals_pandas(ray_df, pandas_df)
teardown_html_file()
@@ -298,19 +338,19 @@ def test_from_html():
def test_from_clipboard():
setup_clipboard(SMALL_ROW_SIZE)
pd_df = pd.read_clipboard()
ray_df = io.read_clipboard()
pandas_df = pandas.read_clipboard()
ray_df = pd.read_clipboard()
assert ray_df_equals_pandas(ray_df, pd_df)
assert ray_df_equals_pandas(ray_df, pandas_df)
def test_from_excel():
setup_excel_file(SMALL_ROW_SIZE)
pd_df = pd.read_excel(TEST_EXCEL_FILENAME)
ray_df = io.read_excel(TEST_EXCEL_FILENAME)
pandas_df = pandas.read_excel(TEST_EXCEL_FILENAME)
ray_df = pd.read_excel(TEST_EXCEL_FILENAME)
assert ray_df_equals_pandas(ray_df, pd_df)
assert ray_df_equals_pandas(ray_df, pandas_df)
teardown_excel_file()
@@ -318,10 +358,10 @@ def test_from_excel():
def test_from_feather():
setup_feather_file(SMALL_ROW_SIZE)
pd_df = pd.read_feather(TEST_FEATHER_FILENAME)
ray_df = io.read_feather(TEST_FEATHER_FILENAME)
pandas_df = pandas.read_feather(TEST_FEATHER_FILENAME)
ray_df = pd.read_feather(TEST_FEATHER_FILENAME)
assert ray_df_equals_pandas(ray_df, pd_df)
assert ray_df_equals_pandas(ray_df, pandas_df)
teardown_feather_file()
@@ -330,10 +370,10 @@ def test_from_feather():
def test_from_hdf():
setup_hdf_file(SMALL_ROW_SIZE)
pd_df = pd.read_hdf(TEST_HDF_FILENAME, key='test')
ray_df = io.read_hdf(TEST_HDF_FILENAME, key='test')
pandas_df = pandas.read_hdf(TEST_HDF_FILENAME, key='test')
ray_df = pd.read_hdf(TEST_HDF_FILENAME, key='test')
assert ray_df_equals_pandas(ray_df, pd_df)
assert ray_df_equals_pandas(ray_df, pandas_df)
teardown_hdf_file()
@@ -341,10 +381,10 @@ def test_from_hdf():
def test_from_msgpack():
setup_msgpack_file(SMALL_ROW_SIZE)
pd_df = pd.read_msgpack(TEST_MSGPACK_FILENAME)
ray_df = io.read_msgpack(TEST_MSGPACK_FILENAME)
pandas_df = pandas.read_msgpack(TEST_MSGPACK_FILENAME)
ray_df = pd.read_msgpack(TEST_MSGPACK_FILENAME)
assert ray_df_equals_pandas(ray_df, pd_df)
assert ray_df_equals_pandas(ray_df, pandas_df)
teardown_msgpack_file()
@@ -352,10 +392,10 @@ def test_from_msgpack():
def test_from_stata():
setup_stata_file(SMALL_ROW_SIZE)
pd_df = pd.read_stata(TEST_STATA_FILENAME)
ray_df = io.read_stata(TEST_STATA_FILENAME)
pandas_df = pandas.read_stata(TEST_STATA_FILENAME)
ray_df = pd.read_stata(TEST_STATA_FILENAME)
assert ray_df_equals_pandas(ray_df, pd_df)
assert ray_df_equals_pandas(ray_df, pandas_df)
teardown_stata_file()
@@ -363,10 +403,10 @@ def test_from_stata():
def test_from_pickle():
setup_pickle_file(SMALL_ROW_SIZE)
pd_df = pd.read_pickle(TEST_PICKLE_FILENAME)
ray_df = io.read_pickle(TEST_PICKLE_FILENAME)
pandas_df = pandas.read_pickle(TEST_PICKLE_FILENAME)
ray_df = pd.read_pickle(TEST_PICKLE_FILENAME)
assert ray_df_equals_pandas(ray_df, pd_df)
assert ray_df_equals_pandas(ray_df, pandas_df)
teardown_pickle_file()
@@ -375,17 +415,261 @@ def test_from_sql():
conn = sqlite3.connect(TEST_SQL_FILENAME)
setup_sql_file(conn, True)
pd_df = pd.read_sql("select * from test", conn)
ray_df = io.read_sql("select * from test", conn)
pandas_df = pandas.read_sql("select * from test", conn)
ray_df = pd.read_sql("select * from test", conn)
assert ray_df_equals_pandas(ray_df, pd_df)
assert ray_df_equals_pandas(ray_df, pandas_df)
teardown_sql_file()
@pytest.mark.skip(reason="No SAS write methods in Pandas")
def test_from_sas():
pd_df = pd.read_sas(TEST_SAS_FILENAME)
ray_df = io.read_sas(TEST_SAS_FILENAME)
pandas_df = pandas.read_sas(TEST_SAS_FILENAME)
ray_df = pd.read_sas(TEST_SAS_FILENAME)
assert ray_df_equals_pandas(ray_df, pd_df)
assert ray_df_equals_pandas(ray_df, pandas_df)
def test_from_csv_delimiter():
setup_csv_file(SMALL_ROW_SIZE, delimiter='|')
pandas_df = pandas.read_csv(TEST_CSV_FILENAME)
ray_df = pd.read_csv(TEST_CSV_FILENAME)
assert ray_df_equals_pandas(ray_df, pandas_df)
teardown_csv_file()
@pytest.mark.skip(reason="No clipboard on Travis")
def test_to_clipboard():
ray_df = create_test_ray_dataframe()
pandas_df = create_test_pandas_dataframe()
ray_df.to_clipboard()
ray_as_clip = pandas.read_clipboard()
pandas_df.to_clipboard()
pandas_as_clip = pandas.read_clipboard()
assert(ray_as_clip.equals(pandas_as_clip))
def test_to_csv():
ray_df = create_test_ray_dataframe()
pandas_df = create_test_pandas_dataframe()
TEST_CSV_DF_FILENAME = "test_df.csv"
TEST_CSV_pandas_FILENAME = "test_pandas.csv"
ray_df.to_csv(TEST_CSV_DF_FILENAME)
pandas_df.to_csv(TEST_CSV_pandas_FILENAME)
assert(test_files_eq(TEST_CSV_DF_FILENAME,
TEST_CSV_pandas_FILENAME))
teardown_test_file(TEST_CSV_pandas_FILENAME)
teardown_test_file(TEST_CSV_DF_FILENAME)
def test_to_dense():
ray_df = create_test_ray_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_dense()
def test_to_dict():
ray_df = create_test_ray_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_dict()
def test_to_excel():
ray_df = create_test_ray_dataframe()
pandas_df = create_test_pandas_dataframe()
TEST_EXCEL_DF_FILENAME = "test_df.xlsx"
TEST_EXCEL_pandas_FILENAME = "test_pandas.xlsx"
ray_writer = pandas.ExcelWriter(TEST_EXCEL_DF_FILENAME)
pandas_writer = pandas.ExcelWriter(TEST_EXCEL_pandas_FILENAME)
ray_df.to_excel(ray_writer)
pandas_df.to_excel(pandas_writer)
ray_writer.save()
pandas_writer.save()
assert(test_files_eq(TEST_EXCEL_DF_FILENAME,
TEST_EXCEL_pandas_FILENAME))
teardown_test_file(TEST_EXCEL_DF_FILENAME)
teardown_test_file(TEST_EXCEL_pandas_FILENAME)
def test_to_feather():
ray_df = create_test_ray_dataframe()
pandas_df = create_test_pandas_dataframe()
TEST_FEATHER_DF_FILENAME = "test_df.feather"
TEST_FEATHER_pandas_FILENAME = "test_pandas.feather"
ray_df.to_feather(TEST_FEATHER_DF_FILENAME)
pandas_df.to_feather(TEST_FEATHER_pandas_FILENAME)
assert(test_files_eq(TEST_FEATHER_DF_FILENAME,
TEST_FEATHER_pandas_FILENAME))
teardown_test_file(TEST_FEATHER_pandas_FILENAME)
teardown_test_file(TEST_FEATHER_DF_FILENAME)
def test_to_gbq():
ray_df = create_test_ray_dataframe()
TEST_GBQ_DF_FILENAME = "test_df.gbq"
with pytest.raises(NotImplementedError):
ray_df.to_gbq(TEST_GBQ_DF_FILENAME, None)
def test_to_html():
ray_df = create_test_ray_dataframe()
pandas_df = create_test_pandas_dataframe()
TEST_HTML_DF_FILENAME = "test_df.html"
TEST_HTML_pandas_FILENAME = "test_pandas.html"
ray_df.to_html(TEST_HTML_DF_FILENAME)
pandas_df.to_html(TEST_HTML_pandas_FILENAME)
assert(test_files_eq(TEST_HTML_DF_FILENAME,
TEST_HTML_pandas_FILENAME))
teardown_test_file(TEST_HTML_pandas_FILENAME)
teardown_test_file(TEST_HTML_DF_FILENAME)
def test_to_json():
ray_df = create_test_ray_dataframe()
pandas_df = create_test_pandas_dataframe()
TEST_JSON_DF_FILENAME = "test_df.json"
TEST_JSON_pandas_FILENAME = "test_pandas.json"
ray_df.to_json(TEST_JSON_DF_FILENAME)
pandas_df.to_json(TEST_JSON_pandas_FILENAME)
assert(test_files_eq(TEST_JSON_DF_FILENAME,
TEST_JSON_pandas_FILENAME))
teardown_test_file(TEST_JSON_pandas_FILENAME)
teardown_test_file(TEST_JSON_DF_FILENAME)
def test_to_latex():
ray_df = create_test_ray_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_latex()
def test_to_msgpack():
ray_df = create_test_ray_dataframe()
pandas_df = create_test_pandas_dataframe()
TEST_MSGPACK_DF_FILENAME = "test_df.msgpack"
TEST_MSGPACK_pandas_FILENAME = "test_pandas.msgpack"
ray_df.to_msgpack(TEST_MSGPACK_DF_FILENAME)
pandas_df.to_msgpack(TEST_MSGPACK_pandas_FILENAME)
assert(test_files_eq(TEST_MSGPACK_DF_FILENAME,
TEST_MSGPACK_pandas_FILENAME))
teardown_test_file(TEST_MSGPACK_pandas_FILENAME)
teardown_test_file(TEST_MSGPACK_DF_FILENAME)
def test_to_panel():
ray_df = create_test_ray_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_panel()
def test_to_parquet():
ray_df = create_test_ray_dataframe()
pandas_df = create_test_pandas_dataframe()
TEST_PARQUET_DF_FILENAME = "test_df.parquet"
TEST_PARQUET_pandas_FILENAME = "test_pandas.parquet"
ray_df.to_parquet(TEST_PARQUET_DF_FILENAME)
pandas_df.to_parquet(TEST_PARQUET_pandas_FILENAME)
assert(test_files_eq(TEST_PARQUET_DF_FILENAME,
TEST_PARQUET_pandas_FILENAME))
teardown_test_file(TEST_PARQUET_pandas_FILENAME)
teardown_test_file(TEST_PARQUET_DF_FILENAME)
def test_to_period():
ray_df = create_test_ray_dataframe()
with pytest.raises(NotImplementedError):
ray_df.to_period()
def test_to_pickle():
ray_df = create_test_ray_dataframe()
pandas_df = create_test_pandas_dataframe()
TEST_PICKLE_DF_FILENAME = "test_df.pkl"
TEST_PICKLE_pandas_FILENAME = "test_pandas.pkl"
ray_df.to_pickle(TEST_PICKLE_DF_FILENAME)
pandas_df.to_pickle(TEST_PICKLE_pandas_FILENAME)
assert(test_files_eq(TEST_PICKLE_DF_FILENAME,
TEST_PICKLE_pandas_FILENAME))
teardown_test_file(TEST_PICKLE_pandas_FILENAME)
teardown_test_file(TEST_PICKLE_DF_FILENAME)
def test_to_sql():
ray_df = create_test_ray_dataframe()
pandas_df = create_test_pandas_dataframe()
TEST_SQL_DF_FILENAME = "test_df.sql"
TEST_SQL_pandas_FILENAME = "test_pandas.sql"
ray_df.to_pickle(TEST_SQL_DF_FILENAME)
pandas_df.to_pickle(TEST_SQL_pandas_FILENAME)
assert(test_files_eq(TEST_SQL_DF_FILENAME,
TEST_SQL_pandas_FILENAME))
teardown_test_file(TEST_SQL_DF_FILENAME)
teardown_test_file(TEST_SQL_pandas_FILENAME)
def test_to_stata():
ray_df = create_test_ray_dataframe()
pandas_df = create_test_pandas_dataframe()
TEST_STATA_DF_FILENAME = "test_df.stata"
TEST_STATA_pandas_FILENAME = "test_pandas.stata"
ray_df.to_stata(TEST_STATA_DF_FILENAME)
pandas_df.to_stata(TEST_STATA_pandas_FILENAME)
assert(test_files_eq(TEST_STATA_DF_FILENAME,
TEST_STATA_pandas_FILENAME))
teardown_test_file(TEST_STATA_pandas_FILENAME)
teardown_test_file(TEST_STATA_DF_FILENAME)