mirror of
https://github.com/wassname/ray.git
synced 2026-06-30 05:57:18 +08:00
[DataFrame] Implementing write methods (#1918)
* Add in write methods and functionality * infer highest available pickle version * Fix import rebase artifact * formatting changes to test * fix lint
This commit is contained in:
committed by
Devin Petersohn
parent
baf97e450b
commit
7c9f39241e
@@ -30,8 +30,11 @@ def get_npartitions():
|
||||
# because they depend on npartitions.
|
||||
from .dataframe import DataFrame # noqa: 402
|
||||
from .series import Series # noqa: 402
|
||||
from .io import (read_csv, read_parquet) # noqa: 402
|
||||
from .concat import concat # noqa: 402
|
||||
from .io import (read_csv, read_parquet, read_json, read_html, # noqa: 402
|
||||
read_clipboard, read_excel, read_hdf, read_feather, # noqa: 402
|
||||
read_msgpack, read_stata, read_sas, read_pickle, # noqa: 402
|
||||
read_sql) # noqa: 402
|
||||
from .concat import concat # noqa: 402
|
||||
|
||||
__all__ = [
|
||||
"DataFrame", "Series", "read_csv", "read_parquet", "concat", "eval"
|
||||
|
||||
@@ -9,7 +9,7 @@ from pandas.core.index import _ensure_index_from_sequences
|
||||
from pandas._libs import lib
|
||||
from pandas.core.dtypes.cast import maybe_upcast_putmask
|
||||
from pandas import compat
|
||||
from pandas.compat import lzip
|
||||
from pandas.compat import lzip, cPickle as pkl
|
||||
import pandas.core.common as com
|
||||
from pandas.core.dtypes.common import (
|
||||
is_bool_dtype,
|
||||
@@ -2924,19 +2924,30 @@ class DataFrame(object):
|
||||
"github.com/ray-project/ray.")
|
||||
|
||||
def to_clipboard(self, excel=None, sep=None, **kwargs):
|
||||
raise NotImplementedError(
|
||||
"To contribute to Pandas on Ray, please visit "
|
||||
"github.com/ray-project/ray.")
|
||||
|
||||
def to_csv(self, path_or_buf=None, sep=', ', na_rep='', float_format=None,
|
||||
warnings.warn("Defaulting to Pandas implementation",
|
||||
PendingDeprecationWarning)
|
||||
|
||||
port_frame = to_pandas(self)
|
||||
port_frame.to_clipboard(excel, sep, **kwargs)
|
||||
|
||||
def to_csv(self, path_or_buf=None, sep=',', na_rep='', float_format=None,
|
||||
columns=None, header=True, index=True, index_label=None,
|
||||
mode='w', encoding=None, compression=None, quoting=None,
|
||||
quotechar='"', line_terminator='\n', chunksize=None,
|
||||
tupleize_cols=None, date_format=None, doublequote=True,
|
||||
escapechar=None, decimal='.'):
|
||||
raise NotImplementedError(
|
||||
"To contribute to Pandas on Ray, please visit "
|
||||
"github.com/ray-project/ray.")
|
||||
|
||||
warnings.warn("Defaulting to Pandas implementation",
|
||||
PendingDeprecationWarning)
|
||||
|
||||
port_frame = to_pandas(self)
|
||||
port_frame.to_csv(path_or_buf, sep, na_rep, float_format,
|
||||
columns, header, index, index_label,
|
||||
mode, encoding, compression, quoting,
|
||||
quotechar, line_terminator, chunksize,
|
||||
tupleize_cols, date_format, doublequote,
|
||||
escapechar, decimal)
|
||||
|
||||
def to_dense(self):
|
||||
raise NotImplementedError(
|
||||
@@ -2953,14 +2964,24 @@ class DataFrame(object):
|
||||
index_label=None, startrow=0, startcol=0, engine=None,
|
||||
merge_cells=True, encoding=None, inf_rep='inf', verbose=True,
|
||||
freeze_panes=None):
|
||||
raise NotImplementedError(
|
||||
"To contribute to Pandas on Ray, please visit "
|
||||
"github.com/ray-project/ray.")
|
||||
|
||||
warnings.warn("Defaulting to Pandas implementation",
|
||||
PendingDeprecationWarning)
|
||||
|
||||
port_frame = to_pandas(self)
|
||||
port_frame.to_excel(excel_writer, sheet_name, na_rep,
|
||||
float_format, columns, header, index,
|
||||
index_label, startrow, startcol, engine,
|
||||
merge_cells, encoding, inf_rep, verbose,
|
||||
freeze_panes)
|
||||
|
||||
def to_feather(self, fname):
|
||||
raise NotImplementedError(
|
||||
"To contribute to Pandas on Ray, please visit "
|
||||
"github.com/ray-project/ray.")
|
||||
|
||||
warnings.warn("Defaulting to Pandas implementation",
|
||||
PendingDeprecationWarning)
|
||||
|
||||
port_frame = to_pandas(self)
|
||||
port_frame.to_feather(fname)
|
||||
|
||||
def to_gbq(self, destination_table, project_id, chunksize=10000,
|
||||
verbose=True, reauth=False, if_exists='fail',
|
||||
@@ -2970,9 +2991,12 @@ class DataFrame(object):
|
||||
"github.com/ray-project/ray.")
|
||||
|
||||
def to_hdf(self, path_or_buf, key, **kwargs):
|
||||
raise NotImplementedError(
|
||||
"To contribute to Pandas on Ray, please visit "
|
||||
"github.com/ray-project/ray.")
|
||||
|
||||
warnings.warn("Defaulting to Pandas implementation",
|
||||
PendingDeprecationWarning)
|
||||
|
||||
port_frame = to_pandas(self)
|
||||
port_frame.to_hdf(path_or_buf, key, **kwargs)
|
||||
|
||||
def to_html(self, buf=None, columns=None, col_space=None, header=True,
|
||||
index=True, na_rep='np.NaN', formatters=None,
|
||||
@@ -2980,16 +3004,29 @@ class DataFrame(object):
|
||||
justify=None, bold_rows=True, classes=None, escape=True,
|
||||
max_rows=None, max_cols=None, show_dimensions=False,
|
||||
notebook=False, decimal='.', border=None):
|
||||
raise NotImplementedError(
|
||||
"To contribute to Pandas on Ray, please visit "
|
||||
"github.com/ray-project/ray.")
|
||||
|
||||
warnings.warn("Defaulting to Pandas implementation",
|
||||
PendingDeprecationWarning)
|
||||
|
||||
port_frame = to_pandas(self)
|
||||
port_frame.to_html(buf, columns, col_space, header,
|
||||
index, na_rep, formatters,
|
||||
float_format, sparsify, index_names,
|
||||
justify, bold_rows, classes, escape,
|
||||
max_rows, max_cols, show_dimensions,
|
||||
notebook, decimal, border)
|
||||
|
||||
def to_json(self, path_or_buf=None, orient=None, date_format=None,
|
||||
double_precision=10, force_ascii=True, date_unit='ms',
|
||||
default_handler=None, lines=False, compression=None):
|
||||
raise NotImplementedError(
|
||||
"To contribute to Pandas on Ray, please visit "
|
||||
"github.com/ray-project/ray.")
|
||||
|
||||
warnings.warn("Defaulting to Pandas implementation",
|
||||
PendingDeprecationWarning)
|
||||
|
||||
port_frame = to_pandas(self)
|
||||
port_frame.to_json(path_or_buf, orient, date_format,
|
||||
double_precision, force_ascii, date_unit,
|
||||
default_handler, lines, compression)
|
||||
|
||||
def to_latex(self, buf=None, columns=None, col_space=None, header=True,
|
||||
index=True, na_rep='np.NaN', formatters=None,
|
||||
@@ -3002,9 +3039,12 @@ class DataFrame(object):
|
||||
"github.com/ray-project/ray.")
|
||||
|
||||
def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs):
|
||||
raise NotImplementedError(
|
||||
"To contribute to Pandas on Ray, please visit "
|
||||
"github.com/ray-project/ray.")
|
||||
|
||||
warnings.warn("Defaulting to Pandas implementation",
|
||||
PendingDeprecationWarning)
|
||||
|
||||
port_frame = to_pandas(self)
|
||||
port_frame.to_msgpack(path_or_buf, encoding, **kwargs)
|
||||
|
||||
def to_panel(self):
|
||||
raise NotImplementedError(
|
||||
@@ -3013,19 +3053,26 @@ class DataFrame(object):
|
||||
|
||||
def to_parquet(self, fname, engine='auto', compression='snappy',
|
||||
**kwargs):
|
||||
raise NotImplementedError(
|
||||
"To contribute to Pandas on Ray, please visit "
|
||||
"github.com/ray-project/ray.")
|
||||
|
||||
warnings.warn("Defaulting to Pandas implementation",
|
||||
PendingDeprecationWarning)
|
||||
|
||||
port_frame = to_pandas(self)
|
||||
port_frame.to_parquet(fname, engine, compression, **kwargs)
|
||||
|
||||
def to_period(self, freq=None, axis=0, copy=True):
|
||||
raise NotImplementedError(
|
||||
"To contribute to Pandas on Ray, please visit "
|
||||
"github.com/ray-project/ray.")
|
||||
|
||||
def to_pickle(self, path, compression='infer', protocol=4):
|
||||
raise NotImplementedError(
|
||||
"To contribute to Pandas on Ray, please visit "
|
||||
"github.com/ray-project/ray.")
|
||||
def to_pickle(self, path, compression='infer',
|
||||
protocol=pkl.HIGHEST_PROTOCOL):
|
||||
|
||||
warnings.warn("Defaulting to Pandas implementation",
|
||||
PendingDeprecationWarning)
|
||||
|
||||
port_frame = to_pandas(self)
|
||||
port_frame.to_pickle(path, compression, protocol)
|
||||
|
||||
def to_records(self, index=True, convert_datetime64=True):
|
||||
raise NotImplementedError(
|
||||
@@ -3039,16 +3086,25 @@ class DataFrame(object):
|
||||
|
||||
def to_sql(self, name, con, flavor=None, schema=None, if_exists='fail',
|
||||
index=True, index_label=None, chunksize=None, dtype=None):
|
||||
raise NotImplementedError(
|
||||
"To contribute to Pandas on Ray, please visit "
|
||||
"github.com/ray-project/ray.")
|
||||
|
||||
warnings.warn("Defaulting to Pandas implementation",
|
||||
PendingDeprecationWarning)
|
||||
|
||||
port_frame = to_pandas(self)
|
||||
port_frame.to_sql(name, con, flavor, schema, if_exists,
|
||||
index, index_label, chunksize, dtype)
|
||||
|
||||
def to_stata(self, fname, convert_dates=None, write_index=True,
|
||||
encoding='latin-1', byteorder=None, time_stamp=None,
|
||||
data_label=None, variable_labels=None):
|
||||
raise NotImplementedError(
|
||||
"To contribute to Pandas on Ray, please visit "
|
||||
"github.com/ray-project/ray.")
|
||||
|
||||
warnings.warn("Defaulting to Pandas implementation",
|
||||
PendingDeprecationWarning)
|
||||
|
||||
port_frame = to_pandas(self)
|
||||
port_frame.to_stata(fname, convert_dates, write_index,
|
||||
encoding, byteorder, time_stamp,
|
||||
data_label, variable_labels)
|
||||
|
||||
def to_string(self, buf=None, columns=None, col_space=None, header=True,
|
||||
index=True, na_rep='np.NaN', formatters=None,
|
||||
|
||||
@@ -8,9 +8,8 @@ import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
import ray.dataframe as rdf
|
||||
from ray.dataframe.utils import (
|
||||
to_pandas,
|
||||
from_pandas
|
||||
)
|
||||
from_pandas,
|
||||
to_pandas)
|
||||
|
||||
from pandas.tests.frame.common import TestData
|
||||
|
||||
@@ -2665,118 +2664,6 @@ def test_take():
|
||||
ray_df.take(None)
|
||||
|
||||
|
||||
def test_to_clipboard():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_clipboard()
|
||||
|
||||
|
||||
def test_to_csv():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_csv()
|
||||
|
||||
|
||||
def test_to_dense():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_dense()
|
||||
|
||||
|
||||
def test_to_dict():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_dict()
|
||||
|
||||
|
||||
def test_to_excel():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_excel(None)
|
||||
|
||||
|
||||
def test_to_feather():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_feather(None)
|
||||
|
||||
|
||||
def test_to_gbq():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_gbq(None, None)
|
||||
|
||||
|
||||
def test_to_hdf():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_hdf(None, None)
|
||||
|
||||
|
||||
def test_to_html():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_html()
|
||||
|
||||
|
||||
def test_to_json():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_json()
|
||||
|
||||
|
||||
def test_to_latex():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_latex()
|
||||
|
||||
|
||||
def test_to_msgpack():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_msgpack()
|
||||
|
||||
|
||||
def test_to_panel():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_panel()
|
||||
|
||||
|
||||
def test_to_parquet():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_parquet(None)
|
||||
|
||||
|
||||
def test_to_period():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_period()
|
||||
|
||||
|
||||
def test_to_pickle():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_pickle(None)
|
||||
|
||||
|
||||
def test_to_records():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
@@ -2791,20 +2678,6 @@ def test_to_sparse():
|
||||
ray_df.to_sparse()
|
||||
|
||||
|
||||
def test_to_sql():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_sql(None, None)
|
||||
|
||||
|
||||
def test_to_stata():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_stata(None)
|
||||
|
||||
|
||||
def test_to_string():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
|
||||
@@ -4,9 +4,9 @@ from __future__ import print_function
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pandas
|
||||
from ray.dataframe.utils import to_pandas
|
||||
import ray.dataframe.io as io
|
||||
import ray.dataframe as pd
|
||||
import os
|
||||
import sqlite3
|
||||
|
||||
@@ -36,13 +36,53 @@ def setup_parquet_file(row_size, force=False):
|
||||
if os.path.exists(TEST_PARQUET_FILENAME) and not force:
|
||||
pass
|
||||
else:
|
||||
df = pd.DataFrame({
|
||||
df = pandas.DataFrame({
|
||||
'col1': np.arange(row_size),
|
||||
'col2': np.arange(row_size)
|
||||
})
|
||||
df.to_parquet(TEST_PARQUET_FILENAME)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def create_test_ray_dataframe():
|
||||
df = pd.DataFrame({'col1': [0, 1, 2, 3],
|
||||
'col2': [4, 5, 6, 7],
|
||||
'col3': [8, 9, 10, 11],
|
||||
'col4': [12, 13, 14, 15],
|
||||
'col5': [0, 0, 0, 0]})
|
||||
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def create_test_pandas_dataframe():
|
||||
df = pandas.DataFrame({'col1': [0, 1, 2, 3],
|
||||
'col2': [4, 5, 6, 7],
|
||||
'col3': [8, 9, 10, 11],
|
||||
'col4': [12, 13, 14, 15],
|
||||
'col5': [0, 0, 0, 0]})
|
||||
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_files_eq(path1, path2):
|
||||
with open(path1, 'rb') as file1, open(path2, 'rb') as file2:
|
||||
file1_content = file1.read()
|
||||
file2_content = file2.read()
|
||||
|
||||
if file1_content == file2_content:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def teardown_test_file(test_path):
|
||||
if os.path.exists(test_path):
|
||||
os.remove(test_path)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def teardown_parquet_file():
|
||||
if os.path.exists(TEST_PARQUET_FILENAME):
|
||||
@@ -54,7 +94,7 @@ def setup_csv_file(row_size, force=False, delimiter=','):
|
||||
if os.path.exists(TEST_CSV_FILENAME) and not force:
|
||||
pass
|
||||
else:
|
||||
df = pd.DataFrame({
|
||||
df = pandas.DataFrame({
|
||||
'col1': np.arange(row_size),
|
||||
'col2': np.arange(row_size)
|
||||
})
|
||||
@@ -72,7 +112,7 @@ def setup_json_file(row_size, force=False):
|
||||
if os.path.exists(TEST_JSON_FILENAME) and not force:
|
||||
pass
|
||||
else:
|
||||
df = pd.DataFrame({
|
||||
df = pandas.DataFrame({
|
||||
'col1': np.arange(row_size),
|
||||
'col2': np.arange(row_size)
|
||||
})
|
||||
@@ -90,7 +130,7 @@ def setup_html_file(row_size, force=False):
|
||||
if os.path.exists(TEST_HTML_FILENAME) and not force:
|
||||
pass
|
||||
else:
|
||||
df = pd.DataFrame({
|
||||
df = pandas.DataFrame({
|
||||
'col1': np.arange(row_size),
|
||||
'col2': np.arange(row_size)
|
||||
})
|
||||
@@ -105,7 +145,7 @@ def teardown_html_file():
|
||||
|
||||
@pytest.fixture
|
||||
def setup_clipboard(row_size, force=False):
|
||||
df = pd.DataFrame({
|
||||
df = pandas.DataFrame({
|
||||
'col1': np.arange(row_size),
|
||||
'col2': np.arange(row_size)
|
||||
})
|
||||
@@ -117,7 +157,7 @@ def setup_excel_file(row_size, force=False):
|
||||
if os.path.exists(TEST_EXCEL_FILENAME) and not force:
|
||||
pass
|
||||
else:
|
||||
df = pd.DataFrame({
|
||||
df = pandas.DataFrame({
|
||||
'col1': np.arange(row_size),
|
||||
'col2': np.arange(row_size)
|
||||
})
|
||||
@@ -135,7 +175,7 @@ def setup_feather_file(row_size, force=False):
|
||||
if os.path.exists(TEST_FEATHER_FILENAME) and not force:
|
||||
pass
|
||||
else:
|
||||
df = pd.DataFrame({
|
||||
df = pandas.DataFrame({
|
||||
'col1': np.arange(row_size),
|
||||
'col2': np.arange(row_size)
|
||||
})
|
||||
@@ -153,7 +193,7 @@ def setup_hdf_file(row_size, force=False):
|
||||
if os.path.exists(TEST_HDF_FILENAME) and not force:
|
||||
pass
|
||||
else:
|
||||
df = pd.DataFrame({
|
||||
df = pandas.DataFrame({
|
||||
'col1': np.arange(row_size),
|
||||
'col2': np.arange(row_size)
|
||||
})
|
||||
@@ -171,7 +211,7 @@ def setup_msgpack_file(row_size, force=False):
|
||||
if os.path.exists(TEST_MSGPACK_FILENAME) and not force:
|
||||
pass
|
||||
else:
|
||||
df = pd.DataFrame({
|
||||
df = pandas.DataFrame({
|
||||
'col1': np.arange(row_size),
|
||||
'col2': np.arange(row_size)
|
||||
})
|
||||
@@ -189,7 +229,7 @@ def setup_stata_file(row_size, force=False):
|
||||
if os.path.exists(TEST_STATA_FILENAME) and not force:
|
||||
pass
|
||||
else:
|
||||
df = pd.DataFrame({
|
||||
df = pandas.DataFrame({
|
||||
'col1': np.arange(row_size),
|
||||
'col2': np.arange(row_size)
|
||||
})
|
||||
@@ -207,7 +247,7 @@ def setup_pickle_file(row_size, force=False):
|
||||
if os.path.exists(TEST_PICKLE_FILENAME) and not force:
|
||||
pass
|
||||
else:
|
||||
df = pd.DataFrame({
|
||||
df = pandas.DataFrame({
|
||||
'col1': np.arange(row_size),
|
||||
'col2': np.arange(row_size)
|
||||
})
|
||||
@@ -225,11 +265,11 @@ def setup_sql_file(conn, force=False):
|
||||
if os.path.exists(TEST_SQL_FILENAME) and not force:
|
||||
pass
|
||||
else:
|
||||
df = pd.DataFrame({'col1': [0, 1, 2, 3],
|
||||
'col2': [4, 5, 6, 7],
|
||||
'col3': [8, 9, 10, 11],
|
||||
'col4': [12, 13, 14, 15],
|
||||
'col5': [0, 0, 0, 0]})
|
||||
df = pandas.DataFrame({'col1': [0, 1, 2, 3],
|
||||
'col2': [4, 5, 6, 7],
|
||||
'col3': [8, 9, 10, 11],
|
||||
'col4': [12, 13, 14, 15],
|
||||
'col5': [0, 0, 0, 0]})
|
||||
df.to_sql(TEST_SQL_FILENAME.split(".")[0], conn)
|
||||
|
||||
|
||||
@@ -243,9 +283,9 @@ def test_from_parquet_small():
|
||||
|
||||
setup_parquet_file(SMALL_ROW_SIZE)
|
||||
|
||||
pd_df = pd.read_parquet(TEST_PARQUET_FILENAME)
|
||||
ray_df = io.read_parquet(TEST_PARQUET_FILENAME)
|
||||
assert ray_df_equals_pandas(ray_df, pd_df)
|
||||
pandas_df = pandas.read_parquet(TEST_PARQUET_FILENAME)
|
||||
ray_df = pd.read_parquet(TEST_PARQUET_FILENAME)
|
||||
assert ray_df_equals_pandas(ray_df, pandas_df)
|
||||
|
||||
teardown_parquet_file()
|
||||
|
||||
@@ -253,10 +293,10 @@ def test_from_parquet_small():
|
||||
def test_from_parquet_large():
|
||||
setup_parquet_file(LARGE_ROW_SIZE)
|
||||
|
||||
pd_df = pd.read_parquet(TEST_PARQUET_FILENAME)
|
||||
ray_df = io.read_parquet(TEST_PARQUET_FILENAME)
|
||||
pandas_df = pandas.read_parquet(TEST_PARQUET_FILENAME)
|
||||
ray_df = pd.read_parquet(TEST_PARQUET_FILENAME)
|
||||
|
||||
assert ray_df_equals_pandas(ray_df, pd_df)
|
||||
assert ray_df_equals_pandas(ray_df, pandas_df)
|
||||
|
||||
teardown_parquet_file()
|
||||
|
||||
@@ -264,10 +304,10 @@ def test_from_parquet_large():
|
||||
def test_from_csv():
|
||||
setup_csv_file(SMALL_ROW_SIZE)
|
||||
|
||||
pd_df = pd.read_csv(TEST_CSV_FILENAME)
|
||||
ray_df = io.read_csv(TEST_CSV_FILENAME)
|
||||
pandas_df = pandas.read_csv(TEST_CSV_FILENAME)
|
||||
ray_df = pd.read_csv(TEST_CSV_FILENAME)
|
||||
|
||||
assert ray_df_equals_pandas(ray_df, pd_df)
|
||||
assert ray_df_equals_pandas(ray_df, pandas_df)
|
||||
|
||||
teardown_csv_file()
|
||||
|
||||
@@ -275,10 +315,10 @@ def test_from_csv():
|
||||
def test_from_json():
|
||||
setup_json_file(SMALL_ROW_SIZE)
|
||||
|
||||
pd_df = pd.read_json(TEST_JSON_FILENAME)
|
||||
ray_df = io.read_json(TEST_JSON_FILENAME)
|
||||
pandas_df = pandas.read_json(TEST_JSON_FILENAME)
|
||||
ray_df = pd.read_json(TEST_JSON_FILENAME)
|
||||
|
||||
assert ray_df_equals_pandas(ray_df, pd_df)
|
||||
assert ray_df_equals_pandas(ray_df, pandas_df)
|
||||
|
||||
teardown_json_file()
|
||||
|
||||
@@ -286,10 +326,10 @@ def test_from_json():
|
||||
def test_from_html():
|
||||
setup_html_file(SMALL_ROW_SIZE)
|
||||
|
||||
pd_df = pd.read_html(TEST_HTML_FILENAME)[0]
|
||||
ray_df = io.read_html(TEST_HTML_FILENAME)
|
||||
pandas_df = pandas.read_html(TEST_HTML_FILENAME)[0]
|
||||
ray_df = pd.read_html(TEST_HTML_FILENAME)
|
||||
|
||||
assert ray_df_equals_pandas(ray_df, pd_df)
|
||||
assert ray_df_equals_pandas(ray_df, pandas_df)
|
||||
|
||||
teardown_html_file()
|
||||
|
||||
@@ -298,19 +338,19 @@ def test_from_html():
|
||||
def test_from_clipboard():
|
||||
setup_clipboard(SMALL_ROW_SIZE)
|
||||
|
||||
pd_df = pd.read_clipboard()
|
||||
ray_df = io.read_clipboard()
|
||||
pandas_df = pandas.read_clipboard()
|
||||
ray_df = pd.read_clipboard()
|
||||
|
||||
assert ray_df_equals_pandas(ray_df, pd_df)
|
||||
assert ray_df_equals_pandas(ray_df, pandas_df)
|
||||
|
||||
|
||||
def test_from_excel():
|
||||
setup_excel_file(SMALL_ROW_SIZE)
|
||||
|
||||
pd_df = pd.read_excel(TEST_EXCEL_FILENAME)
|
||||
ray_df = io.read_excel(TEST_EXCEL_FILENAME)
|
||||
pandas_df = pandas.read_excel(TEST_EXCEL_FILENAME)
|
||||
ray_df = pd.read_excel(TEST_EXCEL_FILENAME)
|
||||
|
||||
assert ray_df_equals_pandas(ray_df, pd_df)
|
||||
assert ray_df_equals_pandas(ray_df, pandas_df)
|
||||
|
||||
teardown_excel_file()
|
||||
|
||||
@@ -318,10 +358,10 @@ def test_from_excel():
|
||||
def test_from_feather():
|
||||
setup_feather_file(SMALL_ROW_SIZE)
|
||||
|
||||
pd_df = pd.read_feather(TEST_FEATHER_FILENAME)
|
||||
ray_df = io.read_feather(TEST_FEATHER_FILENAME)
|
||||
pandas_df = pandas.read_feather(TEST_FEATHER_FILENAME)
|
||||
ray_df = pd.read_feather(TEST_FEATHER_FILENAME)
|
||||
|
||||
assert ray_df_equals_pandas(ray_df, pd_df)
|
||||
assert ray_df_equals_pandas(ray_df, pandas_df)
|
||||
|
||||
teardown_feather_file()
|
||||
|
||||
@@ -330,10 +370,10 @@ def test_from_feather():
|
||||
def test_from_hdf():
|
||||
setup_hdf_file(SMALL_ROW_SIZE)
|
||||
|
||||
pd_df = pd.read_hdf(TEST_HDF_FILENAME, key='test')
|
||||
ray_df = io.read_hdf(TEST_HDF_FILENAME, key='test')
|
||||
pandas_df = pandas.read_hdf(TEST_HDF_FILENAME, key='test')
|
||||
ray_df = pd.read_hdf(TEST_HDF_FILENAME, key='test')
|
||||
|
||||
assert ray_df_equals_pandas(ray_df, pd_df)
|
||||
assert ray_df_equals_pandas(ray_df, pandas_df)
|
||||
|
||||
teardown_hdf_file()
|
||||
|
||||
@@ -341,10 +381,10 @@ def test_from_hdf():
|
||||
def test_from_msgpack():
|
||||
setup_msgpack_file(SMALL_ROW_SIZE)
|
||||
|
||||
pd_df = pd.read_msgpack(TEST_MSGPACK_FILENAME)
|
||||
ray_df = io.read_msgpack(TEST_MSGPACK_FILENAME)
|
||||
pandas_df = pandas.read_msgpack(TEST_MSGPACK_FILENAME)
|
||||
ray_df = pd.read_msgpack(TEST_MSGPACK_FILENAME)
|
||||
|
||||
assert ray_df_equals_pandas(ray_df, pd_df)
|
||||
assert ray_df_equals_pandas(ray_df, pandas_df)
|
||||
|
||||
teardown_msgpack_file()
|
||||
|
||||
@@ -352,10 +392,10 @@ def test_from_msgpack():
|
||||
def test_from_stata():
|
||||
setup_stata_file(SMALL_ROW_SIZE)
|
||||
|
||||
pd_df = pd.read_stata(TEST_STATA_FILENAME)
|
||||
ray_df = io.read_stata(TEST_STATA_FILENAME)
|
||||
pandas_df = pandas.read_stata(TEST_STATA_FILENAME)
|
||||
ray_df = pd.read_stata(TEST_STATA_FILENAME)
|
||||
|
||||
assert ray_df_equals_pandas(ray_df, pd_df)
|
||||
assert ray_df_equals_pandas(ray_df, pandas_df)
|
||||
|
||||
teardown_stata_file()
|
||||
|
||||
@@ -363,10 +403,10 @@ def test_from_stata():
|
||||
def test_from_pickle():
|
||||
setup_pickle_file(SMALL_ROW_SIZE)
|
||||
|
||||
pd_df = pd.read_pickle(TEST_PICKLE_FILENAME)
|
||||
ray_df = io.read_pickle(TEST_PICKLE_FILENAME)
|
||||
pandas_df = pandas.read_pickle(TEST_PICKLE_FILENAME)
|
||||
ray_df = pd.read_pickle(TEST_PICKLE_FILENAME)
|
||||
|
||||
assert ray_df_equals_pandas(ray_df, pd_df)
|
||||
assert ray_df_equals_pandas(ray_df, pandas_df)
|
||||
|
||||
teardown_pickle_file()
|
||||
|
||||
@@ -375,17 +415,261 @@ def test_from_sql():
|
||||
conn = sqlite3.connect(TEST_SQL_FILENAME)
|
||||
setup_sql_file(conn, True)
|
||||
|
||||
pd_df = pd.read_sql("select * from test", conn)
|
||||
ray_df = io.read_sql("select * from test", conn)
|
||||
pandas_df = pandas.read_sql("select * from test", conn)
|
||||
ray_df = pd.read_sql("select * from test", conn)
|
||||
|
||||
assert ray_df_equals_pandas(ray_df, pd_df)
|
||||
assert ray_df_equals_pandas(ray_df, pandas_df)
|
||||
|
||||
teardown_sql_file()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="No SAS write methods in Pandas")
|
||||
def test_from_sas():
|
||||
pd_df = pd.read_sas(TEST_SAS_FILENAME)
|
||||
ray_df = io.read_sas(TEST_SAS_FILENAME)
|
||||
pandas_df = pandas.read_sas(TEST_SAS_FILENAME)
|
||||
ray_df = pd.read_sas(TEST_SAS_FILENAME)
|
||||
|
||||
assert ray_df_equals_pandas(ray_df, pd_df)
|
||||
assert ray_df_equals_pandas(ray_df, pandas_df)
|
||||
|
||||
|
||||
def test_from_csv_delimiter():
|
||||
setup_csv_file(SMALL_ROW_SIZE, delimiter='|')
|
||||
|
||||
pandas_df = pandas.read_csv(TEST_CSV_FILENAME)
|
||||
ray_df = pd.read_csv(TEST_CSV_FILENAME)
|
||||
|
||||
assert ray_df_equals_pandas(ray_df, pandas_df)
|
||||
|
||||
teardown_csv_file()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="No clipboard on Travis")
|
||||
def test_to_clipboard():
|
||||
ray_df = create_test_ray_dataframe()
|
||||
pandas_df = create_test_pandas_dataframe()
|
||||
|
||||
ray_df.to_clipboard()
|
||||
ray_as_clip = pandas.read_clipboard()
|
||||
|
||||
pandas_df.to_clipboard()
|
||||
pandas_as_clip = pandas.read_clipboard()
|
||||
|
||||
assert(ray_as_clip.equals(pandas_as_clip))
|
||||
|
||||
|
||||
def test_to_csv():
|
||||
ray_df = create_test_ray_dataframe()
|
||||
pandas_df = create_test_pandas_dataframe()
|
||||
|
||||
TEST_CSV_DF_FILENAME = "test_df.csv"
|
||||
TEST_CSV_pandas_FILENAME = "test_pandas.csv"
|
||||
|
||||
ray_df.to_csv(TEST_CSV_DF_FILENAME)
|
||||
pandas_df.to_csv(TEST_CSV_pandas_FILENAME)
|
||||
|
||||
assert(test_files_eq(TEST_CSV_DF_FILENAME,
|
||||
TEST_CSV_pandas_FILENAME))
|
||||
|
||||
teardown_test_file(TEST_CSV_pandas_FILENAME)
|
||||
teardown_test_file(TEST_CSV_DF_FILENAME)
|
||||
|
||||
|
||||
def test_to_dense():
|
||||
ray_df = create_test_ray_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_dense()
|
||||
|
||||
|
||||
def test_to_dict():
|
||||
ray_df = create_test_ray_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_dict()
|
||||
|
||||
|
||||
def test_to_excel():
|
||||
ray_df = create_test_ray_dataframe()
|
||||
pandas_df = create_test_pandas_dataframe()
|
||||
|
||||
TEST_EXCEL_DF_FILENAME = "test_df.xlsx"
|
||||
TEST_EXCEL_pandas_FILENAME = "test_pandas.xlsx"
|
||||
|
||||
ray_writer = pandas.ExcelWriter(TEST_EXCEL_DF_FILENAME)
|
||||
pandas_writer = pandas.ExcelWriter(TEST_EXCEL_pandas_FILENAME)
|
||||
|
||||
ray_df.to_excel(ray_writer)
|
||||
pandas_df.to_excel(pandas_writer)
|
||||
|
||||
ray_writer.save()
|
||||
pandas_writer.save()
|
||||
|
||||
assert(test_files_eq(TEST_EXCEL_DF_FILENAME,
|
||||
TEST_EXCEL_pandas_FILENAME))
|
||||
|
||||
teardown_test_file(TEST_EXCEL_DF_FILENAME)
|
||||
teardown_test_file(TEST_EXCEL_pandas_FILENAME)
|
||||
|
||||
|
||||
def test_to_feather():
|
||||
ray_df = create_test_ray_dataframe()
|
||||
pandas_df = create_test_pandas_dataframe()
|
||||
|
||||
TEST_FEATHER_DF_FILENAME = "test_df.feather"
|
||||
TEST_FEATHER_pandas_FILENAME = "test_pandas.feather"
|
||||
|
||||
ray_df.to_feather(TEST_FEATHER_DF_FILENAME)
|
||||
pandas_df.to_feather(TEST_FEATHER_pandas_FILENAME)
|
||||
|
||||
assert(test_files_eq(TEST_FEATHER_DF_FILENAME,
|
||||
TEST_FEATHER_pandas_FILENAME))
|
||||
|
||||
teardown_test_file(TEST_FEATHER_pandas_FILENAME)
|
||||
teardown_test_file(TEST_FEATHER_DF_FILENAME)
|
||||
|
||||
|
||||
def test_to_gbq():
|
||||
ray_df = create_test_ray_dataframe()
|
||||
|
||||
TEST_GBQ_DF_FILENAME = "test_df.gbq"
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_gbq(TEST_GBQ_DF_FILENAME, None)
|
||||
|
||||
|
||||
def test_to_html():
|
||||
ray_df = create_test_ray_dataframe()
|
||||
pandas_df = create_test_pandas_dataframe()
|
||||
|
||||
TEST_HTML_DF_FILENAME = "test_df.html"
|
||||
TEST_HTML_pandas_FILENAME = "test_pandas.html"
|
||||
|
||||
ray_df.to_html(TEST_HTML_DF_FILENAME)
|
||||
pandas_df.to_html(TEST_HTML_pandas_FILENAME)
|
||||
|
||||
assert(test_files_eq(TEST_HTML_DF_FILENAME,
|
||||
TEST_HTML_pandas_FILENAME))
|
||||
|
||||
teardown_test_file(TEST_HTML_pandas_FILENAME)
|
||||
teardown_test_file(TEST_HTML_DF_FILENAME)
|
||||
|
||||
|
||||
def test_to_json():
|
||||
ray_df = create_test_ray_dataframe()
|
||||
pandas_df = create_test_pandas_dataframe()
|
||||
|
||||
TEST_JSON_DF_FILENAME = "test_df.json"
|
||||
TEST_JSON_pandas_FILENAME = "test_pandas.json"
|
||||
|
||||
ray_df.to_json(TEST_JSON_DF_FILENAME)
|
||||
pandas_df.to_json(TEST_JSON_pandas_FILENAME)
|
||||
|
||||
assert(test_files_eq(TEST_JSON_DF_FILENAME,
|
||||
TEST_JSON_pandas_FILENAME))
|
||||
|
||||
teardown_test_file(TEST_JSON_pandas_FILENAME)
|
||||
teardown_test_file(TEST_JSON_DF_FILENAME)
|
||||
|
||||
|
||||
def test_to_latex():
|
||||
ray_df = create_test_ray_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_latex()
|
||||
|
||||
|
||||
def test_to_msgpack():
|
||||
ray_df = create_test_ray_dataframe()
|
||||
pandas_df = create_test_pandas_dataframe()
|
||||
|
||||
TEST_MSGPACK_DF_FILENAME = "test_df.msgpack"
|
||||
TEST_MSGPACK_pandas_FILENAME = "test_pandas.msgpack"
|
||||
|
||||
ray_df.to_msgpack(TEST_MSGPACK_DF_FILENAME)
|
||||
pandas_df.to_msgpack(TEST_MSGPACK_pandas_FILENAME)
|
||||
|
||||
assert(test_files_eq(TEST_MSGPACK_DF_FILENAME,
|
||||
TEST_MSGPACK_pandas_FILENAME))
|
||||
|
||||
teardown_test_file(TEST_MSGPACK_pandas_FILENAME)
|
||||
teardown_test_file(TEST_MSGPACK_DF_FILENAME)
|
||||
|
||||
|
||||
def test_to_panel():
|
||||
ray_df = create_test_ray_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_panel()
|
||||
|
||||
|
||||
def test_to_parquet():
|
||||
ray_df = create_test_ray_dataframe()
|
||||
pandas_df = create_test_pandas_dataframe()
|
||||
|
||||
TEST_PARQUET_DF_FILENAME = "test_df.parquet"
|
||||
TEST_PARQUET_pandas_FILENAME = "test_pandas.parquet"
|
||||
|
||||
ray_df.to_parquet(TEST_PARQUET_DF_FILENAME)
|
||||
pandas_df.to_parquet(TEST_PARQUET_pandas_FILENAME)
|
||||
|
||||
assert(test_files_eq(TEST_PARQUET_DF_FILENAME,
|
||||
TEST_PARQUET_pandas_FILENAME))
|
||||
|
||||
teardown_test_file(TEST_PARQUET_pandas_FILENAME)
|
||||
teardown_test_file(TEST_PARQUET_DF_FILENAME)
|
||||
|
||||
|
||||
def test_to_period():
|
||||
ray_df = create_test_ray_dataframe()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.to_period()
|
||||
|
||||
|
||||
def test_to_pickle():
|
||||
ray_df = create_test_ray_dataframe()
|
||||
pandas_df = create_test_pandas_dataframe()
|
||||
|
||||
TEST_PICKLE_DF_FILENAME = "test_df.pkl"
|
||||
TEST_PICKLE_pandas_FILENAME = "test_pandas.pkl"
|
||||
|
||||
ray_df.to_pickle(TEST_PICKLE_DF_FILENAME)
|
||||
pandas_df.to_pickle(TEST_PICKLE_pandas_FILENAME)
|
||||
|
||||
assert(test_files_eq(TEST_PICKLE_DF_FILENAME,
|
||||
TEST_PICKLE_pandas_FILENAME))
|
||||
|
||||
teardown_test_file(TEST_PICKLE_pandas_FILENAME)
|
||||
teardown_test_file(TEST_PICKLE_DF_FILENAME)
|
||||
|
||||
|
||||
def test_to_sql():
|
||||
ray_df = create_test_ray_dataframe()
|
||||
pandas_df = create_test_pandas_dataframe()
|
||||
|
||||
TEST_SQL_DF_FILENAME = "test_df.sql"
|
||||
TEST_SQL_pandas_FILENAME = "test_pandas.sql"
|
||||
|
||||
ray_df.to_pickle(TEST_SQL_DF_FILENAME)
|
||||
pandas_df.to_pickle(TEST_SQL_pandas_FILENAME)
|
||||
|
||||
assert(test_files_eq(TEST_SQL_DF_FILENAME,
|
||||
TEST_SQL_pandas_FILENAME))
|
||||
|
||||
teardown_test_file(TEST_SQL_DF_FILENAME)
|
||||
teardown_test_file(TEST_SQL_pandas_FILENAME)
|
||||
|
||||
|
||||
def test_to_stata():
|
||||
ray_df = create_test_ray_dataframe()
|
||||
pandas_df = create_test_pandas_dataframe()
|
||||
|
||||
TEST_STATA_DF_FILENAME = "test_df.stata"
|
||||
TEST_STATA_pandas_FILENAME = "test_pandas.stata"
|
||||
|
||||
ray_df.to_stata(TEST_STATA_DF_FILENAME)
|
||||
pandas_df.to_stata(TEST_STATA_pandas_FILENAME)
|
||||
|
||||
assert(test_files_eq(TEST_STATA_DF_FILENAME,
|
||||
TEST_STATA_pandas_FILENAME))
|
||||
|
||||
teardown_test_file(TEST_STATA_pandas_FILENAME)
|
||||
teardown_test_file(TEST_STATA_DF_FILENAME)
|
||||
|
||||
Reference in New Issue
Block a user