mirror of
https://github.com/wassname/ray.git
synced 2026-06-30 05:41:19 +08:00
[DataFrame] Pass read_csv kwargs to _infer_column (#1894)
* pass kwargs to _infer_column * adding small test for non-comma delim * fix lint
This commit is contained in:
committed by
Devin Petersohn
parent
cff37765b1
commit
f505f0642f
@@ -114,8 +114,8 @@ def _get_firstline(file_path):
|
||||
return first
|
||||
|
||||
|
||||
def _infer_column(first_line):
|
||||
return pd.read_csv(BytesIO(first_line)).columns
|
||||
def _infer_column(first_line, kwargs={}):
|
||||
return pd.read_csv(BytesIO(first_line), **kwargs).columns
|
||||
|
||||
|
||||
@ray.remote
|
||||
@@ -247,7 +247,7 @@ def read_csv(filepath,
|
||||
offsets = _compute_offset(filepath, get_npartitions())
|
||||
|
||||
first_line = _get_firstline(filepath)
|
||||
columns = _infer_column(first_line)
|
||||
columns = _infer_column(first_line, kwargs=kwargs)
|
||||
|
||||
df_obj_ids = []
|
||||
for start, end in offsets:
|
||||
|
||||
@@ -40,7 +40,7 @@ def teardown_parquet_file():
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def setup_csv_file(row_size, force=False):
|
||||
def setup_csv_file(row_size, force=False, delimiter=','):
|
||||
if os.path.exists(TEST_CSV_FILENAME) and not force:
|
||||
pass
|
||||
else:
|
||||
@@ -48,7 +48,7 @@ def setup_csv_file(row_size, force=False):
|
||||
'col1': np.arange(row_size),
|
||||
'col2': np.arange(row_size)
|
||||
})
|
||||
df.to_csv(TEST_CSV_FILENAME)
|
||||
df.to_csv(TEST_CSV_FILENAME, sep=delimiter)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -88,3 +88,14 @@ def test_from_csv():
|
||||
assert ray_df_equals_pandas(ray_df, pd_df)
|
||||
|
||||
teardown_csv_file()
|
||||
|
||||
|
||||
def test_from_csv_delimiter():
|
||||
setup_csv_file(SMALL_ROW_SIZE, delimiter='|')
|
||||
|
||||
pd_df = pd.read_csv(TEST_CSV_FILENAME)
|
||||
ray_df = io.read_csv(TEST_CSV_FILENAME)
|
||||
|
||||
assert ray_df_equals_pandas(ray_df, pd_df)
|
||||
|
||||
teardown_csv_file()
|
||||
|
||||
Reference in New Issue
Block a user