[DataFrame] Pass read_csv kwargs to _infer_column (#1894)

* pass kwargs to _infer_column

* adding small test for non-comma delim

* fix lint
This commit is contained in:
Patrick Yang
2018-04-16 08:47:30 -07:00
committed by Devin Petersohn
parent cff37765b1
commit f505f0642f
2 changed files with 16 additions and 5 deletions
+3 -3
View File
@@ -114,8 +114,8 @@ def _get_firstline(file_path):
return first
def _infer_column(first_line):
return pd.read_csv(BytesIO(first_line)).columns
def _infer_column(first_line, kwargs={}):
return pd.read_csv(BytesIO(first_line), **kwargs).columns
@ray.remote
@@ -247,7 +247,7 @@ def read_csv(filepath,
offsets = _compute_offset(filepath, get_npartitions())
first_line = _get_firstline(filepath)
columns = _infer_column(first_line)
columns = _infer_column(first_line, kwargs=kwargs)
df_obj_ids = []
for start, end in offsets:
+13 -2
View File
@@ -40,7 +40,7 @@ def teardown_parquet_file():
@pytest.fixture
def setup_csv_file(row_size, force=False):
def setup_csv_file(row_size, force=False, delimiter=','):
if os.path.exists(TEST_CSV_FILENAME) and not force:
pass
else:
@@ -48,7 +48,7 @@ def setup_csv_file(row_size, force=False):
'col1': np.arange(row_size),
'col2': np.arange(row_size)
})
df.to_csv(TEST_CSV_FILENAME)
df.to_csv(TEST_CSV_FILENAME, sep=delimiter)
@pytest.fixture
@@ -88,3 +88,14 @@ def test_from_csv():
assert ray_df_equals_pandas(ray_df, pd_df)
teardown_csv_file()
def test_from_csv_delimiter():
setup_csv_file(SMALL_ROW_SIZE, delimiter='|')
pd_df = pd.read_csv(TEST_CSV_FILENAME)
ray_df = io.read_csv(TEST_CSV_FILENAME)
assert ray_df_equals_pandas(ray_df, pd_df)
teardown_csv_file()