diff --git a/python/ray/dataframe/io.py b/python/ray/dataframe/io.py index 45bbadd85..fdfe04cfc 100644 --- a/python/ray/dataframe/io.py +++ b/python/ray/dataframe/io.py @@ -114,8 +114,8 @@ def _get_firstline(file_path): return first -def _infer_column(first_line): - return pd.read_csv(BytesIO(first_line)).columns +def _infer_column(first_line, kwargs={}): + return pd.read_csv(BytesIO(first_line), **kwargs).columns @ray.remote @@ -247,7 +247,7 @@ def read_csv(filepath, offsets = _compute_offset(filepath, get_npartitions()) first_line = _get_firstline(filepath) - columns = _infer_column(first_line) + columns = _infer_column(first_line, kwargs=kwargs) df_obj_ids = [] for start, end in offsets: diff --git a/python/ray/dataframe/test/test_io.py b/python/ray/dataframe/test/test_io.py index 74aab1e2f..00b0a06ad 100644 --- a/python/ray/dataframe/test/test_io.py +++ b/python/ray/dataframe/test/test_io.py @@ -40,7 +40,7 @@ def teardown_parquet_file(): @pytest.fixture -def setup_csv_file(row_size, force=False): +def setup_csv_file(row_size, force=False, delimiter=','): if os.path.exists(TEST_CSV_FILENAME) and not force: pass else: @@ -48,7 +48,7 @@ def setup_csv_file(row_size, force=False): 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) - df.to_csv(TEST_CSV_FILENAME) + df.to_csv(TEST_CSV_FILENAME, sep=delimiter) @pytest.fixture @@ -88,3 +88,14 @@ def test_from_csv(): assert ray_df_equals_pandas(ray_df, pd_df) teardown_csv_file() + + +def test_from_csv_delimiter(): + setup_csv_file(SMALL_ROW_SIZE, delimiter='|') + + pd_df = pd.read_csv(TEST_CSV_FILENAME) + ray_df = io.read_csv(TEST_CSV_FILENAME) + + assert ray_df_equals_pandas(ray_df, pd_df) + + teardown_csv_file()