mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 19:32:11 +08:00
[DataFrame] Fixes dropna subset bug (#2018)
* fix dropna * resolve comment
This commit is contained in:
committed by
Devin Petersohn
parent
72a3a6cb02
commit
b79912ec74
@@ -806,17 +806,22 @@ class DataFrame(object):
|
||||
if how is None and thresh is None:
|
||||
raise TypeError('must specify how or thresh')
|
||||
|
||||
indices = None
|
||||
if subset is not None:
|
||||
subset = set(subset)
|
||||
|
||||
if axis == 1:
|
||||
subset = [item for item in self.index if item in subset]
|
||||
indices = self.index.get_indexer_for(subset)
|
||||
check = indices == -1
|
||||
if check.any():
|
||||
raise KeyError(list(np.compress(check, subset)))
|
||||
else:
|
||||
subset = [item for item in self.columns if item in subset]
|
||||
indices = self.columns.get_indexer_for(subset)
|
||||
check = indices == -1
|
||||
if check.any():
|
||||
raise KeyError(list(np.compress(check, subset)))
|
||||
|
||||
def dropna_helper(df):
|
||||
new_df = df.dropna(axis=axis, how=how, thresh=thresh,
|
||||
subset=subset, inplace=False)
|
||||
subset=indices, inplace=False)
|
||||
|
||||
if axis == 1:
|
||||
new_index = new_df.columns
|
||||
|
||||
@@ -842,10 +842,15 @@ def test_dense_nan_df():
|
||||
[np.nan, np.nan, np.nan, 5]],
|
||||
columns=list('ABCD'))
|
||||
|
||||
column_subsets = [list('AD'), list('BC'), list('CD')]
|
||||
row_subsets = [[0, 1], [0, 1, 2], [2, 0]]
|
||||
|
||||
test_dropna(ray_df, pd_df)
|
||||
test_dropna_inplace(ray_df, pd_df)
|
||||
test_dropna_multiple_axes(ray_df, pd_df)
|
||||
test_dropna_multiple_axes_inplace(ray_df, pd_df)
|
||||
test_dropna_subset(ray_df, pd_df, column_subsets, row_subsets)
|
||||
test_dropna_subset_error(ray_df)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -1402,6 +1407,40 @@ def test_dropna_multiple_axes_inplace(ray_df, pd_df):
|
||||
assert ray_df_equals_pandas(ray_df_copy, pd_df_copy)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_dropna_subset(ray_df, pd_df, column_subsets, row_subsets):
|
||||
for subset in column_subsets:
|
||||
assert ray_df_equals_pandas(
|
||||
ray_df.dropna(how='all', subset=subset),
|
||||
pd_df.dropna(how='all', subset=subset)
|
||||
)
|
||||
|
||||
assert ray_df_equals_pandas(
|
||||
ray_df.dropna(how='any', subset=subset),
|
||||
pd_df.dropna(how='any', subset=subset)
|
||||
)
|
||||
|
||||
for subset in row_subsets:
|
||||
assert ray_df_equals_pandas(
|
||||
ray_df.dropna(how='all', axis=1, subset=subset),
|
||||
pd_df.dropna(how='all', axis=1, subset=subset)
|
||||
)
|
||||
|
||||
assert ray_df_equals_pandas(
|
||||
ray_df.dropna(how='any', axis=1, subset=subset),
|
||||
pd_df.dropna(how='any', axis=1, subset=subset)
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_dropna_subset_error(ray_df):
|
||||
with pytest.raises(KeyError):
|
||||
ray_df.dropna(subset=list('EF'))
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
ray_df.dropna(axis=1, subset=[4, 5])
|
||||
|
||||
|
||||
def test_duplicated():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user