[DataFrame] Fixes dropna subset bug (#2018)

* fix dropna

* resolve comment
This commit is contained in:
Kunal Gosar
2018-05-10 08:25:24 -07:00
committed by Devin Petersohn
parent 72a3a6cb02
commit b79912ec74
2 changed files with 49 additions and 5 deletions
+10 -5
View File
@@ -806,17 +806,22 @@ class DataFrame(object):
if how is None and thresh is None:
raise TypeError('must specify how or thresh')
indices = None
if subset is not None:
subset = set(subset)
if axis == 1:
subset = [item for item in self.index if item in subset]
indices = self.index.get_indexer_for(subset)
check = indices == -1
if check.any():
raise KeyError(list(np.compress(check, subset)))
else:
subset = [item for item in self.columns if item in subset]
indices = self.columns.get_indexer_for(subset)
check = indices == -1
if check.any():
raise KeyError(list(np.compress(check, subset)))
def dropna_helper(df):
new_df = df.dropna(axis=axis, how=how, thresh=thresh,
subset=subset, inplace=False)
subset=indices, inplace=False)
if axis == 1:
new_index = new_df.columns
@@ -842,10 +842,15 @@ def test_dense_nan_df():
[np.nan, np.nan, np.nan, 5]],
columns=list('ABCD'))
column_subsets = [list('AD'), list('BC'), list('CD')]
row_subsets = [[0, 1], [0, 1, 2], [2, 0]]
test_dropna(ray_df, pd_df)
test_dropna_inplace(ray_df, pd_df)
test_dropna_multiple_axes(ray_df, pd_df)
test_dropna_multiple_axes_inplace(ray_df, pd_df)
test_dropna_subset(ray_df, pd_df, column_subsets, row_subsets)
test_dropna_subset_error(ray_df)
@pytest.fixture
@@ -1402,6 +1407,40 @@ def test_dropna_multiple_axes_inplace(ray_df, pd_df):
assert ray_df_equals_pandas(ray_df_copy, pd_df_copy)
@pytest.fixture
def test_dropna_subset(ray_df, pd_df, column_subsets, row_subsets):
for subset in column_subsets:
assert ray_df_equals_pandas(
ray_df.dropna(how='all', subset=subset),
pd_df.dropna(how='all', subset=subset)
)
assert ray_df_equals_pandas(
ray_df.dropna(how='any', subset=subset),
pd_df.dropna(how='any', subset=subset)
)
for subset in row_subsets:
assert ray_df_equals_pandas(
ray_df.dropna(how='all', axis=1, subset=subset),
pd_df.dropna(how='all', axis=1, subset=subset)
)
assert ray_df_equals_pandas(
ray_df.dropna(how='any', axis=1, subset=subset),
pd_df.dropna(how='any', axis=1, subset=subset)
)
@pytest.fixture
def test_dropna_subset_error(ray_df):
with pytest.raises(KeyError):
ray_df.dropna(subset=list('EF'))
with pytest.raises(KeyError):
ray_df.dropna(axis=1, subset=[4, 5])
def test_duplicated():
ray_df = create_test_dataframe()