mirror of
https://github.com/wassname/ray.git
synced 2026-07-06 01:07:38 +08:00
[DataFrame] Refactor __delitem__ (#2080)
* Implement the bug fix * Fix flake8
This commit is contained in:
committed by
Devin Petersohn
parent
25e7aa1e79
commit
0b07602c89
@@ -4904,10 +4904,6 @@ class DataFrame(object):
|
||||
df.columns = pd.RangeIndex(0, len(df.columns))
|
||||
return df
|
||||
|
||||
to_delete = self.columns.get_loc(key)
|
||||
self._row_partitions = _map_partitions(
|
||||
del_helper, self._row_partitions, to_delete)
|
||||
|
||||
# This structure is used to get the correct index inside the partition.
|
||||
del_df = self._col_metadata[key]
|
||||
|
||||
@@ -4920,15 +4916,18 @@ class DataFrame(object):
|
||||
# Cast cols as pd.Series as duplicate columns mean result may be
|
||||
# np.int64 or pd.Series
|
||||
col_parts_to_del = \
|
||||
pd.Series(self._col_metadata[key, 'partition']).unique()
|
||||
pd.Series(del_df['partition'].copy()).unique()
|
||||
self._col_metadata.drop(key)
|
||||
|
||||
for i in col_parts_to_del:
|
||||
# Compute the correct index inside the partition to delete.
|
||||
to_delete_in_partition = \
|
||||
del_df[del_df['partition'] == i]['index_within_partition']
|
||||
|
||||
self._col_partitions[i] = _deploy_func.remote(
|
||||
del_helper, self._col_partitions[i], to_delete_in_partition)
|
||||
for j in range(self._block_partitions.shape[0]):
|
||||
self._block_partitions[j, i] = _deploy_func.remote(
|
||||
del_helper, self._block_partitions[j, i],
|
||||
to_delete_in_partition)
|
||||
|
||||
self._col_metadata.reset_partition_coords(col_parts_to_del)
|
||||
|
||||
|
||||
@@ -203,15 +203,15 @@ class _IndexMetadata(object):
|
||||
# updated as well.
|
||||
try:
|
||||
self._coord_df.loc[partition_mask,
|
||||
'index_within_partition'] = [
|
||||
p for p in range(sum(partition_mask))]
|
||||
'index_within_partition'] = np.arange(
|
||||
sum(partition_mask)).astype(int)
|
||||
except ValueError:
|
||||
# Copy the arrow sealed dataframe so we can mutate it.
|
||||
# We only do this the first time we try to mutate the sealed.
|
||||
self._coord_df = self._coord_df.copy()
|
||||
self._coord_df.loc[partition_mask,
|
||||
'index_within_partition'] = [
|
||||
p for p in range(sum(partition_mask))]
|
||||
'index_within_partition'] = np.arange(
|
||||
sum(partition_mask)).astype(int)
|
||||
|
||||
def insert(self, key, loc=None, partition=None,
|
||||
index_within_partition=None):
|
||||
@@ -357,7 +357,14 @@ class _IndexMetadata(object):
|
||||
|
||||
# Update first lengths to prevent possible length inconsistencies
|
||||
if isinstance(dropped, pd.DataFrame):
|
||||
drop_per_part = dropped.groupby(["partition"]).size()\
|
||||
try:
|
||||
drop_per_part = dropped.groupby(["partition"]).size()\
|
||||
.reindex(index=pd.RangeIndex(len(self._lengths)),
|
||||
fill_value=0)
|
||||
except ValueError:
|
||||
# Copy the arrow sealed dataframe so we can mutate it.
|
||||
dropped = dropped.copy()
|
||||
drop_per_part = dropped.groupby(["partition"]).size()\
|
||||
.reindex(index=pd.RangeIndex(len(self._lengths)),
|
||||
fill_value=0)
|
||||
elif isinstance(dropped, pd.Series):
|
||||
|
||||
@@ -3274,6 +3274,12 @@ def test___delitem__(ray_df, pd_df):
|
||||
pd_df.__delitem__('col1')
|
||||
ray_df_equals_pandas(ray_df, pd_df)
|
||||
|
||||
# Issue 2027
|
||||
last_label = pd_df.iloc[:, -1].name
|
||||
ray_df.__delitem__(last_label)
|
||||
pd_df.__delitem__(last_label)
|
||||
ray_df_equals_pandas(ray_df, pd_df)
|
||||
|
||||
|
||||
def test___finalize__():
|
||||
ray_df = create_test_dataframe()
|
||||
|
||||
Reference in New Issue
Block a user