mirror of
https://github.com/wassname/ray.git
synced 2026-06-30 02:22:13 +08:00
[DataFrame] Implements DataFrame.rename, DataFrame.rename_axis, and Index.set_names (#1573)
* Index update * Fixed transpose bug with nan values * Fix lint * Add rename tests * Implement DataFrame.rename, DataFrame.rename_axis, and Index.set_names * Temp * Fixing rename for new index implementation Fix rebase merges * Fix rename and rename_axis to work with new index. Re-add pytest fixture Clean up rebase artifacts Remove index.py file * Addressing minor points * Addressing comments
This commit is contained in:
committed by
Devin Petersohn
parent
15a4392156
commit
6455ec934b
@@ -1742,14 +1742,79 @@ class DataFrame(object):
|
||||
|
||||
def rename(self, mapper=None, index=None, columns=None, axis=None,
|
||||
copy=True, inplace=False, level=None):
|
||||
raise NotImplementedError(
|
||||
"To contribute to Pandas on Ray, please visit "
|
||||
"github.com/ray-project/ray.")
|
||||
if mapper is None and index is None and columns is None:
|
||||
raise TypeError('must pass an index to rename')
|
||||
|
||||
if axis is None:
|
||||
if columns is not None:
|
||||
new_df = [
|
||||
_deploy_func.remote(
|
||||
lambda df: df.rename(columns=columns,
|
||||
copy=copy, level=level),
|
||||
part
|
||||
)
|
||||
for part in self._df
|
||||
]
|
||||
new_columns = pd.DataFrame(columns=self.columns)\
|
||||
.rename(columns=columns, copy=copy, level=level)\
|
||||
.columns
|
||||
new_df = DataFrame(new_df, new_columns, self.index)
|
||||
else:
|
||||
new_df = self.copy()
|
||||
if index is not None:
|
||||
new_df.index = self._index.rename(index=index, copy=copy,
|
||||
level=level).index
|
||||
else:
|
||||
new_df = self._map_partitions(
|
||||
lambda df: df.rename(mapper=mapper, axis=axis, copy=copy,
|
||||
level=level)
|
||||
)
|
||||
new_df._index = new_df._index.rename(mapper=mapper, axis=axis,
|
||||
copy=copy, level=level)
|
||||
new_df.columns = pd.DataFrame(columns=new_df.columns)\
|
||||
.rename(mapper=mapper, axis=axis, copy=copy,
|
||||
level=level).columns
|
||||
|
||||
if inplace:
|
||||
self._update_inplace(
|
||||
df=new_df._df,
|
||||
columns=new_df.columns,
|
||||
index=new_df.index
|
||||
)
|
||||
else:
|
||||
return new_df
|
||||
|
||||
def rename_axis(self, mapper, axis=0, copy=True, inplace=False):
|
||||
raise NotImplementedError(
|
||||
"To contribute to Pandas on Ray, please visit "
|
||||
"github.com/ray-project/ray.")
|
||||
axes_is_columns = axis == 1 or axis == "columns"
|
||||
renamed = self if inplace else self.copy()
|
||||
if axes_is_columns:
|
||||
renamed.columns.name = mapper
|
||||
else:
|
||||
renamed._index.rename_axis(mapper, axis=axis, copy=copy,
|
||||
inplace=True)
|
||||
if not inplace:
|
||||
return renamed
|
||||
|
||||
def _set_axis_name(self, name, axis=0, inplace=False):
|
||||
"""Alter the name or names of the axis.
|
||||
|
||||
Args:
|
||||
name: Name for the Index, or list of names for the MultiIndex
|
||||
axis: 0 or 'index' for the index; 1 or 'columns' for the columns
|
||||
inplace: Whether to modify `self` directly or return a copy
|
||||
|
||||
Returns:
|
||||
Type of caller or None if inplace=True.
|
||||
"""
|
||||
axes_is_columns = axis == 1 or axis == "columns"
|
||||
renamed = self if inplace else self.copy()
|
||||
if axes_is_columns:
|
||||
renamed.columns.set_names(name)
|
||||
else:
|
||||
renamed._index.set_names(name)
|
||||
|
||||
if not inplace:
|
||||
return renamed
|
||||
|
||||
def reorder_levels(self, order, axis=0):
|
||||
raise NotImplementedError(
|
||||
|
||||
@@ -1960,18 +1960,257 @@ def test_reindex_like():
|
||||
ray_df.reindex_like(None)
|
||||
|
||||
|
||||
def test_rename():
|
||||
ray_df = create_test_dataframe()
|
||||
# Renaming
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.rename()
|
||||
def test_rename():
|
||||
test_rename_sanity()
|
||||
test_rename_multiindex()
|
||||
# TODO: Uncomment when __setitem__ is implemented
|
||||
# test_rename_nocopy()
|
||||
test_rename_inplace()
|
||||
test_rename_bug()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_rename_sanity(num_partitions=2):
|
||||
test_data = TestData()
|
||||
mapping = {
|
||||
'A': 'a',
|
||||
'B': 'b',
|
||||
'C': 'c',
|
||||
'D': 'd'
|
||||
}
|
||||
|
||||
ray_df = rdf.from_pandas(test_data.frame, num_partitions)
|
||||
assert ray_df_equals_pandas(
|
||||
ray_df.rename(columns=mapping),
|
||||
test_data.frame.rename(columns=mapping)
|
||||
)
|
||||
|
||||
renamed2 = test_data.frame.rename(columns=str.lower)
|
||||
assert ray_df_equals_pandas(
|
||||
ray_df.rename(columns=str.lower),
|
||||
renamed2
|
||||
)
|
||||
|
||||
ray_df = rdf.from_pandas(renamed2, num_partitions)
|
||||
assert ray_df_equals_pandas(
|
||||
ray_df.rename(columns=str.upper),
|
||||
renamed2.rename(columns=str.upper)
|
||||
)
|
||||
|
||||
# index
|
||||
data = {
|
||||
'A': {'foo': 0, 'bar': 1}
|
||||
}
|
||||
|
||||
# gets sorted alphabetical
|
||||
df = pd.DataFrame(data)
|
||||
ray_df = rdf.from_pandas(df, num_partitions)
|
||||
tm.assert_index_equal(
|
||||
ray_df.rename(index={'foo': 'bar', 'bar': 'foo'}).index,
|
||||
df.rename(index={'foo': 'bar', 'bar': 'foo'}).index
|
||||
)
|
||||
|
||||
tm.assert_index_equal(
|
||||
ray_df.rename(index=str.upper).index,
|
||||
df.rename(index=str.upper).index
|
||||
)
|
||||
|
||||
# have to pass something
|
||||
pytest.raises(TypeError, ray_df.rename)
|
||||
|
||||
# partial columns
|
||||
renamed = test_data.frame.rename(columns={'C': 'foo', 'D': 'bar'})
|
||||
ray_df = rdf.from_pandas(test_data.frame, num_partitions)
|
||||
tm.assert_index_equal(
|
||||
ray_df.rename(columns={'C': 'foo', 'D': 'bar'}).index,
|
||||
test_data.frame.rename(columns={'C': 'foo', 'D': 'bar'}).index
|
||||
)
|
||||
|
||||
# TODO: Uncomment when transpose works
|
||||
# other axis
|
||||
# renamed = test_data.frame.T.rename(index={'C': 'foo', 'D': 'bar'})
|
||||
# tm.assert_index_equal(
|
||||
# test_data.frame.T.rename(index={'C': 'foo', 'D': 'bar'}).index,
|
||||
# ray_df.T.rename(index={'C': 'foo', 'D': 'bar'}).index
|
||||
# )
|
||||
|
||||
# index with name
|
||||
index = pd.Index(['foo', 'bar'], name='name')
|
||||
renamer = pd.DataFrame(data, index=index)
|
||||
|
||||
ray_df = rdf.from_pandas(renamer, num_partitions)
|
||||
renamed = renamer.rename(index={'foo': 'bar', 'bar': 'foo'})
|
||||
ray_renamed = ray_df.rename(index={'foo': 'bar', 'bar': 'foo'})
|
||||
tm.assert_index_equal(
|
||||
renamed.index, ray_renamed.index
|
||||
)
|
||||
|
||||
assert renamed.index.name == ray_renamed.index.name
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_rename_multiindex(num_partitions=2):
|
||||
tuples_index = [('foo1', 'bar1'), ('foo2', 'bar2')]
|
||||
tuples_columns = [('fizz1', 'buzz1'), ('fizz2', 'buzz2')]
|
||||
index = pd.MultiIndex.from_tuples(tuples_index, names=['foo', 'bar'])
|
||||
columns = pd.MultiIndex.from_tuples(
|
||||
tuples_columns, names=['fizz', 'buzz'])
|
||||
df = pd.DataFrame([(0, 0), (1, 1)], index=index, columns=columns)
|
||||
ray_df = rdf.from_pandas(df, num_partitions)
|
||||
|
||||
#
|
||||
# without specifying level -> accross all levels
|
||||
renamed = df.rename(index={'foo1': 'foo3', 'bar2': 'bar3'},
|
||||
columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'})
|
||||
ray_renamed = ray_df.rename(index={'foo1': 'foo3', 'bar2': 'bar3'},
|
||||
columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'})
|
||||
tm.assert_index_equal(
|
||||
renamed.index, ray_renamed.index
|
||||
)
|
||||
|
||||
renamed = df.rename(index={'foo1': 'foo3', 'bar2': 'bar3'},
|
||||
columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'})
|
||||
tm.assert_index_equal(renamed.columns, ray_renamed.columns)
|
||||
assert renamed.index.names == ray_renamed.index.names
|
||||
assert renamed.columns.names == ray_renamed.columns.names
|
||||
|
||||
#
|
||||
# with specifying a level (GH13766)
|
||||
|
||||
# dict
|
||||
renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
|
||||
level=0)
|
||||
ray_renamed = ray_df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
|
||||
level=0)
|
||||
tm.assert_index_equal(renamed.columns, ray_renamed.columns)
|
||||
renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
|
||||
level='fizz')
|
||||
ray_renamed = ray_df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
|
||||
level='fizz')
|
||||
tm.assert_index_equal(renamed.columns, ray_renamed.columns)
|
||||
|
||||
renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
|
||||
level=1)
|
||||
ray_renamed = ray_df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
|
||||
level=1)
|
||||
tm.assert_index_equal(renamed.columns, ray_renamed.columns)
|
||||
renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
|
||||
level='buzz')
|
||||
ray_renamed = ray_df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
|
||||
level='buzz')
|
||||
tm.assert_index_equal(renamed.columns, ray_renamed.columns)
|
||||
|
||||
# function
|
||||
func = str.upper
|
||||
renamed = df.rename(columns=func, level=0)
|
||||
ray_renamed = ray_df.rename(columns=func, level=0)
|
||||
tm.assert_index_equal(renamed.columns, ray_renamed.columns)
|
||||
renamed = df.rename(columns=func, level='fizz')
|
||||
ray_renamed = ray_df.rename(columns=func, level='fizz')
|
||||
tm.assert_index_equal(renamed.columns, ray_renamed.columns)
|
||||
|
||||
renamed = df.rename(columns=func, level=1)
|
||||
ray_renamed = ray_df.rename(columns=func, level=1)
|
||||
tm.assert_index_equal(renamed.columns, ray_renamed.columns)
|
||||
renamed = df.rename(columns=func, level='buzz')
|
||||
ray_renamed = ray_df.rename(columns=func, level='buzz')
|
||||
tm.assert_index_equal(renamed.columns, ray_renamed.columns)
|
||||
|
||||
# index
|
||||
renamed = df.rename(index={'foo1': 'foo3', 'bar2': 'bar3'},
|
||||
level=0)
|
||||
ray_renamed = ray_df.rename(index={'foo1': 'foo3', 'bar2': 'bar3'},
|
||||
level=0)
|
||||
tm.assert_index_equal(ray_renamed.index, renamed.index)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_rename_nocopy(num_partitions=2):
|
||||
test_data = TestData().frame
|
||||
ray_df = rdf.from_pandas(test_data, num_partitions)
|
||||
ray_renamed = ray_df.rename(columns={'C': 'foo'}, copy=False)
|
||||
ray_renamed['foo'] = 1
|
||||
assert (ray_df['C'] == 1).all()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_rename_inplace(num_partitions=2):
|
||||
test_data = TestData().frame
|
||||
ray_df = rdf.from_pandas(test_data, num_partitions)
|
||||
|
||||
assert ray_df_equals_pandas(
|
||||
ray_df.rename(columns={'C': 'foo'}),
|
||||
test_data.rename(columns={'C': 'foo'})
|
||||
)
|
||||
|
||||
frame = test_data.copy()
|
||||
ray_frame = ray_df.copy()
|
||||
frame.rename(columns={'C': 'foo'}, inplace=True)
|
||||
ray_frame.rename(columns={'C': 'foo'}, inplace=True)
|
||||
|
||||
assert ray_df_equals_pandas(
|
||||
ray_frame,
|
||||
frame
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_rename_bug(num_partitions=2):
|
||||
# GH 5344
|
||||
# rename set ref_locs, and set_index was not resetting
|
||||
df = pd.DataFrame({0: ['foo', 'bar'], 1: ['bah', 'bas'], 2: [1, 2]})
|
||||
ray_df = rdf.from_pandas(df, num_partitions)
|
||||
df = df.rename(columns={0: 'a'})
|
||||
df = df.rename(columns={1: 'b'})
|
||||
# TODO: Uncomment when set_index is implemented
|
||||
# df = df.set_index(['a', 'b'])
|
||||
# df.columns = ['2001-01-01']
|
||||
|
||||
ray_df = ray_df.rename(columns={0: 'a'})
|
||||
ray_df = ray_df.rename(columns={1: 'b'})
|
||||
# TODO: Uncomment when set_index is implemented
|
||||
# ray_df = ray_df.set_index(['a', 'b'])
|
||||
# ray_df.columns = ['2001-01-01']
|
||||
|
||||
assert ray_df_equals_pandas(
|
||||
ray_df,
|
||||
df
|
||||
)
|
||||
|
||||
|
||||
def test_rename_axis():
|
||||
ray_df = create_test_dataframe()
|
||||
test_rename_axis_inplace()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
ray_df.rename_axis(None)
|
||||
|
||||
@pytest.fixture
|
||||
def test_rename_axis_inplace(num_partitions=2):
|
||||
test_frame = TestData().frame
|
||||
ray_df = rdf.from_pandas(test_frame, num_partitions)
|
||||
|
||||
# GH 15704
|
||||
result = test_frame.copy()
|
||||
ray_result = ray_df.copy()
|
||||
no_return = result.rename_axis('foo', inplace=True)
|
||||
ray_no_return = ray_result.rename_axis('foo', inplace=True)
|
||||
|
||||
assert no_return is ray_no_return
|
||||
assert ray_df_equals_pandas(
|
||||
ray_result,
|
||||
result
|
||||
)
|
||||
|
||||
result = test_frame.copy()
|
||||
ray_result = ray_df.copy()
|
||||
no_return = result.rename_axis('bar', axis=1, inplace=True)
|
||||
ray_no_return = ray_result.rename_axis('bar', axis=1, inplace=True)
|
||||
|
||||
assert no_return is ray_no_return
|
||||
assert ray_df_equals_pandas(
|
||||
ray_result,
|
||||
result
|
||||
)
|
||||
|
||||
|
||||
def test_reorder_levels():
|
||||
|
||||
Reference in New Issue
Block a user