[DataFrame] Implements df.as_matrix (#2001)

* Implement df.as_matrix

* Addressing comments

* Addressing comments
This commit is contained in:
Peter Veerman
2018-05-06 23:36:39 -07:00
committed by Devin Petersohn
parent 1848745223
commit 12da021717
2 changed files with 37 additions and 8 deletions
+13 -5
View File
@@ -1189,9 +1189,17 @@ class DataFrame(object):
"github.com/ray-project/ray.")
def as_matrix(self, columns=None):
raise NotImplementedError(
"To contribute to Pandas on Ray, please visit "
"github.com/ray-project/ray.")
"""Convert the frame to its Numpy-array representation.
Args:
columns: If None, return all columns, otherwise,
returns specified columns.
Returns:
values: ndarray
"""
# TODO this is very inneficient, also see __array__
return to_pandas(self).as_matrix(columns)
def asfreq(self, freq, method=None, how=None, normalize=False,
fill_value=None):
@@ -4633,8 +4641,8 @@ class DataFrame(object):
"github.com/ray-project/ray.")
def __array__(self, dtype=None):
# TODO: This is very inefficient and needs fix
return np.array(to_pandas(self))
# TODO: This is very inefficient and needs fix, also see as_matrix
return to_pandas(self).__array__(dtype=dtype)
def __array_wrap__(self, result, context=None):
raise NotImplementedError(
+24 -3
View File
@@ -994,10 +994,31 @@ def test_as_blocks():
def test_as_matrix():
ray_df = create_test_dataframe()
test_data = TestData()
frame = rdf.DataFrame(test_data.frame)
mat = frame.as_matrix()
with pytest.raises(NotImplementedError):
ray_df.as_matrix()
frame_columns = frame.columns
for i, row in enumerate(mat):
for j, value in enumerate(row):
col = frame_columns[j]
if np.isnan(value):
assert np.isnan(frame[col][i])
else:
assert value == frame[col][i]
# mixed type
mat = rdf.DataFrame(test_data.mixed_frame).as_matrix(['foo', 'A'])
assert mat[0, 0] == 'bar'
df = rdf.DataFrame({'real': [1, 2, 3], 'complex': [1j, 2j, 3j]})
mat = df.as_matrix()
assert mat[0, 0] == 1j
# single block corner case
mat = rdf.DataFrame(test_data.frame).as_matrix(['A', 'B'])
expected = test_data.frame.reindex(columns=['A', 'B']).values
tm.assert_almost_equal(mat, expected)
def test_asfreq():