From 84cfce3be2cc0be36d7f4dfc6756fdd5c23930fa Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Sun, 4 Sep 2016 00:15:05 -0700 Subject: [PATCH] introduce base object --- python/src/pynumbuf/adapters/numpy.cc | 8 ++++++-- python/src/pynumbuf/adapters/numpy.h | 2 +- python/src/pynumbuf/adapters/python.cc | 22 +++++++++++----------- python/src/pynumbuf/adapters/python.h | 6 +++--- python/src/pynumbuf/numbuf.cc | 5 +++-- 5 files changed, 24 insertions(+), 19 deletions(-) diff --git a/python/src/pynumbuf/adapters/numpy.cc b/python/src/pynumbuf/adapters/numpy.cc index 95e90249c..92319f181 100644 --- a/python/src/pynumbuf/adapters/numpy.cc +++ b/python/src/pynumbuf/adapters/numpy.cc @@ -25,11 +25,15 @@ namespace numbuf { type* data = const_cast(values->raw_data()) \ + content->offset(offset); \ *out = PyArray_SimpleNewFromData(num_dims, dim.data(), NPY_##TYPE, \ - reinterpret_cast(data)); \ + reinterpret_cast(data)); \ + if (base != Py_None) { \ + PyArray_SetBaseObject((PyArrayObject*) *out, base); \ + } \ + Py_XINCREF(base); \ } \ return Status::OK(); -Status DeserializeArray(std::shared_ptr array, int32_t offset, PyObject** out) { +Status DeserializeArray(std::shared_ptr array, int32_t offset, PyObject* base, PyObject** out) { DCHECK(array); auto tensor = std::dynamic_pointer_cast(array); DCHECK(tensor); diff --git a/python/src/pynumbuf/adapters/numpy.h b/python/src/pynumbuf/adapters/numpy.h index a57e5c974..c0fcbb7f6 100644 --- a/python/src/pynumbuf/adapters/numpy.h +++ b/python/src/pynumbuf/adapters/numpy.h @@ -15,7 +15,7 @@ namespace numbuf { arrow::Status SerializeArray(PyArrayObject* array, SequenceBuilder& builder, std::vector& subdicts); -arrow::Status DeserializeArray(std::shared_ptr array, int32_t offset, PyObject** out); +arrow::Status DeserializeArray(std::shared_ptr array, int32_t offset, PyObject* base, PyObject** out); } diff --git a/python/src/pynumbuf/adapters/python.cc b/python/src/pynumbuf/adapters/python.cc index 3b9b6b2c2..173767a1a 100644 --- a/python/src/pynumbuf/adapters/python.cc +++ b/python/src/pynumbuf/adapters/python.cc @@ -11,7 +11,7 @@ extern PyObject* numbuf_deserialize_callback; namespace numbuf { -PyObject* get_value(ArrayPtr arr, int32_t index, int32_t type) { +PyObject* get_value(ArrayPtr arr, int32_t index, int32_t type, PyObject* base) { PyObject* result; switch (arr->type()->type) { case Type::BOOL: @@ -36,13 +36,13 @@ PyObject* get_value(ArrayPtr arr, int32_t index, int32_t type) { auto s = std::static_pointer_cast(arr); auto l = std::static_pointer_cast(s->field(0)); if (s->type()->child(0)->name == "list") { - ARROW_CHECK_OK(DeserializeList(l->values(), l->value_offset(index), l->value_offset(index+1), &result)); + ARROW_CHECK_OK(DeserializeList(l->values(), l->value_offset(index), l->value_offset(index+1), base, &result)); } else if (s->type()->child(0)->name == "tuple") { - ARROW_CHECK_OK(DeserializeTuple(l->values(), l->value_offset(index), l->value_offset(index+1), &result)); + ARROW_CHECK_OK(DeserializeTuple(l->values(), l->value_offset(index), l->value_offset(index+1), base, &result)); } else if (s->type()->child(0)->name == "dict") { - ARROW_CHECK_OK(DeserializeDict(l->values(), l->value_offset(index), l->value_offset(index+1), &result)); + ARROW_CHECK_OK(DeserializeDict(l->values(), l->value_offset(index), l->value_offset(index+1), base, &result)); } else { - ARROW_CHECK_OK(DeserializeArray(arr, index, &result)); + ARROW_CHECK_OK(DeserializeArray(arr, index, base, &result)); } return result; } @@ -181,17 +181,17 @@ Status SerializeSequences(std::vector sequences, std::shared_ptrValue(i); \ int8_t type = types->Value(i); \ ArrayPtr arr = data->child(type); \ - SET_ITEM(result, i-start_idx, get_value(arr, offset, type)); \ + SET_ITEM(result, i-start_idx, get_value(arr, offset, type, base)); \ } \ } \ *out = result; \ return Status::OK(); -Status DeserializeList(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, PyObject** out) { +Status DeserializeList(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, PyObject* base, PyObject** out) { DESERIALIZE_SEQUENCE(PyList_New, PyList_SetItem) } -Status DeserializeTuple(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, PyObject** out) { +Status DeserializeTuple(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, PyObject* base, PyObject** out) { DESERIALIZE_SEQUENCE(PyTuple_New, PyTuple_SetItem) } @@ -227,13 +227,13 @@ Status SerializeDict(std::vector dicts, std::shared_ptr* out) return Status::OK(); } -Status DeserializeDict(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, PyObject** out) { +Status DeserializeDict(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, PyObject* base, PyObject** out) { auto data = std::dynamic_pointer_cast(array); // TODO(pcm): error handling, get rid of the temporary copy of the list PyObject *keys, *vals; PyObject* result = PyDict_New(); - ARROW_RETURN_NOT_OK(DeserializeList(data->field(0), start_idx, stop_idx, &keys)); - ARROW_RETURN_NOT_OK(DeserializeList(data->field(1), start_idx, stop_idx, &vals)); + ARROW_RETURN_NOT_OK(DeserializeList(data->field(0), start_idx, stop_idx, base, &keys)); + ARROW_RETURN_NOT_OK(DeserializeList(data->field(1), start_idx, stop_idx, base, &vals)); for (size_t i = start_idx; i < stop_idx; ++i) { PyDict_SetItem(result, PyList_GetItem(keys, i - start_idx), PyList_GetItem(vals, i - start_idx)); } diff --git a/python/src/pynumbuf/adapters/python.h b/python/src/pynumbuf/adapters/python.h index 66efa3005..13ee629d3 100644 --- a/python/src/pynumbuf/adapters/python.h +++ b/python/src/pynumbuf/adapters/python.h @@ -13,9 +13,9 @@ namespace numbuf { arrow::Status SerializeSequences(std::vector sequences, std::shared_ptr* out); arrow::Status SerializeDict(std::vector dicts, std::shared_ptr* out); -arrow::Status DeserializeList(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, PyObject** out); -arrow::Status DeserializeTuple(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, PyObject** out); -arrow::Status DeserializeDict(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, PyObject** out); +arrow::Status DeserializeList(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, PyObject* base, PyObject** out); +arrow::Status DeserializeTuple(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, PyObject* base, PyObject** out); +arrow::Status DeserializeDict(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, PyObject* base, PyObject** out); arrow::Status python_error_to_status(); diff --git a/python/src/pynumbuf/numbuf.cc b/python/src/pynumbuf/numbuf.cc index 104747b4d..d89913d6e 100644 --- a/python/src/pynumbuf/numbuf.cc +++ b/python/src/pynumbuf/numbuf.cc @@ -126,11 +126,12 @@ static PyObject* read_from_buffer(PyObject* self, PyObject* args) { /* Documented in doc/numbuf.rst in ray-core */ static PyObject* deserialize_list(PyObject* self, PyObject* args) { std::shared_ptr* data; - if (!PyArg_ParseTuple(args, "O&", &PyObjectToArrow, &data)) { + PyObject* base = Py_None; + if (!PyArg_ParseTuple(args, "O&|O", &PyObjectToArrow, &data, &base)) { return NULL; } PyObject* result; - ARROW_CHECK_OK(DeserializeList((*data)->column(0), 0, (*data)->num_rows(), &result)); + ARROW_CHECK_OK(DeserializeList((*data)->column(0), 0, (*data)->num_rows(), base, &result)); return result; }