From 05e0226047c4fbbaad2197ebdf3f60feaaec0d46 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Sat, 19 Nov 2016 13:24:51 -0800 Subject: [PATCH] clang-fixes --- numbuf/{cpp => }/.clang-format | 0 numbuf/cpp/src/numbuf/dict.cc | 13 +- numbuf/cpp/src/numbuf/dict.h | 18 +-- numbuf/cpp/src/numbuf/sequence.cc | 37 ++--- numbuf/cpp/src/numbuf/sequence.h | 16 +- numbuf/cpp/src/numbuf/tensor.cc | 17 ++- numbuf/cpp/src/numbuf/tensor.h | 25 ++-- numbuf/python/src/pynumbuf/adapters/numpy.cc | 44 +++--- numbuf/python/src/pynumbuf/adapters/numpy.h | 11 +- numbuf/python/src/pynumbuf/adapters/python.cc | 139 ++++++++++-------- numbuf/python/src/pynumbuf/adapters/python.h | 16 +- numbuf/python/src/pynumbuf/adapters/scalars.h | 30 ++-- numbuf/python/src/pynumbuf/memory.h | 33 ++--- numbuf/python/src/pynumbuf/numbuf.cc | 83 +++++------ 14 files changed, 239 insertions(+), 243 deletions(-) rename numbuf/{cpp => }/.clang-format (100%) diff --git a/numbuf/cpp/.clang-format b/numbuf/.clang-format similarity index 100% rename from numbuf/cpp/.clang-format rename to numbuf/.clang-format diff --git a/numbuf/cpp/src/numbuf/dict.cc b/numbuf/cpp/src/numbuf/dict.cc index 74a9cfadf..047191d8c 100644 --- a/numbuf/cpp/src/numbuf/dict.cc +++ b/numbuf/cpp/src/numbuf/dict.cc @@ -4,12 +4,9 @@ using namespace arrow; namespace numbuf { -Status DictBuilder::Finish( - std::shared_ptr key_tuple_data, - std::shared_ptr val_list_data, - std::shared_ptr val_tuple_data, - std::shared_ptr val_dict_data, - std::shared_ptr* out) { +Status DictBuilder::Finish(std::shared_ptr key_tuple_data, + std::shared_ptr val_list_data, std::shared_ptr val_tuple_data, + std::shared_ptr val_dict_data, std::shared_ptr* out) { // lists and dicts can't be keys of dicts in Python, that is why for // the keys we do not need to collect sublists std::shared_ptr keys, vals; @@ -17,11 +14,11 @@ Status DictBuilder::Finish( RETURN_NOT_OK(vals_.Finish(val_list_data, val_tuple_data, val_dict_data, &vals)); auto keys_field = std::make_shared("keys", keys->type()); auto vals_field = std::make_shared("vals", vals->type()); - auto type = std::make_shared(std::vector({keys_field, vals_field})); + auto type = + std::make_shared(std::vector({keys_field, vals_field})); std::vector field_arrays({keys, vals}); DCHECK(keys->length() == vals->length()); out->reset(new StructArray(type, keys->length(), field_arrays)); return Status::OK(); } - } diff --git a/numbuf/cpp/src/numbuf/dict.h b/numbuf/cpp/src/numbuf/dict.h index 6497b3eb7..c8f5925a7 100644 --- a/numbuf/cpp/src/numbuf/dict.h +++ b/numbuf/cpp/src/numbuf/dict.h @@ -13,9 +13,8 @@ namespace numbuf { can be obtained via the Finish method. */ class DictBuilder { -public: - DictBuilder(arrow::MemoryPool* pool = nullptr) - : keys_(pool), vals_(pool) {} + public: + DictBuilder(arrow::MemoryPool* pool = nullptr) : keys_(pool), vals_(pool) {} //! Builder for the keys of the dictionary SequenceBuilder& keys() { return keys_; } @@ -33,18 +32,15 @@ public: List containing the data from nested dictionaries in the value list of the dictionary */ - arrow::Status Finish( - std::shared_ptr key_tuple_data, - std::shared_ptr val_list_data, - std::shared_ptr val_tuple_data, - std::shared_ptr val_dict_data, - std::shared_ptr* out); + arrow::Status Finish(std::shared_ptr key_tuple_data, + std::shared_ptr val_list_data, + std::shared_ptr val_tuple_data, + std::shared_ptr val_dict_data, std::shared_ptr* out); -private: + private: SequenceBuilder keys_; SequenceBuilder vals_; }; - } #endif diff --git a/numbuf/cpp/src/numbuf/sequence.cc b/numbuf/cpp/src/numbuf/sequence.cc index 9672f5e5d..b1b5fe2e6 100644 --- a/numbuf/cpp/src/numbuf/sequence.cc +++ b/numbuf/cpp/src/numbuf/sequence.cc @@ -25,15 +25,17 @@ SequenceBuilder::SequenceBuilder(MemoryPool* pool) int64_tensors_(std::make_shared(), pool), float_tensors_(std::make_shared(), pool), double_tensors_(std::make_shared(), pool), - list_offsets_({0}), tuple_offsets_({0}), dict_offsets_({0}) {} + list_offsets_({0}), + tuple_offsets_({0}), + dict_offsets_({0}) {} -#define UPDATE(OFFSET, TAG) \ - if (TAG == -1) { \ - TAG = num_tags; \ - num_tags += 1; \ - } \ - RETURN_NOT_OK(offsets_.Append(OFFSET)); \ - RETURN_NOT_OK(types_.Append(TAG)); \ +#define UPDATE(OFFSET, TAG) \ + if (TAG == -1) { \ + TAG = num_tags; \ + num_tags += 1; \ + } \ + RETURN_NOT_OK(offsets_.Append(OFFSET)); \ + RETURN_NOT_OK(types_.Append(TAG)); \ RETURN_NOT_OK(nones_.AppendToBitmap(true)); Status SequenceBuilder::AppendNone() { @@ -79,9 +81,7 @@ Status SequenceBuilder::AppendDouble(double data) { #define DEF_TENSOR_APPEND(NAME, TYPE, TAG) \ Status SequenceBuilder::AppendTensor(const std::vector& dims, TYPE* data) { \ - if (TAG == -1) { \ - NAME.Start(); \ - } \ + if (TAG == -1) { NAME.Start(); } \ UPDATE(NAME.length(), TAG); \ return NAME.Append(dims, data); \ } @@ -138,12 +138,9 @@ Status SequenceBuilder::AppendDict(int32_t size) { DCHECK(OFFSETS.size() == 1); \ } -Status SequenceBuilder::Finish( - std::shared_ptr list_data, - std::shared_ptr tuple_data, - std::shared_ptr dict_data, - std::shared_ptr* out) { - +Status SequenceBuilder::Finish(std::shared_ptr list_data, + std::shared_ptr tuple_data, std::shared_ptr dict_data, + std::shared_ptr* out) { std::vector> types(num_tags); std::vector children(num_tags); @@ -174,10 +171,8 @@ Status SequenceBuilder::Finish( std::vector type_ids = {}; TypePtr type = TypePtr(new UnionType(types, type_ids, UnionMode::DENSE)); - out->reset(new UnionArray(type, types_.length(), - children, types_.data(), offsets_.data(), - nones_.null_count(), nones_.null_bitmap())); + out->reset(new UnionArray(type, types_.length(), children, types_.data(), + offsets_.data(), nones_.null_count(), nones_.null_bitmap())); return Status::OK(); } - } diff --git a/numbuf/cpp/src/numbuf/sequence.h b/numbuf/cpp/src/numbuf/sequence.h index bcb1c1afd..4c87d81a6 100644 --- a/numbuf/cpp/src/numbuf/sequence.h +++ b/numbuf/cpp/src/numbuf/sequence.h @@ -1,9 +1,9 @@ #ifndef NUMBUF_LIST_H #define NUMBUF_LIST_H +#include "tensor.h" #include #include -#include "tensor.h" namespace numbuf { @@ -25,7 +25,7 @@ class SequenceBuilder { //! Appending an uint64_t to the sequence arrow::Status AppendUInt64(uint64_t data); - + //! Append a list of bytes to the sequence arrow::Status AppendBytes(const uint8_t* data, int32_t length); @@ -79,11 +79,9 @@ class SequenceBuilder { arrow::Status AppendDict(int32_t size); //! Finish building the sequence and return the result - arrow::Status Finish( - std::shared_ptr list_data, - std::shared_ptr tuple_data, - std::shared_ptr dict_data, - std::shared_ptr* out); + arrow::Status Finish(std::shared_ptr list_data, + std::shared_ptr tuple_data, std::shared_ptr dict_data, + std::shared_ptr* out); private: arrow::MemoryPool* pool_; @@ -139,6 +137,6 @@ class SequenceBuilder { int8_t num_tags = 0; }; -} // namespace numbuf +} // namespace numbuf -#endif // NUMBUF_LIST_H +#endif // NUMBUF_LIST_H diff --git a/numbuf/cpp/src/numbuf/tensor.cc b/numbuf/cpp/src/numbuf/tensor.cc index 5d95a94a7..db9325563 100644 --- a/numbuf/cpp/src/numbuf/tensor.cc +++ b/numbuf/cpp/src/numbuf/tensor.cc @@ -4,11 +4,11 @@ using namespace arrow; namespace numbuf { -template +template TensorBuilder::TensorBuilder(const TypePtr& dtype, MemoryPool* pool) : dtype_(dtype), pool_(pool) {} -template +template Status TensorBuilder::Start() { dim_data_ = std::make_shared(pool_, std::make_shared()); dims_ = std::make_shared(pool_, dim_data_); @@ -16,12 +16,14 @@ Status TensorBuilder::Start() { values_ = std::make_shared(pool_, value_data_); auto dims_field = std::make_shared("dims", dims_->type()); auto values_field = std::make_shared("data", values_->type()); - auto type = std::make_shared(std::vector({dims_field, values_field})); - tensors_ = std::make_shared(pool_, type, std::vector>({dims_, values_})); + auto type = + std::make_shared(std::vector({dims_field, values_field})); + tensors_ = std::make_shared( + pool_, type, std::vector>({dims_, values_})); return Status::OK(); } -template +template Status TensorBuilder::Append(const std::vector& dims, const elem_type* data) { DCHECK(tensors_); RETURN_NOT_OK(tensors_->Append()); @@ -33,10 +35,10 @@ Status TensorBuilder::Append(const std::vector& dims, const elem_typ RETURN_NOT_OK(dim_data_->Append(dim)); } RETURN_NOT_OK(value_data_->Append(data, size)); - return Status::OK(); // tensors_->Append(); + return Status::OK(); // tensors_->Append(); } -template +template Status TensorBuilder::Finish(std::shared_ptr* out) { return tensors_->Finish(out); } @@ -51,5 +53,4 @@ template class TensorBuilder; template class TensorBuilder; template class TensorBuilder; template class TensorBuilder; - } diff --git a/numbuf/cpp/src/numbuf/tensor.h b/numbuf/cpp/src/numbuf/tensor.h index e3725fcc9..2725f4615 100644 --- a/numbuf/cpp/src/numbuf/tensor.h +++ b/numbuf/cpp/src/numbuf/tensor.h @@ -1,9 +1,9 @@ #ifndef NUMBUF_TENSOR_H #define NUMBUF_TENSOR_H -#include -#include #include +#include +#include namespace numbuf { @@ -12,13 +12,13 @@ namespace numbuf { columns, "dims" which contains an array of dimensions for each Tensor and "data" which contains data buffer of the Tensor as a flattened array. */ -template +template class TensorBuilder { -public: + public: typedef typename T::c_type elem_type; TensorBuilder(const arrow::TypePtr& dtype, arrow::MemoryPool* pool = nullptr); - + arrow::Status Start(); /*! Append a new tensor. @@ -36,16 +36,12 @@ public: arrow::Status Finish(std::shared_ptr* out); //! Number of tensors in the column - int32_t length() { - return tensors_->length(); - } + int32_t length() { return tensors_->length(); } - const arrow::TypePtr& type() { - return tensors_->type(); - } + const arrow::TypePtr& type() { return tensors_->type(); } -private: - arrow::TypePtr dtype_; + private: + arrow::TypePtr dtype_; arrow::MemoryPool* pool_; std::shared_ptr dim_data_; std::shared_ptr dims_; @@ -64,7 +60,6 @@ typedef TensorBuilder UInt64TensorBuilder; typedef TensorBuilder Int64TensorBuilder; typedef TensorBuilder FloatTensorBuilder; typedef TensorBuilder DoubleTensorBuilder; - } -#endif // NUMBUF_TENSOR_H +#endif // NUMBUF_TENSOR_H diff --git a/numbuf/python/src/pynumbuf/adapters/numpy.cc b/numbuf/python/src/pynumbuf/adapters/numpy.cc index 32901ffad..4964f61f2 100644 --- a/numbuf/python/src/pynumbuf/adapters/numpy.cc +++ b/numbuf/python/src/pynumbuf/adapters/numpy.cc @@ -8,8 +8,8 @@ using namespace arrow; extern "C" { - extern PyObject *numbuf_serialize_callback; - extern PyObject *numbuf_deserialize_callback; +extern PyObject* numbuf_serialize_callback; +extern PyObject* numbuf_deserialize_callback; } namespace numbuf { @@ -18,22 +18,20 @@ namespace numbuf { case Type::TYPE: \ return NPY_##TYPE; -#define DESERIALIZE_ARRAY_CASE(TYPE, ArrayType, type) \ - case Type::TYPE: { \ - auto values = std::dynamic_pointer_cast(content->values()); \ - DCHECK(values); \ - type* data = const_cast(values->raw_data()) \ - + content->offset(offset); \ - *out = PyArray_SimpleNewFromData(num_dims, dim.data(), NPY_##TYPE, \ - reinterpret_cast(data)); \ - if (base != Py_None) { \ - PyArray_SetBaseObject((PyArrayObject*) *out, base); \ - } \ - Py_XINCREF(base); \ - } \ - return Status::OK(); +#define DESERIALIZE_ARRAY_CASE(TYPE, ArrayType, type) \ + case Type::TYPE: { \ + auto values = std::dynamic_pointer_cast(content->values()); \ + DCHECK(values); \ + type* data = const_cast(values->raw_data()) + content->offset(offset); \ + *out = PyArray_SimpleNewFromData( \ + num_dims, dim.data(), NPY_##TYPE, reinterpret_cast(data)); \ + if (base != Py_None) { PyArray_SetBaseObject((PyArrayObject*)*out, base); } \ + Py_XINCREF(base); \ + } \ + return Status::OK(); -Status DeserializeArray(std::shared_ptr array, int32_t offset, PyObject* base, PyObject** out) { +Status DeserializeArray( + std::shared_ptr array, int32_t offset, PyObject* base, PyObject** out) { DCHECK(array); auto tensor = std::dynamic_pointer_cast(array); DCHECK(tensor); @@ -41,9 +39,9 @@ Status DeserializeArray(std::shared_ptr array, int32_t offset, PyObject* auto content = std::dynamic_pointer_cast(tensor->field(1)); npy_intp num_dims = dims->value_length(offset); std::vector dim(num_dims); - for (int i = dims->offset(offset); i < dims->offset(offset+1); ++i) { + for (int i = dims->offset(offset); i < dims->offset(offset + 1); ++i) { dim[i - dims->offset(offset)] = - std::dynamic_pointer_cast(dims->values())->Value(i); + std::dynamic_pointer_cast(dims->values())->Value(i); } switch (content->value_type()->type) { DESERIALIZE_ARRAY_CASE(INT8, Int8Array, int8_t) @@ -62,8 +60,8 @@ Status DeserializeArray(std::shared_ptr array, int32_t offset, PyObject* return Status::OK(); } -Status SerializeArray(PyArrayObject* array, SequenceBuilder& builder, - std::vector& subdicts) { +Status SerializeArray( + PyArrayObject* array, SequenceBuilder& builder, std::vector& subdicts) { size_t ndim = PyArray_NDIM(array); int dtype = PyArray_TYPE(array); std::vector dims(ndim); @@ -119,7 +117,8 @@ Status SerializeArray(PyArrayObject* array, SequenceBuilder& builder, PyObject* result = PyObject_CallObject(numbuf_serialize_callback, arglist); Py_XDECREF(arglist); if (!result) { - return Status::NotImplemented("python error"); // TODO(pcm): https://github.com/ray-project/numbuf/issues/10 + return Status::NotImplemented("python error"); // TODO(pcm): + // https://github.com/ray-project/numbuf/issues/10 } builder.AppendDict(PyDict_Size(result)); subdicts.push_back(result); @@ -128,5 +127,4 @@ Status SerializeArray(PyArrayObject* array, SequenceBuilder& builder, Py_XDECREF(contiguous); return Status::OK(); } - } diff --git a/numbuf/python/src/pynumbuf/adapters/numpy.h b/numbuf/python/src/pynumbuf/adapters/numpy.h index c0fcbb7f6..91f0473da 100644 --- a/numbuf/python/src/pynumbuf/adapters/numpy.h +++ b/numbuf/python/src/pynumbuf/adapters/numpy.h @@ -1,22 +1,23 @@ #ifndef PYNUMBUF_NUMPY_H #define PYNUMBUF_NUMPY_H -#include #include +#include #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #define NO_IMPORT_ARRAY #define PY_ARRAY_UNIQUE_SYMBOL NUMBUF_ARRAY_API #include -#include #include +#include namespace numbuf { -arrow::Status SerializeArray(PyArrayObject* array, SequenceBuilder& builder, std::vector& subdicts); -arrow::Status DeserializeArray(std::shared_ptr array, int32_t offset, PyObject* base, PyObject** out); - +arrow::Status SerializeArray( + PyArrayObject* array, SequenceBuilder& builder, std::vector& subdicts); +arrow::Status DeserializeArray( + std::shared_ptr array, int32_t offset, PyObject* base, PyObject** out); } #endif diff --git a/numbuf/python/src/pynumbuf/adapters/python.cc b/numbuf/python/src/pynumbuf/adapters/python.cc index 2d41cbaa3..eada6e958 100644 --- a/numbuf/python/src/pynumbuf/adapters/python.cc +++ b/numbuf/python/src/pynumbuf/adapters/python.cc @@ -12,46 +12,54 @@ extern "C" { extern PyObject* numbuf_serialize_callback; extern PyObject* numbuf_deserialize_callback; - } namespace numbuf { -Status get_value(ArrayPtr arr, int32_t index, int32_t type, PyObject* base, PyObject** result) { +Status get_value( + ArrayPtr arr, int32_t index, int32_t type, PyObject* base, PyObject** result) { switch (arr->type()->type) { case Type::BOOL: - *result = PyBool_FromLong(std::static_pointer_cast(arr)->Value(index)); + *result = + PyBool_FromLong(std::static_pointer_cast(arr)->Value(index)); return Status::OK(); case Type::INT64: *result = PyInt_FromLong(std::static_pointer_cast(arr)->Value(index)); return Status::OK(); case Type::BINARY: { int32_t nchars; - const uint8_t* str = std::static_pointer_cast(arr)->GetValue(index, &nchars); + const uint8_t* str = + std::static_pointer_cast(arr)->GetValue(index, &nchars); *result = PyString_FromStringAndSize(reinterpret_cast(str), nchars); return Status::OK(); } case Type::STRING: { int32_t nchars; - const uint8_t* str = std::static_pointer_cast(arr)->GetValue(index, &nchars); + const uint8_t* str = + std::static_pointer_cast(arr)->GetValue(index, &nchars); *result = PyUnicode_FromStringAndSize(reinterpret_cast(str), nchars); return Status::OK(); } case Type::FLOAT: - *result = PyFloat_FromDouble(std::static_pointer_cast(arr)->Value(index)); + *result = + PyFloat_FromDouble(std::static_pointer_cast(arr)->Value(index)); return Status::OK(); case Type::DOUBLE: - *result = PyFloat_FromDouble(std::static_pointer_cast(arr)->Value(index)); + *result = + PyFloat_FromDouble(std::static_pointer_cast(arr)->Value(index)); return Status::OK(); case Type::STRUCT: { auto s = std::static_pointer_cast(arr); auto l = std::static_pointer_cast(s->field(0)); if (s->type()->child(0)->name == "list") { - return DeserializeList(l->values(), l->value_offset(index), l->value_offset(index+1), base, result); + return DeserializeList(l->values(), l->value_offset(index), + l->value_offset(index + 1), base, result); } else if (s->type()->child(0)->name == "tuple") { - return DeserializeTuple(l->values(), l->value_offset(index), l->value_offset(index+1), base, result); + return DeserializeTuple(l->values(), l->value_offset(index), + l->value_offset(index + 1), base, result); } else if (s->type()->child(0)->name == "dict") { - return DeserializeDict(l->values(), l->value_offset(index), l->value_offset(index+1), base, result); + return DeserializeDict(l->values(), l->value_offset(index), + l->value_offset(index + 1), base, result); } else { return DeserializeArray(arr, index, base, result); } @@ -62,10 +70,8 @@ Status get_value(ArrayPtr arr, int32_t index, int32_t type, PyObject* base, PyOb return Status::OK(); } -Status append(PyObject* elem, SequenceBuilder& builder, - std::vector& sublists, - std::vector& subtuples, - std::vector& subdicts) { +Status append(PyObject* elem, SequenceBuilder& builder, std::vector& sublists, + std::vector& subtuples, std::vector& subdicts) { // The bool case must precede the int case (PyInt_Check passes for bools) if (PyBool_Check(elem)) { RETURN_NOT_OK(builder.AppendBool(elem == Py_True)); @@ -75,9 +81,7 @@ Status append(PyObject* elem, SequenceBuilder& builder, int overflow = 0; int64_t data = PyLong_AsLongLongAndOverflow(elem, &overflow); RETURN_NOT_OK(builder.AppendInt64(data)); - if(overflow) { - return Status::NotImplemented("long overflow"); - } + if (overflow) { return Status::NotImplemented("long overflow"); } } else if (PyInt_Check(elem)) { RETURN_NOT_OK(builder.AppendInt64(static_cast(PyInt_AS_LONG(elem)))); } else if (PyString_Check(elem)) { @@ -86,13 +90,14 @@ Status append(PyObject* elem, SequenceBuilder& builder, RETURN_NOT_OK(builder.AppendBytes(data, size)); } else if (PyUnicode_Check(elem)) { Py_ssize_t size; - #if PY_MAJOR_VERSION >= 3 - char* data = PyUnicode_AsUTF8AndSize(elem, &size); // TODO(pcm): Check if this is correct - #else - PyObject* str = PyUnicode_AsUTF8String(elem); - char* data = PyString_AS_STRING(str); - size = PyString_GET_SIZE(str); - #endif +#if PY_MAJOR_VERSION >= 3 + char* data = + PyUnicode_AsUTF8AndSize(elem, &size); // TODO(pcm): Check if this is correct +#else + PyObject* str = PyUnicode_AsUTF8String(elem); + char* data = PyString_AS_STRING(str); + size = PyString_GET_SIZE(str); +#endif Status s = builder.AppendString(data, size); Py_XDECREF(str); RETURN_NOT_OK(s); @@ -108,14 +113,14 @@ Status append(PyObject* elem, SequenceBuilder& builder, } else if (PyArray_IsScalar(elem, Generic)) { RETURN_NOT_OK(AppendScalar(elem, builder)); } else if (PyArray_Check(elem)) { - RETURN_NOT_OK(SerializeArray((PyArrayObject*) elem, builder, subdicts)); + RETURN_NOT_OK(SerializeArray((PyArrayObject*)elem, builder, subdicts)); } else if (elem == Py_None) { RETURN_NOT_OK(builder.AppendNone()); } else { if (!numbuf_serialize_callback) { std::stringstream ss; ss << "data type of " << PyString_AS_STRING(PyObject_Repr(elem)) - << " not recognized and custom serialization handler not registered"; + << " not recognized and custom serialization handler not registered"; return Status::NotImplemented(ss.str()); } else { PyObject* arglist = Py_BuildValue("(O)", elem); @@ -124,7 +129,8 @@ Status append(PyObject* elem, SequenceBuilder& builder, PyObject* result = PyObject_CallObject(numbuf_serialize_callback, arglist); Py_XDECREF(arglist); if (!result) { - return Status::NotImplemented("python error"); // TODO(pcm): https://github.com/ray-project/numbuf/issues/10 + return Status::NotImplemented("python error"); // TODO(pcm): + // https://github.com/ray-project/numbuf/issues/10 } builder.AppendDict(PyDict_Size(result)); subdicts.push_back(result); @@ -133,13 +139,16 @@ Status append(PyObject* elem, SequenceBuilder& builder, return Status::OK(); } -Status SerializeSequences(std::vector sequences, int32_t recursion_depth, std::shared_ptr* out) { +Status SerializeSequences(std::vector sequences, int32_t recursion_depth, + std::shared_ptr* out) { DCHECK(out); if (recursion_depth >= MAX_RECURSION_DEPTH) { - return Status::NotImplemented("This object exceeds the maximum recursion depth. It may contain itself recursively."); + return Status::NotImplemented( + "This object exceeds the maximum recursion depth. It may contain itself " + "recursively."); } SequenceBuilder builder(nullptr); - std::vector sublists, subtuples, subdicts; + std::vector sublists, subtuples, subdicts; for (const auto& sequence : sequences) { PyObject* item; PyObject* iterator = PyObject_GetIter(sequence); @@ -169,42 +178,47 @@ Status SerializeSequences(std::vector sequences, int32_t recursion_de return builder.Finish(list, tuple, dict, out); } -#define DESERIALIZE_SEQUENCE(CREATE, SET_ITEM) \ - auto data = std::dynamic_pointer_cast(array); \ - int32_t size = array->length(); \ - PyObject* result = CREATE(stop_idx - start_idx); \ - auto types = std::make_shared(size, data->types()); \ - auto offsets = std::make_shared(size, data->offset_buf()); \ - for (size_t i = start_idx; i < stop_idx; ++i) { \ - if (data->IsNull(i)) { \ - Py_INCREF(Py_None); \ - SET_ITEM(result, i-start_idx, Py_None); \ - } else { \ - int32_t offset = offsets->Value(i); \ - int8_t type = types->Value(i); \ - ArrayPtr arr = data->child(type); \ - PyObject* value; \ - RETURN_NOT_OK(get_value(arr, offset, type, base, &value)); \ - SET_ITEM(result, i-start_idx, value); \ - } \ - } \ - *out = result; \ +#define DESERIALIZE_SEQUENCE(CREATE, SET_ITEM) \ + auto data = std::dynamic_pointer_cast(array); \ + int32_t size = array->length(); \ + PyObject* result = CREATE(stop_idx - start_idx); \ + auto types = std::make_shared(size, data->types()); \ + auto offsets = std::make_shared(size, data->offset_buf()); \ + for (size_t i = start_idx; i < stop_idx; ++i) { \ + if (data->IsNull(i)) { \ + Py_INCREF(Py_None); \ + SET_ITEM(result, i - start_idx, Py_None); \ + } else { \ + int32_t offset = offsets->Value(i); \ + int8_t type = types->Value(i); \ + ArrayPtr arr = data->child(type); \ + PyObject* value; \ + RETURN_NOT_OK(get_value(arr, offset, type, base, &value)); \ + SET_ITEM(result, i - start_idx, value); \ + } \ + } \ + *out = result; \ return Status::OK(); -Status DeserializeList(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, PyObject* base, PyObject** out) { +Status DeserializeList(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, + PyObject* base, PyObject** out) { DESERIALIZE_SEQUENCE(PyList_New, PyList_SetItem) } -Status DeserializeTuple(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, PyObject* base, PyObject** out) { +Status DeserializeTuple(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, + PyObject* base, PyObject** out) { DESERIALIZE_SEQUENCE(PyTuple_New, PyTuple_SetItem) } -Status SerializeDict(std::vector dicts, int32_t recursion_depth, std::shared_ptr* out) { +Status SerializeDict( + std::vector dicts, int32_t recursion_depth, std::shared_ptr* out) { DictBuilder result; if (recursion_depth >= MAX_RECURSION_DEPTH) { - return Status::NotImplemented("This object exceeds the maximum recursion depth. It may contain itself recursively."); + return Status::NotImplemented( + "This object exceeds the maximum recursion depth. It may contain itself " + "recursively."); } - std::vector key_tuples, val_lists, val_tuples, val_dicts, dummy; + std::vector key_tuples, val_lists, val_tuples, val_dicts, dummy; for (const auto& dict : dicts) { PyObject *key, *value; Py_ssize_t pos = 0; @@ -248,7 +262,8 @@ Status SerializeDict(std::vector dicts, int32_t recursion_depth, std: return Status::OK(); } -Status DeserializeDict(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, PyObject* base, PyObject** out) { +Status DeserializeDict(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, + PyObject* base, PyObject** out) { auto data = std::dynamic_pointer_cast(array); // TODO(pcm): error handling, get rid of the temporary copy of the list PyObject *keys, *vals; @@ -256,10 +271,11 @@ Status DeserializeDict(std::shared_ptr array, int32_t start_idx, int32_t ARROW_RETURN_NOT_OK(DeserializeList(data->field(0), start_idx, stop_idx, base, &keys)); ARROW_RETURN_NOT_OK(DeserializeList(data->field(1), start_idx, stop_idx, base, &vals)); for (size_t i = start_idx; i < stop_idx; ++i) { - PyDict_SetItem(result, PyList_GetItem(keys, i - start_idx), PyList_GetItem(vals, i - start_idx)); + PyDict_SetItem( + result, PyList_GetItem(keys, i - start_idx), PyList_GetItem(vals, i - start_idx)); } - Py_XDECREF(keys); // PyList_GetItem(keys, ...) incremented the reference count - Py_XDECREF(vals); // PyList_GetItem(vals, ...) incremented the reference count + Py_XDECREF(keys); // PyList_GetItem(keys, ...) incremented the reference count + Py_XDECREF(vals); // PyList_GetItem(vals, ...) incremented the reference count static PyObject* py_type = PyString_FromString("_pytype_"); if (PyDict_Contains(result, py_type) && numbuf_deserialize_callback) { PyObject* arglist = Py_BuildValue("(O)", result); @@ -270,12 +286,11 @@ Status DeserializeDict(std::shared_ptr array, int32_t start_idx, int32_t Py_XDECREF(result); result = callback_result; if (!callback_result) { - return Status::NotImplemented("python error"); // TODO(pcm): https://github.com/ray-project/numbuf/issues/10 + return Status::NotImplemented("python error"); // TODO(pcm): + // https://github.com/ray-project/numbuf/issues/10 } } *out = result; return Status::OK(); } - - } diff --git a/numbuf/python/src/pynumbuf/adapters/python.h b/numbuf/python/src/pynumbuf/adapters/python.h index 024ebebbf..dc25ad018 100644 --- a/numbuf/python/src/pynumbuf/adapters/python.h +++ b/numbuf/python/src/pynumbuf/adapters/python.h @@ -11,12 +11,16 @@ namespace numbuf { -arrow::Status SerializeSequences(std::vector sequences, int32_t recursion_depth, std::shared_ptr* out); -arrow::Status SerializeDict(std::vector dicts, int32_t recursion_depth, std::shared_ptr* out); -arrow::Status DeserializeList(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, PyObject* base, PyObject** out); -arrow::Status DeserializeTuple(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, PyObject* base, PyObject** out); -arrow::Status DeserializeDict(std::shared_ptr array, int32_t start_idx, int32_t stop_idx, PyObject* base, PyObject** out); - +arrow::Status SerializeSequences(std::vector sequences, + int32_t recursion_depth, std::shared_ptr* out); +arrow::Status SerializeDict(std::vector dicts, int32_t recursion_depth, + std::shared_ptr* out); +arrow::Status DeserializeList(std::shared_ptr array, int32_t start_idx, + int32_t stop_idx, PyObject* base, PyObject** out); +arrow::Status DeserializeTuple(std::shared_ptr array, int32_t start_idx, + int32_t stop_idx, PyObject* base, PyObject** out); +arrow::Status DeserializeDict(std::shared_ptr array, int32_t start_idx, + int32_t stop_idx, PyObject* base, PyObject** out); } #endif diff --git a/numbuf/python/src/pynumbuf/adapters/scalars.h b/numbuf/python/src/pynumbuf/adapters/scalars.h index e680b446f..e64027c9e 100644 --- a/numbuf/python/src/pynumbuf/adapters/scalars.h +++ b/numbuf/python/src/pynumbuf/adapters/scalars.h @@ -16,39 +16,39 @@ namespace numbuf { arrow::Status AppendScalar(PyObject* obj, SequenceBuilder& builder) { if (PyArray_IsScalar(obj, Bool)) { - return builder.AppendBool(((PyBoolScalarObject *)obj)->obval != 0); + return builder.AppendBool(((PyBoolScalarObject*)obj)->obval != 0); } else if (PyArray_IsScalar(obj, Float)) { - return builder.AppendFloat(((PyFloatScalarObject *)obj)->obval); + return builder.AppendFloat(((PyFloatScalarObject*)obj)->obval); } else if (PyArray_IsScalar(obj, Double)) { - return builder.AppendDouble(((PyDoubleScalarObject *)obj)->obval); + return builder.AppendDouble(((PyDoubleScalarObject*)obj)->obval); } int64_t value = 0; if (PyArray_IsScalar(obj, Byte)) { - value = ((PyByteScalarObject *)obj)->obval; + value = ((PyByteScalarObject*)obj)->obval; } else if (PyArray_IsScalar(obj, UByte)) { - value = ((PyUByteScalarObject *)obj)->obval; + value = ((PyUByteScalarObject*)obj)->obval; } else if (PyArray_IsScalar(obj, Short)) { - value = ((PyShortScalarObject *)obj)->obval; + value = ((PyShortScalarObject*)obj)->obval; } else if (PyArray_IsScalar(obj, UShort)) { - value = ((PyUShortScalarObject *)obj)->obval; + value = ((PyUShortScalarObject*)obj)->obval; } else if (PyArray_IsScalar(obj, Int)) { - value = ((PyIntScalarObject *)obj)->obval; + value = ((PyIntScalarObject*)obj)->obval; } else if (PyArray_IsScalar(obj, UInt)) { - value = ((PyUIntScalarObject *)obj)->obval; + value = ((PyUIntScalarObject*)obj)->obval; } else if (PyArray_IsScalar(obj, Long)) { - value = ((PyLongScalarObject *)obj)->obval; + value = ((PyLongScalarObject*)obj)->obval; } else if (PyArray_IsScalar(obj, ULong)) { - value = ((PyULongScalarObject *)obj)->obval; + value = ((PyULongScalarObject*)obj)->obval; } else if (PyArray_IsScalar(obj, LongLong)) { - value = ((PyLongLongScalarObject *)obj)->obval; + value = ((PyLongLongScalarObject*)obj)->obval; } else if (PyArray_IsScalar(obj, ULongLong)) { - value = ((PyULongLongScalarObject *)obj)->obval; + value = ((PyULongLongScalarObject*)obj)->obval; } else { DCHECK(false) << "scalar type not recognized"; } return builder.AppendInt64(value); } -} // namespace +} // namespace -#endif // PYNUMBUF_SCALARS_H +#endif // PYNUMBUF_SCALARS_H diff --git a/numbuf/python/src/pynumbuf/memory.h b/numbuf/python/src/pynumbuf/memory.h index eb926dbcb..acd62f523 100644 --- a/numbuf/python/src/pynumbuf/memory.h +++ b/numbuf/python/src/pynumbuf/memory.h @@ -5,16 +5,18 @@ namespace numbuf { -class FixedBufferStream : public arrow::io::OutputStream, public arrow::io::ReadableFileInterface { +class FixedBufferStream : public arrow::io::OutputStream, + public arrow::io::ReadableFileInterface { public: virtual ~FixedBufferStream() {} explicit FixedBufferStream(uint8_t* data, int64_t nbytes) - : data_(data), position_(0), size_(nbytes) {} + : data_(data), position_(0), size_(nbytes) {} arrow::Status Read(int64_t nbytes, std::shared_ptr* out) override { DCHECK(out); - DCHECK(position_ + nbytes <= size_) << "position: " << position_ << " nbytes: " << nbytes << "size: " << size_; + DCHECK(position_ + nbytes <= size_) << "position: " << position_ + << " nbytes: " << nbytes << "size: " << size_; *out = std::make_shared(data_ + position_, nbytes); position_ += nbytes; return arrow::Status::OK(); @@ -30,9 +32,7 @@ class FixedBufferStream : public arrow::io::OutputStream, public arrow::io::Read return arrow::Status::OK(); } - arrow::Status Close() override { - return arrow::Status::OK(); - } + arrow::Status Close() override { return arrow::Status::OK(); } arrow::Status Tell(int64_t* position) override { *position = position_; @@ -41,28 +41,27 @@ class FixedBufferStream : public arrow::io::OutputStream, public arrow::io::Read arrow::Status Write(const uint8_t* data, int64_t nbytes) override { DCHECK(position_ >= 0 && position_ < size_); - DCHECK(position_ + nbytes <= size_) << "position: " << position_ << " nbytes: " << nbytes << "size: " << size_; + DCHECK(position_ + nbytes <= size_) << "position: " << position_ + << " nbytes: " << nbytes << "size: " << size_; uint8_t* dst = data_ + position_; memcpy(dst, data, nbytes); position_ += nbytes; return arrow::Status::OK(); } - arrow::Status GetSize(int64_t *size) override { + arrow::Status GetSize(int64_t* size) override { *size = size_; return arrow::Status::OK(); } - bool supports_zero_copy() const override { - return true; - } + bool supports_zero_copy() const override { return true; } -private: - uint8_t* data_; - int64_t position_; - int64_t size_; + private: + uint8_t* data_; + int64_t position_; + int64_t size_; }; -} // namespace numbuf +} // namespace numbuf -#endif // PYNUMBUF_MEMORY_H +#endif // PYNUMBUF_MEMORY_H diff --git a/numbuf/python/src/pynumbuf/numbuf.cc b/numbuf/python/src/pynumbuf/numbuf.cc index ba2483810..70be81b26 100644 --- a/numbuf/python/src/pynumbuf/numbuf.cc +++ b/numbuf/python/src/pynumbuf/numbuf.cc @@ -23,14 +23,15 @@ std::shared_ptr make_row_batch(std::shared_ptr data) { extern "C" { -static PyObject *NumbufError; +static PyObject* NumbufError; -PyObject *numbuf_serialize_callback = NULL; -PyObject *numbuf_deserialize_callback = NULL; +PyObject* numbuf_serialize_callback = NULL; +PyObject* numbuf_deserialize_callback = NULL; -int PyObjectToArrow(PyObject* object, std::shared_ptr **result) { +int PyObjectToArrow(PyObject* object, std::shared_ptr** result) { if (PyCapsule_IsValid(object, "arrow")) { - *result = reinterpret_cast*>(PyCapsule_GetPointer(object, "arrow")); + *result = reinterpret_cast*>( + PyCapsule_GetPointer(object, "arrow")); return 1; } else { PyErr_SetString(PyExc_TypeError, "must be an 'arrow' capsule"); @@ -39,25 +40,23 @@ int PyObjectToArrow(PyObject* object, std::shared_ptr **result) { } static void ArrowCapsule_Destructor(PyObject* capsule) { - delete reinterpret_cast*>(PyCapsule_GetPointer(capsule, "arrow")); + delete reinterpret_cast*>( + PyCapsule_GetPointer(capsule, "arrow")); } /* Documented in doc/numbuf.rst in ray-core */ static PyObject* serialize_list(PyObject* self, PyObject* args) { PyObject* value; - if (!PyArg_ParseTuple(args, "O", &value)) { - return NULL; - } + if (!PyArg_ParseTuple(args, "O", &value)) { return NULL; } std::shared_ptr array; if (PyList_Check(value)) { int32_t recursion_depth = 0; - Status s = SerializeSequences(std::vector({value}), recursion_depth, &array); + Status s = + SerializeSequences(std::vector({value}), recursion_depth, &array); if (!s.ok()) { // If this condition is true, there was an error in the callback that // needs to be passed through - if (!PyErr_Occurred()) { - PyErr_SetString(NumbufError, s.ToString().c_str()); - } + if (!PyErr_Occurred()) { PyErr_SetString(NumbufError, s.ToString().c_str()); } return NULL; } @@ -74,8 +73,8 @@ static PyObject* serialize_list(PyObject* self, PyObject* args) { PyObject* r = PyTuple_New(3); PyTuple_SetItem(r, 0, PyByteArray_FromStringAndSize(ptr, buffer->size())); PyTuple_SetItem(r, 1, PyInt_FromLong(size)); - PyTuple_SetItem(r, 2, PyCapsule_New(reinterpret_cast(batch), - "arrow", &ArrowCapsule_Destructor)); + PyTuple_SetItem(r, 2, + PyCapsule_New(reinterpret_cast(batch), "arrow", &ArrowCapsule_Destructor)); return r; } return NULL; @@ -88,14 +87,14 @@ static PyObject* write_to_buffer(PyObject* self, PyObject* args) { if (!PyArg_ParseTuple(args, "O&O", &PyObjectToArrow, &batch, &memoryview)) { return NULL; } - if (!PyMemoryView_Check(memoryview)) { - return NULL; - } + if (!PyMemoryView_Check(memoryview)) { return NULL; } Py_buffer* buffer = PyMemoryView_GET_BUFFER(memoryview); - auto target = std::make_shared(reinterpret_cast(buffer->buf), buffer->len); + auto target = std::make_shared( + reinterpret_cast(buffer->buf), buffer->len); int64_t body_end_offset; int64_t header_end_offset; - ARROW_CHECK_OK(ipc::WriteRecordBatch((*batch)->columns(), (*batch)->num_rows(), target.get(), &body_end_offset, &header_end_offset)); + ARROW_CHECK_OK(ipc::WriteRecordBatch((*batch)->columns(), (*batch)->num_rows(), + target.get(), &body_end_offset, &header_end_offset)); return PyInt_FromLong(header_end_offset); } @@ -118,31 +117,28 @@ static PyObject* read_from_buffer(PyObject* self, PyObject* args) { ARROW_CHECK_OK(schema_msg->GetSchema(&schema)); Py_buffer* buffer = PyMemoryView_GET_BUFFER(memoryview); - auto source = std::make_shared(reinterpret_cast(buffer->buf), buffer->len); + auto source = std::make_shared( + reinterpret_cast(buffer->buf), buffer->len); std::shared_ptr reader; - ARROW_CHECK_OK(arrow::ipc::RecordBatchReader::Open(source.get(), metadata_offset, &reader)); + ARROW_CHECK_OK( + arrow::ipc::RecordBatchReader::Open(source.get(), metadata_offset, &reader)); auto batch = new std::shared_ptr(); ARROW_CHECK_OK(reader->GetRecordBatch(schema, batch)); - return PyCapsule_New(reinterpret_cast(batch), - "arrow", &ArrowCapsule_Destructor); + return PyCapsule_New(reinterpret_cast(batch), "arrow", &ArrowCapsule_Destructor); } /* Documented in doc/numbuf.rst in ray-core */ static PyObject* deserialize_list(PyObject* self, PyObject* args) { std::shared_ptr* data; PyObject* base = Py_None; - if (!PyArg_ParseTuple(args, "O&|O", &PyObjectToArrow, &data, &base)) { - return NULL; - } + if (!PyArg_ParseTuple(args, "O&|O", &PyObjectToArrow, &data, &base)) { return NULL; } PyObject* result; Status s = DeserializeList((*data)->column(0), 0, (*data)->num_rows(), base, &result); if (!s.ok()) { // If this condition is true, there was an error in the callback that // needs to be passed through - if (!PyErr_Occurred()) { - PyErr_SetString(NumbufError, s.ToString().c_str()); - } + if (!PyErr_Occurred()) { PyErr_SetString(NumbufError, s.ToString().c_str()); } return NULL; } return result; @@ -152,7 +148,8 @@ static PyObject* register_callbacks(PyObject* self, PyObject* args) { PyObject* result = NULL; PyObject* serialize_callback; PyObject* deserialize_callback; - if (PyArg_ParseTuple(args, "OO:register_callbacks", &serialize_callback, &deserialize_callback)) { + if (PyArg_ParseTuple( + args, "OO:register_callbacks", &serialize_callback, &deserialize_callback)) { if (!PyCallable_Check(serialize_callback)) { PyErr_SetString(PyExc_TypeError, "serialize_callback must be callable"); return NULL; @@ -161,10 +158,10 @@ static PyObject* register_callbacks(PyObject* self, PyObject* args) { PyErr_SetString(PyExc_TypeError, "deserialize_callback must be callable"); return NULL; } - Py_XINCREF(serialize_callback); // Add a reference to new serialization callback - Py_XINCREF(deserialize_callback); // Add a reference to new deserialization callback - Py_XDECREF(numbuf_serialize_callback); // Dispose of old serialization callback - Py_XDECREF(numbuf_deserialize_callback); // Dispose of old deserialization callback + Py_XINCREF(serialize_callback); // Add a reference to new serialization callback + Py_XINCREF(deserialize_callback); // Add a reference to new deserialization callback + Py_XDECREF(numbuf_serialize_callback); // Dispose of old serialization callback + Py_XDECREF(numbuf_deserialize_callback); // Dispose of old deserialization callback numbuf_serialize_callback = serialize_callback; numbuf_deserialize_callback = deserialize_callback; Py_INCREF(Py_None); @@ -174,13 +171,14 @@ static PyObject* register_callbacks(PyObject* self, PyObject* args) { } static PyMethodDef NumbufMethods[] = { - { "serialize_list", serialize_list, METH_VARARGS, "serialize a Python list" }, - { "deserialize_list", deserialize_list, METH_VARARGS, "deserialize a Python list" }, - { "write_to_buffer", write_to_buffer, METH_VARARGS, "write serialized data to buffer"}, - { "read_from_buffer", read_from_buffer, METH_VARARGS, "read serialized data from buffer"}, - { "register_callbacks", register_callbacks, METH_VARARGS, "set serialization and deserialization callbacks"}, - { NULL, NULL, 0, NULL } -}; + {"serialize_list", serialize_list, METH_VARARGS, "serialize a Python list"}, + {"deserialize_list", deserialize_list, METH_VARARGS, "deserialize a Python list"}, + {"write_to_buffer", write_to_buffer, METH_VARARGS, "write serialized data to buffer"}, + {"read_from_buffer", read_from_buffer, METH_VARARGS, + "read serialized data from buffer"}, + {"register_callbacks", register_callbacks, METH_VARARGS, + "set serialization and deserialization callbacks"}, + {NULL, NULL, 0, NULL}}; PyMODINIT_FUNC initlibnumbuf(void) { PyObject* m; @@ -191,5 +189,4 @@ PyMODINIT_FUNC initlibnumbuf(void) { PyModule_AddObject(m, "numbuf_error", NumbufError); import_array(); } - }