#include "numpy.h" #include using namespace arrow; namespace numbuf { #define ARROW_TYPE_TO_NUMPY_CASE(TYPE) \ case Type::TYPE: \ return NPY_##TYPE; #define DESERIALIZE_ARRAY_CASE(TYPE, ArrayType, type) \ case Type::TYPE: { \ auto values = std::dynamic_pointer_cast(content->values()); \ DCHECK(values); \ type* data = const_cast(values->raw_data()) \ + content->offset(offset); \ *out = PyArray_SimpleNewFromData(num_dims, dim.data(), NPY_##TYPE, \ reinterpret_cast(data)); \ } \ return Status::OK(); Status DeserializeArray(std::shared_ptr array, int32_t offset, PyObject** out) { DCHECK(array); auto tensor = std::dynamic_pointer_cast(array); DCHECK(tensor); auto dims = std::dynamic_pointer_cast(tensor->field(0)); auto content = std::dynamic_pointer_cast(tensor->field(1)); npy_intp num_dims = dims->value_length(offset); std::vector dim(num_dims); for (int i = dims->offset(offset); i < dims->offset(offset+1); ++i) { dim[i - dims->offset(offset)] = std::dynamic_pointer_cast(dims->values())->Value(i); } switch (content->value_type()->type) { DESERIALIZE_ARRAY_CASE(INT8, Int8Array, int8_t) DESERIALIZE_ARRAY_CASE(INT16, Int16Array, int16_t) DESERIALIZE_ARRAY_CASE(INT32, Int32Array, int32_t) DESERIALIZE_ARRAY_CASE(INT64, Int64Array, int64_t) DESERIALIZE_ARRAY_CASE(UINT8, UInt8Array, uint8_t) DESERIALIZE_ARRAY_CASE(UINT16, UInt16Array, uint16_t) DESERIALIZE_ARRAY_CASE(UINT32, UInt32Array, uint32_t) DESERIALIZE_ARRAY_CASE(UINT64, UInt64Array, uint64_t) DESERIALIZE_ARRAY_CASE(FLOAT, FloatArray, float) DESERIALIZE_ARRAY_CASE(DOUBLE, DoubleArray, double) default: DCHECK(false) << "arrow type not recognized: " << content->value_type()->type; } return Status::OK(); } Status SerializeArray(PyArrayObject* array, SequenceBuilder& builder) { size_t ndim = PyArray_NDIM(array); int dtype = PyArray_TYPE(array); std::vector dims(ndim); for (int i = 0; i < ndim; ++i) { dims[i] = PyArray_DIM(array, i); } // TODO(pcm): Once we don't use builders any more below and directly share // the memory buffer, we need to be more careful about this and not // decrease the reference count of "contiguous" before the serialization // is finished auto contiguous = PyArray_GETCONTIGUOUS(array); auto data = PyArray_DATA(contiguous); switch (dtype) { case NPY_UINT8: RETURN_NOT_OK(builder.AppendTensor(dims, reinterpret_cast(data))); break; case NPY_INT8: RETURN_NOT_OK(builder.AppendTensor(dims, reinterpret_cast(data))); break; case NPY_UINT16: RETURN_NOT_OK(builder.AppendTensor(dims, reinterpret_cast(data))); break; case NPY_INT16: RETURN_NOT_OK(builder.AppendTensor(dims, reinterpret_cast(data))); break; case NPY_UINT32: RETURN_NOT_OK(builder.AppendTensor(dims, reinterpret_cast(data))); break; case NPY_INT32: RETURN_NOT_OK(builder.AppendTensor(dims, reinterpret_cast(data))); break; case NPY_UINT64: RETURN_NOT_OK(builder.AppendTensor(dims, reinterpret_cast(data))); break; case NPY_INT64: RETURN_NOT_OK(builder.AppendTensor(dims, reinterpret_cast(data))); break; case NPY_FLOAT: RETURN_NOT_OK(builder.AppendTensor(dims, reinterpret_cast(data))); break; case NPY_DOUBLE: RETURN_NOT_OK(builder.AppendTensor(dims, reinterpret_cast(data))); break; default: DCHECK(false) << "numpy data type not recognized: " << dtype; } Py_XDECREF(contiguous); return Status::OK(); } }