diff --git a/.gitmodules b/.gitmodules index 89d94b72e..29d35a973 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "thirdparty/arrow"] path = thirdparty/arrow - url = https://github.com/ray-project/arrow.git + url = https://github.com/pcmoritz/arrow.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 760bdde11..a8d400ee6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,10 @@ project(numbuf) list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules) +# Make libnumbuf.so look for shared libraries in the folder libnumbuf.so is in +set(CMAKE_INSTALL_RPATH "$ORIGIN/") +set(CMAKE_MACOSX_RPATH 1) + if(NOT APPLE) find_package(PythonInterp REQUIRED) find_package(PythonLibs REQUIRED) @@ -53,8 +57,25 @@ endif() set(ARROW_DIR "${CMAKE_SOURCE_DIR}/thirdparty/arrow/" CACHE STRING "Path of the arrow source directory") -set(ARROW_STATIC_LIB "${CMAKE_SOURCE_DIR}/thirdparty/arrow/cpp/build/release/libarrow.a" CACHE STRING - "Path to libarrow.a (needs to be changed if arrow is build in debug mode)") +if (APPLE) + set(ARROW_LIB "${CMAKE_SOURCE_DIR}/thirdparty/arrow/cpp/build/release/libarrow.dylib" CACHE STRING + "Path to libarrow.dylib (needs to be changed if arrow is build in debug mode)") + + set(ARROW_IO_LIB "${CMAKE_SOURCE_DIR}/thirdparty/arrow/cpp/build/release/libarrow_io.dylib" CACHE STRING + "Path to libarrow_io.dylib (needs to be changed if arrow is build in debug mode)") + + set(ARROW_IPC_LIB "${CMAKE_SOURCE_DIR}/thirdparty/arrow/cpp/build/release/libarrow_ipc.dylib" CACHE STRING + "Path to libarrow_ipc.dylib (needs to be changed if arrow is build in debug mode)") +else() + set(ARROW_LIB "${CMAKE_SOURCE_DIR}/thirdparty/arrow/cpp/build/release/libarrow.so" CACHE STRING + "Path to libarrow.so (needs to be changed if arrow is build in debug mode)") + + set(ARROW_IO_LIB "${CMAKE_SOURCE_DIR}/thirdparty/arrow/cpp/build/release/libarrow_io.so" CACHE STRING + "Path to libarrow_io.so (needs to be changed if arrow is build in debug mode)") + + set(ARROW_IPC_LIB "${CMAKE_SOURCE_DIR}/thirdparty/arrow/cpp/build/release/libarrow_ipc.so" CACHE STRING + "Path to libarrow_ipc.so (needs to be changed if arrow is build in debug mode)") +endif() include_directories("${ARROW_DIR}/cpp/src/") include_directories("cpp/src/") @@ -75,8 +96,18 @@ if(APPLE) add_custom_command(TARGET numbuf POST_BUILD COMMAND ${CMAKE_INSTALL_NAME_TOOL} -change ${PYTHON_SHARED_LIBRARY} ${PYTHON_LIBRARIES} libnumbuf.so) + add_custom_command(TARGET numbuf + POST_BUILD COMMAND + ${CMAKE_INSTALL_NAME_TOOL} -change "@rpath/libarrow.dylib" "@loader_path/libarrow.dylib" libnumbuf.so) + add_custom_command(TARGET numbuf + POST_BUILD COMMAND + ${CMAKE_INSTALL_NAME_TOOL} -change "@rpath/libarrow_io.dylib" "@loader_path/libarrow_io.dylib" libnumbuf.so) + add_custom_command(TARGET numbuf + POST_BUILD COMMAND + ${CMAKE_INSTALL_NAME_TOOL} -change "@rpath/libarrow_ipc.dylib" "@loader_path/libarrow_ipc.dylib" libnumbuf.so) endif(APPLE) -target_link_libraries(numbuf ${ARROW_STATIC_LIB} ${PYTHON_LIBRARIES}) +target_link_libraries(numbuf ${ARROW_LIB} ${ARROW_IO_LIB} ${ARROW_IPC_LIB} ${PYTHON_LIBRARIES}) -install(TARGETS numbuf DESTINATION ${CMAKE_SOURCE_DIR}) +install(TARGETS numbuf DESTINATION ${CMAKE_SOURCE_DIR}/numbuf/) +install(FILES ${ARROW_LIB} ${ARROW_IO_LIB} ${ARROW_IPC_LIB} DESTINATION ${CMAKE_SOURCE_DIR}/numbuf/) diff --git a/build.sh b/build.sh index d82905fb5..087c1471f 100755 --- a/build.sh +++ b/build.sh @@ -15,6 +15,6 @@ fi mkdir -p "$ROOT_DIR/build" pushd "$ROOT_DIR/build" - cmake -DCMAKE_BUILD_TYPE=Release .. + cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-g" -DCMAKE_CXX_FLAGS="-g" .. make install -j$PARALLEL popd diff --git a/cpp/src/numbuf/dict.cc b/cpp/src/numbuf/dict.cc index 98b03894b..74a9cfadf 100644 --- a/cpp/src/numbuf/dict.cc +++ b/cpp/src/numbuf/dict.cc @@ -4,21 +4,24 @@ using namespace arrow; namespace numbuf { -std::shared_ptr DictBuilder::Finish( +Status DictBuilder::Finish( std::shared_ptr key_tuple_data, std::shared_ptr val_list_data, std::shared_ptr val_tuple_data, - std::shared_ptr val_dict_data) { + std::shared_ptr val_dict_data, + std::shared_ptr* out) { // lists and dicts can't be keys of dicts in Python, that is why for // the keys we do not need to collect sublists - auto keys = keys_.Finish(nullptr, key_tuple_data, nullptr); - auto vals = vals_.Finish(val_list_data, val_tuple_data, val_dict_data); + std::shared_ptr keys, vals; + RETURN_NOT_OK(keys_.Finish(nullptr, key_tuple_data, nullptr, &keys)); + RETURN_NOT_OK(vals_.Finish(val_list_data, val_tuple_data, val_dict_data, &vals)); auto keys_field = std::make_shared("keys", keys->type()); auto vals_field = std::make_shared("vals", vals->type()); auto type = std::make_shared(std::vector({keys_field, vals_field})); std::vector field_arrays({keys, vals}); DCHECK(keys->length() == vals->length()); - return std::make_shared(type, keys->length(), field_arrays); + out->reset(new StructArray(type, keys->length(), field_arrays)); + return Status::OK(); } } diff --git a/cpp/src/numbuf/dict.h b/cpp/src/numbuf/dict.h index dcb38fb6b..6497b3eb7 100644 --- a/cpp/src/numbuf/dict.h +++ b/cpp/src/numbuf/dict.h @@ -33,11 +33,12 @@ public: List containing the data from nested dictionaries in the value list of the dictionary */ - std::shared_ptr Finish( + arrow::Status Finish( std::shared_ptr key_tuple_data, std::shared_ptr val_list_data, std::shared_ptr val_tuple_data, - std::shared_ptr val_dict_data); + std::shared_ptr val_dict_data, + std::shared_ptr* out); private: SequenceBuilder keys_; diff --git a/cpp/src/numbuf/sequence.cc b/cpp/src/numbuf/sequence.cc index a5e1cf8ca..9672f5e5d 100644 --- a/cpp/src/numbuf/sequence.cc +++ b/cpp/src/numbuf/sequence.cc @@ -5,13 +5,16 @@ using namespace arrow; namespace numbuf { SequenceBuilder::SequenceBuilder(MemoryPool* pool) - : pool_(pool), types_(pool), offsets_(pool), + : pool_(pool), + types_(pool, std::make_shared()), + offsets_(pool, std::make_shared()), nones_(pool, std::make_shared()), bools_(pool, std::make_shared()), - ints_(pool), + ints_(pool, std::make_shared()), bytes_(pool, std::make_shared()), strings_(pool, std::make_shared()), - floats_(pool), doubles_(pool), + floats_(pool, std::make_shared()), + doubles_(pool, std::make_shared()), uint8_tensors_(std::make_shared(), pool), int8_tensors_(std::make_shared(), pool), uint16_tensors_(std::make_shared(), pool), @@ -76,6 +79,9 @@ Status SequenceBuilder::AppendDouble(double data) { #define DEF_TENSOR_APPEND(NAME, TYPE, TAG) \ Status SequenceBuilder::AppendTensor(const std::vector& dims, TYPE* data) { \ + if (TAG == -1) { \ + NAME.Start(); \ + } \ UPDATE(NAME.length(), TAG); \ return NAME.Append(dims, data); \ } @@ -109,11 +115,11 @@ Status SequenceBuilder::AppendDict(int32_t size) { return Status::OK(); } -#define ADD_ELEMENT(VARNAME, TAG) \ - if (TAG != -1) { \ - types[TAG] = VARNAME.type(); \ - children[TAG] = VARNAME.Finish(); \ - ARROW_CHECK_OK(nones_.AppendToBitmap(true)); \ +#define ADD_ELEMENT(VARNAME, TAG) \ + if (TAG != -1) { \ + types[TAG] = std::make_shared("", VARNAME.type()); \ + RETURN_NOT_OK(VARNAME.Finish(&children[TAG])); \ + RETURN_NOT_OK(nones_.AppendToBitmap(true)); \ } #define ADD_SUBSEQUENCE(DATA, OFFSETS, BUILDER, TAG, NAME) \ @@ -132,12 +138,13 @@ Status SequenceBuilder::AppendDict(int32_t size) { DCHECK(OFFSETS.size() == 1); \ } -std::shared_ptr SequenceBuilder::Finish( +Status SequenceBuilder::Finish( std::shared_ptr list_data, std::shared_ptr tuple_data, - std::shared_ptr dict_data) { + std::shared_ptr dict_data, + std::shared_ptr* out) { - std::vector types(num_tags); + std::vector> types(num_tags); std::vector children(num_tags); ADD_ELEMENT(bools_, bool_tag); @@ -165,11 +172,12 @@ std::shared_ptr SequenceBuilder::Finish( ADD_SUBSEQUENCE(tuple_data, tuple_offsets_, tuple_builder, tuple_tag, "tuple"); ADD_SUBSEQUENCE(dict_data, dict_offsets_, dict_builder, dict_tag, "dict"); - TypePtr type = TypePtr(new DenseUnionType(types)); - - return std::make_shared(type, types_.length(), + std::vector type_ids = {}; + TypePtr type = TypePtr(new UnionType(types, type_ids, UnionMode::DENSE)); + out->reset(new UnionArray(type, types_.length(), children, types_.data(), offsets_.data(), - nones_.null_count(), nones_.null_bitmap()); + nones_.null_count(), nones_.null_bitmap())); + return Status::OK(); } } diff --git a/cpp/src/numbuf/sequence.h b/cpp/src/numbuf/sequence.h index 7237dbd83..bcb1c1afd 100644 --- a/cpp/src/numbuf/sequence.h +++ b/cpp/src/numbuf/sequence.h @@ -79,10 +79,11 @@ class SequenceBuilder { arrow::Status AppendDict(int32_t size); //! Finish building the sequence and return the result - std::shared_ptr Finish( + arrow::Status Finish( std::shared_ptr list_data, std::shared_ptr tuple_data, - std::shared_ptr dict_data); + std::shared_ptr dict_data, + std::shared_ptr* out); private: arrow::MemoryPool* pool_; diff --git a/cpp/src/numbuf/tensor.cc b/cpp/src/numbuf/tensor.cc index f0f7647de..5d95a94a7 100644 --- a/cpp/src/numbuf/tensor.cc +++ b/cpp/src/numbuf/tensor.cc @@ -6,19 +6,24 @@ namespace numbuf { template TensorBuilder::TensorBuilder(const TypePtr& dtype, MemoryPool* pool) - : dtype_(dtype) { - dim_data_ = std::make_shared(pool); - dims_ = std::make_shared(pool, dim_data_); - value_data_ = std::make_shared>(pool, dtype); - values_ = std::make_shared(pool, value_data_); + : dtype_(dtype), pool_(pool) {} + +template +Status TensorBuilder::Start() { + dim_data_ = std::make_shared(pool_, std::make_shared()); + dims_ = std::make_shared(pool_, dim_data_); + value_data_ = std::make_shared>(pool_, dtype_); + values_ = std::make_shared(pool_, value_data_); auto dims_field = std::make_shared("dims", dims_->type()); auto values_field = std::make_shared("data", values_->type()); auto type = std::make_shared(std::vector({dims_field, values_field})); - tensors_ = std::make_shared(pool, type, std::vector>({dims_, values_})); -}; + tensors_ = std::make_shared(pool_, type, std::vector>({dims_, values_})); + return Status::OK(); +} template Status TensorBuilder::Append(const std::vector& dims, const elem_type* data) { + DCHECK(tensors_); RETURN_NOT_OK(tensors_->Append()); RETURN_NOT_OK(dims_->Append()); RETURN_NOT_OK(values_->Append()); @@ -32,8 +37,8 @@ Status TensorBuilder::Append(const std::vector& dims, const elem_typ } template -std::shared_ptr TensorBuilder::Finish() { - return tensors_->Finish(); +Status TensorBuilder::Finish(std::shared_ptr* out) { + return tensors_->Finish(out); } template class TensorBuilder; diff --git a/cpp/src/numbuf/tensor.h b/cpp/src/numbuf/tensor.h index 351892758..e3725fcc9 100644 --- a/cpp/src/numbuf/tensor.h +++ b/cpp/src/numbuf/tensor.h @@ -18,6 +18,8 @@ public: typedef typename T::c_type elem_type; TensorBuilder(const arrow::TypePtr& dtype, arrow::MemoryPool* pool = nullptr); + + arrow::Status Start(); /*! Append a new tensor. @@ -31,7 +33,7 @@ public: arrow::Status Append(const std::vector& dims, const elem_type* data); //! Convert the tensors to an Arrow StructArray - std::shared_ptr Finish(); + arrow::Status Finish(std::shared_ptr* out); //! Number of tensors in the column int32_t length() { @@ -44,6 +46,7 @@ public: private: arrow::TypePtr dtype_; + arrow::MemoryPool* pool_; std::shared_ptr dim_data_; std::shared_ptr dims_; std::shared_ptr> value_data_; diff --git a/python/src/pynumbuf/adapters/python.cc b/python/src/pynumbuf/adapters/python.cc index fd425a861..2d41cbaa3 100644 --- a/python/src/pynumbuf/adapters/python.cc +++ b/python/src/pynumbuf/adapters/python.cc @@ -166,12 +166,11 @@ Status SerializeSequences(std::vector sequences, int32_t recursion_de if (subdicts.size() > 0) { RETURN_NOT_OK(SerializeDict(subdicts, recursion_depth + 1, &dict)); } - *out = builder.Finish(list, tuple, dict); - return Status::OK(); + return builder.Finish(list, tuple, dict, out); } #define DESERIALIZE_SEQUENCE(CREATE, SET_ITEM) \ - auto data = std::dynamic_pointer_cast(array); \ + auto data = std::dynamic_pointer_cast(array); \ int32_t size = array->length(); \ PyObject* result = CREATE(stop_idx - start_idx); \ auto types = std::make_shared(size, data->types()); \ @@ -231,7 +230,7 @@ Status SerializeDict(std::vector dicts, int32_t recursion_depth, std: if (val_dicts.size() > 0) { RETURN_NOT_OK(SerializeDict(val_dicts, recursion_depth + 1, &val_dict_arr)); } - *out = result.Finish(key_tuples_arr, val_list_arr, val_tuples_arr, val_dict_arr); + result.Finish(key_tuples_arr, val_list_arr, val_tuples_arr, val_dict_arr, out); // This block is used to decrement the reference counts of the results // returned by the serialization callback, which is called in SerializeArray diff --git a/python/src/pynumbuf/memory.h b/python/src/pynumbuf/memory.h index ebaa22c51..eb926dbcb 100644 --- a/python/src/pynumbuf/memory.h +++ b/python/src/pynumbuf/memory.h @@ -1,22 +1,32 @@ #ifndef PYNUMBUF_MEMORY_H #define PYNUMBUF_MEMORY_H -#include +#include namespace numbuf { -class BufferSource : public arrow::ipc::MemorySource { +class FixedBufferStream : public arrow::io::OutputStream, public arrow::io::ReadableFileInterface { public: - virtual ~BufferSource() {} + virtual ~FixedBufferStream() {} - explicit BufferSource(uint8_t* data, int64_t nbytes) - : data_(data), size_(nbytes) {} + explicit FixedBufferStream(uint8_t* data, int64_t nbytes) + : data_(data), position_(0), size_(nbytes) {} - arrow::Status ReadAt(int64_t position, int64_t nbytes, - std::shared_ptr* out) override { + arrow::Status Read(int64_t nbytes, std::shared_ptr* out) override { DCHECK(out); - DCHECK(position + nbytes <= size_) << "position: " << position << " nbytes: " << nbytes << "size: " << size_; - *out = std::make_shared(data_ + position, nbytes); + DCHECK(position_ + nbytes <= size_) << "position: " << position_ << " nbytes: " << nbytes << "size: " << size_; + *out = std::make_shared(data_ + position_, nbytes); + position_ += nbytes; + return arrow::Status::OK(); + } + + arrow::Status Read(int64_t nbytes, int64_t* bytes_read, uint8_t* out) { + assert(0); + return arrow::Status::OK(); + } + + arrow::Status Seek(int64_t position) override { + position_ = position; return arrow::Status::OK(); } @@ -24,24 +34,35 @@ class BufferSource : public arrow::ipc::MemorySource { return arrow::Status::OK(); } - arrow::Status Write(int64_t position, const uint8_t* data, - int64_t nbytes) override { - DCHECK(position >= 0 && position < size_); - DCHECK(position + nbytes <= size_) << "position: " << position << " nbytes: " << nbytes << "size: " << size_; - uint8_t* dst = data_ + position; - memcpy(dst, data, nbytes); + arrow::Status Tell(int64_t* position) override { + *position = position_; return arrow::Status::OK(); } - int64_t Size() const override { - return size_; + arrow::Status Write(const uint8_t* data, int64_t nbytes) override { + DCHECK(position_ >= 0 && position_ < size_); + DCHECK(position_ + nbytes <= size_) << "position: " << position_ << " nbytes: " << nbytes << "size: " << size_; + uint8_t* dst = data_ + position_; + memcpy(dst, data, nbytes); + position_ += nbytes; + return arrow::Status::OK(); + } + + arrow::Status GetSize(int64_t *size) override { + *size = size_; + return arrow::Status::OK(); + } + + bool supports_zero_copy() const override { + return true; } private: uint8_t* data_; + int64_t position_; int64_t size_; }; -} // namespace +} // namespace numbuf #endif // PYNUMBUF_MEMORY_H diff --git a/python/src/pynumbuf/numbuf.cc b/python/src/pynumbuf/numbuf.cc index 8951b07be..ba2483810 100644 --- a/python/src/pynumbuf/numbuf.cc +++ b/python/src/pynumbuf/numbuf.cc @@ -1,6 +1,5 @@ #include #include -#include #include #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #define PY_ARRAY_UNIQUE_SYMBOL NUMBUF_ARRAY_API @@ -16,10 +15,10 @@ using namespace arrow; using namespace numbuf; -std::shared_ptr make_row_batch(std::shared_ptr data) { +std::shared_ptr make_row_batch(std::shared_ptr data) { auto field = std::make_shared("list", data->type()); std::shared_ptr schema(new Schema({field})); - return std::shared_ptr(new RowBatch(schema, data->length(), {data})); + return std::shared_ptr(new RecordBatch(schema, data->length(), {data})); } extern "C" { @@ -29,9 +28,9 @@ static PyObject *NumbufError; PyObject *numbuf_serialize_callback = NULL; PyObject *numbuf_deserialize_callback = NULL; -int PyObjectToArrow(PyObject* object, std::shared_ptr **result) { +int PyObjectToArrow(PyObject* object, std::shared_ptr **result) { if (PyCapsule_IsValid(object, "arrow")) { - *result = reinterpret_cast*>(PyCapsule_GetPointer(object, "arrow")); + *result = reinterpret_cast*>(PyCapsule_GetPointer(object, "arrow")); return 1; } else { PyErr_SetString(PyExc_TypeError, "must be an 'arrow' capsule"); @@ -40,7 +39,7 @@ int PyObjectToArrow(PyObject* object, std::shared_ptr **result) { } static void ArrowCapsule_Destructor(PyObject* capsule) { - delete reinterpret_cast*>(PyCapsule_GetPointer(capsule, "arrow")); + delete reinterpret_cast*>(PyCapsule_GetPointer(capsule, "arrow")); } /* Documented in doc/numbuf.rst in ray-core */ @@ -62,11 +61,11 @@ static PyObject* serialize_list(PyObject* self, PyObject* args) { return NULL; } - auto batch = new std::shared_ptr(); + auto batch = new std::shared_ptr(); *batch = make_row_batch(array); int64_t size = 0; - ARROW_CHECK_OK(arrow::ipc::GetRowBatchSize(batch->get(), &size)); + ARROW_CHECK_OK(arrow::ipc::GetRecordBatchSize(batch->get(), &size)); std::shared_ptr buffer; ARROW_CHECK_OK(ipc::WriteSchema((*batch)->schema().get(), &buffer)); @@ -84,7 +83,7 @@ static PyObject* serialize_list(PyObject* self, PyObject* args) { /* Documented in doc/numbuf.rst in ray-core */ static PyObject* write_to_buffer(PyObject* self, PyObject* args) { - std::shared_ptr* batch; + std::shared_ptr* batch; PyObject* memoryview; if (!PyArg_ParseTuple(args, "O&O", &PyObjectToArrow, &batch, &memoryview)) { return NULL; @@ -93,10 +92,11 @@ static PyObject* write_to_buffer(PyObject* self, PyObject* args) { return NULL; } Py_buffer* buffer = PyMemoryView_GET_BUFFER(memoryview); - auto target = std::make_shared(reinterpret_cast(buffer->buf), buffer->len); - int64_t metadata_offset; - ARROW_CHECK_OK(ipc::WriteRowBatch(target.get(), batch->get(), 0, &metadata_offset)); - return PyInt_FromLong(metadata_offset); + auto target = std::make_shared(reinterpret_cast(buffer->buf), buffer->len); + int64_t body_end_offset; + int64_t header_end_offset; + ARROW_CHECK_OK(ipc::WriteRecordBatch((*batch)->columns(), (*batch)->num_rows(), target.get(), &body_end_offset, &header_end_offset)); + return PyInt_FromLong(header_end_offset); } /* Documented in doc/numbuf.rst in ray-core */ @@ -118,11 +118,11 @@ static PyObject* read_from_buffer(PyObject* self, PyObject* args) { ARROW_CHECK_OK(schema_msg->GetSchema(&schema)); Py_buffer* buffer = PyMemoryView_GET_BUFFER(memoryview); - auto source = std::make_shared(reinterpret_cast(buffer->buf), buffer->len); - std::shared_ptr reader; - ARROW_CHECK_OK(arrow::ipc::RowBatchReader::Open(source.get(), metadata_offset, &reader)); - auto batch = new std::shared_ptr(); - ARROW_CHECK_OK(reader->GetRowBatch(schema, batch)); + auto source = std::make_shared(reinterpret_cast(buffer->buf), buffer->len); + std::shared_ptr reader; + ARROW_CHECK_OK(arrow::ipc::RecordBatchReader::Open(source.get(), metadata_offset, &reader)); + auto batch = new std::shared_ptr(); + ARROW_CHECK_OK(reader->GetRecordBatch(schema, batch)); return PyCapsule_New(reinterpret_cast(batch), "arrow", &ArrowCapsule_Destructor); @@ -130,7 +130,7 @@ static PyObject* read_from_buffer(PyObject* self, PyObject* args) { /* Documented in doc/numbuf.rst in ray-core */ static PyObject* deserialize_list(PyObject* self, PyObject* args) { - std::shared_ptr* data; + std::shared_ptr* data; PyObject* base = Py_None; if (!PyArg_ParseTuple(args, "O&|O", &PyObjectToArrow, &data, &base)) { return NULL; diff --git a/setup.py b/setup.py index ce9a51bc7..066ca4d0c 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,13 @@ import subprocess from setuptools import setup, find_packages, Extension import setuptools.command.install as _install +from sys import platform + +extension = "" +if platform == "linux" or platform == "linux2": + extension = ".so" +elif platform == "darwin": + extension = ".dylib" # Because of relative paths, this must be run from inside numbuf/. @@ -8,7 +15,6 @@ class install(_install.install): def run(self): subprocess.check_call(["./setup.sh"]) subprocess.check_call(["./build.sh"]) - subprocess.check_call(["cp", "libnumbuf.so", "numbuf/"]) # Calling _install.install.run(self) does not fetch required packages and # instead performs an old-style install. See command/install.py in # setuptools. So, calling do_egg_install() manually here. @@ -17,7 +23,7 @@ class install(_install.install): setup(name="numbuf", version="0.0.1", packages=find_packages(), - package_data={"numbuf": ["libnumbuf.so"]}, + package_data={"numbuf": ["libnumbuf.so", "libarrow" + extension, "libarrow_io" + extension, "libarrow_ipc" + extension]}, cmdclass={"install": install}, setup_requires=["numpy"], include_package_data=True, diff --git a/thirdparty/arrow b/thirdparty/arrow index 1407126ec..58bd7bedc 160000 --- a/thirdparty/arrow +++ b/thirdparty/arrow @@ -1 +1 @@ -Subproject commit 1407126ec0956b6dc6f9873bdeeef4d042ca032f +Subproject commit 58bd7bedc63d66d5898297bab25b54dfb67665db diff --git a/thirdparty/build_thirdparty.sh b/thirdparty/build_thirdparty.sh index 7c307f816..e5c4b5b45 100755 --- a/thirdparty/build_thirdparty.sh +++ b/thirdparty/build_thirdparty.sh @@ -20,8 +20,7 @@ fi echo "building arrow" cd $TP_DIR/arrow/cpp -source setup_build_env.sh mkdir -p $TP_DIR/arrow/cpp/build cd $TP_DIR/arrow/cpp/build -cmake -DLIBARROW_LINKAGE=STATIC -DCMAKE_BUILD_TYPE=Release .. +cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-g" -DCMAKE_CXX_FLAGS="-g" .. make VERBOSE=1 -j$PARALLEL