[DataFrame] Add Parquet Support in Build Process (#1531)

* Add shell script for building parquet

* Use parquet ci script; remove anaconda

* Remove gcc flag, use default

* add boost_root

* Fix $TP_DIR reference issue

* fix the PR

* check out specific parquet-cpp commit
This commit is contained in:
Simon Mo
2018-02-16 07:18:42 -08:00
committed by Philipp Moritz
parent 844a6afcdd
commit a24cc28773
7 changed files with 51 additions and 6 deletions
+1 -1
View File
@@ -72,7 +72,7 @@ matrix:
install:
- ./.travis/install-dependencies.sh
# This command should be kept in sync with ray/python/README-building-wheels.md.
- docker run --rm -w /ray -v `pwd`:/ray -ti quay.io/xhochy/arrow_manylinux1_x86_64_base:ARROW-1024 /ray/python/build-wheel-manylinux1.sh
- docker run --rm -w /ray -v `pwd`:/ray -ti quay.io/xhochy/arrow_manylinux1_x86_64_base:latest /ray/python/build-wheel-manylinux1.sh
script:
- ./.travis/test-wheels.sh
+1 -1
View File
@@ -4,7 +4,7 @@
FROM ubuntu:xenial
RUN apt-get update \
&& apt-get install -y vim git wget \
&& apt-get install -y cmake pkg-config build-essential autoconf curl libtool unzip
&& apt-get install -y cmake pkg-config build-essential autoconf curl libtool unzip flex bison
RUN echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh \
&& wget --quiet 'https://repo.continuum.io/archive/Anaconda2-4.2.0-Linux-x86_64.sh' -O /tmp/anaconda.sh \
&& /bin/bash /tmp/anaconda.sh -b -p /opt/conda \
+1 -1
View File
@@ -9,7 +9,7 @@ produce .whl files owned by root.
Inside the root directory (i.e., one level above this python directory), run
```
docker run --rm -w /ray -v `pwd`:/ray -ti quay.io/xhochy/arrow_manylinux1_x86_64_base:ARROW-1024 /ray/python/build-wheel-manylinux1.sh
docker run --rm -w /ray -v `pwd`:/ray -ti quay.io/xhochy/arrow_manylinux1_x86_64_base:latest /ray/python/build-wheel-manylinux1.sh
```
The wheel files will be placed in the .whl directory.
+1 -1
View File
@@ -28,6 +28,6 @@ if [[ ! -d $TP_DIR/boost ]]; then
# Compile boost.
pushd $TP_DIR/boost_$BOOST_VERSION_UNDERSCORE
./bootstrap.sh
./bjam cxxflags=-fPIC cflags=-fPIC --prefix=$TP_DIR/boost --with-filesystem --with-system install > /dev/null
./bjam cxxflags=-fPIC cflags=-fPIC variant=release link=static --prefix=$TP_DIR/boost --with-filesystem --with-system --with-regex install > /dev/null
popd
fi
+33
View File
@@ -0,0 +1,33 @@
#!/bin/bash
set -x
# Cause the script to exit if a single command fails.
set -e
unamestr="$(uname)"
TP_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
if [ "$unamestr" == "Darwin" ]; then
brew update > /dev/null
brew install boost && true
brew install openssl
brew install bison
export OPENSSL_ROOT_DIR=/usr/local/opt/openssl
export LD_LIBRARY_PATH=/usr/local/opt/openssl/lib:$LD_LIBRARY_PATH
export PATH="/usr/local/opt/bison/bin:$PATH"
else
export BOOST_ROOT=$TP_DIR/boost
fi
cd $TP_DIR/parquet-cpp
ARROW_HOME=$TP_DIR/arrow/cpp/build/cpp-install
cmake -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=$PARQUET_HOME \
-DPARQUET_BUILD_BENCHMARKS=off \
-DPARQUET_BUILD_EXECUTABLES=off \
-DPARQUET_BUILD_TESTS=off \
.
make -j4
make install
+6 -1
View File
@@ -67,7 +67,6 @@ cmake -DCMAKE_BUILD_TYPE=Release \
-DARROW_JEMALLOC=off \
-DARROW_WITH_BROTLI=off \
-DARROW_WITH_LZ4=off \
-DARROW_WITH_SNAPPY=off \
-DARROW_WITH_ZLIB=off \
-DARROW_WITH_ZSTD=off \
..
@@ -81,6 +80,10 @@ if [[ -d $ARROW_HOME/lib64 ]]; then
cp -r $ARROW_HOME/lib64 $ARROW_HOME/lib
fi
export PARQUET_HOME=$TP_DIR/arrow/cpp/build/cpp-install
bash "$TP_DIR/build_parquet.sh"
echo "installing pyarrow"
cd $TP_DIR/arrow/python
# We set PKG_CONFIG_PATH, which is important so that in cmake, pkg-config can
@@ -92,7 +95,9 @@ $PYTHON_EXECUTABLE setup.py build
PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig \
PYARROW_WITH_PLASMA=1 \
PYARROW_BUNDLE_ARROW_CPP=1 \
PYARROW_WITH_PARQUET=1 \
$PYTHON_EXECUTABLE setup.py build_ext
# Find the pyarrow directory that was just built and copy it to ray/python/ray/
# so that pyarrow can be packaged along with ray.
pushd .
+8 -1
View File
@@ -12,5 +12,12 @@ if [ ! -d $TP_DIR/arrow ]; then
fi
cd $TP_DIR/arrow
git fetch origin master
git checkout e26f3dad3675288564ef0c0330a5c9afcac652f1
cd $TP_DIR
if [ ! -d $TP_DIR/parquet-cpp ]; then
git clone https://github.com/apache/parquet-cpp.git "$TP_DIR/parquet-cpp"
pushd $TP_DIR/parquet-cpp
git checkout 76388ea4eb8b23656283116bc656b0c8f5db093b
popd
fi