Package pyarrow along with ray. (#822)

* Rough pass at installing pyarrow along with Ray.

* Remove hardcoded path and try to find correct path automatically.

* Add print.

* Fix linting.

* Copy pyarrow files to a location that we manually add to python path in order to avoid interfering with pre-existing pyarrow installations.

* Move call to build.sh back into build_ext in setup.py.

* Ignore some linting errors.

* Fix problem in which pyarrow files to copy were listed before they were built.

* Fix tests by importing ray before pyarrow.
This commit is contained in:
Robert Nishihara
2017-08-07 21:17:28 -07:00
committed by Philipp Moritz
parent 0e6e38115f
commit 03f2325780
6 changed files with 59 additions and 46 deletions
+31 -37
View File
@@ -10,35 +10,42 @@ import sys
from setuptools import setup, find_packages, Distribution
import setuptools.command.build_ext as _build_ext
# This used to be the first line of the run method in the build_ext class.
# However, we moved it here because the previous approach seemed to fail in
# Docker. Inside of the build.sh script, we install the pyarrow Python module.
# Something about calling "python setup.py install" inside of the build_ext
# run method doesn't work (this is easily reproducible in Docker with just a
# couple files to simulate two Python modules). The problem is that the pyarrow
# module doesn't get added to the easy-install.pth file, so it never gets added
# to the Python path even though the package is built and copied to the right
# location. An alternative fix would be to manually modify the easy-install.pth
# file. TODO(rkn): Fix all of this.
#
# Note: We are passing in sys.executable so that we use the same version of
# Python to build pyarrow inside the build.sh script. Note that certain flags
# will not be passed along such as --user or sudo. TODO(rkn): Fix this.
subprocess.check_call(["../build.sh", sys.executable])
# Ideally, we could include these files by putting them in a
# MANIFEST.in or using the package_data argument to setup, but the
# MANIFEST.in gets applied at the very beginning when setup.py runs
# before these files have been created, so we have to move the files
# manually.
ray_files = [
"ray/core/src/common/thirdparty/redis/src/redis-server",
"ray/core/src/common/redis_module/libray_redis_module.so",
"ray/core/src/plasma/plasma_store",
"ray/core/src/plasma/plasma_manager",
"ray/core/src/local_scheduler/local_scheduler",
"ray/core/src/local_scheduler/liblocal_scheduler_library.so",
"ray/core/src/numbuf/libnumbuf.so",
"ray/core/src/global_scheduler/global_scheduler",
"ray/WebUI.ipynb"
]
class build_ext(_build_ext.build_ext):
def run(self):
# The line below has been moved outside of the build_ext class. See the
# explanation there.
# subprocess.check_call(["../build.sh"])
# Note: We are passing in sys.executable so that we use the same
# version of Python to build pyarrow inside the build.sh script. Note
# that certain flags will not be passed along such as --user or sudo.
# TODO(rkn): Fix this.
subprocess.check_call(["../build.sh", sys.executable])
# We also need to install pyarrow along with Ray, so make sure that the
# relevant non-Python pyarrow files get copied.
pyarrow_files = [
os.path.join("ray/pyarrow_files/pyarrow", filename)
for filename in os.listdir("./ray/pyarrow_files/pyarrow")
if not os.path.isdir(os.path.join("ray/pyarrow_files/pyarrow",
filename))]
files_to_include = ray_files + pyarrow_files
# Ideally, we could include these files by putting them in a
# MANIFEST.in or using the package_data argument to setup, but the
# MANIFEST.in gets applied at the very beginning when setup.py runs
# before these files have been created, so we have to move the files
# manually.
for filename in files_to_include:
self.move_file(filename)
# Copy over the autogenerated flatbuffer Python bindings.
@@ -62,19 +69,6 @@ class build_ext(_build_ext.build_ext):
shutil.copy(source, destination)
files_to_include = [
"ray/core/src/common/thirdparty/redis/src/redis-server",
"ray/core/src/common/redis_module/libray_redis_module.so",
"ray/core/src/plasma/plasma_store",
"ray/core/src/plasma/plasma_manager",
"ray/core/src/local_scheduler/local_scheduler",
"ray/core/src/local_scheduler/liblocal_scheduler_library.so",
"ray/core/src/numbuf/libnumbuf.so",
"ray/core/src/global_scheduler/global_scheduler",
"ray/WebUI.ipynb"
]
class BinaryDistribution(Distribution):
def has_ext_modules(self):
return True