diff --git a/python/ray/__init__.py b/python/ray/__init__.py index 59bfebda1..9d0a68722 100644 --- a/python/ray/__init__.py +++ b/python/ray/__init__.py @@ -2,11 +2,20 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os +import sys +# Add the directory containing pyarrow to the Python path so that we find the +# pyarrow version packaged with ray and not a pre-existing pyarrow. +pyarrow_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), + "pyarrow_files") +sys.path.insert(0, pyarrow_path) + from ray.worker import (register_class, error_info, init, connect, disconnect, get, put, wait, remote, log_event, log_span, - flush_log, get_gpu_ids) -from ray.worker import SCRIPT_MODE, WORKER_MODE, PYTHON_MODE, SILENT_MODE -from ray.worker import global_state + flush_log, get_gpu_ids) # noqa: E402 +from ray.worker import (SCRIPT_MODE, WORKER_MODE, PYTHON_MODE, + SILENT_MODE) # noqa: E402 +from ray.worker import global_state # noqa: E402 # We import ray.actor because some code is run in actor.py which initializes # some functions in the worker. import ray.actor # noqa: F401 @@ -20,7 +29,7 @@ __all__ = ["register_class", "error_info", "init", "connect", "disconnect", "flush_log", "actor", "get_gpu_ids", "SCRIPT_MODE", "WORKER_MODE", "PYTHON_MODE", "SILENT_MODE", "global_state", "__version__"] -import ctypes +import ctypes # noqa: E402 # Windows only if hasattr(ctypes, "windll"): # Makes sure that all child processes die when we die. Also makes sure that diff --git a/python/ray/global_scheduler/test/test.py b/python/ray/global_scheduler/test/test.py index 18634d768..91df772ea 100644 --- a/python/ray/global_scheduler/test/test.py +++ b/python/ray/global_scheduler/test/test.py @@ -10,14 +10,15 @@ import sys import time import unittest -import pyarrow as pa +# The ray import must come before the pyarrow import because ray modifies the +# python path so that the right version of pyarrow is found. import ray.global_scheduler as global_scheduler import ray.local_scheduler as local_scheduler import ray.plasma as plasma from ray.plasma.utils import create_object - from ray import services from ray.experimental import state +import pyarrow as pa USE_VALGRIND = False PLASMA_STORE_MEMORY = 1000000000 diff --git a/python/ray/plasma/test/test.py b/python/ray/plasma/test/test.py index dba6253d3..c5292d9df 100644 --- a/python/ray/plasma/test/test.py +++ b/python/ray/plasma/test/test.py @@ -13,12 +13,14 @@ import threading import time import unittest -import pyarrow as pa -import pyarrow.plasma as plasma +# The ray import must come before the pyarrow import because ray modifies the +# python path so that the right version of pyarrow is found. import ray from ray.plasma.utils import (random_object_id, create_object_with_id, create_object) from ray import services +import pyarrow as pa +import pyarrow.plasma as plasma USE_VALGRIND = False PLASMA_STORE_MEMORY = 1000000000 diff --git a/python/ray/pyarrow_files/.gitkeep b/python/ray/pyarrow_files/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/python/setup.py b/python/setup.py index 04bd430b9..372b2da06 100644 --- a/python/setup.py +++ b/python/setup.py @@ -10,35 +10,42 @@ import sys from setuptools import setup, find_packages, Distribution import setuptools.command.build_ext as _build_ext - -# This used to be the first line of the run method in the build_ext class. -# However, we moved it here because the previous approach seemed to fail in -# Docker. Inside of the build.sh script, we install the pyarrow Python module. -# Something about calling "python setup.py install" inside of the build_ext -# run method doesn't work (this is easily reproducible in Docker with just a -# couple files to simulate two Python modules). The problem is that the pyarrow -# module doesn't get added to the easy-install.pth file, so it never gets added -# to the Python path even though the package is built and copied to the right -# location. An alternative fix would be to manually modify the easy-install.pth -# file. TODO(rkn): Fix all of this. -# -# Note: We are passing in sys.executable so that we use the same version of -# Python to build pyarrow inside the build.sh script. Note that certain flags -# will not be passed along such as --user or sudo. TODO(rkn): Fix this. -subprocess.check_call(["../build.sh", sys.executable]) +# Ideally, we could include these files by putting them in a +# MANIFEST.in or using the package_data argument to setup, but the +# MANIFEST.in gets applied at the very beginning when setup.py runs +# before these files have been created, so we have to move the files +# manually. +ray_files = [ + "ray/core/src/common/thirdparty/redis/src/redis-server", + "ray/core/src/common/redis_module/libray_redis_module.so", + "ray/core/src/plasma/plasma_store", + "ray/core/src/plasma/plasma_manager", + "ray/core/src/local_scheduler/local_scheduler", + "ray/core/src/local_scheduler/liblocal_scheduler_library.so", + "ray/core/src/numbuf/libnumbuf.so", + "ray/core/src/global_scheduler/global_scheduler", + "ray/WebUI.ipynb" +] class build_ext(_build_ext.build_ext): def run(self): - # The line below has been moved outside of the build_ext class. See the - # explanation there. - # subprocess.check_call(["../build.sh"]) + # Note: We are passing in sys.executable so that we use the same + # version of Python to build pyarrow inside the build.sh script. Note + # that certain flags will not be passed along such as --user or sudo. + # TODO(rkn): Fix this. + subprocess.check_call(["../build.sh", sys.executable]) + + # We also need to install pyarrow along with Ray, so make sure that the + # relevant non-Python pyarrow files get copied. + pyarrow_files = [ + os.path.join("ray/pyarrow_files/pyarrow", filename) + for filename in os.listdir("./ray/pyarrow_files/pyarrow") + if not os.path.isdir(os.path.join("ray/pyarrow_files/pyarrow", + filename))] + + files_to_include = ray_files + pyarrow_files - # Ideally, we could include these files by putting them in a - # MANIFEST.in or using the package_data argument to setup, but the - # MANIFEST.in gets applied at the very beginning when setup.py runs - # before these files have been created, so we have to move the files - # manually. for filename in files_to_include: self.move_file(filename) # Copy over the autogenerated flatbuffer Python bindings. @@ -62,19 +69,6 @@ class build_ext(_build_ext.build_ext): shutil.copy(source, destination) -files_to_include = [ - "ray/core/src/common/thirdparty/redis/src/redis-server", - "ray/core/src/common/redis_module/libray_redis_module.so", - "ray/core/src/plasma/plasma_store", - "ray/core/src/plasma/plasma_manager", - "ray/core/src/local_scheduler/local_scheduler", - "ray/core/src/local_scheduler/liblocal_scheduler_library.so", - "ray/core/src/numbuf/libnumbuf.so", - "ray/core/src/global_scheduler/global_scheduler", - "ray/WebUI.ipynb" -] - - class BinaryDistribution(Distribution): def has_ext_modules(self): return True diff --git a/src/thirdparty/build_thirdparty.sh b/src/thirdparty/build_thirdparty.sh index c4f75e1c4..b60207b9a 100755 --- a/src/thirdparty/build_thirdparty.sh +++ b/src/thirdparty/build_thirdparty.sh @@ -62,4 +62,11 @@ cd $TP_DIR/arrow/python # We set PKG_CONFIG_PATH, which is important so that in cmake, pkg-config can # find plasma. ARROW_HOME=$TP_DIR/arrow/cpp/build/cpp-install -PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig PYARROW_WITH_PLASMA=1 PYARROW_BUNDLE_ARROW_CPP=1 $PYTHON_EXECUTABLE setup.py install +PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig PYARROW_WITH_PLASMA=1 PYARROW_BUNDLE_ARROW_CPP=1 $PYTHON_EXECUTABLE setup.py build +PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig PYARROW_WITH_PLASMA=1 PYARROW_BUNDLE_ARROW_CPP=1 $PYTHON_EXECUTABLE setup.py build_ext +# Find the pyarrow directory that was just built and copy it to ray/python/ray/ +# so that pyarrow can be packaged along with ray. TODO(rkn): This doesn't seem +# very robust. Fix this. +PYARROW_BUILD_LIB_DIR=$(find $TP_DIR/arrow/python/build -type d -maxdepth 1 -print | grep -m1 'lib') +echo "copying pyarrow files from $PYARROW_BUILD_LIB_DIR/pyarrow" +cp -r $PYARROW_BUILD_LIB_DIR/pyarrow $TP_DIR/../../python/ray/pyarrow_files/