mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 21:38:18 +08:00
Package pyarrow along with ray. (#822)
* Rough pass at installing pyarrow along with Ray. * Remove hardcoded path and try to find correct path automatically. * Add print. * Fix linting. * Copy pyarrow files to a location that we manually add to python path in order to avoid interfering with pre-existing pyarrow installations. * Move call to build.sh back into build_ext in setup.py. * Ignore some linting errors. * Fix problem in which pyarrow files to copy were listed before they were built. * Fix tests by importing ray before pyarrow.
This commit is contained in:
committed by
Philipp Moritz
parent
0e6e38115f
commit
03f2325780
+13
-4
@@ -2,11 +2,20 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
# Add the directory containing pyarrow to the Python path so that we find the
|
||||
# pyarrow version packaged with ray and not a pre-existing pyarrow.
|
||||
pyarrow_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
|
||||
"pyarrow_files")
|
||||
sys.path.insert(0, pyarrow_path)
|
||||
|
||||
from ray.worker import (register_class, error_info, init, connect, disconnect,
|
||||
get, put, wait, remote, log_event, log_span,
|
||||
flush_log, get_gpu_ids)
|
||||
from ray.worker import SCRIPT_MODE, WORKER_MODE, PYTHON_MODE, SILENT_MODE
|
||||
from ray.worker import global_state
|
||||
flush_log, get_gpu_ids) # noqa: E402
|
||||
from ray.worker import (SCRIPT_MODE, WORKER_MODE, PYTHON_MODE,
|
||||
SILENT_MODE) # noqa: E402
|
||||
from ray.worker import global_state # noqa: E402
|
||||
# We import ray.actor because some code is run in actor.py which initializes
|
||||
# some functions in the worker.
|
||||
import ray.actor # noqa: F401
|
||||
@@ -20,7 +29,7 @@ __all__ = ["register_class", "error_info", "init", "connect", "disconnect",
|
||||
"flush_log", "actor", "get_gpu_ids", "SCRIPT_MODE", "WORKER_MODE",
|
||||
"PYTHON_MODE", "SILENT_MODE", "global_state", "__version__"]
|
||||
|
||||
import ctypes
|
||||
import ctypes # noqa: E402
|
||||
# Windows only
|
||||
if hasattr(ctypes, "windll"):
|
||||
# Makes sure that all child processes die when we die. Also makes sure that
|
||||
|
||||
@@ -10,14 +10,15 @@ import sys
|
||||
import time
|
||||
import unittest
|
||||
|
||||
import pyarrow as pa
|
||||
# The ray import must come before the pyarrow import because ray modifies the
|
||||
# python path so that the right version of pyarrow is found.
|
||||
import ray.global_scheduler as global_scheduler
|
||||
import ray.local_scheduler as local_scheduler
|
||||
import ray.plasma as plasma
|
||||
from ray.plasma.utils import create_object
|
||||
|
||||
from ray import services
|
||||
from ray.experimental import state
|
||||
import pyarrow as pa
|
||||
|
||||
USE_VALGRIND = False
|
||||
PLASMA_STORE_MEMORY = 1000000000
|
||||
|
||||
@@ -13,12 +13,14 @@ import threading
|
||||
import time
|
||||
import unittest
|
||||
|
||||
import pyarrow as pa
|
||||
import pyarrow.plasma as plasma
|
||||
# The ray import must come before the pyarrow import because ray modifies the
|
||||
# python path so that the right version of pyarrow is found.
|
||||
import ray
|
||||
from ray.plasma.utils import (random_object_id,
|
||||
create_object_with_id, create_object)
|
||||
from ray import services
|
||||
import pyarrow as pa
|
||||
import pyarrow.plasma as plasma
|
||||
|
||||
USE_VALGRIND = False
|
||||
PLASMA_STORE_MEMORY = 1000000000
|
||||
|
||||
+31
-37
@@ -10,35 +10,42 @@ import sys
|
||||
from setuptools import setup, find_packages, Distribution
|
||||
import setuptools.command.build_ext as _build_ext
|
||||
|
||||
|
||||
# This used to be the first line of the run method in the build_ext class.
|
||||
# However, we moved it here because the previous approach seemed to fail in
|
||||
# Docker. Inside of the build.sh script, we install the pyarrow Python module.
|
||||
# Something about calling "python setup.py install" inside of the build_ext
|
||||
# run method doesn't work (this is easily reproducible in Docker with just a
|
||||
# couple files to simulate two Python modules). The problem is that the pyarrow
|
||||
# module doesn't get added to the easy-install.pth file, so it never gets added
|
||||
# to the Python path even though the package is built and copied to the right
|
||||
# location. An alternative fix would be to manually modify the easy-install.pth
|
||||
# file. TODO(rkn): Fix all of this.
|
||||
#
|
||||
# Note: We are passing in sys.executable so that we use the same version of
|
||||
# Python to build pyarrow inside the build.sh script. Note that certain flags
|
||||
# will not be passed along such as --user or sudo. TODO(rkn): Fix this.
|
||||
subprocess.check_call(["../build.sh", sys.executable])
|
||||
# Ideally, we could include these files by putting them in a
|
||||
# MANIFEST.in or using the package_data argument to setup, but the
|
||||
# MANIFEST.in gets applied at the very beginning when setup.py runs
|
||||
# before these files have been created, so we have to move the files
|
||||
# manually.
|
||||
ray_files = [
|
||||
"ray/core/src/common/thirdparty/redis/src/redis-server",
|
||||
"ray/core/src/common/redis_module/libray_redis_module.so",
|
||||
"ray/core/src/plasma/plasma_store",
|
||||
"ray/core/src/plasma/plasma_manager",
|
||||
"ray/core/src/local_scheduler/local_scheduler",
|
||||
"ray/core/src/local_scheduler/liblocal_scheduler_library.so",
|
||||
"ray/core/src/numbuf/libnumbuf.so",
|
||||
"ray/core/src/global_scheduler/global_scheduler",
|
||||
"ray/WebUI.ipynb"
|
||||
]
|
||||
|
||||
|
||||
class build_ext(_build_ext.build_ext):
|
||||
def run(self):
|
||||
# The line below has been moved outside of the build_ext class. See the
|
||||
# explanation there.
|
||||
# subprocess.check_call(["../build.sh"])
|
||||
# Note: We are passing in sys.executable so that we use the same
|
||||
# version of Python to build pyarrow inside the build.sh script. Note
|
||||
# that certain flags will not be passed along such as --user or sudo.
|
||||
# TODO(rkn): Fix this.
|
||||
subprocess.check_call(["../build.sh", sys.executable])
|
||||
|
||||
# We also need to install pyarrow along with Ray, so make sure that the
|
||||
# relevant non-Python pyarrow files get copied.
|
||||
pyarrow_files = [
|
||||
os.path.join("ray/pyarrow_files/pyarrow", filename)
|
||||
for filename in os.listdir("./ray/pyarrow_files/pyarrow")
|
||||
if not os.path.isdir(os.path.join("ray/pyarrow_files/pyarrow",
|
||||
filename))]
|
||||
|
||||
files_to_include = ray_files + pyarrow_files
|
||||
|
||||
# Ideally, we could include these files by putting them in a
|
||||
# MANIFEST.in or using the package_data argument to setup, but the
|
||||
# MANIFEST.in gets applied at the very beginning when setup.py runs
|
||||
# before these files have been created, so we have to move the files
|
||||
# manually.
|
||||
for filename in files_to_include:
|
||||
self.move_file(filename)
|
||||
# Copy over the autogenerated flatbuffer Python bindings.
|
||||
@@ -62,19 +69,6 @@ class build_ext(_build_ext.build_ext):
|
||||
shutil.copy(source, destination)
|
||||
|
||||
|
||||
files_to_include = [
|
||||
"ray/core/src/common/thirdparty/redis/src/redis-server",
|
||||
"ray/core/src/common/redis_module/libray_redis_module.so",
|
||||
"ray/core/src/plasma/plasma_store",
|
||||
"ray/core/src/plasma/plasma_manager",
|
||||
"ray/core/src/local_scheduler/local_scheduler",
|
||||
"ray/core/src/local_scheduler/liblocal_scheduler_library.so",
|
||||
"ray/core/src/numbuf/libnumbuf.so",
|
||||
"ray/core/src/global_scheduler/global_scheduler",
|
||||
"ray/WebUI.ipynb"
|
||||
]
|
||||
|
||||
|
||||
class BinaryDistribution(Distribution):
|
||||
def has_ext_modules(self):
|
||||
return True
|
||||
|
||||
Reference in New Issue
Block a user