From 5df801605e12de297fb660b360500d17bccdbd3b Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Tue, 18 Feb 2020 13:43:19 -0800 Subject: [PATCH] Add ray.util package and move libraries from experimental (#7100) --- .travis.yml | 2 +- BUILD.bazel | 1 + README.rst | 2 +- ci/jenkins_tests/run_tune_tests.sh | 30 ++-- ci/long_running_tests/workloads/serve.py | 4 +- .../run_application_stress_tests.sh | 2 +- ci/travis/determine_tests_to_run.py | 2 +- .../doc_code/raysgd_torch_signatures.py | 4 +- doc/examples/overview.rst | 10 +- doc/source/actors.rst | 10 +- doc/source/advanced.rst | 2 +- doc/source/async_api.rst | 4 +- doc/source/cluster-index.rst | 1 + doc/source/development.rst | 8 +- doc/source/index.rst | 23 +-- doc/source/iter.rst | 36 ++-- doc/source/joblib.rst | 20 +-- doc/source/memory-management.rst | 7 +- doc/source/multiprocessing.rst | 16 +- doc/source/raysgd/raysgd.rst | 19 +- doc/source/raysgd/raysgd_pytorch.rst | 25 +-- doc/source/raysgd/raysgd_ref.rst | 6 +- doc/source/raysgd/raysgd_tensorflow.rst | 11 +- doc/source/rllib-dev.rst | 4 +- doc/source/rllib-training.rst | 2 +- doc/source/serve.rst | 17 +- doc/source/signals.rst | 168 ------------------ doc/source/tune.rst | 6 +- doc/source/using-ray.rst | 1 + python/ray/__init__.py | 2 + python/ray/actor.py | 6 +- python/ray/experimental/__init__.py | 20 --- python/ray/experimental/gcs_flush_policy.py | 89 ---------- python/ray/experimental/sgd/__init__.py | 4 - python/ray/experimental/sgd/tf/__init__.py | 3 - python/ray/monitor.py | 64 ------- python/ray/scripts/scripts.py | 2 +- python/ray/{experimental => }/serve/BUILD | 0 .../ray/{experimental => }/serve/__init__.py | 6 +- python/ray/{experimental => }/serve/api.py | 27 ++- .../serve/backend_config.py | 0 .../ray/{experimental => }/serve/constants.py | 0 .../ray/{experimental => }/serve/context.py | 2 +- .../serve/examples/benchmark.py | 4 +- .../{experimental => }/serve/examples/echo.py | 4 +- .../serve/examples/echo_actor.py | 4 +- .../serve/examples/echo_actor_batch.py | 6 +- .../serve/examples/echo_batching.py | 4 +- .../serve/examples/echo_error.py | 4 +- .../serve/examples/echo_fixed_packing.py | 4 +- .../serve/examples/echo_full.py | 4 +- .../serve/examples/echo_pipeline.py | 2 +- .../serve/examples/echo_round_robin.py | 4 +- .../serve/examples/echo_slo_reverse.py | 2 +- .../serve/examples/echo_split.py | 4 +- .../{experimental => }/serve/exceptions.py | 0 .../{experimental => }/serve/global_state.py | 24 ++- python/ray/{experimental => }/serve/handle.py | 10 +- .../ray/{experimental => }/serve/http_util.py | 0 .../serve/kv_store_service.py | 7 +- python/ray/{experimental => }/serve/metric.py | 0 python/ray/{experimental => }/serve/policy.py | 4 +- python/ray/{experimental => }/serve/queues.py | 2 +- .../serve/request_params.py | 2 +- .../ray/{experimental => }/serve/scripts.py | 2 +- python/ray/{experimental => }/serve/server.py | 10 +- .../{experimental => }/serve/task_runner.py | 8 +- .../serve/tests/conftest.py | 2 +- .../serve/tests/test_api.py | 6 +- .../serve/tests/test_metric.py | 2 +- .../serve/tests/test_persistence.py | 4 +- .../serve/tests/test_queue.py | 4 +- .../serve/tests/test_routing.py | 4 +- .../serve/tests/test_serve.py | 0 .../serve/tests/test_task_runner.py | 10 +- .../serve/tests/test_util.py | 2 +- python/ray/{experimental => }/serve/utils.py | 4 +- python/ray/tests/test_actor_advanced.py | 12 +- python/ray/tests/test_actor_pool.py | 2 +- python/ray/tests/test_iter.py | 2 +- python/ray/tests/test_joblib.py | 4 +- python/ray/tests/test_multi_node.py | 4 +- python/ray/tests/test_multi_node_2.py | 1 - python/ray/util/__init__.py | 10 ++ .../ray/{experimental => util}/actor_pool.py | 0 python/ray/{experimental => util}/iter.py | 8 +- .../{experimental => util}/joblib/__init__.py | 2 +- .../joblib/ray_backend.py | 4 +- .../{experimental => util}/named_actors.py | 0 python/ray/util/sgd/__init__.py | 4 + .../sgd/pytorch/__init__.py | 4 +- .../sgd/pytorch/distributed_pytorch_runner.py | 2 +- .../pytorch/examples/cifar_pytorch_example.py | 6 +- .../sgd/pytorch/examples/dcgan.py | 7 +- .../sgd/pytorch/examples/example-sgd.yaml | 0 .../sgd/pytorch/examples/mnist_cnn.pt | Bin .../sgd/pytorch/examples/train_example.py | 2 +- .../sgd/pytorch/examples/tune_example.py | 2 +- .../sgd/pytorch/pytorch_runner.py | 4 +- .../sgd/pytorch/pytorch_trainer.py | 8 +- .../sgd/pytorch/resnet.py | 0 .../sgd/pytorch/utils.py | 2 +- .../sgd/tests/test_pytorch.py | 10 +- .../sgd/tests/test_pytorch_runner.py | 2 +- .../sgd/tests/test_tensorflow.py | 6 +- python/ray/util/sgd/tf/__init__.py | 3 + .../sgd/tf/examples/cifar_tf_example.py | 2 +- .../tf/examples/tensorflow_train_example.py | 2 +- .../sgd/tf/examples/tf-example-sgd.yaml | 0 .../sgd/tf/tf_runner.py | 2 +- .../sgd/tf/tf_trainer.py | 2 +- .../ray/{experimental => util}/sgd/utils.py | 0 rllib/evaluation/rollout_worker.py | 2 +- 113 files changed, 305 insertions(+), 637 deletions(-) delete mode 100644 doc/source/signals.rst delete mode 100644 python/ray/experimental/gcs_flush_policy.py delete mode 100644 python/ray/experimental/sgd/__init__.py delete mode 100644 python/ray/experimental/sgd/tf/__init__.py rename python/ray/{experimental => }/serve/BUILD (100%) rename python/ray/{experimental => }/serve/__init__.py (69%) rename python/ray/{experimental => }/serve/api.py (95%) rename python/ray/{experimental => }/serve/backend_config.py (100%) rename python/ray/{experimental => }/serve/constants.py (100%) rename python/ray/{experimental => }/serve/context.py (93%) rename python/ray/{experimental => }/serve/examples/benchmark.py (86%) rename python/ray/{experimental => }/serve/examples/echo.py (84%) rename python/ray/{experimental => }/serve/examples/echo_actor.py (92%) rename python/ray/{experimental => }/serve/examples/echo_actor_batch.py (92%) rename python/ray/{experimental => }/serve/examples/echo_batching.py (95%) rename python/ray/{experimental => }/serve/examples/echo_error.py (91%) rename python/ray/{experimental => }/serve/examples/echo_fixed_packing.py (92%) rename python/ray/{experimental => }/serve/examples/echo_full.py (95%) rename python/ray/{experimental => }/serve/examples/echo_pipeline.py (97%) rename python/ray/{experimental => }/serve/examples/echo_round_robin.py (89%) rename python/ray/{experimental => }/serve/examples/echo_slo_reverse.py (98%) rename python/ray/{experimental => }/serve/examples/echo_split.py (89%) rename python/ray/{experimental => }/serve/exceptions.py (100%) rename python/ray/{experimental => }/serve/global_state.py (88%) rename python/ray/{experimental => }/serve/handle.py (92%) rename python/ray/{experimental => }/serve/http_util.py (100%) rename python/ray/{experimental => }/serve/kv_store_service.py (98%) rename python/ray/{experimental => }/serve/metric.py (100%) rename python/ray/{experimental => }/serve/policy.py (98%) rename python/ray/{experimental => }/serve/queues.py (99%) rename python/ray/{experimental => }/serve/request_params.py (94%) rename python/ray/{experimental => }/serve/scripts.py (96%) rename python/ray/{experimental => }/serve/server.py (95%) rename python/ray/{experimental => }/serve/task_runner.py (96%) rename python/ray/{experimental => }/serve/tests/conftest.py (94%) rename python/ray/{experimental => }/serve/tests/test_api.py (98%) rename python/ray/{experimental => }/serve/tests/test_metric.py (97%) rename python/ray/{experimental => }/serve/tests/test_persistence.py (89%) rename python/ray/{experimental => }/serve/tests/test_queue.py (98%) rename python/ray/{experimental => }/serve/tests/test_routing.py (90%) rename python/ray/{experimental => }/serve/tests/test_serve.py (100%) rename python/ray/{experimental => }/serve/tests/test_task_runner.py (89%) rename python/ray/{experimental => }/serve/tests/test_util.py (79%) rename python/ray/{experimental => }/serve/utils.py (95%) rename python/ray/{experimental => util}/actor_pool.py (100%) rename python/ray/{experimental => util}/iter.py (98%) rename python/ray/{experimental => util}/joblib/__init__.py (88%) rename python/ray/{experimental => util}/joblib/ray_backend.py (95%) rename python/ray/{experimental => util}/named_actors.py (100%) create mode 100644 python/ray/util/sgd/__init__.py rename python/ray/{experimental => util}/sgd/pytorch/__init__.py (64%) rename python/ray/{experimental => util}/sgd/pytorch/distributed_pytorch_runner.py (98%) rename python/ray/{experimental => util}/sgd/pytorch/examples/cifar_pytorch_example.py (96%) rename python/ray/{experimental => util}/sgd/pytorch/examples/dcgan.py (97%) rename python/ray/{experimental => util}/sgd/pytorch/examples/example-sgd.yaml (100%) rename python/ray/{experimental => util}/sgd/pytorch/examples/mnist_cnn.pt (100%) rename python/ray/{experimental => util}/sgd/pytorch/examples/train_example.py (98%) rename python/ray/{experimental => util}/sgd/pytorch/examples/tune_example.py (97%) rename python/ray/{experimental => util}/sgd/pytorch/pytorch_runner.py (99%) rename python/ray/{experimental => util}/sgd/pytorch/pytorch_trainer.py (98%) rename python/ray/{experimental => util}/sgd/pytorch/resnet.py (100%) rename python/ray/{experimental => util}/sgd/pytorch/utils.py (99%) rename python/ray/{experimental => util}/sgd/tests/test_pytorch.py (97%) rename python/ray/{experimental => util}/sgd/tests/test_pytorch_runner.py (98%) rename python/ray/{experimental => util}/sgd/tests/test_tensorflow.py (94%) create mode 100644 python/ray/util/sgd/tf/__init__.py rename python/ray/{experimental => util}/sgd/tf/examples/cifar_tf_example.py (99%) rename python/ray/{experimental => util}/sgd/tf/examples/tensorflow_train_example.py (98%) rename python/ray/{experimental => util}/sgd/tf/examples/tf-example-sgd.yaml (100%) rename python/ray/{experimental => util}/sgd/tf/tf_runner.py (99%) rename python/ray/{experimental => util}/sgd/tf/tf_trainer.py (99%) rename python/ray/{experimental => util}/sgd/utils.py (100%) diff --git a/.travis.yml b/.travis.yml index eb9a5faf8..535ee11ed 100644 --- a/.travis.yml +++ b/.travis.yml @@ -294,7 +294,7 @@ script: - ./ci/suppress_output bazel test --build_tests_only --show_progress_rate_limit=100 --test_output=errors -- //:all -rllib/... # ray serve tests - - if [ $RAY_CI_SERVE_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=errors --test_tag_filters=-jenkins_only python/ray/experimental/serve/...; fi + - if [ $RAY_CI_SERVE_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=errors --test_tag_filters=-jenkins_only python/ray/serve/...; fi # ray operator tests - cd ./deploy/ray-operator/ diff --git a/BUILD.bazel b/BUILD.bazel index 658bd9fb2..7aafbb259 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -1176,6 +1176,7 @@ filegroup( "python/ray/core/generated/ray/protocol/__init__.py", "python/ray/dashboard/dashboard.py", "python/ray/experimental/*.py", + "python/ray/util/*.py", "python/ray/internal/*.py", "python/ray/projects/*.py", "python/ray/projects/schema.json", diff --git a/README.rst b/README.rst index adaacaa02..7c6c5b0a4 100644 --- a/README.rst +++ b/README.rst @@ -15,7 +15,7 @@ Ray is packaged with the following libraries for accelerating machine learning w - `Tune`_: Scalable Hyperparameter Tuning - `RLlib`_: Scalable Reinforcement Learning -- `Distributed Training `__ +- `RaySGD `__: Distributed Training Wrappers Install Ray with: ``pip install ray``. For nightly wheels, see the `Installation page `__. diff --git a/ci/jenkins_tests/run_tune_tests.sh b/ci/jenkins_tests/run_tune_tests.sh index 19b5cf083..cb9d84a29 100755 --- a/ci/jenkins_tests/run_tune_tests.sh +++ b/ci/jenkins_tests/run_tune_tests.sh @@ -144,49 +144,49 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} ######################## SGD TESTS ################################# $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ - python -m pytest /ray/python/ray/experimental/sgd/tests + python -m pytest /ray/python/ray/util/sgd/tests $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ python /ray/doc/examples/doc_code/raysgd_torch_signatures.py $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ - python /ray/python/ray/experimental/sgd/pytorch/examples/train_example.py + python /ray/python/ray/util/sgd/pytorch/examples/train_example.py $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ - python /ray/python/ray/experimental/sgd/pytorch/examples/train_example.py --num-replicas=2 + python /ray/python/ray/util/sgd/pytorch/examples/train_example.py --num-replicas=2 $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ - python /ray/python/ray/experimental/sgd/pytorch/examples/tune_example.py + python /ray/python/ray/util/sgd/pytorch/examples/tune_example.py $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ - python /ray/python/ray/experimental/sgd/pytorch/examples/tune_example.py --num-replicas=2 + python /ray/python/ray/util/sgd/pytorch/examples/tune_example.py --num-replicas=2 $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ - python /ray/python/ray/experimental/sgd/pytorch/examples/cifar_pytorch_example.py --smoke-test + python /ray/python/ray/util/sgd/pytorch/examples/cifar_pytorch_example.py --smoke-test $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ - python /ray/python/ray/experimental/sgd/pytorch/examples/cifar_pytorch_example.py --smoke-test --num-replicas=2 + python /ray/python/ray/util/sgd/pytorch/examples/cifar_pytorch_example.py --smoke-test --num-replicas=2 $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ - python /ray/python/ray/experimental/sgd/pytorch/examples/cifar_pytorch_example.py --smoke-test --tune + python /ray/python/ray/util/sgd/pytorch/examples/cifar_pytorch_example.py --smoke-test --tune $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ - python /ray/python/ray/experimental/sgd/pytorch/examples/dcgan.py --smoke-test --num-replicas=2 + python /ray/python/ray/util/sgd/pytorch/examples/dcgan.py --smoke-test --num-replicas=2 $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ - python /ray/python/ray/experimental/sgd/tf/examples/tensorflow_train_example.py + python /ray/python/ray/util/sgd/tf/examples/tensorflow_train_example.py $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ - python /ray/python/ray/experimental/sgd/tf/examples/tensorflow_train_example.py --num-replicas=2 + python /ray/python/ray/util/sgd/tf/examples/tensorflow_train_example.py --num-replicas=2 $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ - python /ray/python/ray/experimental/sgd/tf/examples/tensorflow_train_example.py --tune + python /ray/python/ray/util/sgd/tf/examples/tensorflow_train_example.py --tune $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ - python /ray/python/ray/experimental/sgd/tf/examples/cifar_tf_example.py --smoke-test + python /ray/python/ray/util/sgd/tf/examples/cifar_tf_example.py --smoke-test $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ - python /ray/python/ray/experimental/sgd/tf/examples/cifar_tf_example.py --num-replicas 2 --smoke-test + python /ray/python/ray/util/sgd/tf/examples/cifar_tf_example.py --num-replicas 2 --smoke-test $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ - python /ray/python/ray/experimental/sgd/tf/examples/cifar_tf_example.py --num-replicas 2 --smoke-test --augment-data + python /ray/python/ray/util/sgd/tf/examples/cifar_tf_example.py --num-replicas 2 --smoke-test --augment-data diff --git a/ci/long_running_tests/workloads/serve.py b/ci/long_running_tests/workloads/serve.py index d1cb8fa73..a0963142d 100644 --- a/ci/long_running_tests/workloads/serve.py +++ b/ci/long_running_tests/workloads/serve.py @@ -5,9 +5,9 @@ from subprocess import PIPE import requests import ray -from ray.experimental import serve +from ray import serve from ray.cluster_utils import Cluster -from ray.experimental.serve.kv_store_service import RayInternalKVStore +from ray.serve.kv_store_service import RayInternalKVStore num_redis_shards = 1 redis_max_memory = 10**8 diff --git a/ci/stress_tests/run_application_stress_tests.sh b/ci/stress_tests/run_application_stress_tests.sh index 1c5480b48..f4df1ef73 100755 --- a/ci/stress_tests/run_application_stress_tests.sh +++ b/ci/stress_tests/run_application_stress_tests.sh @@ -127,7 +127,7 @@ test_sgd(){ echo "Try running SGD stress test." { - SGD_DIR=$ROOT_DIR/../../python/ray/experimental/sgd/ + SGD_DIR=$ROOT_DIR/../../python/ray/util/sgd/ ray --logging-level=DEBUG up -y "$CLUSTER" && # TODO: fix submit so that args work ray rsync_up "$CLUSTER" "$SGD_DIR/mnist_example.py" mnist_example.py && diff --git a/ci/travis/determine_tests_to_run.py b/ci/travis/determine_tests_to_run.py index 41739c53b..4fbf36daa 100644 --- a/ci/travis/determine_tests_to_run.py +++ b/ci/travis/determine_tests_to_run.py @@ -65,7 +65,7 @@ if __name__ == "__main__": RAY_CI_RLLIB_FULL_AFFECTED = 1 RAY_CI_LINUX_WHEELS_AFFECTED = 1 RAY_CI_MACOS_WHEELS_AFFECTED = 1 - elif changed_file.startswith("python/ray/experimental/serve"): + elif changed_file.startswith("python/ray/serve"): RAY_CI_SERVE_AFFECTED = 1 RAY_CI_LINUX_WHEELS_AFFECTED = 1 RAY_CI_MACOS_WHEELS_AFFECTED = 1 diff --git a/doc/examples/doc_code/raysgd_torch_signatures.py b/doc/examples/doc_code/raysgd_torch_signatures.py index b8fe06180..8dc0d85e7 100644 --- a/doc/examples/doc_code/raysgd_torch_signatures.py +++ b/doc/examples/doc_code/raysgd_torch_signatures.py @@ -46,7 +46,7 @@ def optimizer_creator(model, config): # __torch_data_start__ -from ray.experimental.sgd.pytorch.examples.train_example import LinearDataset +from ray.util.sgd.pytorch.examples.train_example import LinearDataset def data_creator(config): """Constructs torch.utils.data.Dataset objects. @@ -108,7 +108,7 @@ ray.init() # __torch_ray_end__ # __torch_trainer_start__ -from ray.experimental.sgd import PyTorchTrainer +from ray.util.sgd import PyTorchTrainer trainer = PyTorchTrainer( model_creator, diff --git a/doc/examples/overview.rst b/doc/examples/overview.rst index 9ede56417..e7bd2145b 100644 --- a/doc/examples/overview.rst +++ b/doc/examples/overview.rst @@ -1,5 +1,11 @@ -Examples Overview -================= +Ray Tutorials and Examples +========================== + +Get started with Ray, Tune, and RLlib with these notebooks that you can run online in CoLab or Binder: + * `Ray Tutorial Notebooks `__ + +Example Gallery +--------------- .. customgalleryitem:: :tooltip: Build a simple parameter server using Ray. diff --git a/doc/source/actors.rst b/doc/source/actors.rst index 219f3ad5e..c0c8939ee 100644 --- a/doc/source/actors.rst +++ b/doc/source/actors.rst @@ -197,19 +197,19 @@ If we instantiate an actor, we can pass the handle around to various tasks. print(ray.get(counter.get_counter.remote())) -Actor Pool (Experimental) -------------------------- +Actor Pool +---------- -The ``ray.experimental`` module contains a utility class, ``ActorPool``. +The ``ray.util`` module contains a utility class, ``ActorPool``. This class is similar to multiprocessing.Pool and lets you schedule Ray tasks over a fixed pool of actors. .. code-block:: - from ray.experimental import ActorPool + from ray.util import ActorPool a1, a2 = Actor.remote(), Actor.remote() pool = ActorPool([a1, a2]) print(pool.map(lambda a, v: a.double.remote(v), [1, 2, 3, 4])) # [2, 4, 6, 8] -See the `package reference `_ for more information. +See the `package reference `_ for more information. diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 166e57c1f..b44d84433 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -259,7 +259,7 @@ driver: .. code-block:: python - counter = ray.experimental.get_actor("CounterActor") + counter = ray.util.get_actor("CounterActor") print(ray.get(counter.get_counter.remote())) Note that just creating a named actor is allowed, this actor will be cleaned diff --git a/doc/source/async_api.rst b/doc/source/async_api.rst index 0c49a438f..b4e01c6d1 100644 --- a/doc/source/async_api.rst +++ b/doc/source/async_api.rst @@ -1,5 +1,5 @@ -Async API (Experimental) -======================== +AsyncIO / Concurrency for Actors +================================ Since Python 3.5, it is possible to write concurrent code using the ``async/await`` `syntax `__. diff --git a/doc/source/cluster-index.rst b/doc/source/cluster-index.rst index bf60c016d..ad0fd9c47 100644 --- a/doc/source/cluster-index.rst +++ b/doc/source/cluster-index.rst @@ -12,3 +12,4 @@ How to setup your cluster and use Ray most effectively. deploy-on-yarn.rst deploy-on-kubernetes.rst deploying-on-slurm.rst + projects.rst diff --git a/doc/source/development.rst b/doc/source/development.rst index 837da97f2..6025ebb2e 100644 --- a/doc/source/development.rst +++ b/doc/source/development.rst @@ -237,12 +237,8 @@ scripts. Some of the examples include: * ``bazel test --build_tests_only //:all`` * Ray serving test commands: - * ``python -m pytest python/ray/experimental/serve/tests`` - * ``python python/ray/experimental/serve/examples/echo_full.py`` - -* Ray test commands: - * ``python/ray/experimental/test/async_test.py`` - * ``python/ray/tests/py3_test.py`` + * ``python -m pytest python/ray/serve/tests`` + * ``python python/ray/serve/examples/echo_full.py`` If a Travis-CI build exception doesn't appear to be related to your change, please visit `this link `_ to diff --git a/doc/source/index.rst b/doc/source/index.rst index 98b25d82d..46869e28b 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -16,7 +16,7 @@ Ray is packaged with the following libraries for accelerating machine learning w - `Tune`_: Scalable Hyperparameter Tuning - `RLlib`_: Scalable Reinforcement Learning -- `RaySGD`_: Distributed Training +- `RaySGD`_: Distributed Training Wrappers Star us on `on GitHub`_. You can also get started by visiting our `Tutorials `_. For the latest wheels (nightlies), see the `installation page `__. @@ -239,8 +239,7 @@ Getting Involved using-ray.rst configure.rst cluster-index.rst - Tutorials - Examples + Tutorial and Examples package-ref.rst .. toctree:: @@ -254,9 +253,9 @@ Getting Involved tune-distributed.rst tune-schedulers.rst tune-searchalg.rst - tune-package-ref.rst tune-design.rst tune-examples.rst + tune-package-ref.rst tune-contrib.rst .. toctree:: @@ -272,31 +271,27 @@ Getting Involved rllib-offline.rst rllib-concepts.rst rllib-examples.rst - rllib-dev.rst rllib-package-ref.rst + rllib-dev.rst .. toctree:: :maxdepth: -1 - :caption: RaySGD + :caption: Ray SGD raysgd/raysgd.rst raysgd/raysgd_pytorch.rst raysgd/raysgd_tensorflow.rst raysgd/raysgd_ref.rst - .. toctree:: :maxdepth: -1 - :caption: Experimental + :caption: Other Libraries - pandas_on_ray.rst - projects.rst - signals.rst - async_api.rst - serve.rst - iter.rst multiprocessing.rst joblib.rst + iter.rst + pandas_on_ray.rst + serve.rst .. toctree:: :maxdepth: -1 diff --git a/doc/source/iter.rst b/doc/source/iter.rst index f3e068307..1ea42fbd5 100644 --- a/doc/source/iter.rst +++ b/doc/source/iter.rst @@ -1,13 +1,19 @@ -Parallel Iterator API (Experimental) -==================================== +Distributed Iterators +===================== -``ray.experimental.iter`` provides a parallel iterator API for simple data ingest -and processing. It can be thought of as syntactic sugar around Ray actors and ``ray.wait`` loops. +.. _`issue on GitHub`: https://github.com/ray-project/ray/issues + +``ray.util.iter`` provides a parallel iterator API for simple data ingest and processing. It can be thought of as syntactic sugar around Ray actors and ``ray.wait`` loops. Parallel iterators are lazy and can operate over infinite sequences of items. Iterator transformations are only executed when the user calls ``next()`` to fetch the next output item from the iterator. +.. note:: + + This API is new and may be revised in future Ray releases. If you encounter + any bugs, please file an `issue on GitHub`_. + Concepts -------- @@ -18,20 +24,20 @@ create a worker actor that produces the data for each shard of the iterator: .. code-block:: python # Create an iterator with 2 worker actors over the list [1, 2, 3, 4]. - >>> it = ray.experimental.iter.from_items([1, 2, 3, 4], num_shards=2) + >>> it = ray.util.iter.from_items([1, 2, 3, 4], num_shards=2) ParallelIterator[from_items[int, 4, shards=2]] # Create an iterator with 32 worker actors over range(1000000). - >>> it = ray.experimental.iter.from_range(1000000, num_shards=32) + >>> it = ray.util.iter.from_range(1000000, num_shards=32) ParallelIterator[from_range[1000000, shards=32]] # Create an iterator over two range(10) generators. - >>> it = ray.experimental.iter.from_iterators([range(10), range(10)]) + >>> it = ray.util.iter.from_iterators([range(10), range(10)]) ParallelIterator[from_iterators[shards=2]] # Create an iterator from existing worker actors. These actors must # implement the ParallelIteratorWorker interface. - >>> it = ray.experimental.iter.from_actors([a1, a2, a3, a4]) + >>> it = ray.util.iter.from_actors([a1, a2, a3, a4]) ParallelIterator[from_actors[shards=4]] Simple transformations can be chained on the iterator, such as mapping, @@ -58,7 +64,7 @@ correspond to ``ray.get`` and ``ray.wait`` loops over the actors respectively: .. code-block:: python # Gather items synchronously (deterministic round robin across shards): - >>> it = ray.experimental.iter.from_range(1000000, 1) + >>> it = ray.util.iter.from_range(1000000, 1) >>> it = it.gather_sync() LocalIterator[ParallelIterator[from_range[1000000, shards=1]].gather_sync()] @@ -72,7 +78,7 @@ correspond to ``ray.get`` and ``ray.wait`` loops over the actors respectively: [0, 2, 4, 6, 8] # Async gather can be used for better performance, but it is non-deterministic. - >>> it = ray.experimental.iter.from_range(1000, 4).gather_async() + >>> it = ray.util.iter.from_range(1000, 4).gather_async() >>> it.take(5) [0, 250, 500, 750, 1] @@ -83,7 +89,7 @@ each shard should only be read by one process at a time: .. code-block:: python # Get local iterators representing the shards of this ParallelIterator: - >>> it = ray.experimental.iter.from_range(10000, 3) + >>> it = ray.util.iter.from_range(10000, 3) >>> [s0, s1, s2] = it.shards() [LocalIterator[from_range[10000, shards=3].shard[0]], LocalIterator[from_range[10000, shards=3].shard[1]], @@ -122,7 +128,7 @@ This means that you can pass a stateful callable to ``.foreach()``: self.total += x return (self.total, x) - it = ray.experimental.iter.from_range(5, 1) + it = ray.util.iter.from_range(5, 1) for x in it.for_each(CumulativeSum()).gather_sync(): print(x) @@ -150,7 +156,7 @@ streaming grep: file_list = glob.glob("/var/log/syslog*.gz") it = ( - ray.experimental.iter.from_items(file_list, num_shards=4) + ray.util.iter.from_items(file_list, num_shards=4) .for_each(lambda f: gzip.open(f).readlines()) .flatten() .for_each(lambda line: line.decode("utf-8")) @@ -184,7 +190,7 @@ distributed training: print("train on", batch) # perform model update with batch it = ( - ray.experimental.iter.from_range(1000000, num_shards=4, repeat=True) + ray.util.iter.from_range(1000000, num_shards=4, repeat=True) .batch(1024) .for_each(np.array) ) @@ -195,7 +201,7 @@ distributed training: API Reference ------------- -.. automodule:: ray.experimental.iter +.. automodule:: ray.util.iter :members: :show-inheritance: :special-members: diff --git a/doc/source/joblib.rst b/doc/source/joblib.rst index 54b6027d0..b5a3235a8 100644 --- a/doc/source/joblib.rst +++ b/doc/source/joblib.rst @@ -1,12 +1,5 @@ -sklearn Ray Backend API (Experimental) -======================================= - -.. warning:: - - Support for running scikit-learn on Ray is an experimental feature, - so it may be changed at any time without warning. If you encounter any - bugs/shortcomings/incompatibilities, please file an `issue on GitHub`_. - Contributions are always welcome! +Distributed Scikit-learn / Joblib +================================= .. _`issue on GitHub`: https://github.com/ray-project/ray/issues @@ -15,6 +8,11 @@ implementing a Ray backend for `joblib`_ using `Ray Actors `__ instead of local processes. This makes it easy to scale existing applications that use scikit-learn from a single node to a cluster. +.. note:: + + This API is new and may be revised in future Ray releases. If you encounter + any bugs, please file an `issue on GitHub`_. + .. _`joblib`: https://joblib.readthedocs.io .. _`scikit-learn`: https://scikit-learn.org @@ -22,7 +20,7 @@ Quickstart ---------- To get started, first `install Ray `__, then use -``from ray.experimental.joblib import register_ray`` and run ``register_ray()``. +``from ray.util.joblib import register_ray`` and run ``register_ray()``. This will register Ray as a joblib backend for scikit-learn to use. Then run your original scikit-learn code inside ``with joblib.parallel_backend('ray')``. This will start a local Ray cluster. @@ -46,7 +44,7 @@ a multi-node Ray cluster instead. search = RandomizedSearchCV(model, param_space, cv=5, n_iter=300, verbose=10) import joblib - from ray.experimental.joblib import register_ray + from ray.util.joblib import register_ray register_ray() with joblib.parallel_backend('ray'): search.fit(digits.data, digits.target) diff --git a/doc/source/memory-management.rst b/doc/source/memory-management.rst index 2b8eda42a..9d20fc3c9 100644 --- a/doc/source/memory-management.rst +++ b/doc/source/memory-management.rst @@ -84,10 +84,11 @@ Heap memory quota When Ray starts, it queries the available memory on a node / container not reserved for Redis and the object store or being used by other applications. This is considered "available memory" that actors and tasks can request memory out of. You can also set ``memory=`` on Ray init to tell Ray explicitly how much memory is available. -.. note:: +.. important:: - Setting available memory for the node does not impose any limits on memory usage - of tasks. To set per-task limits, see the following sections. + Setting available memory for the node does NOT impose any limits on memory usage + unless you specify memory resource requirements in decorators. By default, tasks + and actors request no memory (and hence have no limit). To tell the Ray scheduler a task or actor requires a certain amount of available memory to run, set the ``memory`` argument. The Ray scheduler will then reserve the specified amount of available memory during scheduling, similar to how it handles CPU and GPU resources: diff --git a/doc/source/multiprocessing.rst b/doc/source/multiprocessing.rst index bd28ebff3..9db205df6 100644 --- a/doc/source/multiprocessing.rst +++ b/doc/source/multiprocessing.rst @@ -1,12 +1,5 @@ -multiprocessing.Pool API -======================== - -.. warning:: - - Support for the multiprocessing.Pool API on Ray is an experimental feature, - so it may be changed at any time without warning. If you encounter any - bugs/shortcomings/incompatibilities, please file an `issue on GitHub`_. - Contributions are always welcome! +Distributed multiprocessing.Pool +================================ .. _`issue on GitHub`: https://github.com/ray-project/ray/issues @@ -15,6 +8,11 @@ using `Ray Actors `__ instead of local processes. This makes it eas to scale existing applications that use ``multiprocessing.Pool`` from a single node to a cluster. +.. note:: + + This API is new and may be revised in future Ray releases. If you encounter + any bugs, please file an `issue on GitHub`_. + .. _`multiprocessing.Pool API`: https://docs.python.org/3/library/multiprocessing.html#module-multiprocessing.pool Quickstart diff --git a/doc/source/raysgd/raysgd.rst b/doc/source/raysgd/raysgd.rst index 426464641..0160c39cb 100644 --- a/doc/source/raysgd/raysgd.rst +++ b/doc/source/raysgd/raysgd.rst @@ -1,9 +1,7 @@ -RaySGD: Distributed Deep Learning -================================= +RaySGD: Distributed Training Wrappers +===================================== -.. image:: raysgdlogo.png - :scale: 20% - :align: center +.. _`issue on GitHub`: https://github.com/ray-project/ray/issues RaySGD is a lightweight library for distributed deep learning, providing thin wrappers around PyTorch and TensorFlow native modules for data parallel training. @@ -13,7 +11,10 @@ The main features are: - **Composability**: RaySGD is built on top of the Ray Actor API, enabling seamless integration with existing Ray applications such as RLlib, Tune, and Ray.Serve. - **Scale up and down**: Start on single CPU. Scale up to multi-node, multi-CPU, or multi-GPU clusters by changing 2 lines of code. -.. tip:: We need your feedback! RaySGD is currently early in its development, and we're hoping to get feedback from people using or considering it. We'd love `to get in touch `_! +.. note:: + + This API is new and may be revised in future Ray releases. If you encounter + any bugs, please file an `issue on GitHub`_. Getting Started @@ -28,8 +29,8 @@ You can start a ``PyTorchTrainer`` with the following: import torch.nn as nn from torch import distributed - from ray.experimental.sgd import PyTorchTrainer - from ray.experimental.sgd.examples.train_example import LinearDataset + from ray.util.sgd import PyTorchTrainer + from ray.util.sgd.examples.train_example import LinearDataset def model_creator(config): @@ -61,3 +62,5 @@ You can start a ``PyTorchTrainer`` with the following: print(stats) trainer1.shutdown() print("success!") + +.. tip:: Get in touch with us if you're using or considering using `RaySGD `_! diff --git a/doc/source/raysgd/raysgd_pytorch.rst b/doc/source/raysgd/raysgd_pytorch.rst index 7dcbf6a69..e85fc10a4 100644 --- a/doc/source/raysgd/raysgd_pytorch.rst +++ b/doc/source/raysgd/raysgd_pytorch.rst @@ -1,8 +1,5 @@ -RaySGD Pytorch -============== - -.. image:: raysgd-pytorch.svg - :align: center +Distributed PyTorch +=================== The RaySGD ``PyTorchTrainer`` simplifies distributed model training for PyTorch. The ``PyTorchTrainer`` is a wrapper around ``torch.distributed.launch`` with a Python API to easily incorporate distributed training into a larger Python application, as opposed to needing to wrap your training code in bash scripts. @@ -16,7 +13,7 @@ For end to end examples leveraging RaySGD PyTorchTrainer, jump to :ref:`raysgd-p Setting up training ------------------- -.. tip:: We need your feedback! RaySGD is currently early in its development, and we're hoping to get feedback from people using or considering it. We'd love `to get in touch `_! +.. tip:: Get in touch with us if you're using or considering using `RaySGD `_! The ``PyTorchTrainer`` can be constructed with functions that wrap components of the training script. Specifically, it requires constructors for the Model, Data, Optimizer, Loss, and ``lr_scheduler`` to create replicated copies across different devices and machines. @@ -154,8 +151,6 @@ After training, you may want to reappropriate the Ray cluster. To release Ray re Initialization Functions ------------------------ -.. warning:: This is still an experimental API and is subject to change without warning. - You may want to run some initializers on each worker when they are started. This may be something like setting an environment variable or downloading some data. You can do this via the ``initialization_hook`` parameter: .. code-block:: python @@ -305,11 +300,9 @@ Users can set ``checkpoint="auto"`` to always checkpoint the current model befor Advanced: Hyperparameter Tuning ------------------------------- -.. warning:: This is still an experimental API and is subject to change without warning. - ``PyTorchTrainer`` naturally integrates with Tune via the ``PyTorchTrainable`` interface. The same arguments to ``PyTorchTrainer`` should be passed into the ``tune.run(config=...)`` as shown below. -.. literalinclude:: ../../../python/ray/experimental/sgd/pytorch/examples/tune_example.py +.. literalinclude:: ../../../python/ray/util/sgd/pytorch/examples/tune_example.py :language: python :start-after: __torch_tune_example__ @@ -321,7 +314,7 @@ In certain scenarios such as training GANs, you may want to use multiple models If multiple models, optimizers, or schedulers are returned, you will need to provide a custom training function (and custom validation function if you plan to call ``validate``). -You can see the `DCGAN script `_ for an end-to-end example. +You can see the `DCGAN script `_ for an end-to-end example. .. code-block:: python @@ -362,8 +355,6 @@ You can see the `DCGAN script `_. -- `PyTorch training example `__: +- `PyTorch training example `__: Simple example of using Ray's PyTorchTrainer. -- `CIFAR10 example `__: +- `CIFAR10 example `__: Training a ResNet18 model on CIFAR10. It uses a custom training function, a custom validation function, and custom initialization code for each worker. -- `DCGAN example `__: +- `DCGAN example `__: Training a Deep Convolutional GAN on MNIST. It constructs two models and two optimizers and uses a custom training and validation function. diff --git a/doc/source/raysgd/raysgd_ref.rst b/doc/source/raysgd/raysgd_ref.rst index 13c313c1e..d7ac5020b 100644 --- a/doc/source/raysgd/raysgd_ref.rst +++ b/doc/source/raysgd/raysgd_ref.rst @@ -6,7 +6,7 @@ Package Reference PyTorchTrainer -------------- -.. autoclass:: ray.experimental.sgd.pytorch.PyTorchTrainer +.. autoclass:: ray.util.sgd.pytorch.PyTorchTrainer :members: .. automethod:: __init__ @@ -15,13 +15,13 @@ PyTorchTrainer PyTorchTrainable ---------------- -.. autoclass:: ray.experimental.sgd.pytorch.PyTorchTrainable +.. autoclass:: ray.util.sgd.pytorch.PyTorchTrainable :members: TFTrainer --------- -.. autoclass:: ray.experimental.sgd.tf.TFTrainer +.. autoclass:: ray.util.sgd.tf.TFTrainer :members: .. automethod:: __init__ diff --git a/doc/source/raysgd/raysgd_tensorflow.rst b/doc/source/raysgd/raysgd_tensorflow.rst index b031153bb..ce9dacd85 100644 --- a/doc/source/raysgd/raysgd_tensorflow.rst +++ b/doc/source/raysgd/raysgd_tensorflow.rst @@ -1,9 +1,12 @@ -RaySGD TensorFlow -================= +Distributed TensorFlow +====================== RaySGD's ``TFTrainer`` simplifies distributed model training for Tensorflow. The ``TFTrainer`` is a wrapper around ``MultiWorkerMirroredStrategy`` with a Python API to easily incorporate distributed training into a larger Python application, as opposed to write custom logic of setting environments and starting separate processes. -.. important:: This API has only been tested with TensorFlow2.0rc and is still highly experimental. Please file bug reports if you run into any - thanks! +Under the hood, ``TFTrainer`` will create *replicas* of your model (controlled by ``num_replicas``), each of which is managed by a Ray actor. + +.. image:: raysgd-actors.svg + :align: center .. tip:: We need your feedback! RaySGD is currently early in its development, and we're hoping to get feedback from people using or considering it. We'd love `to get in touch `_! @@ -69,6 +72,6 @@ TFTrainer Example Below is an example of using Ray's TFTrainer. Under the hood, ``TFTrainer`` will create *replicas* of your model (controlled by ``num_replicas``) which are each managed by a worker. -.. literalinclude:: ../../../python/ray/experimental/sgd/tf/examples/tensorflow_train_example.py +.. literalinclude:: ../../../python/ray/util/sgd/tf/examples/tensorflow_train_example.py :language: python diff --git a/doc/source/rllib-dev.rst b/doc/source/rllib-dev.rst index 18e18a739..543beb78c 100644 --- a/doc/source/rllib-dev.rst +++ b/doc/source/rllib-dev.rst @@ -1,5 +1,5 @@ -RLlib Development -================= +Contributing to RLlib +===================== Development Install ------------------- diff --git a/doc/source/rllib-training.rst b/doc/source/rllib-training.rst index f078c06f9..61140ff3f 100644 --- a/doc/source/rllib-training.rst +++ b/doc/source/rllib-training.rst @@ -442,7 +442,7 @@ Sometimes, it is necessary to coordinate between pieces of code that live in dif .. code-block:: python - from ray.experimental import named_actors + from ray.util import named_actors @ray.remote class Counter: diff --git a/doc/source/serve.rst b/doc/source/serve.rst index 06781b40a..4f5425f7e 100644 --- a/doc/source/serve.rst +++ b/doc/source/serve.rst @@ -1,20 +1,23 @@ -Ray Serve (Experimental) -======================== +Ray Serve +========= + +.. _`issue on GitHub`: https://github.com/ray-project/ray/issues Ray Serve is a serving library that exposes python function/classes to HTTP. It has built-in support for flexible traffic policy. This means you can easy split incoming traffic to multiple implementations. -With Ray Serve, you can deploy your services at any scale. - .. warning:: - Ray Serve is Python 3 only. + + Ray Serve is under development and its API may be revised in future Ray releases. If you encounter any bugs, please file an `issue on GitHub`_. + +With Ray Serve, you can deploy your services at any scale. Quickstart ---------- -.. literalinclude:: ../../python/ray/experimental/serve/examples/echo_full.py +.. literalinclude:: ../../python/ray/serve/examples/echo_full.py API --- -.. automodule:: ray.experimental.serve +.. automodule:: ray.serve :members: diff --git a/doc/source/signals.rst b/doc/source/signals.rst deleted file mode 100644 index 76f9984d6..000000000 --- a/doc/source/signals.rst +++ /dev/null @@ -1,168 +0,0 @@ -Signal API (Experimental) -========================= - -This experimental API allows tasks and actors to generate signals which can -be received by other tasks and actors. In addition, task failures and actor -method failures generate error signals. The error signals enable applications -to detect failures and potentially recover from failures. - -.. autofunction:: ray.experimental.signal.send - -Here is a simple example of a remote function that sends a user-defined signal. - -.. code-block:: python - - import ray.experimental.signal as signal - - # Define an application level signal. - class UserSignal(signal.Signal): - def __init__(self, value): - self.value = value - - def get_value(self): - return self.value - - # Define a remote function that sends a user-defined signal. - @ray.remote - def send_signal(value): - signal.send(UserSignal(value)) - -.. autofunction:: ray.experimental.signal.receive - -Here is a simple example of how to receive signals from an actor or task identified -by ``a``. Note that an actor is identified by its handle, and a task by one of its -object ID return values. - -.. code-block:: python - - import ray.experimental.signal as signal - - # This returns a possibly empty list of all signals that have been sent by 'a' - # since the last invocation of signal.receive from within this process. If 'a' - # did not send any signals, then this will wait for up to 10 seconds to receive - # a signal from 'a'. - signal_list = signal.receive([a], timeout=10) - -.. autofunction:: ray.experimental.signal.reset - - -Example: sending a user signal ------------------------------- - -The code below show a simple example in which a task, called ``send_signal()`` -sends a user signal and the driver gets it by invoking ``signal.receive()``. - -.. code-block:: python - - import ray.experimental.signal as signal - - # Define a user signal. - class UserSignal(signal.Signal): - def __init__(self, value): - self.value = value - - def get_value(self): - return self.value - - @ray.remote - def send_signal(value): - signal.send(UserSignal(value)) - return - - signal_value = 'simple signal' - object_id = send_signal.remote(signal_value) - # Wait up to 10sec to receive a signal from the task. Note the task is - # identified by the object_id it returns. - result_list = signal.receive([object_id], timeout=10) - # Print signal values. This should print "simple_signal". - # Note that result_list[0] is the signal we expect from the task. - # The signal is a tuple where the first element is the first object ID - # returned by the task and the second element is the signal object. - print(result_list[0][1].get_value()) - -Example: Getting an error signals ---------------------------------- - -This is a simple example in which a driver gets an error signal caused -by the failure of ``task()``. - -.. code-block:: python - - @ray.remote - def task(): - raise Exception('exception message') - - object_id = task.remote() - try: - ray.get(object_id) - except Exception as e: - pass - finally: - result_list = signal.receive([object_id], timeout=10) - # Expected signal is 'ErrorSignal'. - assert type(result_list[0][1]) == signal.ErrorSignal - # Print the error. - print(result_list[0][1].get_error()) - - -Example: Sending signals between multiple actors ------------------------------------------------- - -This is a more involved example in which two actors ``a1`` and ``a2`` each -generate five signals, and another actor ``b`` waits to receive all signals -generated by ``a1`` and ``a2``, respectively. Note that ``b`` recursively calls -its own method ``get_signals()`` until it gets all signals it expects. - -.. code-block:: python - - @ray.remote - class ActorSendSignals(object): - def send_signals(self, value, count): - for i in range(count): - signal.send(UserSignal(value + str(i))) - - @ray.remote - class ActorGetAllSignals(object): - def __init__(self, num_expected_signals, *source_ids): - self.received_signals = [] - self.num_expected_signals = num_expected_signals - self.source_ids = source_ids - - def register_handle(self, handle): - self.this_actor = handle - - def get_signals(self): - new_signals = signal.receive(self.source_ids, timeout=10) - self.received_signals.extend(new_signals) - if len(self.received_signals) < self.num_expected_signals: - self.this_actor.get_signals.remote() - - def get_count(self): - return len(self.received_signals) - - # Create two actors to send signals. - a1 = ActorSendSignals.remote() - a2 = ActorSendSignals.remote() - signal_value = 'simple signal' - count = 5 - # Each actor sends five signals. - a1.send_signals.remote(signal_value, count) - a2.send_signals.remote(signal_value, count) - - # Create an actor that waits for all five signals sent by each actor. - b = ActorGetAllSignals.remote(2 * count, *[a1, a2]) - # Provide actor to its own handle, so it can recursively call itself - # to get all signals from a1, and a2, respectively. This enables the actor - # execute other methods if needed. - ray.get(b.register_handle.remote(b)) - b.get_signals.remote() - # Print total number of signals. This should be 2*count = 10. - print(ray.get(b.get_count.remote())) - -Note ----- - -A failed actor (e.g., an actor that crashed) generates an error message only -when another actor or task invokes one of its methods. - -Please `let us know `__ any issues you encounter. diff --git a/doc/source/tune.rst b/doc/source/tune.rst index 36024ca42..d8ddba936 100644 --- a/doc/source/tune.rst +++ b/doc/source/tune.rst @@ -1,11 +1,11 @@ -Tune: A Scalable Hyperparameter Tuning Library -============================================== +Tune: Scalable Hyperparameter Tuning +==================================== .. image:: images/tune.png :scale: 30% :align: center -Tune is a Python library for hyperparameter tuning at any scale. Core features: +Tune is a Python library for experiment execution and hyperparameter tuning at any scale. Core features: * Launch a multi-node `distributed hyperparameter sweep `_ in less than 10 lines of code. * Supports any machine learning framework, including PyTorch, XGBoost, MXNet, and Keras. See `examples here `_. diff --git a/doc/source/using-ray.rst b/doc/source/using-ray.rst index cc22b9540..cf3744d88 100644 --- a/doc/source/using-ray.rst +++ b/doc/source/using-ray.rst @@ -12,6 +12,7 @@ Finally, we've also included some content on using core Ray APIs with `Tensorflo starting-ray.rst actors.rst + async_api.rst using-ray-with-gpus.rst serialization.rst memory-management.rst diff --git a/python/ray/__init__.py b/python/ray/__init__.py index 70df47cdc..9874d36c2 100644 --- a/python/ray/__init__.py +++ b/python/ray/__init__.py @@ -96,6 +96,7 @@ import ray.actor # noqa: F401 from ray.actor import method # noqa: E402 from ray.runtime_context import _get_runtime_context # noqa: E402 from ray.cross_language import java_function, java_actor_class # noqa: E402 +from ray import util # noqa: E402 # Ray version string. __version__ = "0.9.0.dev0" @@ -140,6 +141,7 @@ __all__ = [ "Language", "java_function", "java_actor_class", + "util", ] # ID types diff --git a/python/ray/actor.py b/python/ray/actor.py index 480ece45b..40f045f71 100644 --- a/python/ray/actor.py +++ b/python/ray/actor.py @@ -483,14 +483,14 @@ class ActorClass: # Check whether the name is already taken. if name is not None: try: - ray.experimental.get_actor(name) + ray.util.get_actor(name) except ValueError: # name is not taken, expected. pass else: raise ValueError( "The name {name} is already taken. Please use " "a different name or get existing actor using " - "ray.experimental.get_actor('{name}')".format(name=name)) + "ray.util.get_actor('{name}')".format(name=name)) # Set the actor's default resources if not already set. First three # conditions are to check that no resources were specified in the @@ -585,7 +585,7 @@ class ActorClass: original_handle=True) if name is not None: - ray.experimental.register_actor(name, actor_handle) + ray.util.register_actor(name, actor_handle) return actor_handle diff --git a/python/ray/experimental/__init__.py b/python/ray/experimental/__init__.py index 1b214d6df..81e7f995c 100644 --- a/python/ray/experimental/__init__.py +++ b/python/ray/experimental/__init__.py @@ -1,28 +1,8 @@ -from .gcs_flush_policy import (set_flushing_policy, GcsFlushPolicy, - SimpleGcsFlushPolicy) -from .named_actors import get_actor, register_actor from .api import get, wait -from .actor_pool import ActorPool from .dynamic_resources import set_resource -from . import iter - - -def TensorFlowVariables(*args, **kwargs): - raise DeprecationWarning( - "'ray.experimental.TensorFlowVariables' is deprecated. Instead, please" - " do 'from ray.experimental.tf_utils import TensorFlowVariables'.") - __all__ = [ - "TensorFlowVariables", - "get_actor", - "register_actor", "get", "wait", - "set_flushing_policy", - "GcsFlushPolicy", - "SimpleGcsFlushPolicy", "set_resource", - "ActorPool", - "iter", ] diff --git a/python/ray/experimental/gcs_flush_policy.py b/python/ray/experimental/gcs_flush_policy.py deleted file mode 100644 index 00d9e85fe..000000000 --- a/python/ray/experimental/gcs_flush_policy.py +++ /dev/null @@ -1,89 +0,0 @@ -import os -import time - -import ray -import ray.cloudpickle as pickle - - -class GcsFlushPolicy: - """Experimental: a policy to control GCS flushing. - - Used by Monitor to enable automatic control of memory usage. - """ - - def should_flush(self, redis_client): - """Returns a bool, whether a flush request should be issued.""" - pass - - def num_entries_to_flush(self): - """Returns an upper bound for number of entries to flush next.""" - pass - - def record_flush(self): - """Must be called after a flush has been performed.""" - pass - - -class SimpleGcsFlushPolicy(GcsFlushPolicy): - """A simple policy with constant flush rate, after a warmup period. - - Example policy values: - - flush_when_at_least_bytes 2GB - - flush_period_secs 10s - - flush_num_entries_each_time 10k - - This means: (1) If the GCS shard uses less than 2GB of memory, - no flushing would take place. This should cover most Ray runs. (2) The - GCS shard will only honor a flush request, if it's issued after 10 - seconds since the last processed flush. In particular this means it's - okay for the Monitor to issue requests more frequently than this param. - (3) When processing a flush, the shard will flush at most 10k entries. - This is to control the latency of each request. - - Note, flush rate == (flush period) * (num entries each time). So - applications that have a heavier GCS load can tune these params. - """ - - def __init__(self, - flush_when_at_least_bytes=(1 << 31), - flush_period_secs=10, - flush_num_entries_each_time=10000): - self.flush_when_at_least_bytes = flush_when_at_least_bytes - self.flush_period_secs = flush_period_secs - self.flush_num_entries_each_time = flush_num_entries_each_time - self.last_flush_timestamp = time.time() - - def should_flush(self, redis_client): - if time.time() - self.last_flush_timestamp < self.flush_period_secs: - return False - - used_memory = redis_client.info("memory")["used_memory"] - assert used_memory > 0 - - return used_memory >= self.flush_when_at_least_bytes - - def num_entries_to_flush(self): - return self.flush_num_entries_each_time - - def record_flush(self): - self.last_flush_timestamp = time.time() - - def serialize(self): - return pickle.dumps(self) - - -def set_flushing_policy(flushing_policy): - """Serialize this policy for Monitor to pick up.""" - if "RAY_USE_NEW_GCS" not in os.environ: - raise Exception( - "set_flushing_policy() is only available when environment " - "variable RAY_USE_NEW_GCS is present at both compile and run time." - ) - ray.worker.global_worker.check_connected() - redis_client = ray.worker.global_worker.redis_client - - serialized = pickle.dumps(flushing_policy) - redis_client.set("gcs_flushing_policy", serialized) diff --git a/python/ray/experimental/sgd/__init__.py b/python/ray/experimental/sgd/__init__.py deleted file mode 100644 index 9223335cc..000000000 --- a/python/ray/experimental/sgd/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from ray.experimental.sgd.pytorch import PyTorchTrainer -from ray.experimental.sgd.tf import TFTrainer - -__all__ = ["PyTorchTrainer", "TFTrainer"] diff --git a/python/ray/experimental/sgd/tf/__init__.py b/python/ray/experimental/sgd/tf/__init__.py deleted file mode 100644 index 54701ab20..000000000 --- a/python/ray/experimental/sgd/tf/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from ray.experimental.sgd.tf.tf_trainer import (TFTrainer, TFTrainable) - -__all__ = ["TFTrainer", "TFTrainable"] diff --git a/python/ray/monitor.py b/python/ray/monitor.py index d15a984f9..b325205b7 100644 --- a/python/ray/monitor.py +++ b/python/ray/monitor.py @@ -5,11 +5,8 @@ import time import traceback import json -import redis - import ray from ray.autoscaler.autoscaler import LoadMetrics, StandardAutoscaler -import ray.cloudpickle as pickle import ray.gcs_utils import ray.utils import ray.ray_constants as ray_constants @@ -54,34 +51,6 @@ class Monitor: self.autoscaler = None self.autoscaling_config = None - # Experimental feature: GCS flushing. - self.issue_gcs_flushes = "RAY_USE_NEW_GCS" in os.environ - self.gcs_flush_policy = None - if self.issue_gcs_flushes: - # Data is stored under the first data shard, so we issue flushes to - # that redis server. - addr_port = self.redis.lrange("RedisShards", 0, -1) - if len(addr_port) > 1: - logger.warning( - "Monitor: " - "TODO: if launching > 1 redis shard, flushing needs to " - "touch shards in parallel.") - self.issue_gcs_flushes = False - else: - addr_port = addr_port[0].split(b":") - self.redis_shard = redis.StrictRedis( - host=addr_port[0], - port=addr_port[1], - password=redis_password) - try: - self.redis_shard.execute_command("HEAD.FLUSH 0") - except redis.exceptions.ResponseError as e: - logger.info( - "Monitor: " - "Turning off flushing due to exception: {}".format( - str(e))) - self.issue_gcs_flushes = False - def __del__(self): """Destruct the monitor object.""" # We close the pubsub client to avoid leaking file descriptors. @@ -288,37 +257,6 @@ class Monitor: ip_address += ":" + str(raylet_info["NodeManagerPort"]) self.raylet_id_to_ip_map[node_id] = ip_address - def _maybe_flush_gcs(self): - """Experimental: issue a flush request to the GCS. - - The purpose of this feature is to control GCS memory usage. - - To activate this feature, Ray must be compiled with the flag - RAY_USE_NEW_GCS set, and Ray must be started at run time with the flag - as well. - """ - if not self.issue_gcs_flushes: - return - if self.gcs_flush_policy is None: - serialized = self.redis.get("gcs_flushing_policy") - if serialized is None: - # Client has not set any policy; by default flushing is off. - return - self.gcs_flush_policy = pickle.loads(serialized) - - if not self.gcs_flush_policy.should_flush(self.redis_shard): - return - - max_entries_to_flush = self.gcs_flush_policy.num_entries_to_flush() - num_flushed = self.redis_shard.execute_command( - "HEAD.FLUSH {}".format(max_entries_to_flush)) - logger.info("Monitor: num_flushed {}".format(num_flushed)) - - # This flushes event log and log files. - ray.experimental.flush_redis_unsafe(self.redis) - - self.gcs_flush_policy.record_flush() - def _run(self): """Run the monitor. @@ -346,8 +284,6 @@ class Monitor: if self.autoscaler: self.autoscaler.update() - self._maybe_flush_gcs() - # Process a round of messages. self.process_messages() diff --git a/python/ray/scripts/scripts.py b/python/ray/scripts/scripts.py index 0aeec706d..51d22e964 100644 --- a/python/ray/scripts/scripts.py +++ b/python/ray/scripts/scripts.py @@ -977,7 +977,7 @@ cli.add_command(project_cli) cli.add_command(session_cli) try: - from ray.experimental.serve.scripts import serve_cli + from ray.serve.scripts import serve_cli cli.add_command(serve_cli) except Exception as e: logger.debug( diff --git a/python/ray/experimental/serve/BUILD b/python/ray/serve/BUILD similarity index 100% rename from python/ray/experimental/serve/BUILD rename to python/ray/serve/BUILD diff --git a/python/ray/experimental/serve/__init__.py b/python/ray/serve/__init__.py similarity index 69% rename from python/ray/experimental/serve/__init__.py rename to python/ray/serve/__init__.py index 6ebfec9cd..e5b7eb314 100644 --- a/python/ray/experimental/serve/__init__.py +++ b/python/ray/serve/__init__.py @@ -1,6 +1,6 @@ -from ray.experimental.serve.backend_config import BackendConfig -from ray.experimental.serve.policy import RoutePolicy -from ray.experimental.serve.api import ( +from ray.serve.backend_config import BackendConfig +from ray.serve.policy import RoutePolicy +from ray.serve.api import ( init, create_backend, create_endpoint, link, split, get_handle, stat, set_backend_config, get_backend_config, accept_batch, route) # noqa: E402 diff --git a/python/ray/experimental/serve/api.py b/python/ray/serve/api.py similarity index 95% rename from python/ray/experimental/serve/api.py rename to python/ray/serve/api.py index d04cb77be..ccf033d62 100644 --- a/python/ray/experimental/serve/api.py +++ b/python/ray/serve/api.py @@ -7,18 +7,17 @@ from multiprocessing import cpu_count import numpy as np import ray -from ray.experimental.serve.constants import ( - DEFAULT_HTTP_HOST, DEFAULT_HTTP_PORT, SERVE_NURSERY_NAME) -from ray.experimental.serve.global_state import (GlobalState, - start_initial_state) -from ray.experimental.serve.kv_store_service import SQLiteKVStore -from ray.experimental.serve.task_runner import RayServeMixin, TaskRunnerActor -from ray.experimental.serve.utils import (block_until_http_ready, - get_random_letters, expand) -from ray.experimental.serve.exceptions import RayServeException -from ray.experimental.serve.backend_config import BackendConfig -from ray.experimental.serve.policy import RoutePolicy -from ray.experimental.serve.queues import Query +from ray.serve.constants import (DEFAULT_HTTP_HOST, DEFAULT_HTTP_PORT, + SERVE_NURSERY_NAME) +from ray.serve.global_state import (GlobalState, start_initial_state) +from ray.serve.kv_store_service import SQLiteKVStore +from ray.serve.task_runner import RayServeMixin, TaskRunnerActor +from ray.serve.utils import (block_until_http_ready, get_random_letters, + expand) +from ray.serve.exceptions import RayServeException +from ray.serve.backend_config import BackendConfig +from ray.serve.policy import RoutePolicy +from ray.serve.queues import Query global_state = None @@ -114,7 +113,7 @@ def init(kv_store_connector=None, # Try to get serve nursery if there exists try: - ray.experimental.get_actor(SERVE_NURSERY_NAME) + ray.util.get_actor(SERVE_NURSERY_NAME) global_state = GlobalState() return except ValueError: @@ -440,7 +439,7 @@ def get_handle(endpoint_name, relative_slo_ms=None, absolute_slo_ms=None): global_state.route_table.list_service(include_headless=True).values()) # Delay import due to it's dependency on global_state - from ray.experimental.serve.handle import RayServeHandle + from ray.serve.handle import RayServeHandle return RayServeHandle(global_state.init_or_get_router(), endpoint_name, relative_slo_ms, absolute_slo_ms) diff --git a/python/ray/experimental/serve/backend_config.py b/python/ray/serve/backend_config.py similarity index 100% rename from python/ray/experimental/serve/backend_config.py rename to python/ray/serve/backend_config.py diff --git a/python/ray/experimental/serve/constants.py b/python/ray/serve/constants.py similarity index 100% rename from python/ray/experimental/serve/constants.py rename to python/ray/serve/constants.py diff --git a/python/ray/experimental/serve/context.py b/python/ray/serve/context.py similarity index 93% rename from python/ray/experimental/serve/context.py rename to python/ray/serve/context.py index 25ccf0329..1069bbe21 100644 --- a/python/ray/experimental/serve/context.py +++ b/python/ray/serve/context.py @@ -1,6 +1,6 @@ from enum import IntEnum -from ray.experimental.serve.exceptions import RayServeException +from ray.serve.exceptions import RayServeException class TaskContext(IntEnum): diff --git a/python/ray/experimental/serve/examples/benchmark.py b/python/ray/serve/examples/benchmark.py similarity index 86% rename from python/ray/experimental/serve/examples/benchmark.py rename to python/ray/serve/examples/benchmark.py index 6ed4c54cc..18b355fbb 100644 --- a/python/ray/experimental/serve/examples/benchmark.py +++ b/python/ray/serve/examples/benchmark.py @@ -1,5 +1,5 @@ -from ray.experimental import serve -from ray.experimental.serve.constants import DEFAULT_HTTP_ADDRESS +from ray import serve +from ray.serve.constants import DEFAULT_HTTP_ADDRESS import requests import time import pandas as pd diff --git a/python/ray/experimental/serve/examples/echo.py b/python/ray/serve/examples/echo.py similarity index 84% rename from python/ray/experimental/serve/examples/echo.py rename to python/ray/serve/examples/echo.py index e124367ea..578a630fb 100644 --- a/python/ray/experimental/serve/examples/echo.py +++ b/python/ray/serve/examples/echo.py @@ -6,8 +6,8 @@ import time import requests -from ray.experimental import serve -from ray.experimental.serve.utils import pformat_color_json +from ray import serve +from ray.serve.utils import pformat_color_json def echo(flask_request): diff --git a/python/ray/experimental/serve/examples/echo_actor.py b/python/ray/serve/examples/echo_actor.py similarity index 92% rename from python/ray/experimental/serve/examples/echo_actor.py rename to python/ray/serve/examples/echo_actor.py index 49ed23fe7..dfc165513 100644 --- a/python/ray/experimental/serve/examples/echo_actor.py +++ b/python/ray/serve/examples/echo_actor.py @@ -10,8 +10,8 @@ import time import requests import ray -from ray.experimental import serve -from ray.experimental.serve.utils import pformat_color_json +from ray import serve +from ray.serve.utils import pformat_color_json class MagicCounter: diff --git a/python/ray/experimental/serve/examples/echo_actor_batch.py b/python/ray/serve/examples/echo_actor_batch.py similarity index 92% rename from python/ray/experimental/serve/examples/echo_actor_batch.py rename to python/ray/serve/examples/echo_actor_batch.py index 51844e6b6..5420e60ad 100644 --- a/python/ray/experimental/serve/examples/echo_actor_batch.py +++ b/python/ray/serve/examples/echo_actor_batch.py @@ -10,9 +10,9 @@ import time import requests import ray -from ray.experimental import serve -from ray.experimental.serve.utils import pformat_color_json -from ray.experimental.serve import BackendConfig +from ray import serve +from ray.serve.utils import pformat_color_json +from ray.serve import BackendConfig class MagicCounter: diff --git a/python/ray/experimental/serve/examples/echo_batching.py b/python/ray/serve/examples/echo_batching.py similarity index 95% rename from python/ray/experimental/serve/examples/echo_batching.py rename to python/ray/serve/examples/echo_batching.py index 0622c2efe..6bea509de 100644 --- a/python/ray/experimental/serve/examples/echo_batching.py +++ b/python/ray/serve/examples/echo_batching.py @@ -3,8 +3,8 @@ This example has backend which has batching functionality enabled. """ import ray -from ray.experimental import serve -from ray.experimental.serve import BackendConfig +from ray import serve +from ray.serve import BackendConfig class MagicCounter: diff --git a/python/ray/experimental/serve/examples/echo_error.py b/python/ray/serve/examples/echo_error.py similarity index 91% rename from python/ray/experimental/serve/examples/echo_error.py rename to python/ray/serve/examples/echo_error.py index a6ed56b99..b286dc31d 100644 --- a/python/ray/experimental/serve/examples/echo_error.py +++ b/python/ray/serve/examples/echo_error.py @@ -18,8 +18,8 @@ import time import requests import ray -from ray.experimental import serve -from ray.experimental.serve.utils import pformat_color_json +from ray import serve +from ray.serve.utils import pformat_color_json def echo(_): diff --git a/python/ray/experimental/serve/examples/echo_fixed_packing.py b/python/ray/serve/examples/echo_fixed_packing.py similarity index 92% rename from python/ray/experimental/serve/examples/echo_fixed_packing.py rename to python/ray/serve/examples/echo_fixed_packing.py index d5de07f45..fa0258a4b 100644 --- a/python/ray/experimental/serve/examples/echo_fixed_packing.py +++ b/python/ray/serve/examples/echo_fixed_packing.py @@ -10,8 +10,8 @@ import time import requests -from ray.experimental import serve -from ray.experimental.serve.utils import pformat_color_json +from ray import serve +from ray.serve.utils import pformat_color_json def echo_v1(_): diff --git a/python/ray/experimental/serve/examples/echo_full.py b/python/ray/serve/examples/echo_full.py similarity index 95% rename from python/ray/experimental/serve/examples/echo_full.py rename to python/ray/serve/examples/echo_full.py index 56e22d436..23b9c64ae 100644 --- a/python/ray/experimental/serve/examples/echo_full.py +++ b/python/ray/serve/examples/echo_full.py @@ -7,8 +7,8 @@ import time import requests import ray -import ray.experimental.serve as serve -from ray.experimental.serve.utils import pformat_color_json +import ray.serve as serve +from ray.serve.utils import pformat_color_json # initialize ray serve system. # blocking=True will wait for HTTP server to be ready to serve request. diff --git a/python/ray/experimental/serve/examples/echo_pipeline.py b/python/ray/serve/examples/echo_pipeline.py similarity index 97% rename from python/ray/experimental/serve/examples/echo_pipeline.py rename to python/ray/serve/examples/echo_pipeline.py index 772ad7883..a28a2a044 100644 --- a/python/ray/experimental/serve/examples/echo_pipeline.py +++ b/python/ray/serve/examples/echo_pipeline.py @@ -2,7 +2,7 @@ Ray serve pipeline example """ import ray -import ray.experimental.serve as serve +import ray.serve as serve import time # initialize ray serve system. diff --git a/python/ray/experimental/serve/examples/echo_round_robin.py b/python/ray/serve/examples/echo_round_robin.py similarity index 89% rename from python/ray/experimental/serve/examples/echo_round_robin.py rename to python/ray/serve/examples/echo_round_robin.py index 01b74b92e..d17924df5 100644 --- a/python/ray/experimental/serve/examples/echo_round_robin.py +++ b/python/ray/serve/examples/echo_round_robin.py @@ -6,8 +6,8 @@ import time import requests -from ray.experimental import serve -from ray.experimental.serve.utils import pformat_color_json +from ray import serve +from ray.serve.utils import pformat_color_json def echo_v1(_): diff --git a/python/ray/experimental/serve/examples/echo_slo_reverse.py b/python/ray/serve/examples/echo_slo_reverse.py similarity index 98% rename from python/ray/experimental/serve/examples/echo_slo_reverse.py rename to python/ray/serve/examples/echo_slo_reverse.py index 6e5224b20..7232320f5 100644 --- a/python/ray/experimental/serve/examples/echo_slo_reverse.py +++ b/python/ray/serve/examples/echo_slo_reverse.py @@ -7,7 +7,7 @@ import time import requests import ray -import ray.experimental.serve as serve +import ray.serve as serve # initialize ray serve system. # blocking=True will wait for HTTP server to be ready to serve request. diff --git a/python/ray/experimental/serve/examples/echo_split.py b/python/ray/serve/examples/echo_split.py similarity index 89% rename from python/ray/experimental/serve/examples/echo_split.py rename to python/ray/serve/examples/echo_split.py index 6942db5a5..a9a7dabe9 100644 --- a/python/ray/experimental/serve/examples/echo_split.py +++ b/python/ray/serve/examples/echo_split.py @@ -6,8 +6,8 @@ import time import requests -from ray.experimental import serve -from ray.experimental.serve.utils import pformat_color_json +from ray import serve +from ray.serve.utils import pformat_color_json def echo_v1(_): diff --git a/python/ray/experimental/serve/exceptions.py b/python/ray/serve/exceptions.py similarity index 100% rename from python/ray/experimental/serve/exceptions.py rename to python/ray/serve/exceptions.py diff --git a/python/ray/experimental/serve/global_state.py b/python/ray/serve/global_state.py similarity index 88% rename from python/ray/experimental/serve/global_state.py rename to python/ray/serve/global_state.py index ae90423dc..363098cd7 100644 --- a/python/ray/experimental/serve/global_state.py +++ b/python/ray/serve/global_state.py @@ -1,19 +1,18 @@ import ray -from ray.experimental.serve.constants import ( - BOOTSTRAP_KV_STORE_CONN_KEY, DEFAULT_HTTP_HOST, DEFAULT_HTTP_PORT, - SERVE_NURSERY_NAME, ASYNC_CONCURRENCY) -from ray.experimental.serve.kv_store_service import ( - BackendTable, RoutingTable, TrafficPolicyTable) -from ray.experimental.serve.metric import (MetricMonitor, - start_metric_monitor_loop) +from ray.serve.constants import (BOOTSTRAP_KV_STORE_CONN_KEY, + DEFAULT_HTTP_HOST, DEFAULT_HTTP_PORT, + SERVE_NURSERY_NAME, ASYNC_CONCURRENCY) +from ray.serve.kv_store_service import (BackendTable, RoutingTable, + TrafficPolicyTable) +from ray.serve.metric import (MetricMonitor, start_metric_monitor_loop) -from ray.experimental.serve.policy import RoutePolicy -from ray.experimental.serve.server import HTTPActor +from ray.serve.policy import RoutePolicy +from ray.serve.server import HTTPActor def start_initial_state(kv_store_connector): nursery_handle = ActorNursery.remote() - ray.experimental.register_actor(SERVE_NURSERY_NAME, nursery_handle) + ray.util.register_actor(SERVE_NURSERY_NAME, nursery_handle) ray.get( nursery_handle.store_bootstrap_state.remote( @@ -92,8 +91,7 @@ class GlobalState: def __init__(self, actor_nursery_handle=None): # Get actor nursery handle if actor_nursery_handle is None: - actor_nursery_handle = ray.experimental.get_actor( - SERVE_NURSERY_NAME) + actor_nursery_handle = ray.util.get_actor(SERVE_NURSERY_NAME) self.actor_nursery_handle = actor_nursery_handle # Connect to all the table @@ -126,7 +124,7 @@ class GlobalState: return_policy = default_policy # check if there is already a queue_actor running # with policy as p.name for the case where - # serve nursery exists: ray.experimental.get_actor(SERVE_NURSERY_NAME) + # serve nursery exists: ray.util.get_actor(SERVE_NURSERY_NAME) for p in RoutePolicy: queue_actor_tag = "queue_actor::" + p.name if queue_actor_tag in self.actor_handle_cache: diff --git a/python/ray/experimental/serve/handle.py b/python/ray/serve/handle.py similarity index 92% rename from python/ray/experimental/serve/handle.py rename to python/ray/serve/handle.py index a468fe178..3efce5308 100644 --- a/python/ray/experimental/serve/handle.py +++ b/python/ray/serve/handle.py @@ -1,8 +1,8 @@ -from ray.experimental import serve -from ray.experimental.serve.context import TaskContext -from ray.experimental.serve.exceptions import RayServeException -from ray.experimental.serve.constants import DEFAULT_HTTP_ADDRESS -from ray.experimental.serve.request_params import RequestMetadata +from ray import serve +from ray.serve.context import TaskContext +from ray.serve.exceptions import RayServeException +from ray.serve.constants import DEFAULT_HTTP_ADDRESS +from ray.serve.request_params import RequestMetadata class RayServeHandle: diff --git a/python/ray/experimental/serve/http_util.py b/python/ray/serve/http_util.py similarity index 100% rename from python/ray/experimental/serve/http_util.py rename to python/ray/serve/http_util.py diff --git a/python/ray/experimental/serve/kv_store_service.py b/python/ray/serve/kv_store_service.py similarity index 98% rename from python/ray/experimental/serve/kv_store_service.py rename to python/ray/serve/kv_store_service.py index 7d24b84b9..5960e0b97 100644 --- a/python/ray/experimental/serve/kv_store_service.py +++ b/python/ray/serve/kv_store_service.py @@ -1,13 +1,12 @@ import json import sqlite3 from abc import ABC +from typing import Union from ray import cloudpickle as pickle - import ray.experimental.internal_kv as ray_kv -from ray.experimental.serve.utils import logger -from typing import Union -from ray.experimental.serve.constants import NO_ROUTE_KEY +from ray.serve.utils import logger +from ray.serve.constants import NO_ROUTE_KEY class NamespacedKVStore(ABC): diff --git a/python/ray/experimental/serve/metric.py b/python/ray/serve/metric.py similarity index 100% rename from python/ray/experimental/serve/metric.py rename to python/ray/serve/metric.py diff --git a/python/ray/experimental/serve/policy.py b/python/ray/serve/policy.py similarity index 98% rename from python/ray/experimental/serve/policy.py rename to python/ray/serve/policy.py index 1b32283a2..66fd73e64 100644 --- a/python/ray/experimental/serve/policy.py +++ b/python/ray/serve/policy.py @@ -4,8 +4,8 @@ import itertools import numpy as np import ray -from ray.experimental.serve.queues import (CentralizedQueues) -from ray.experimental.serve.utils import logger +from ray.serve.queues import (CentralizedQueues) +from ray.serve.utils import logger class RandomPolicyQueue(CentralizedQueues): diff --git a/python/ray/experimental/serve/queues.py b/python/ray/serve/queues.py similarity index 99% rename from python/ray/experimental/serve/queues.py rename to python/ray/serve/queues.py index f456f9ebf..4e944e967 100644 --- a/python/ray/experimental/serve/queues.py +++ b/python/ray/serve/queues.py @@ -13,7 +13,7 @@ import pickle import blist import ray -from ray.experimental.serve.utils import logger +from ray.serve.utils import logger class Query: diff --git a/python/ray/experimental/serve/request_params.py b/python/ray/serve/request_params.py similarity index 94% rename from python/ray/experimental/serve/request_params.py rename to python/ray/serve/request_params.py index b6545fdec..06e0e1b04 100644 --- a/python/ray/experimental/serve/request_params.py +++ b/python/ray/serve/request_params.py @@ -1,5 +1,5 @@ import time -from ray.experimental.serve.constants import DEFAULT_LATENCY_SLO_MS +from ray.serve.constants import DEFAULT_LATENCY_SLO_MS class RequestMetadata: diff --git a/python/ray/experimental/serve/scripts.py b/python/ray/serve/scripts.py similarity index 96% rename from python/ray/experimental/serve/scripts.py rename to python/ray/serve/scripts.py index b3830559f..812ebf0dd 100644 --- a/python/ray/experimental/serve/scripts.py +++ b/python/ray/serve/scripts.py @@ -3,7 +3,7 @@ import json import click import ray -import ray.experimental.serve as serve +import ray.serve as serve @click.group("serve", help="Commands working with ray serve") diff --git a/python/ray/experimental/serve/server.py b/python/ray/serve/server.py similarity index 95% rename from python/ray/experimental/serve/server.py rename to python/ray/serve/server.py index 6ec55e3b4..514530bd8 100644 --- a/python/ray/experimental/serve/server.py +++ b/python/ray/serve/server.py @@ -5,10 +5,10 @@ import uvicorn import ray from ray.experimental.async_api import _async_init -from ray.experimental.serve.constants import HTTP_ROUTER_CHECKER_INTERVAL_S -from ray.experimental.serve.context import TaskContext -from ray.experimental.serve.utils import BytesEncoder -from ray.experimental.serve.request_params import RequestMetadata +from ray.serve.constants import HTTP_ROUTER_CHECKER_INTERVAL_S +from ray.serve.context import TaskContext +from ray.serve.utils import BytesEncoder +from ray.serve.request_params import RequestMetadata from urllib.parse import parse_qs @@ -62,7 +62,7 @@ class HTTPProxy: assert ray.is_initialized() # Delay import due to GlobalState depends on HTTP actor - from ray.experimental.serve.global_state import GlobalState + from ray.serve.global_state import GlobalState self.serve_global_state = GlobalState() self.route_table_cache = dict() diff --git a/python/ray/experimental/serve/task_runner.py b/python/ray/serve/task_runner.py similarity index 96% rename from python/ray/experimental/serve/task_runner.py rename to python/ray/serve/task_runner.py index a7b186432..7e0704656 100644 --- a/python/ray/experimental/serve/task_runner.py +++ b/python/ray/serve/task_runner.py @@ -2,11 +2,11 @@ import time import traceback import ray -from ray.experimental.serve import context as serve_context -from ray.experimental.serve.context import FakeFlaskRequest +from ray.serve import context as serve_context +from ray.serve.context import FakeFlaskRequest from collections import defaultdict -from ray.experimental.serve.utils import parse_request_item -from ray.experimental.serve.exceptions import RayServeException +from ray.serve.utils import parse_request_item +from ray.serve.exceptions import RayServeException class TaskRunner: diff --git a/python/ray/experimental/serve/tests/conftest.py b/python/ray/serve/tests/conftest.py similarity index 94% rename from python/ray/experimental/serve/tests/conftest.py rename to python/ray/serve/tests/conftest.py index cf8047e42..2f6d629bb 100644 --- a/python/ray/experimental/serve/tests/conftest.py +++ b/python/ray/serve/tests/conftest.py @@ -4,7 +4,7 @@ import tempfile import pytest import ray -from ray.experimental import serve +from ray import serve @pytest.fixture(scope="session") diff --git a/python/ray/experimental/serve/tests/test_api.py b/python/ray/serve/tests/test_api.py similarity index 98% rename from python/ray/experimental/serve/tests/test_api.py rename to python/ray/serve/tests/test_api.py index c432dd35f..93ba92f6e 100644 --- a/python/ray/experimental/serve/tests/test_api.py +++ b/python/ray/serve/tests/test_api.py @@ -2,10 +2,10 @@ import time import pytest import requests -from ray.experimental import serve -from ray.experimental.serve import BackendConfig +from ray import serve +from ray.serve import BackendConfig import ray -from ray.experimental.serve.constants import NO_ROUTE_KEY +from ray.serve.constants import NO_ROUTE_KEY def test_e2e(serve_instance): diff --git a/python/ray/experimental/serve/tests/test_metric.py b/python/ray/serve/tests/test_metric.py similarity index 97% rename from python/ray/experimental/serve/tests/test_metric.py rename to python/ray/serve/tests/test_metric.py index 54e0ac6db..ed8d69c81 100644 --- a/python/ray/experimental/serve/tests/test_metric.py +++ b/python/ray/serve/tests/test_metric.py @@ -2,7 +2,7 @@ import numpy as np import pytest import ray -from ray.experimental.serve.metric import MetricMonitor +from ray.serve.metric import MetricMonitor @pytest.fixture(scope="session") diff --git a/python/ray/experimental/serve/tests/test_persistence.py b/python/ray/serve/tests/test_persistence.py similarity index 89% rename from python/ray/experimental/serve/tests/test_persistence.py rename to python/ray/serve/tests/test_persistence.py index 52c4cdc92..602f3a825 100644 --- a/python/ray/experimental/serve/tests/test_persistence.py +++ b/python/ray/serve/tests/test_persistence.py @@ -3,7 +3,7 @@ import subprocess import tempfile import ray -from ray.experimental import serve +from ray import serve def test_new_driver(serve_instance): @@ -11,7 +11,7 @@ def test_new_driver(serve_instance): import ray ray.init(address="auto") -from ray.experimental import serve +from ray import serve serve.init() @serve.route("/driver") diff --git a/python/ray/experimental/serve/tests/test_queue.py b/python/ray/serve/tests/test_queue.py similarity index 98% rename from python/ray/experimental/serve/tests/test_queue.py rename to python/ray/serve/tests/test_queue.py index 83e72c675..b5a2e8f66 100644 --- a/python/ray/experimental/serve/tests/test_queue.py +++ b/python/ray/serve/tests/test_queue.py @@ -3,10 +3,10 @@ import asyncio import pytest import ray -from ray.experimental.serve.policy import ( +from ray.serve.policy import ( RandomPolicyQueue, RandomPolicyQueueActor, RoundRobinPolicyQueueActor, PowerOfTwoPolicyQueueActor, FixedPackingPolicyQueueActor) -from ray.experimental.serve.request_params import RequestMetadata +from ray.serve.request_params import RequestMetadata pytestmark = pytest.mark.asyncio diff --git a/python/ray/experimental/serve/tests/test_routing.py b/python/ray/serve/tests/test_routing.py similarity index 90% rename from python/ray/experimental/serve/tests/test_routing.py rename to python/ray/serve/tests/test_routing.py index 7cbe237b2..dc19ba2bd 100644 --- a/python/ray/experimental/serve/tests/test_routing.py +++ b/python/ray/serve/tests/test_routing.py @@ -1,8 +1,8 @@ import os import tempfile -from ray.experimental.serve.kv_store_service import ( - InMemoryKVStore, RayInternalKVStore, SQLiteKVStore) +from ray.serve.kv_store_service import (InMemoryKVStore, RayInternalKVStore, + SQLiteKVStore) def test_default_in_memory_kv(): diff --git a/python/ray/experimental/serve/tests/test_serve.py b/python/ray/serve/tests/test_serve.py similarity index 100% rename from python/ray/experimental/serve/tests/test_serve.py rename to python/ray/serve/tests/test_serve.py diff --git a/python/ray/experimental/serve/tests/test_task_runner.py b/python/ray/serve/tests/test_task_runner.py similarity index 89% rename from python/ray/experimental/serve/tests/test_task_runner.py rename to python/ray/serve/tests/test_task_runner.py index ff4ebd59f..5a0236c6e 100644 --- a/python/ray/experimental/serve/tests/test_task_runner.py +++ b/python/ray/serve/tests/test_task_runner.py @@ -1,11 +1,11 @@ import pytest import ray -import ray.experimental.serve.context as context -from ray.experimental.serve.policy import RoundRobinPolicyQueueActor -from ray.experimental.serve.task_runner import ( - RayServeMixin, TaskRunner, TaskRunnerActor, wrap_to_ray_error) -from ray.experimental.serve.request_params import RequestMetadata +import ray.serve.context as context +from ray.serve.policy import RoundRobinPolicyQueueActor +from ray.serve.task_runner import (RayServeMixin, TaskRunner, TaskRunnerActor, + wrap_to_ray_error) +from ray.serve.request_params import RequestMetadata pytestmark = pytest.mark.asyncio diff --git a/python/ray/experimental/serve/tests/test_util.py b/python/ray/serve/tests/test_util.py similarity index 79% rename from python/ray/experimental/serve/tests/test_util.py rename to python/ray/serve/tests/test_util.py index be17e4ba6..4cc944643 100644 --- a/python/ray/experimental/serve/tests/test_util.py +++ b/python/ray/serve/tests/test_util.py @@ -1,6 +1,6 @@ import json -from ray.experimental.serve.utils import BytesEncoder +from ray.serve.utils import BytesEncoder def test_bytes_encoder(): diff --git a/python/ray/experimental/serve/utils.py b/python/ray/serve/utils.py similarity index 95% rename from python/ray/experimental/serve/utils.py rename to python/ray/serve/utils.py index 42283d06c..eb78eab6d 100644 --- a/python/ray/experimental/serve/utils.py +++ b/python/ray/serve/utils.py @@ -8,8 +8,8 @@ import os import requests from pygments import formatters, highlight, lexers -from ray.experimental.serve.context import FakeFlaskRequest, TaskContext -from ray.experimental.serve.http_util import build_flask_request +from ray.serve.context import FakeFlaskRequest, TaskContext +from ray.serve.http_util import build_flask_request import itertools diff --git a/python/ray/tests/test_actor_advanced.py b/python/ray/tests/test_actor_advanced.py index 66b7e9447..cb6641bf7 100644 --- a/python/ray/tests/test_actor_advanced.py +++ b/python/ray/tests/test_actor_advanced.py @@ -628,22 +628,22 @@ def test_register_and_get_named_actors(ray_start_regular): f1 = Foo.remote() # Test saving f. - ray.experimental.register_actor("f1", f1) + ray.util.register_actor("f1", f1) # Test getting f. - f2 = ray.experimental.get_actor("f1") + f2 = ray.util.get_actor("f1") assert f1._actor_id == f2._actor_id # Test same name register shall raise error. with pytest.raises(ValueError): - ray.experimental.register_actor("f1", f2) + ray.util.register_actor("f1", f2) # Test register with wrong object type. with pytest.raises(TypeError): - ray.experimental.register_actor("f3", 1) + ray.util.register_actor("f3", 1) # Test getting a nonexistent actor. with pytest.raises(ValueError): - ray.experimental.get_actor("nonexistent") + ray.util.get_actor("nonexistent") # Test method assert ray.get(f1.method.remote()) == 1 @@ -682,7 +682,7 @@ ray.get(actor.ping.remote()) """.format(redis_address, actor_name) run_string_as_driver(driver_script) - detached_actor = ray.experimental.get_actor(actor_name) + detached_actor = ray.util.get_actor(actor_name) assert ray.get(detached_actor.ping.remote()) == "pong" diff --git a/python/ray/tests/test_actor_pool.py b/python/ray/tests/test_actor_pool.py index 1e186f92d..a81d79fad 100644 --- a/python/ray/tests/test_actor_pool.py +++ b/python/ray/tests/test_actor_pool.py @@ -2,7 +2,7 @@ import time import pytest import ray -from ray.experimental import ActorPool +from ray.util import ActorPool @pytest.fixture diff --git a/python/ray/tests/test_iter.py b/python/ray/tests/test_iter.py index a811aaa12..ebef30cc9 100644 --- a/python/ray/tests/test_iter.py +++ b/python/ray/tests/test_iter.py @@ -2,7 +2,7 @@ import time from collections import Counter import ray -from ray.experimental.iter import from_items, from_iterators, from_range, \ +from ray.util.iter import from_items, from_iterators, from_range, \ from_actors, ParallelIteratorWorker diff --git a/python/ray/tests/test_joblib.py b/python/ray/tests/test_joblib.py index a5e0d972b..ea51f1aef 100644 --- a/python/ray/tests/test_joblib.py +++ b/python/ray/tests/test_joblib.py @@ -16,7 +16,7 @@ from sklearn.linear_model import LogisticRegression from sklearn.neural_network import MLPClassifier from sklearn.model_selection import cross_val_score -from ray.experimental.joblib import register_ray +from ray.util.joblib import register_ray import ray @@ -28,7 +28,7 @@ def test_register_ray(): def test_ray_backend(shutdown_only): register_ray() - from ray.experimental.joblib.ray_backend import RayBackend + from ray.util.joblib.ray_backend import RayBackend with joblib.parallel_backend("ray"): assert type(joblib.parallel.get_active_backend()[0]) == RayBackend diff --git a/python/ray/tests/test_multi_node.py b/python/ray/tests/test_multi_node.py index bcdb5afae..1d397f43f 100644 --- a/python/ray/tests/test_multi_node.py +++ b/python/ray/tests/test_multi_node.py @@ -188,7 +188,7 @@ class Counter: self.count += 1 return self.count counter = Counter.remote() -ray.experimental.register_actor("Counter", counter) +ray.util.register_actor("Counter", counter) time.sleep(100) """.format(address) @@ -199,7 +199,7 @@ import time ray.init(address="{}") while True: try: - counter = ray.experimental.get_actor("Counter") + counter = ray.util.get_actor("Counter") break except ValueError: time.sleep(1) diff --git a/python/ray/tests/test_multi_node_2.py b/python/ray/tests/test_multi_node_2.py index 0a49464a6..495709d93 100644 --- a/python/ray/tests/test_multi_node_2.py +++ b/python/ray/tests/test_multi_node_2.py @@ -61,7 +61,6 @@ def setup_monitor(address): monitor.subscribe(ray.gcs_utils.XRAY_HEARTBEAT_BATCH_CHANNEL) monitor.subscribe(ray.gcs_utils.XRAY_JOB_CHANNEL) # TODO: Remove? monitor.update_raylet_map(_append_port=True) - monitor._maybe_flush_gcs() return monitor diff --git a/python/ray/util/__init__.py b/python/ray/util/__init__.py index e69de29bb..f12c344d3 100644 --- a/python/ray/util/__init__.py +++ b/python/ray/util/__init__.py @@ -0,0 +1,10 @@ +from .named_actors import get_actor, register_actor +from .actor_pool import ActorPool +from . import iter + +__all__ = [ + "ActorPool", + "iter", + "get_actor", + "register_actor", +] diff --git a/python/ray/experimental/actor_pool.py b/python/ray/util/actor_pool.py similarity index 100% rename from python/ray/experimental/actor_pool.py rename to python/ray/util/actor_pool.py diff --git a/python/ray/experimental/iter.py b/python/ray/util/iter.py similarity index 98% rename from python/ray/experimental/iter.py rename to python/ray/util/iter.py index 89c6e6262..2d5ba9a06 100644 --- a/python/ray/experimental/iter.py +++ b/python/ray/util/iter.py @@ -112,7 +112,7 @@ class ParallelIterator(Generic[T]): Examples: >>> # Applying a function over items in parallel. - >>> it = ray.experimental.iter.from_items([1, 2, 3], num_shards=2) + >>> it = ray.util.iter.from_items([1, 2, 3], num_shards=2) ... <__main__.ParallelIterator object> >>> it = it.for_each(lambda x: x * 2).gather_sync() ... <__main__.LocalIterator object> @@ -120,13 +120,13 @@ class ParallelIterator(Generic[T]): ... [2, 4, 6] >>> # Creating from generators. - >>> it = ray.experimental.iter.from_iterators([range(3), range(3)]) + >>> it = ray.util.iter.from_iterators([range(3), range(3)]) ... <__main__.ParallelIterator object> >>> print(list(it.gather_sync())) ... [0, 0, 1, 1, 2, 2] >>> # Accessing the individual shards of an iterator. - >>> it = ray.experimental.iter.from_range(10, num_shards=2) + >>> it = ray.util.iter.from_range(10, num_shards=2) ... <__main__.ParallelIterator object> >>> it0 = it.get_shard(0) ... <__main__.LocalIterator object> @@ -138,7 +138,7 @@ class ParallelIterator(Generic[T]): ... [5, 6, 7, 8, 9] >>> # Gathering results from actors synchronously in parallel. - >>> it = ray.experimental.iter.from_actors(workers) + >>> it = ray.util.iter.from_actors(workers) ... <__main__.ParallelIterator object> >>> it = it.batch_across_shards() ... <__main__.LocalIterator object> diff --git a/python/ray/experimental/joblib/__init__.py b/python/ray/util/joblib/__init__.py similarity index 88% rename from python/ray/experimental/joblib/__init__.py rename to python/ray/util/joblib/__init__.py index 275cae2ea..77633c9cd 100644 --- a/python/ray/experimental/joblib/__init__.py +++ b/python/ray/util/joblib/__init__.py @@ -4,7 +4,7 @@ from joblib.parallel import register_parallel_backend def register_ray(): """ Register Ray Backend to be called with parallel_backend("ray"). """ try: - from ray.experimental.joblib.ray_backend import RayBackend + from ray.util.joblib.ray_backend import RayBackend register_parallel_backend("ray", RayBackend) except ImportError: msg = ("To use the ray backend you must install ray." diff --git a/python/ray/experimental/joblib/ray_backend.py b/python/ray/util/joblib/ray_backend.py similarity index 95% rename from python/ray/experimental/joblib/ray_backend.py rename to python/ray/util/joblib/ray_backend.py index 56c1d7878..660c8cb1c 100644 --- a/python/ray/experimental/joblib/ray_backend.py +++ b/python/ray/util/joblib/ray_backend.py @@ -2,7 +2,7 @@ from joblib._parallel_backends import MultiprocessingBackend from joblib.pool import PicklingPool import logging -from ray.experimental.multiprocessing.pool import Pool +from ray.util.multiprocessing.pool import Pool import ray RAY_ADDRESS_ENV = "RAY_ADDRESS" @@ -24,7 +24,7 @@ class RayBackend(MultiprocessingBackend): """Make Ray Pool the father class of PicklingPool. PicklingPool is a father class that inherits Pool from multiprocessing.pool. The next line is a patch, which changes the inheritance of Pool to be from - ray.experimental.multiprocessing.pool. + ray.util.multiprocessing.pool. """ PicklingPool.__bases__ = (Pool, ) """Use all available resources when n_jobs == -1. Must set RAY_ADDRESS diff --git a/python/ray/experimental/named_actors.py b/python/ray/util/named_actors.py similarity index 100% rename from python/ray/experimental/named_actors.py rename to python/ray/util/named_actors.py diff --git a/python/ray/util/sgd/__init__.py b/python/ray/util/sgd/__init__.py new file mode 100644 index 000000000..edd6c4493 --- /dev/null +++ b/python/ray/util/sgd/__init__.py @@ -0,0 +1,4 @@ +from ray.util.sgd.pytorch import PyTorchTrainer +from ray.util.sgd.tf import TFTrainer + +__all__ = ["PyTorchTrainer", "TFTrainer"] diff --git a/python/ray/experimental/sgd/pytorch/__init__.py b/python/ray/util/sgd/pytorch/__init__.py similarity index 64% rename from python/ray/experimental/sgd/pytorch/__init__.py rename to python/ray/util/sgd/pytorch/__init__.py index 20d6394a0..dd284cb9e 100644 --- a/python/ray/experimental/sgd/pytorch/__init__.py +++ b/python/ray/util/sgd/pytorch/__init__.py @@ -7,8 +7,8 @@ PyTorchTrainable = None try: import torch # noqa: F401 - from ray.experimental.sgd.pytorch.pytorch_trainer import (PyTorchTrainer, - PyTorchTrainable) + from ray.util.sgd.pytorch.pytorch_trainer import (PyTorchTrainer, + PyTorchTrainable) __all__ = ["PyTorchTrainer", "PyTorchTrainable"] except ImportError: diff --git a/python/ray/experimental/sgd/pytorch/distributed_pytorch_runner.py b/python/ray/util/sgd/pytorch/distributed_pytorch_runner.py similarity index 98% rename from python/ray/experimental/sgd/pytorch/distributed_pytorch_runner.py rename to python/ray/util/sgd/pytorch/distributed_pytorch_runner.py index 0917eef65..8db45dfe4 100644 --- a/python/ray/experimental/sgd/pytorch/distributed_pytorch_runner.py +++ b/python/ray/util/sgd/pytorch/distributed_pytorch_runner.py @@ -7,7 +7,7 @@ import torch.distributed as dist import torch.utils.data from torch.nn.parallel import DistributedDataParallel -from ray.experimental.sgd.pytorch.pytorch_runner import PyTorchRunner +from ray.util.sgd.pytorch.pytorch_runner import PyTorchRunner logger = logging.getLogger(__name__) diff --git a/python/ray/experimental/sgd/pytorch/examples/cifar_pytorch_example.py b/python/ray/util/sgd/pytorch/examples/cifar_pytorch_example.py similarity index 96% rename from python/ray/experimental/sgd/pytorch/examples/cifar_pytorch_example.py rename to python/ray/util/sgd/pytorch/examples/cifar_pytorch_example.py index 531740649..e039ec839 100644 --- a/python/ray/experimental/sgd/pytorch/examples/cifar_pytorch_example.py +++ b/python/ray/util/sgd/pytorch/examples/cifar_pytorch_example.py @@ -8,9 +8,9 @@ import torchvision import torchvision.transforms as transforms import ray -from ray.experimental.sgd.pytorch import (PyTorchTrainer, PyTorchTrainable) -from ray.experimental.sgd.pytorch.resnet import ResNet18 -from ray.experimental.sgd.pytorch.utils import TEST_MODE +from ray.util.sgd.pytorch import (PyTorchTrainer, PyTorchTrainable) +from ray.util.sgd.pytorch.resnet import ResNet18 +from ray.util.sgd.pytorch.utils import TEST_MODE def initialization_hook(runner): diff --git a/python/ray/experimental/sgd/pytorch/examples/dcgan.py b/python/ray/util/sgd/pytorch/examples/dcgan.py similarity index 97% rename from python/ray/experimental/sgd/pytorch/examples/dcgan.py rename to python/ray/util/sgd/pytorch/examples/dcgan.py index 91f41e265..110697d10 100644 --- a/python/ray/experimental/sgd/pytorch/examples/dcgan.py +++ b/python/ray/util/sgd/pytorch/examples/dcgan.py @@ -15,8 +15,8 @@ from torch.nn import functional as F from scipy.stats import entropy import ray -from ray.experimental.sgd import PyTorchTrainer -from ray.experimental.sgd.pytorch.utils import TEST_MODE +from ray.util.sgd import PyTorchTrainer +from ray.util.sgd.pytorch.utils import TEST_MODE # Training parameters TRAIN_BATCHES = 5 @@ -256,8 +256,7 @@ if __name__ == "__main__": ray.init(address=args.address) path = os.path.dirname(ray.__file__) - model_path = os.path.join( - path, "experimental/sgd/pytorch/examples/mnist_cnn.pt") + model_path = os.path.join(path, "util/sgd/pytorch/examples/mnist_cnn.pt") # load the pretrained mnist classification model for inception_score trainer = train_example( diff --git a/python/ray/experimental/sgd/pytorch/examples/example-sgd.yaml b/python/ray/util/sgd/pytorch/examples/example-sgd.yaml similarity index 100% rename from python/ray/experimental/sgd/pytorch/examples/example-sgd.yaml rename to python/ray/util/sgd/pytorch/examples/example-sgd.yaml diff --git a/python/ray/experimental/sgd/pytorch/examples/mnist_cnn.pt b/python/ray/util/sgd/pytorch/examples/mnist_cnn.pt similarity index 100% rename from python/ray/experimental/sgd/pytorch/examples/mnist_cnn.pt rename to python/ray/util/sgd/pytorch/examples/mnist_cnn.pt diff --git a/python/ray/experimental/sgd/pytorch/examples/train_example.py b/python/ray/util/sgd/pytorch/examples/train_example.py similarity index 98% rename from python/ray/experimental/sgd/pytorch/examples/train_example.py rename to python/ray/util/sgd/pytorch/examples/train_example.py index 4b1f94bdb..bc7ca7c21 100644 --- a/python/ray/experimental/sgd/pytorch/examples/train_example.py +++ b/python/ray/util/sgd/pytorch/examples/train_example.py @@ -13,7 +13,7 @@ import numpy as np import torch import torch.nn as nn -from ray.experimental.sgd import PyTorchTrainer +from ray.util.sgd import PyTorchTrainer class LinearDataset(torch.utils.data.Dataset): diff --git a/python/ray/experimental/sgd/pytorch/examples/tune_example.py b/python/ray/util/sgd/pytorch/examples/tune_example.py similarity index 97% rename from python/ray/experimental/sgd/pytorch/examples/tune_example.py rename to python/ray/util/sgd/pytorch/examples/tune_example.py index bacfa8eac..9a178e230 100644 --- a/python/ray/experimental/sgd/pytorch/examples/tune_example.py +++ b/python/ray/util/sgd/pytorch/examples/tune_example.py @@ -14,7 +14,7 @@ import torch.nn as nn import ray from ray import tune -from ray.experimental.sgd.pytorch.pytorch_trainer import PyTorchTrainable +from ray.util.sgd.pytorch.pytorch_trainer import PyTorchTrainable class LinearDataset(torch.utils.data.Dataset): diff --git a/python/ray/experimental/sgd/pytorch/pytorch_runner.py b/python/ray/util/sgd/pytorch/pytorch_runner.py similarity index 99% rename from python/ray/experimental/sgd/pytorch/pytorch_runner.py rename to python/ray/util/sgd/pytorch/pytorch_runner.py index 5587574d0..7c8803896 100644 --- a/python/ray/experimental/sgd/pytorch/pytorch_runner.py +++ b/python/ray/util/sgd/pytorch/pytorch_runner.py @@ -8,8 +8,8 @@ import torch.utils.data from torch.utils.data import Dataset import ray -from ray.experimental.sgd.pytorch import utils as pytorch_utils -from ray.experimental.sgd import utils +from ray.util.sgd.pytorch import utils as pytorch_utils +from ray.util.sgd import utils logger = logging.getLogger(__name__) amp = None diff --git a/python/ray/experimental/sgd/pytorch/pytorch_trainer.py b/python/ray/util/sgd/pytorch/pytorch_trainer.py similarity index 98% rename from python/ray/experimental/sgd/pytorch/pytorch_trainer.py rename to python/ray/util/sgd/pytorch/pytorch_trainer.py index a10933532..6eec29ba3 100644 --- a/python/ray/experimental/sgd/pytorch/pytorch_trainer.py +++ b/python/ray/util/sgd/pytorch/pytorch_trainer.py @@ -11,11 +11,11 @@ import ray from ray.tune import Trainable from ray.tune.trial import Resources -from ray.experimental.sgd.pytorch.distributed_pytorch_runner import ( +from ray.util.sgd.pytorch.distributed_pytorch_runner import ( DistributedPyTorchRunner) -from ray.experimental.sgd import utils -from ray.experimental.sgd.pytorch.pytorch_runner import PyTorchRunner -from ray.experimental.sgd.pytorch import utils as pytorch_utils +from ray.util.sgd import utils +from ray.util.sgd.pytorch.pytorch_runner import PyTorchRunner +from ray.util.sgd.pytorch import utils as pytorch_utils logger = logging.getLogger(__name__) RESIZE_COOLDOWN_S = 10 diff --git a/python/ray/experimental/sgd/pytorch/resnet.py b/python/ray/util/sgd/pytorch/resnet.py similarity index 100% rename from python/ray/experimental/sgd/pytorch/resnet.py rename to python/ray/util/sgd/pytorch/resnet.py diff --git a/python/ray/experimental/sgd/pytorch/utils.py b/python/ray/util/sgd/pytorch/utils.py similarity index 99% rename from python/ray/experimental/sgd/pytorch/utils.py rename to python/ray/util/sgd/pytorch/utils.py index 295499d19..a28407983 100644 --- a/python/ray/experimental/sgd/pytorch/utils.py +++ b/python/ray/util/sgd/pytorch/utils.py @@ -2,7 +2,7 @@ import collections import time import torch -from ray.experimental.sgd.utils import TimerStat +from ray.util.sgd.utils import TimerStat amp = None diff --git a/python/ray/experimental/sgd/tests/test_pytorch.py b/python/ray/util/sgd/tests/test_pytorch.py similarity index 97% rename from python/ray/experimental/sgd/tests/test_pytorch.py rename to python/ray/util/sgd/tests/test_pytorch.py index 8e174200f..08eb28172 100644 --- a/python/ray/experimental/sgd/tests/test_pytorch.py +++ b/python/ray/util/sgd/tests/test_pytorch.py @@ -11,12 +11,12 @@ import torch.distributed as dist import ray from ray import tune from ray.tests.conftest import ray_start_2_cpus # noqa: F401 -from ray.experimental.sgd.pytorch import PyTorchTrainer, PyTorchTrainable -from ray.experimental.sgd.pytorch.utils import (train, BATCH_COUNT, TEST_MODE, - SCHEDULER_STEP) -from ray.experimental.sgd.utils import check_for_failure +from ray.util.sgd.pytorch import PyTorchTrainer, PyTorchTrainable +from ray.util.sgd.pytorch.utils import (train, BATCH_COUNT, TEST_MODE, + SCHEDULER_STEP) +from ray.util.sgd.utils import check_for_failure -from ray.experimental.sgd.pytorch.examples.train_example import ( +from ray.util.sgd.pytorch.examples.train_example import ( model_creator, optimizer_creator, data_creator, LinearDataset) diff --git a/python/ray/experimental/sgd/tests/test_pytorch_runner.py b/python/ray/util/sgd/tests/test_pytorch_runner.py similarity index 98% rename from python/ray/experimental/sgd/tests/test_pytorch_runner.py rename to python/ray/util/sgd/tests/test_pytorch_runner.py index f3036fe5e..e341194a6 100644 --- a/python/ray/experimental/sgd/tests/test_pytorch_runner.py +++ b/python/ray/util/sgd/tests/test_pytorch_runner.py @@ -4,7 +4,7 @@ import torch.nn as nn import unittest from unittest.mock import MagicMock -from ray.experimental.sgd.pytorch.pytorch_runner import PyTorchRunner +from ray.util.sgd.pytorch.pytorch_runner import PyTorchRunner class LinearDataset(torch.utils.data.Dataset): diff --git a/python/ray/experimental/sgd/tests/test_tensorflow.py b/python/ray/util/sgd/tests/test_tensorflow.py similarity index 94% rename from python/ray/experimental/sgd/tests/test_tensorflow.py rename to python/ray/util/sgd/tests/test_tensorflow.py index 7e2014772..c541607fc 100644 --- a/python/ray/experimental/sgd/tests/test_tensorflow.py +++ b/python/ray/util/sgd/tests/test_tensorflow.py @@ -6,10 +6,10 @@ import shutil from ray import tune from ray.tests.conftest import ray_start_2_cpus # noqa: F401 -from ray.experimental.sgd.tf import TFTrainer, TFTrainable +from ray.util.sgd.tf import TFTrainer, TFTrainable -from ray.experimental.sgd.tf.examples.tensorflow_train_example import ( - simple_model, simple_dataset) +from ray.util.sgd.tf.examples.tensorflow_train_example import (simple_model, + simple_dataset) SIMPLE_CONFIG = { "batch_size": 128, diff --git a/python/ray/util/sgd/tf/__init__.py b/python/ray/util/sgd/tf/__init__.py new file mode 100644 index 000000000..99006cde4 --- /dev/null +++ b/python/ray/util/sgd/tf/__init__.py @@ -0,0 +1,3 @@ +from ray.util.sgd.tf.tf_trainer import (TFTrainer, TFTrainable) + +__all__ = ["TFTrainer", "TFTrainable"] diff --git a/python/ray/experimental/sgd/tf/examples/cifar_tf_example.py b/python/ray/util/sgd/tf/examples/cifar_tf_example.py similarity index 99% rename from python/ray/experimental/sgd/tf/examples/cifar_tf_example.py rename to python/ray/util/sgd/tf/examples/cifar_tf_example.py index dd908bc3a..7f82edf29 100644 --- a/python/ray/experimental/sgd/tf/examples/cifar_tf_example.py +++ b/python/ray/util/sgd/tf/examples/cifar_tf_example.py @@ -16,7 +16,7 @@ import os from filelock import FileLock import ray -from ray.experimental.sgd.tf.tf_trainer import TFTrainer +from ray.util.sgd.tf.tf_trainer import TFTrainer num_classes = 10 diff --git a/python/ray/experimental/sgd/tf/examples/tensorflow_train_example.py b/python/ray/util/sgd/tf/examples/tensorflow_train_example.py similarity index 98% rename from python/ray/experimental/sgd/tf/examples/tensorflow_train_example.py rename to python/ray/util/sgd/tf/examples/tensorflow_train_example.py index df1e7d9cc..5982f7b85 100644 --- a/python/ray/experimental/sgd/tf/examples/tensorflow_train_example.py +++ b/python/ray/util/sgd/tf/examples/tensorflow_train_example.py @@ -6,7 +6,7 @@ import numpy as np import ray from ray import tune -from ray.experimental.sgd.tf.tf_trainer import TFTrainer, TFTrainable +from ray.util.sgd.tf.tf_trainer import TFTrainer, TFTrainable NUM_TRAIN_SAMPLES = 1000 NUM_TEST_SAMPLES = 400 diff --git a/python/ray/experimental/sgd/tf/examples/tf-example-sgd.yaml b/python/ray/util/sgd/tf/examples/tf-example-sgd.yaml similarity index 100% rename from python/ray/experimental/sgd/tf/examples/tf-example-sgd.yaml rename to python/ray/util/sgd/tf/examples/tf-example-sgd.yaml diff --git a/python/ray/experimental/sgd/tf/tf_runner.py b/python/ray/util/sgd/tf/tf_runner.py similarity index 99% rename from python/ray/experimental/sgd/tf/tf_runner.py rename to python/ray/util/sgd/tf/tf_runner.py index 2f8965dfd..575f97159 100644 --- a/python/ray/experimental/sgd/tf/tf_runner.py +++ b/python/ray/util/sgd/tf/tf_runner.py @@ -5,7 +5,7 @@ import numpy as np import ray import ray.services -from ray.experimental.sgd import utils +from ray.util.sgd import utils logger = logging.getLogger(__name__) diff --git a/python/ray/experimental/sgd/tf/tf_trainer.py b/python/ray/util/sgd/tf/tf_trainer.py similarity index 99% rename from python/ray/experimental/sgd/tf/tf_trainer.py rename to python/ray/util/sgd/tf/tf_trainer.py index 55cdbeb82..ff8ea2c41 100644 --- a/python/ray/experimental/sgd/tf/tf_trainer.py +++ b/python/ray/util/sgd/tf/tf_trainer.py @@ -7,7 +7,7 @@ import ray from ray.tune import Trainable from ray.tune.resources import Resources -from ray.experimental.sgd.tf.tf_runner import TFRunner +from ray.util.sgd.tf.tf_runner import TFRunner logger = logging.getLogger(__name__) diff --git a/python/ray/experimental/sgd/utils.py b/python/ray/util/sgd/utils.py similarity index 100% rename from python/ray/experimental/sgd/utils.py rename to python/ray/util/sgd/utils.py diff --git a/rllib/evaluation/rollout_worker.py b/rllib/evaluation/rollout_worker.py index 1bc7c604e..31483a64b 100644 --- a/rllib/evaluation/rollout_worker.py +++ b/rllib/evaluation/rollout_worker.py @@ -842,7 +842,7 @@ class RolloutWorker(EvaluatorInterface): def find_free_port(self): """Finds a free port on the current node.""" - from ray.experimental.sgd import utils + from ray.util.sgd import utils return utils.find_free_port() def __del__(self):