diff --git a/python/README-benchmarks.rst b/python/README-benchmarks.rst index 2837fc296..3dbd71af6 100644 --- a/python/README-benchmarks.rst +++ b/python/README-benchmarks.rst @@ -8,22 +8,31 @@ You can run the benchmark suite by doing the following: To run ASV inside docker, you can use the following command: ``docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA bash -c '/ray/test/jenkins_tests/run_asv.sh'`` +``docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA bash -c '/ray/test/jenkins_tests/run_rllib_asv.sh'`` + Visualizing Benchmarks ====================== -To visualize benchmarks, you must copy the S3 bucket down to `$RAY_DIR/python`. Assuming asv is installed, +For visualizing regular Ray benchmarks, you must copy the S3 bucket down to `$RAY_DIR/python`. .. code-block:: cd $RAY_DIR/python aws s3 sync s3://$BUCKET/ASV/ . -Then, you can run: +For rllib, you must sync a _particular_ folder down to `$RLLIB_DIR (ray/python/ray/rllib)`. + +.. code-block:: + + cd $RAY_DIR/python/ray/rllib + aws s3 sync s3://$BUCKET/RLLIB_RESULTS/ ./RLLIB_RESULTS + +Then, in the directory, you can run: .. code-block:: asv publish --no-pull asv preview -This creates the directory and then launches a server. +This creates the directory and then launches a server at which you can visualize results. diff --git a/python/ray/rllib/asv.conf.json b/python/ray/rllib/asv.conf.json new file mode 100644 index 000000000..71b9ae6a4 --- /dev/null +++ b/python/ray/rllib/asv.conf.json @@ -0,0 +1,141 @@ +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + + // The name of the project being benchmarked + "project": "rllib", + + // The project's homepage + "project_url": "http://rllib.io", + + // The URL or local path of the source code repository for the + // project being benchmarked + "repo": "../../../", + + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "default" (for mercurial). + "branches": ["master"], // for git + // "branches": ["default"], // for mercurial + + // The DVCS being used. If not set, it will be automatically + // determined from "repo" by looking at the protocol in the URL + // (if remote), or by looking for special directories, such as + // ".git" (if local). + "dvcs": "git", + + // The tool to use to create environments. May be "conda", + // "virtualenv" or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "conda", + + // timeout in seconds for installing any dependencies in environment + // defaults to 10 min + //"install_timeout": 600, + + // the base URL to show a commit for the project. + "show_commit_url": "http://github.com/ray-project/ray/commit/", + + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + "pythons": ["3.6"], + + // The matrix of dependencies to test. Each key is the name of a + // package (in PyPI) and the values are version numbers. An empty + // list or empty string indicates to just test against the default + // (latest) version. null indicates that the package is to not be + // installed. If the package to be tested is only available from + // PyPi, and the 'environment_type' is conda, then you can preface + // the package name by 'pip+', and the package will be installed via + // pip (with all the conda available packages installed first, + // followed by the pip installed packages). + // + // "matrix": { + // "numpy": ["1.6", "1.7"], + // "six": ["", null], // test with and without six installed + // "pip+emcee": [""], // emcee is only available for install with pip. + // }, + + // Combinations of libraries/python versions can be excluded/included + // from the set to test. Each entry is a dictionary containing additional + // key-value pairs to include/exclude. + // + // An exclude entry excludes entries where all values match. The + // values are regexps that should match the whole string. + // + // An include entry adds an environment. Only the packages listed + // are installed. The 'python' key is required. The exclude rules + // do not apply to includes. + // + // In addition to package names, the following keys are available: + // + // - python + // Python version, as in the *pythons* variable above. + // - environment_type + // Environment type, as above. + // - sys_platform + // Platform, as in sys.platform. Possible values for the common + // cases: 'linux2', 'win32', 'cygwin', 'darwin'. + // + // "exclude": [ + // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows + // {"environment_type": "conda", "six": null}, // don't run without six on conda + // ], + // + // "include": [ + // // additional env for python2.7 + // {"python": "2.7", "numpy": "1.8"}, + // // additional env if run on windows+conda + // {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""}, + // ], + + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks" + "benchmark_dir": "tuned_examples/regression_tests", + + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env" + // "env_dir": "env", + + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + "results_dir": "RLLIB_RESULTS", + + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + // "html_dir": "html", + + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + + // `asv` will cache wheels of the recent builds in each + // environment, making them faster to install next time. This is + // number of builds to keep, per environment. + // "wheel_cache_size": 0 + + // The commits after which the regression search in `asv publish` + // should start looking for regressions. Dictionary whose keys are + // regexps matching to benchmark names, and values corresponding to + // the commit (exclusive) after which to start looking for + // regressions. The default is to start from the first commit + // with results. If the commit is `null`, regression detection is + // skipped for the matching benchmark. + // + // "regressions_first_commits": { + // "some_benchmark": "352cdf", // Consider regressions only after this commit + // "another_benchmark": null, // Skip regression detection altogether + // } + + // The thresholds for relative change in results, after which `asv + // publish` starts reporting regressions. Dictionary of the same + // form as in ``regressions_first_commits``, with values + // indicating the thresholds. If multiple entries match, the + // maximum is taken. If no entry matches, the default is 5%. + // + // "regressions_thresholds": { + // "some_benchmark": 0.01, // Threshold of 1% + // "another_benchmark": 0.5, // Threshold of 50% + // } +} diff --git a/python/ray/rllib/tuned_examples/regression_tests/__init__.py b/python/ray/rllib/tuned_examples/regression_tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/python/ray/rllib/tuned_examples/regression_tests/regression_test.py b/python/ray/rllib/tuned_examples/regression_tests/regression_test.py new file mode 100644 index 000000000..dc2e7e940 --- /dev/null +++ b/python/ray/rllib/tuned_examples/regression_tests/regression_test.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python +""" +This class runs the regression YAMLs in the ASV format. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import defaultdict +import numpy as np +import os +import yaml + +import ray +from ray import tune + + +CONFIG_DIR = os.path.dirname(os.path.abspath(__file__)) + + +def _evaulate_config(filename): + with open(os.path.join(CONFIG_DIR, filename)) as f: + experiments = yaml.load(f) + ray.init() + trials = tune.run_experiments(experiments) + results = defaultdict(list) + for t in trials: + results["time_total_s"] += [t.last_result.time_total_s] + results["episode_reward_mean"] += [t.last_result.episode_reward_mean] + results["training_iteration"] += [t.last_result.training_iteration] + + return {k: np.median(v) for k, v in results.items()} + + +class Regression(): + def setup_cache(self): + # We need to implement this in separate classes + # below so that ASV will register the setup/class + # as a separate test. + raise NotImplementedError + + def teardown(self, *args): + ray.worker.cleanup() + + def track_time(self, result): + return result["time_total_s"] + + def track_reward(self, result): + return result["episode_reward_mean"] + + def track_iterations(self, result): + return result["training_iteration"] + + +class TestCartPolePPO(Regression): + _file = "cartpole-ppo.yaml" + + def setup_cache(self): + return _evaulate_config(self._file) + + +class TestCartPolePG(Regression): + _file = "cartpole-pg.yaml" + + def setup_cache(self): + return _evaulate_config(self._file) + + +class TestPendulumDDPG(Regression): + _file = "pendulum-ddpg.yaml" + + def setup_cache(self): + return _evaulate_config(self._file) + + +class TestCartPoleES(Regression): + _file = "cartpole-es.yaml" + + def setup_cache(self): + return _evaulate_config(self._file) + + +class TestCartPoleDQN(Regression): + _file = "cartpole-dqn.yaml" + + def setup_cache(self): + return _evaulate_config(self._file) + + +class TestCartPoleA3C(Regression): + _file = "cartpole-a3c.yaml" + + def setup_cache(self): + return _evaulate_config(self._file) + + +class TestCartPoleA3CPyTorch(Regression): + _file = "cartpole-a3c-pytorch.yaml" + + def setup_cache(self): + return _evaulate_config(self._file) diff --git a/test/jenkins_tests/run_rllib_asv.sh b/test/jenkins_tests/run_rllib_asv.sh new file mode 100755 index 000000000..2788bc3c6 --- /dev/null +++ b/test/jenkins_tests/run_rllib_asv.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +# Cause the script to exit if a single command fails. +set -e + +# Show explicitly which commands are currently running. +set -x + +BUCKET_NAME=ray-integration-testing/ASV +COMMIT=$(cat /ray/git-rev) +RLLIB_RESULTS=RLLIB_RESULTS +RLLIB_RESULTS_DIR=/ray/python/ray/rllib/RLLIB_RESULTS +pip install awscli + +# Install Ray fork of ASV +git clone https://github.com/ray-project/asv.git /tmp/asv/ || true +cd /tmp/asv/ +pip install -e . + +cd /ray/python/ray/rllib/ +asv machine --machine jenkins +mkdir $RLLIB_RESULTS_DIR || true +aws s3 cp s3://$BUCKET_NAME/RLLIB_RESULTS/benchmarks.json $RLLIB_RESULTS_DIR/benchmarks.json || true + +asv run --show-stderr --python=same --force-record-commit=$COMMIT + +aws s3 cp $RLLIB_RESULTS_DIR/benchmarks.json s3://$BUCKET_NAME/RLLIB_RESULTS/benchmarks_$COMMIT.json +aws s3 sync $RLLIB_RESULTS_DIR/ s3://$BUCKET_NAME/RLLIB_RESULTS/