mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 17:49:47 +08:00
ci: Redo format.sh --all script & backfill lint fixes (#9956)
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
import ray
|
||||
|
||||
|
||||
ray.init()
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import ray
|
||||
|
||||
|
||||
ray.init()
|
||||
|
||||
|
||||
|
||||
+1
-1
@@ -313,7 +313,7 @@ lint_readme() {
|
||||
}
|
||||
|
||||
lint_scripts() {
|
||||
"${ROOT_DIR}"/format.sh --all
|
||||
FORMAT_SH_PRINT_DIFF=1 "${ROOT_DIR}"/format.sh --all
|
||||
}
|
||||
|
||||
lint_bazel() {
|
||||
|
||||
+64
-36
@@ -46,11 +46,6 @@ builtin cd "$(dirname "${BASH_SOURCE:-$0}")"
|
||||
ROOT="$(git rev-parse --show-toplevel)"
|
||||
builtin cd "$ROOT" || exit 1
|
||||
|
||||
# Add the upstream remote if it doesn't exist
|
||||
if ! git remote -v | grep -q upstream; then
|
||||
git remote add 'upstream' 'https://github.com/ray-project/ray.git'
|
||||
fi
|
||||
|
||||
FLAKE8_VERSION=$(flake8 --version | awk '{print $1}')
|
||||
YAPF_VERSION=$(yapf --version | awk '{print $2}')
|
||||
SHELLCHECK_VERSION=$(shellcheck --version | awk '/^version:/ {print $2}')
|
||||
@@ -73,9 +68,6 @@ else
|
||||
echo "WARNING: clang-format is not installed!"
|
||||
fi
|
||||
|
||||
# Only fetch master since that's the branch we're diffing against.
|
||||
git fetch upstream master || true
|
||||
|
||||
SHELLCHECK_FLAGS=(
|
||||
--exclude=1090 # "Can't follow non-constant source. Use a directive to specify location."
|
||||
--exclude=1091 # "Not following {file} due to some error"
|
||||
@@ -99,7 +91,16 @@ YAPF_EXCLUDES=(
|
||||
'--exclude' 'python/ray/thirdparty_files/*'
|
||||
)
|
||||
|
||||
FLAKE8_EXCLUDES="python/ray/core/generated/,streaming/python/generated,doc/source/conf.py,python/ray/cloudpickle/,python/ray/thirdparty_files/"
|
||||
GIT_LS_EXCLUDES=(
|
||||
':(exclude)python/ray/cloudpickle/'
|
||||
)
|
||||
|
||||
# TODO(barakmich): This should be cleaned up. I've at least excised the copies
|
||||
# of these arguments to this location, but the long-term answer is to actually
|
||||
# make a flake8 config file
|
||||
FLAKE8_EXCLUDE="--exclude=python/ray/core/generated/,streaming/python/generated,doc/source/conf.py,python/ray/cloudpickle/,python/ray/thirdparty_files/,python/build/,python/.eggs/"
|
||||
FLAKE8_IGNORES="--ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605"
|
||||
FLAKE8_PYX_IGNORES="--ignore=C408,E121,E123,E126,E211,E225,E226,E227,E24,E704,E999,W503,W504,W605"
|
||||
|
||||
shellcheck_scripts() {
|
||||
shellcheck "${SHELLCHECK_FLAGS[@]}" "$@"
|
||||
@@ -110,7 +111,7 @@ shellcheck_bazel() {
|
||||
}
|
||||
|
||||
# Format specified files
|
||||
format() {
|
||||
format_files() {
|
||||
local shell_files=() python_files=() bazel_files=()
|
||||
|
||||
local name
|
||||
@@ -162,6 +163,46 @@ format() {
|
||||
fi
|
||||
}
|
||||
|
||||
# Format all files, and print the diff to stdout for travis.
|
||||
format_all() {
|
||||
command -v flake8 &> /dev/null;
|
||||
HAS_FLAKE8=$?
|
||||
|
||||
echo "$(date)" "YAPF...."
|
||||
git ls-files -- '*.py' "${GIT_LS_EXCLUDES[@]}" | xargs -P 10 \
|
||||
yapf --in-place "${YAPF_EXCLUDES[@]}" "${YAPF_FLAGS[@]}"
|
||||
if [ $HAS_FLAKE8 ]; then
|
||||
echo "$(date)" "Flake8...."
|
||||
git ls-files -- '*.py' "${GIT_LS_EXCLUDES[@]}" | xargs -P 5 \
|
||||
flake8 --inline-quotes '"' --no-avoid-escape "$FLAKE8_EXCLUDE" "$FLAKE8_IGNORES"
|
||||
|
||||
git ls-files -- '*.pyx' '*.pxd' '*.pxi' "${GIT_LS_EXCLUDES[@]}" | xargs -P 5 \
|
||||
flake8 --inline-quotes '"' --no-avoid-escape "$FLAKE8_EXCLUDE" "$FLAKE8_PYX_IGNORES"
|
||||
fi
|
||||
|
||||
echo "$(date)" "clang-format...."
|
||||
if command -v clang-format >/dev/null; then
|
||||
git ls-files -- '*.cc' '*.h' "${GIT_LS_EXCLUDES[@]}" | xargs -P 5 clang-format -i
|
||||
fi
|
||||
|
||||
if command -v shellcheck >/dev/null; then
|
||||
echo "$(date)" "shellcheck bazel...."
|
||||
shellcheck_bazel
|
||||
|
||||
local shell_files non_shell_files
|
||||
non_shell_files=($(git ls-files -- ':(exclude)*.sh'))
|
||||
shell_files=($(git ls-files -- '*.sh'))
|
||||
if [ 0 -lt "${#non_shell_files[@]}" ]; then
|
||||
shell_files+=($(git --no-pager grep -l -- '^#!\(/usr\)\?/bin/\(env \+\)\?\(ba\)\?sh' "${non_shell_files[@]}" || true))
|
||||
fi
|
||||
if [ 0 -lt "${#shell_files[@]}" ]; then
|
||||
echo "$(date)" "shellcheck scripts...."
|
||||
shellcheck_scripts "${shell_files[@]}"
|
||||
fi
|
||||
fi
|
||||
echo "$(date)" "done!"
|
||||
}
|
||||
|
||||
# Format files that differ from main branch. Ignores dirs that are not slated
|
||||
# for autoformat yet.
|
||||
format_changed() {
|
||||
@@ -178,17 +219,14 @@ format_changed() {
|
||||
yapf --in-place "${YAPF_EXCLUDES[@]}" "${YAPF_FLAGS[@]}"
|
||||
if which flake8 >/dev/null; then
|
||||
git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.py' | xargs -P 5 \
|
||||
flake8 --inline-quotes '"' --no-avoid-escape --exclude="$FLAKE8_EXCLUDES,rllib/" --ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605
|
||||
# Ignore F821 for rllib flake8 checking (produces errors for type annotations using quotes (non-imported classes)).
|
||||
git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.py' | xargs -P 5 \
|
||||
flake8 --inline-quotes '"' --no-avoid-escape --exclude="$FLAKE8_EXCLUDES" --filename="rllib/" --ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605,F821
|
||||
flake8 --inline-quotes '"' --no-avoid-escape "$FLAKE8_EXCLUDE" "$FLAKE8_IGNORES"
|
||||
fi
|
||||
fi
|
||||
|
||||
if ! git diff --diff-filter=ACRM --quiet --exit-code "$MERGEBASE" -- '*.pyx' '*.pxd' '*.pxi' &>/dev/null; then
|
||||
if which flake8 >/dev/null; then
|
||||
git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.pyx' '*.pxd' '*.pxi' | xargs -P 5 \
|
||||
flake8 --inline-quotes '"' --no-avoid-escape --exclude="$FLAKE8_EXCLUDES" --ignore=C408,E121,E123,E126,E211,E225,E226,E227,E24,E704,E999,W503,W504,W605
|
||||
flake8 --inline-quotes '"' --no-avoid-escape "$FLAKE8_EXCLUDE" "$FLAKE8_PYX_IGNORES"
|
||||
fi
|
||||
fi
|
||||
|
||||
@@ -216,35 +254,25 @@ format_changed() {
|
||||
fi
|
||||
}
|
||||
|
||||
# Format all files, and print the diff to stdout for travis.
|
||||
format_all() {
|
||||
# Ignore F821 for rllib flake8 checking (produces errors for type annotations using quotes (non-imported classes)).
|
||||
flake8 --inline-quotes '"' --no-avoid-escape --exclude="$FLAKE8_EXCLUDES,rllib/" --ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605
|
||||
flake8 --inline-quotes '"' --no-avoid-escape --exclude="$FLAKE8_EXCLUDES" --filename="rllib/" --ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605,F821
|
||||
|
||||
yapf --diff "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" test python
|
||||
|
||||
local shell_files
|
||||
# shellcheck disable=SC2207
|
||||
shell_files=($(
|
||||
git -C "${ROOT}" ls-files --exclude-standard HEAD -- "*.sh" &&
|
||||
{ git -C "${ROOT}" --no-pager grep -l '^#!\(/usr\)\?/bin/\(env \+\)\?\(ba\)\?sh' ":(exclude)*.sh" || true; }
|
||||
))
|
||||
if [ 0 -lt "${#shell_files[@]}" ]; then
|
||||
shellcheck_scripts "${shell_files[@]}"
|
||||
fi
|
||||
shellcheck_bazel
|
||||
}
|
||||
|
||||
# This flag formats individual files. --files *must* be the first command line
|
||||
# arg to use this option.
|
||||
if [ "${1-}" == '--files' ]; then
|
||||
format "${@:2}"
|
||||
format_files "${@:2}"
|
||||
# If `--all` is passed, then any further arguments are ignored and the
|
||||
# entire python directory is formatted.
|
||||
elif [ "${1-}" == '--all' ]; then
|
||||
format_all
|
||||
format_all "${@}"
|
||||
if [ -n "${FORMAT_SH_PRINT_DIFF-}" ]; then git --no-pager diff; fi
|
||||
else
|
||||
# Add the upstream remote if it doesn't exist
|
||||
if ! git remote -v | grep -q upstream; then
|
||||
git remote add 'upstream' 'https://github.com/ray-project/ray.git'
|
||||
fi
|
||||
|
||||
# Only fetch master since that's the branch we're diffing against.
|
||||
git fetch upstream master || true
|
||||
|
||||
# Format only the files that changed in last commit.
|
||||
format_changed
|
||||
fi
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <ray/api/generated/actor_funcs.generated.h>
|
||||
#include <ray/api/generated/create_funcs.generated.h>
|
||||
#include <ray/api/generated/funcs.generated.h>
|
||||
#include <ray/api/ray_runtime.h>
|
||||
|
||||
#include <memory>
|
||||
#include <msgpack.hpp>
|
||||
|
||||
#include "ray/core.h"
|
||||
namespace ray {
|
||||
namespace api {
|
||||
@@ -232,13 +233,10 @@ inline ActorTaskCaller<ReturnType> Ray::CallActorInternal(FuncType &actor_func,
|
||||
return ActorTaskCaller<ReturnType>(runtime_, actor.ID(), ptr, buffer);
|
||||
}
|
||||
|
||||
#include <ray/api/generated/exec_funcs.generated.h>
|
||||
|
||||
#include <ray/api/generated/call_funcs_impl.generated.h>
|
||||
|
||||
#include <ray/api/generated/create_actors_impl.generated.h>
|
||||
|
||||
#include <ray/api/generated/call_actors_impl.generated.h>
|
||||
#include <ray/api/generated/call_funcs_impl.generated.h>
|
||||
#include <ray/api/generated/create_actors_impl.generated.h>
|
||||
#include <ray/api/generated/exec_funcs.generated.h>
|
||||
|
||||
} // namespace api
|
||||
} // namespace ray
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <ray/api/serializer.h>
|
||||
|
||||
#include <msgpack.hpp>
|
||||
|
||||
namespace ray {
|
||||
|
||||
@@ -2,9 +2,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include <msgpack.hpp>
|
||||
#include <utility>
|
||||
|
||||
#include "ray/core.h"
|
||||
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ray/api/wait_result.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <msgpack.hpp>
|
||||
#include <typeinfo>
|
||||
#include <vector>
|
||||
|
||||
#include <ray/api/wait_result.h>
|
||||
#include "ray/core.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <ray/api/ray_exception.h>
|
||||
|
||||
#include <msgpack.hpp>
|
||||
|
||||
namespace ray {
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ray/core.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
+1
-1
@@ -1,7 +1,7 @@
|
||||
|
||||
#include <ray/api.h>
|
||||
|
||||
#include <ray/api/ray_config.h>
|
||||
|
||||
#include "runtime/abstract_ray_runtime.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
|
||||
#include "abstract_ray_runtime.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include <ray/api.h>
|
||||
#include <ray/api/ray_config.h>
|
||||
#include <ray/api/ray_exception.h>
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include "../util/address_helper.h"
|
||||
#include "../util/process_helper.h"
|
||||
#include "local_mode_ray_runtime.h"
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include <ray/api/ray_config.h>
|
||||
#include <ray/api/ray_runtime.h>
|
||||
|
||||
#include <msgpack.hpp>
|
||||
#include <mutex>
|
||||
|
||||
#include "./object/object_store.h"
|
||||
#include "./task/task_executor.h"
|
||||
#include "./task/task_submitter.h"
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#include "local_mode_ray_runtime.h"
|
||||
|
||||
#include <ray/api.h>
|
||||
|
||||
#include "../util/address_helper.h"
|
||||
#include "./object/local_mode_object_store.h"
|
||||
#include "./object/object_store.h"
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "abstract_ray_runtime.h"
|
||||
#include "ray/core.h"
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#include "native_ray_runtime.h"
|
||||
|
||||
#include <ray/api.h>
|
||||
|
||||
#include "../util/address_helper.h"
|
||||
#include "./object/native_object_store.h"
|
||||
#include "./object/object_store.h"
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "abstract_ray_runtime.h"
|
||||
#include "ray/core.h"
|
||||
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
|
||||
#include "local_mode_object_store.h"
|
||||
|
||||
#include <ray/api/ray_exception.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <list>
|
||||
#include <thread>
|
||||
|
||||
#include <ray/api/ray_exception.h>
|
||||
#include "../abstract_ray_runtime.h"
|
||||
#include "local_mode_object_store.h"
|
||||
|
||||
namespace ray {
|
||||
namespace api {
|
||||
|
||||
@@ -2,10 +2,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include "ray/core.h"
|
||||
|
||||
#include "../local_mode_ray_runtime.h"
|
||||
#include "object_store.h"
|
||||
#include "ray/core.h"
|
||||
|
||||
namespace ray {
|
||||
namespace api {
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
|
||||
#include "native_object_store.h"
|
||||
|
||||
#include <ray/api/ray_exception.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <list>
|
||||
#include <thread>
|
||||
|
||||
#include <ray/api/ray_exception.h>
|
||||
#include "../abstract_ray_runtime.h"
|
||||
#include "native_object_store.h"
|
||||
|
||||
namespace ray {
|
||||
namespace api {
|
||||
|
||||
@@ -2,10 +2,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include "ray/core.h"
|
||||
|
||||
#include "../native_ray_runtime.h"
|
||||
#include "object_store.h"
|
||||
#include "ray/core.h"
|
||||
|
||||
namespace ray {
|
||||
namespace api {
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <ray/api/wait_result.h>
|
||||
|
||||
#include <memory>
|
||||
#include <msgpack.hpp>
|
||||
|
||||
namespace ray {
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <msgpack.hpp>
|
||||
|
||||
#include "ray/core.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
|
||||
#include "local_mode_task_submitter.h"
|
||||
|
||||
#include <ray/api/ray_exception.h>
|
||||
|
||||
#include <boost/asio/post.hpp>
|
||||
#include <memory>
|
||||
|
||||
#include <ray/api/ray_exception.h>
|
||||
#include "../../util/address_helper.h"
|
||||
#include "../abstract_ray_runtime.h"
|
||||
#include "local_mode_task_submitter.h"
|
||||
|
||||
namespace ray {
|
||||
namespace api {
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#include <boost/asio/thread_pool.hpp>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
|
||||
#include "../local_mode_ray_runtime.h"
|
||||
#include "absl/synchronization/mutex.h"
|
||||
#include "invocation_spec.h"
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
#include "native_task_submitter.h"
|
||||
|
||||
#include <ray/api/ray_exception.h>
|
||||
|
||||
#include "../../util/address_helper.h"
|
||||
#include "../abstract_ray_runtime.h"
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#include <boost/asio/thread_pool.hpp>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
|
||||
#include "../native_ray_runtime.h"
|
||||
#include "invocation_spec.h"
|
||||
#include "ray/core.h"
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
|
||||
#include "task_executor.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "../../util/address_helper.h"
|
||||
#include "../abstract_ray_runtime.h"
|
||||
#include "task_executor.h"
|
||||
|
||||
namespace ray {
|
||||
namespace api {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "absl/synchronization/mutex.h"
|
||||
#include "invocation_spec.h"
|
||||
#include "ray/core.h"
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <ray/api/ray_runtime.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <ray/api/ray_runtime.h>
|
||||
#include "invocation_spec.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <ray/api.h>
|
||||
|
||||
#include <future>
|
||||
#include <thread>
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <ray/api.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
|
||||
@@ -9,7 +9,6 @@ import ray
|
||||
|
||||
@ray.remote
|
||||
class NewsServer(object):
|
||||
|
||||
def __init__(self):
|
||||
self.conn = sqlite3.connect("newsreader.db")
|
||||
c = self.conn.cursor()
|
||||
@@ -25,29 +24,36 @@ class NewsServer(object):
|
||||
items = []
|
||||
c = self.conn.cursor()
|
||||
for item in feed.items:
|
||||
items.append({"title": item.title,
|
||||
"link": item.link,
|
||||
"description": item.description,
|
||||
"description_text": item.description,
|
||||
"pubDate": str(item.pub_date)})
|
||||
c.execute("""INSERT INTO news (title, link, description,
|
||||
items.append({
|
||||
"title": item.title,
|
||||
"link": item.link,
|
||||
"description": item.description,
|
||||
"description_text": item.description,
|
||||
"pubDate": str(item.pub_date)
|
||||
})
|
||||
c.execute(
|
||||
"""INSERT INTO news (title, link, description,
|
||||
published, feed, liked) values
|
||||
(?, ?, ?, ?, ?, ?)""", (
|
||||
item.title, item.link, item.description,
|
||||
item.pub_date, feed.link, False))
|
||||
(?, ?, ?, ?, ?, ?)""",
|
||||
(item.title, item.link, item.description, item.pub_date,
|
||||
feed.link, False))
|
||||
self.conn.commit()
|
||||
|
||||
return {"channel": {"title": feed.title,
|
||||
"link": feed.link,
|
||||
"url": feed.link},
|
||||
"items": items}
|
||||
return {
|
||||
"channel": {
|
||||
"title": feed.title,
|
||||
"link": feed.link,
|
||||
"url": feed.link
|
||||
},
|
||||
"items": items
|
||||
}
|
||||
|
||||
def like_item(self, url, is_faved):
|
||||
c = self.conn.cursor()
|
||||
if is_faved:
|
||||
c.execute("UPDATE news SET liked = 1 WHERE link = ?", (url,))
|
||||
c.execute("UPDATE news SET liked = 1 WHERE link = ?", (url, ))
|
||||
else:
|
||||
c.execute("UPDATE news SET liked = 0 WHERE link = ?", (url,))
|
||||
c.execute("UPDATE news SET liked = 0 WHERE link = ?", (url, ))
|
||||
self.conn.commit()
|
||||
|
||||
|
||||
@@ -71,8 +77,9 @@ def dispatcher():
|
||||
result = ray.get(method.remote(*method_args))
|
||||
return jsonify(result)
|
||||
else:
|
||||
return jsonify(
|
||||
{"error": "method_name '" + method_name + "' not found"})
|
||||
return jsonify({
|
||||
"error": "method_name '" + method_name + "' not found"
|
||||
})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -7,10 +7,13 @@ import ray
|
||||
import wikipedia
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--num-mappers",
|
||||
help="number of mapper actors used", default=3, type=int)
|
||||
parser.add_argument("--num-reducers",
|
||||
help="number of reducer actors used", default=4, type=int)
|
||||
parser.add_argument(
|
||||
"--num-mappers", help="number of mapper actors used", default=3, type=int)
|
||||
parser.add_argument(
|
||||
"--num-reducers",
|
||||
help="number of reducer actors used",
|
||||
default=4,
|
||||
type=int)
|
||||
|
||||
|
||||
@ray.remote
|
||||
@@ -47,8 +50,10 @@ class Reducer(object):
|
||||
word_count_sum = defaultdict(lambda: 0)
|
||||
# Get the word counts for this Reducer's keys from all of the Mappers
|
||||
# and aggregate the results.
|
||||
count_ids = [mapper.get_range.remote(article_index, self.keys)
|
||||
for mapper in self.mappers]
|
||||
count_ids = [
|
||||
mapper.get_range.remote(article_index, self.keys)
|
||||
for mapper in self.mappers
|
||||
]
|
||||
# TODO(rkn): We should process these out of order using ray.wait.
|
||||
for count_id in count_ids:
|
||||
for k, v in ray.get(count_id):
|
||||
@@ -78,8 +83,9 @@ if __name__ == "__main__":
|
||||
streams.append(Stream([line.strip() for line in f.readlines()]))
|
||||
|
||||
# Partition the keys among the reducers.
|
||||
chunks = np.array_split([chr(i) for i in range(ord("a"), ord("z") + 1)],
|
||||
args.num_reducers)
|
||||
chunks = np.array_split([chr(i)
|
||||
for i in range(ord("a"),
|
||||
ord("z") + 1)], args.num_reducers)
|
||||
keys = [[chunk[0], chunk[-1]] for chunk in chunks]
|
||||
|
||||
# Create a number of mappers.
|
||||
@@ -93,12 +99,14 @@ if __name__ == "__main__":
|
||||
while True:
|
||||
print("article index = {}".format(article_index))
|
||||
wordcounts = {}
|
||||
counts = ray.get([reducer.next_reduce_result.remote(article_index)
|
||||
for reducer in reducers])
|
||||
counts = ray.get([
|
||||
reducer.next_reduce_result.remote(article_index)
|
||||
for reducer in reducers
|
||||
])
|
||||
for count in counts:
|
||||
wordcounts.update(count)
|
||||
most_frequent_words = heapq.nlargest(10, wordcounts,
|
||||
key=wordcounts.get)
|
||||
most_frequent_words = heapq.nlargest(
|
||||
10, wordcounts, key=wordcounts.get)
|
||||
for word in most_frequent_words:
|
||||
print(" ", word, wordcounts[word])
|
||||
article_index += 1
|
||||
|
||||
@@ -68,10 +68,7 @@ parameter_grid = {"alpha": [1e-4, 1e-1, 1], "epsilon": [0.01, 0.1]}
|
||||
# As you can see, the setup here is exactly how you would do it for Scikit-Learn. Now, let's try fitting a model.
|
||||
|
||||
tune_search = TuneGridSearchCV(
|
||||
SGDClassifier(),
|
||||
parameter_grid,
|
||||
early_stopping=True,
|
||||
max_iters=10)
|
||||
SGDClassifier(), parameter_grid, early_stopping=True, max_iters=10)
|
||||
|
||||
import time # Just to compare fit times
|
||||
start = time.time()
|
||||
|
||||
@@ -11,13 +11,13 @@ cdef extern from "opencensus/tags/tag_key.h" nogil:
|
||||
cdef extern from "ray/stats/metric.h" nogil:
|
||||
cdef cppclass CMetric "ray::stats::Metric":
|
||||
CMetric(const c_string &name,
|
||||
const c_string &description,
|
||||
const c_string &unit,
|
||||
const c_vector[CTagKey] &tag_keys)
|
||||
const c_string &description,
|
||||
const c_string &unit,
|
||||
const c_vector[CTagKey] &tag_keys)
|
||||
c_string GetName() const
|
||||
void Record(double value)
|
||||
void Record(double value,
|
||||
unordered_map[c_string, c_string] &tags)
|
||||
void Record(double value,
|
||||
unordered_map[c_string, c_string] &tags)
|
||||
|
||||
cdef cppclass CGauge "ray::stats::Gauge":
|
||||
CGauge(const c_string &name,
|
||||
@@ -42,4 +42,4 @@ cdef extern from "ray/stats/metric.h" nogil:
|
||||
const c_string &description,
|
||||
const c_string &unit,
|
||||
const c_vector[double] &boundaries,
|
||||
const c_vector[CTagKey] &tag_keys)
|
||||
const c_vector[CTagKey] &tag_keys)
|
||||
|
||||
@@ -126,9 +126,8 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
|
||||
target_noise_clip = policy.config["target_noise_clip"]
|
||||
clipped_normal_sample = tf.clip_by_value(
|
||||
tf.random.normal(
|
||||
tf.shape(policy_tp1),
|
||||
stddev=policy.config["target_noise"]), -target_noise_clip,
|
||||
target_noise_clip)
|
||||
tf.shape(policy_tp1), stddev=policy.config["target_noise"]),
|
||||
-target_noise_clip, target_noise_clip)
|
||||
policy_tp1_smoothed = tf.clip_by_value(
|
||||
policy_tp1 + clipped_normal_sample,
|
||||
policy.action_space.low * tf.ones_like(policy_tp1),
|
||||
@@ -146,8 +145,8 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
|
||||
q_t_det_policy = model.get_q_values(model_out_t, policy_t)
|
||||
|
||||
if twin_q:
|
||||
twin_q_t = model.get_twin_q_values(
|
||||
model_out_t, train_batch[SampleBatch.ACTIONS])
|
||||
twin_q_t = model.get_twin_q_values(model_out_t,
|
||||
train_batch[SampleBatch.ACTIONS])
|
||||
|
||||
# Target q-net(s) evaluation.
|
||||
q_tp1 = policy.target_model.get_q_values(target_model_out_tp1,
|
||||
@@ -278,11 +277,11 @@ def gradients_fn(policy, optimizer, loss):
|
||||
if policy.config["framework"] in ["tf2", "tfe"]:
|
||||
tape = optimizer.tape
|
||||
pol_weights = policy.model.policy_variables()
|
||||
actor_grads_and_vars = list(zip(tape.gradient(
|
||||
policy.actor_loss, pol_weights), pol_weights))
|
||||
actor_grads_and_vars = list(
|
||||
zip(tape.gradient(policy.actor_loss, pol_weights), pol_weights))
|
||||
q_weights = policy.model.q_variables()
|
||||
critic_grads_and_vars = list(zip(tape.gradient(
|
||||
policy.critic_loss, q_weights), q_weights))
|
||||
critic_grads_and_vars = list(
|
||||
zip(tape.gradient(policy.critic_loss, q_weights), q_weights))
|
||||
else:
|
||||
actor_grads_and_vars = policy._actor_optimizer.compute_gradients(
|
||||
policy.actor_loss, var_list=policy.model.policy_variables())
|
||||
@@ -296,10 +295,12 @@ def gradients_fn(policy, optimizer, loss):
|
||||
clip_func = tf.identity
|
||||
|
||||
# Save grads and vars for later use in `build_apply_op`.
|
||||
policy._actor_grads_and_vars = [
|
||||
(clip_func(g), v) for (g, v) in actor_grads_and_vars if g is not None]
|
||||
policy._critic_grads_and_vars = [
|
||||
(clip_func(g), v) for (g, v) in critic_grads_and_vars if g is not None]
|
||||
policy._actor_grads_and_vars = [(clip_func(g), v)
|
||||
for (g, v) in actor_grads_and_vars
|
||||
if g is not None]
|
||||
policy._critic_grads_and_vars = [(clip_func(g), v)
|
||||
for (g, v) in critic_grads_and_vars
|
||||
if g is not None]
|
||||
|
||||
grads_and_vars = policy._actor_grads_and_vars + \
|
||||
policy._critic_grads_and_vars
|
||||
|
||||
@@ -65,8 +65,7 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
|
||||
torch.normal(
|
||||
mean=torch.zeros(policy_tp1.size()),
|
||||
std=policy.config["target_noise"]).to(policy_tp1.device),
|
||||
-target_noise_clip,
|
||||
target_noise_clip)
|
||||
-target_noise_clip, target_noise_clip)
|
||||
|
||||
policy_tp1_smoothed = torch.min(
|
||||
torch.max(
|
||||
|
||||
@@ -405,7 +405,9 @@ class TestDDPG(unittest.TestCase):
|
||||
policy_t = sigmoid(2.0 * fc(
|
||||
relu(
|
||||
fc(model_out_t, weights[ks[1]], weights[ks[0]], framework=fw)),
|
||||
weights[ks[5]], weights[ks[4]], framework=fw))
|
||||
weights[ks[5]],
|
||||
weights[ks[4]],
|
||||
framework=fw))
|
||||
# Get policy output for t+1 (target model).
|
||||
policy_tp1 = sigmoid(2.0 * fc(
|
||||
relu(
|
||||
@@ -413,7 +415,9 @@ class TestDDPG(unittest.TestCase):
|
||||
weights[ks[3]],
|
||||
weights[ks[2]],
|
||||
framework=fw)),
|
||||
weights[ks[7]], weights[ks[6]], framework=fw))
|
||||
weights[ks[7]],
|
||||
weights[ks[6]],
|
||||
framework=fw))
|
||||
# Assume no smooth target policy.
|
||||
policy_tp1_smoothed = policy_tp1
|
||||
|
||||
|
||||
@@ -74,8 +74,7 @@ class DistributionalQTFModel(TFModelV2):
|
||||
for i in range(len(q_hiddens)):
|
||||
if use_noisy:
|
||||
action_out = NoisyLayer(
|
||||
"{}hidden_{}".format(prefix, i),
|
||||
q_hiddens[i],
|
||||
"{}hidden_{}".format(prefix, i), q_hiddens[i],
|
||||
sigma0)(action_out)
|
||||
elif add_layer_norm:
|
||||
action_out = tf.keras.layers.Dense(
|
||||
@@ -135,8 +134,7 @@ class DistributionalQTFModel(TFModelV2):
|
||||
for i in range(len(q_hiddens)):
|
||||
if use_noisy:
|
||||
state_out = NoisyLayer(
|
||||
"{}dueling_hidden_{}".format(prefix, i),
|
||||
q_hiddens[i],
|
||||
"{}dueling_hidden_{}".format(prefix, i), q_hiddens[i],
|
||||
sigma0)(state_out)
|
||||
else:
|
||||
state_out = tf.keras.layers.Dense(
|
||||
@@ -160,8 +158,8 @@ class DistributionalQTFModel(TFModelV2):
|
||||
self.register_variables(self.q_value_head.variables)
|
||||
|
||||
if dueling:
|
||||
state_out = build_state_score(
|
||||
name + "/state_value/", self.model_out)
|
||||
state_out = build_state_score(name + "/state_value/",
|
||||
self.model_out)
|
||||
self.state_value_head = tf.keras.Model(self.model_out, state_out)
|
||||
self.register_variables(self.state_value_head.variables)
|
||||
|
||||
|
||||
@@ -231,8 +231,8 @@ def build_q_losses(policy, model, _, train_batch):
|
||||
train_batch[SampleBatch.NEXT_OBS],
|
||||
explore=False)
|
||||
q_tp1_best_using_online_net = tf.argmax(q_tp1_using_online_net, 1)
|
||||
q_tp1_best_one_hot_selection = tf.one_hot(
|
||||
q_tp1_best_using_online_net, policy.action_space.n)
|
||||
q_tp1_best_one_hot_selection = tf.one_hot(q_tp1_best_using_online_net,
|
||||
policy.action_space.n)
|
||||
q_tp1_best = tf.reduce_sum(q_tp1 * q_tp1_best_one_hot_selection, 1)
|
||||
q_dist_tp1_best = tf.reduce_sum(
|
||||
q_dist_tp1 * tf.expand_dims(q_tp1_best_one_hot_selection, -1), 1)
|
||||
@@ -246,9 +246,9 @@ def build_q_losses(policy, model, _, train_batch):
|
||||
policy.q_loss = QLoss(
|
||||
q_t_selected, q_logits_t_selected, q_tp1_best, q_dist_tp1_best,
|
||||
train_batch[PRIO_WEIGHTS], train_batch[SampleBatch.REWARDS],
|
||||
tf.cast(train_batch[SampleBatch.DONES], tf.float32), config["gamma"],
|
||||
config["n_step"], config["num_atoms"],
|
||||
config["v_min"], config["v_max"])
|
||||
tf.cast(train_batch[SampleBatch.DONES],
|
||||
tf.float32), config["gamma"], config["n_step"],
|
||||
config["num_atoms"], config["v_min"], config["v_max"])
|
||||
|
||||
return policy.q_loss.loss
|
||||
|
||||
@@ -378,9 +378,8 @@ def postprocess_nstep_and_prio(policy, batch, other_agent=None, episode=None):
|
||||
batch[SampleBatch.CUR_OBS], batch[SampleBatch.ACTIONS],
|
||||
batch[SampleBatch.REWARDS], batch[SampleBatch.NEXT_OBS],
|
||||
batch[SampleBatch.DONES], batch[PRIO_WEIGHTS])
|
||||
new_priorities = (
|
||||
np.abs(convert_to_numpy(td_errors)) +
|
||||
policy.config["prioritized_replay_eps"])
|
||||
new_priorities = (np.abs(convert_to_numpy(td_errors)) +
|
||||
policy.config["prioritized_replay_eps"])
|
||||
batch.data[PRIO_WEIGHTS] = new_priorities
|
||||
|
||||
return batch
|
||||
|
||||
@@ -72,12 +72,16 @@ class DQNTorchModel(TorchModelV2, nn.Module):
|
||||
advantage_module.add_module(
|
||||
"dueling_A_{}".format(i),
|
||||
NoisyLayer(
|
||||
ins, n, sigma0=self.sigma0,
|
||||
ins,
|
||||
n,
|
||||
sigma0=self.sigma0,
|
||||
activation=dueling_activation))
|
||||
value_module.add_module(
|
||||
"dueling_V_{}".format(i),
|
||||
NoisyLayer(
|
||||
ins, n, sigma0=self.sigma0,
|
||||
ins,
|
||||
n,
|
||||
sigma0=self.sigma0,
|
||||
activation=dueling_activation))
|
||||
else:
|
||||
advantage_module.add_module(
|
||||
@@ -88,25 +92,26 @@ class DQNTorchModel(TorchModelV2, nn.Module):
|
||||
SlimFC(ins, n, activation_fn=dueling_activation))
|
||||
# Add LayerNorm after each Dense.
|
||||
if add_layer_norm:
|
||||
advantage_module.add_module(
|
||||
"LayerNorm_A_{}".format(i), nn.LayerNorm(n))
|
||||
value_module.add_module(
|
||||
"LayerNorm_V_{}".format(i), nn.LayerNorm(n))
|
||||
advantage_module.add_module("LayerNorm_A_{}".format(i),
|
||||
nn.LayerNorm(n))
|
||||
value_module.add_module("LayerNorm_V_{}".format(i),
|
||||
nn.LayerNorm(n))
|
||||
ins = n
|
||||
|
||||
# Actual Advantages layer (nodes=num-actions).
|
||||
if use_noisy:
|
||||
advantage_module.add_module("A", NoisyLayer(
|
||||
ins,
|
||||
self.action_space.n * self.num_atoms,
|
||||
sigma0,
|
||||
activation=None))
|
||||
advantage_module.add_module(
|
||||
"A",
|
||||
NoisyLayer(
|
||||
ins,
|
||||
self.action_space.n * self.num_atoms,
|
||||
sigma0,
|
||||
activation=None))
|
||||
elif q_hiddens:
|
||||
advantage_module.add_module(
|
||||
"A",
|
||||
SlimFC(
|
||||
ins, action_space.n * self.num_atoms,
|
||||
activation_fn=None))
|
||||
ins, action_space.n * self.num_atoms, activation_fn=None))
|
||||
|
||||
self.advantage_module = advantage_module
|
||||
|
||||
|
||||
@@ -212,8 +212,8 @@ def build_q_losses(policy, model, _, train_batch):
|
||||
is_training=True)
|
||||
|
||||
# Q scores for actions which we know were selected in the given state.
|
||||
one_hot_selection = F.one_hot(
|
||||
train_batch[SampleBatch.ACTIONS], policy.action_space.n)
|
||||
one_hot_selection = F.one_hot(train_batch[SampleBatch.ACTIONS],
|
||||
policy.action_space.n)
|
||||
q_t_selected = torch.sum(
|
||||
torch.where(q_t > -float("inf"), q_t, torch.tensor(0.0)) *
|
||||
one_hot_selection, 1)
|
||||
@@ -230,8 +230,8 @@ def build_q_losses(policy, model, _, train_batch):
|
||||
explore=False,
|
||||
is_training=True)
|
||||
q_tp1_best_using_online_net = torch.argmax(q_tp1_using_online_net, 1)
|
||||
q_tp1_best_one_hot_selection = F.one_hot(
|
||||
q_tp1_best_using_online_net, policy.action_space.n)
|
||||
q_tp1_best_one_hot_selection = F.one_hot(q_tp1_best_using_online_net,
|
||||
policy.action_space.n)
|
||||
q_tp1_best = torch.sum(
|
||||
torch.where(q_tp1 > -float("inf"), q_tp1, torch.tensor(0.0)) *
|
||||
q_tp1_best_one_hot_selection, 1)
|
||||
@@ -250,8 +250,8 @@ def build_q_losses(policy, model, _, train_batch):
|
||||
q_t_selected, q_logits_t_selected, q_tp1_best, q_probs_tp1_best,
|
||||
train_batch[PRIO_WEIGHTS], train_batch[SampleBatch.REWARDS],
|
||||
train_batch[SampleBatch.DONES].float(), config["gamma"],
|
||||
config["n_step"], config["num_atoms"],
|
||||
config["v_min"], config["v_max"])
|
||||
config["n_step"], config["num_atoms"], config["v_min"],
|
||||
config["v_max"])
|
||||
|
||||
return policy.q_loss.loss
|
||||
|
||||
|
||||
@@ -222,10 +222,12 @@ def multi_from_logits(behaviour_policy_logits,
|
||||
behaviour_policy_logits[i].shape.assert_has_rank(3)
|
||||
target_policy_logits[i].shape.assert_has_rank(3)
|
||||
|
||||
with tf1.name_scope(name, values=[
|
||||
behaviour_policy_logits, target_policy_logits, actions,
|
||||
discounts, rewards, values, bootstrap_value
|
||||
]):
|
||||
with tf1.name_scope(
|
||||
name,
|
||||
values=[
|
||||
behaviour_policy_logits, target_policy_logits, actions,
|
||||
discounts, rewards, values, bootstrap_value
|
||||
]):
|
||||
target_action_log_probs = multi_log_probs_from_logits_and_actions(
|
||||
target_policy_logits, actions, dist_class, model)
|
||||
|
||||
@@ -330,16 +332,16 @@ def from_importance_weights(log_rhos,
|
||||
if clip_pg_rho_threshold is not None:
|
||||
clip_pg_rho_threshold.shape.assert_has_rank(0)
|
||||
|
||||
with tf1.name_scope(name, values=[
|
||||
log_rhos, discounts, rewards, values, bootstrap_value
|
||||
]):
|
||||
with tf1.name_scope(
|
||||
name,
|
||||
values=[log_rhos, discounts, rewards, values, bootstrap_value]):
|
||||
rhos = tf.math.exp(log_rhos)
|
||||
if clip_rho_threshold is not None:
|
||||
clipped_rhos = tf.minimum(
|
||||
clip_rho_threshold, rhos, name="clipped_rhos")
|
||||
|
||||
tf1.summary.histogram(
|
||||
"clipped_rhos_1000", tf.minimum(1000.0, rhos))
|
||||
tf1.summary.histogram("clipped_rhos_1000", tf.minimum(
|
||||
1000.0, rhos))
|
||||
tf1.summary.scalar(
|
||||
"num_of_clipped_rhos",
|
||||
tf.reduce_sum(
|
||||
|
||||
@@ -259,13 +259,13 @@ def choose_optimizer(policy, config):
|
||||
return tf1.train.AdamOptimizer(policy.cur_lr)
|
||||
else:
|
||||
if tfv == 2:
|
||||
return tf.keras.optimizers.RMSprop(
|
||||
policy.cur_lr, config["decay"], config["momentum"],
|
||||
config["epsilon"])
|
||||
return tf.keras.optimizers.RMSprop(policy.cur_lr, config["decay"],
|
||||
config["momentum"],
|
||||
config["epsilon"])
|
||||
else:
|
||||
return tf1.train.RMSPropOptimizer(
|
||||
policy.cur_lr, config["decay"], config["momentum"],
|
||||
config["epsilon"])
|
||||
return tf1.train.RMSPropOptimizer(policy.cur_lr, config["decay"],
|
||||
config["momentum"],
|
||||
config["epsilon"])
|
||||
|
||||
|
||||
def clip_gradients(policy, optimizer, loss):
|
||||
|
||||
@@ -40,23 +40,21 @@ class ReweightedImitationLoss:
|
||||
# update averaged advantage norm
|
||||
if policy.config["framework"] in ["tf2", "tfe"]:
|
||||
policy._ma_adv_norm.assign_add(
|
||||
1e-6 * (tf.reduce_mean(
|
||||
tf.math.square(adv)) - policy._ma_adv_norm))
|
||||
1e-6 *
|
||||
(tf.reduce_mean(tf.math.square(adv)) - policy._ma_adv_norm))
|
||||
# Exponentially weighted advantages.
|
||||
exp_advs = tf.math.exp(
|
||||
beta * tf.math.divide(
|
||||
adv, 1e-8 + tf.math.sqrt(policy._ma_adv_norm)))
|
||||
exp_advs = tf.math.exp(beta * tf.math.divide(
|
||||
adv, 1e-8 + tf.math.sqrt(policy._ma_adv_norm)))
|
||||
else:
|
||||
update_adv_norm = tf1.assign_add(
|
||||
ref=policy._ma_adv_norm,
|
||||
value=1e-6 * (
|
||||
tf.reduce_mean(tf.math.square(adv)) - policy._ma_adv_norm))
|
||||
value=1e-6 *
|
||||
(tf.reduce_mean(tf.math.square(adv)) - policy._ma_adv_norm))
|
||||
|
||||
# exponentially weighted advantages
|
||||
with tf1.control_dependencies([update_adv_norm]):
|
||||
exp_advs = tf.math.exp(
|
||||
beta * tf.math.divide(
|
||||
adv, 1e-8 + tf.math.sqrt(policy._ma_adv_norm)))
|
||||
exp_advs = tf.math.exp(beta * tf.math.divide(
|
||||
adv, 1e-8 + tf.math.sqrt(policy._ma_adv_norm)))
|
||||
|
||||
# log\pi_\theta(a|s)
|
||||
logprobs = action_dist.logp(actions)
|
||||
|
||||
@@ -28,8 +28,8 @@ class TestMARWIL(unittest.TestCase):
|
||||
rllib_dir = Path(__file__).parent.parent.parent.parent
|
||||
print("rllib dir={}".format(rllib_dir))
|
||||
data_file = os.path.join(rllib_dir, "tests/data/cartpole/large.json")
|
||||
print("data_file={} exists={}".format(
|
||||
data_file, os.path.isfile(data_file)))
|
||||
print("data_file={} exists={}".format(data_file,
|
||||
os.path.isfile(data_file)))
|
||||
|
||||
config = marwil.DEFAULT_CONFIG.copy()
|
||||
config["num_workers"] = 0 # Run locally.
|
||||
|
||||
@@ -59,6 +59,7 @@ class TDModel(nn.Module):
|
||||
|
||||
|
||||
if torch:
|
||||
|
||||
class TDDataset(torch.utils.data.Dataset):
|
||||
def __init__(self, dataset: SampleBatchType, norms):
|
||||
self.count = dataset.count
|
||||
|
||||
@@ -26,8 +26,8 @@ def pg_tf_loss(policy, model, dist_class, train_batch):
|
||||
logits, _ = model.from_batch(train_batch)
|
||||
action_dist = dist_class(logits, model)
|
||||
return -tf.reduce_mean(
|
||||
action_dist.logp(train_batch[SampleBatch.ACTIONS]) *
|
||||
tf.cast(train_batch[Postprocessing.ADVANTAGES], dtype=tf.float32))
|
||||
action_dist.logp(train_batch[SampleBatch.ACTIONS]) * tf.cast(
|
||||
train_batch[Postprocessing.ADVANTAGES], dtype=tf.float32))
|
||||
|
||||
|
||||
PGTFPolicy = build_tf_policy(
|
||||
|
||||
@@ -77,13 +77,12 @@ class TestPG(unittest.TestCase):
|
||||
feed_dict=policy._get_loss_inputs_dict(
|
||||
train_batch, shuffle=False))
|
||||
else:
|
||||
results = (
|
||||
pg.pg_tf_loss if fw in ["tf2", "tfe"] else pg.pg_torch_loss
|
||||
)(
|
||||
policy,
|
||||
policy.model,
|
||||
dist_class=dist_cls,
|
||||
train_batch=train_batch)
|
||||
results = (pg.pg_tf_loss
|
||||
if fw in ["tf2", "tfe"] else pg.pg_torch_loss)(
|
||||
policy,
|
||||
policy.model,
|
||||
dist_class=dist_cls,
|
||||
train_batch=train_batch)
|
||||
|
||||
# Calculate expected results.
|
||||
if fw != "torch":
|
||||
|
||||
@@ -17,7 +17,6 @@ from ray.rllib.utils.numpy import fc
|
||||
from ray.rllib.utils.test_utils import check, framework_iterator, \
|
||||
check_compute_single_action
|
||||
|
||||
|
||||
# Fake CartPole episode of n time steps.
|
||||
FAKE_BATCH = {
|
||||
SampleBatch.CUR_OBS: np.array(
|
||||
|
||||
@@ -280,14 +280,14 @@ class QMixTorchPolicy(Policy):
|
||||
masked_q_values = q_values.clone()
|
||||
masked_q_values[avail == 0.0] = -float("inf")
|
||||
masked_q_values_folded = torch.reshape(
|
||||
masked_q_values,
|
||||
[-1] + list(masked_q_values.shape)[2:])
|
||||
masked_q_values, [-1] + list(masked_q_values.shape)[2:])
|
||||
actions, _ = self.exploration.get_exploration_action(
|
||||
action_distribution=TorchCategorical(masked_q_values_folded),
|
||||
timestep=timestep,
|
||||
explore=explore)
|
||||
actions = torch.reshape(
|
||||
actions, list(masked_q_values.shape)[:-1]).cpu().numpy()
|
||||
actions,
|
||||
list(masked_q_values.shape)[:-1]).cpu().numpy()
|
||||
hiddens = [s.cpu().numpy() for s in hiddens]
|
||||
|
||||
return tuple(actions.transpose([1, 0])), hiddens, {}
|
||||
|
||||
@@ -231,10 +231,8 @@ def sac_actor_critic_loss(policy, model, _, train_batch):
|
||||
y_true=q_t_selected_target, y_pred=q_t_selected)
|
||||
]
|
||||
if policy.config["twin_q"]:
|
||||
critic_loss.append(
|
||||
0.5 * tf.keras.losses.MSE(
|
||||
y_true=q_t_selected_target,
|
||||
y_pred=twin_q_t_selected))
|
||||
critic_loss.append(0.5 * tf.keras.losses.MSE(
|
||||
y_true=q_t_selected_target, y_pred=twin_q_t_selected))
|
||||
|
||||
# Alpha- and actor losses.
|
||||
# Note: In the papers, alpha is used directly, here we take the log.
|
||||
@@ -281,25 +279,27 @@ def gradients_fn(policy, optimizer, loss):
|
||||
if policy.config["framework"] in ["tf2", "tfe"]:
|
||||
tape = optimizer.tape
|
||||
pol_weights = policy.model.policy_variables()
|
||||
actor_grads_and_vars = list(zip(tape.gradient(
|
||||
policy.actor_loss, pol_weights), pol_weights))
|
||||
actor_grads_and_vars = list(
|
||||
zip(tape.gradient(policy.actor_loss, pol_weights), pol_weights))
|
||||
q_weights = policy.model.q_variables()
|
||||
if policy.config["twin_q"]:
|
||||
half_cutoff = len(q_weights) // 2
|
||||
grads_1 = tape.gradient(
|
||||
policy.critic_loss[0], q_weights[:half_cutoff])
|
||||
grads_2 = tape.gradient(
|
||||
policy.critic_loss[1], q_weights[half_cutoff:])
|
||||
grads_1 = tape.gradient(policy.critic_loss[0],
|
||||
q_weights[:half_cutoff])
|
||||
grads_2 = tape.gradient(policy.critic_loss[1],
|
||||
q_weights[half_cutoff:])
|
||||
critic_grads_and_vars = \
|
||||
list(zip(grads_1, q_weights[:half_cutoff])) + \
|
||||
list(zip(grads_2, q_weights[half_cutoff:]))
|
||||
else:
|
||||
critic_grads_and_vars = list(zip(tape.gradient(
|
||||
policy.critic_loss[0], q_weights), q_weights))
|
||||
critic_grads_and_vars = list(
|
||||
zip(
|
||||
tape.gradient(policy.critic_loss[0], q_weights),
|
||||
q_weights))
|
||||
|
||||
alpha_vars = [policy.model.log_alpha]
|
||||
alpha_grads_and_vars = list(zip(tape.gradient(
|
||||
policy.alpha_loss, alpha_vars), alpha_vars))
|
||||
alpha_grads_and_vars = list(
|
||||
zip(tape.gradient(policy.alpha_loss, alpha_vars), alpha_vars))
|
||||
# Tf1.x: Use optimizer.compute_gradients()
|
||||
else:
|
||||
actor_grads_and_vars = policy._actor_optimizer.compute_gradients(
|
||||
@@ -327,12 +327,15 @@ def gradients_fn(policy, optimizer, loss):
|
||||
clip_func = tf.identity
|
||||
|
||||
# Save grads and vars for later use in `build_apply_op`.
|
||||
policy._actor_grads_and_vars = [
|
||||
(clip_func(g), v) for (g, v) in actor_grads_and_vars if g is not None]
|
||||
policy._critic_grads_and_vars = [
|
||||
(clip_func(g), v) for (g, v) in critic_grads_and_vars if g is not None]
|
||||
policy._alpha_grads_and_vars = [
|
||||
(clip_func(g), v) for (g, v) in alpha_grads_and_vars if g is not None]
|
||||
policy._actor_grads_and_vars = [(clip_func(g), v)
|
||||
for (g, v) in actor_grads_and_vars
|
||||
if g is not None]
|
||||
policy._critic_grads_and_vars = [(clip_func(g), v)
|
||||
for (g, v) in critic_grads_and_vars
|
||||
if g is not None]
|
||||
policy._alpha_grads_and_vars = [(clip_func(g), v)
|
||||
for (g, v) in alpha_grads_and_vars
|
||||
if g is not None]
|
||||
|
||||
grads_and_vars = (
|
||||
policy._actor_grads_and_vars + policy._critic_grads_and_vars +
|
||||
@@ -391,15 +394,13 @@ class ActorCriticOptimizerMixin:
|
||||
self._actor_optimizer = tf.keras.optimizers.Adam(
|
||||
learning_rate=config["optimization"]["actor_learning_rate"])
|
||||
self._critic_optimizer = [
|
||||
tf.keras.optimizers.Adam(
|
||||
learning_rate=config["optimization"][
|
||||
"critic_learning_rate"])
|
||||
tf.keras.optimizers.Adam(learning_rate=config["optimization"][
|
||||
"critic_learning_rate"])
|
||||
]
|
||||
if config["twin_q"]:
|
||||
self._critic_optimizer.append(
|
||||
tf.keras.optimizers.Adam(
|
||||
learning_rate=config["optimization"][
|
||||
"critic_learning_rate"]))
|
||||
tf.keras.optimizers.Adam(learning_rate=config[
|
||||
"optimization"]["critic_learning_rate"]))
|
||||
self._alpha_optimizer = tf.keras.optimizers.Adam(
|
||||
learning_rate=config["optimization"]["entropy_learning_rate"])
|
||||
else:
|
||||
@@ -407,15 +408,13 @@ class ActorCriticOptimizerMixin:
|
||||
self._actor_optimizer = tf1.train.AdamOptimizer(
|
||||
learning_rate=config["optimization"]["actor_learning_rate"])
|
||||
self._critic_optimizer = [
|
||||
tf1.train.AdamOptimizer(
|
||||
learning_rate=config["optimization"][
|
||||
"critic_learning_rate"])
|
||||
tf1.train.AdamOptimizer(learning_rate=config["optimization"][
|
||||
"critic_learning_rate"])
|
||||
]
|
||||
if config["twin_q"]:
|
||||
self._critic_optimizer.append(
|
||||
tf1.train.AdamOptimizer(
|
||||
learning_rate=config["optimization"][
|
||||
"critic_learning_rate"]))
|
||||
tf1.train.AdamOptimizer(learning_rate=config[
|
||||
"optimization"]["critic_learning_rate"]))
|
||||
self._alpha_optimizer = tf1.train.AdamOptimizer(
|
||||
learning_rate=config["optimization"]["entropy_learning_rate"])
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ def to_float_array(v: List[Any]) -> np.ndarray:
|
||||
|
||||
# TODO(sven): Remove the following class once we switch to trajectory view API.
|
||||
|
||||
|
||||
@PublicAPI
|
||||
class SampleBatchBuilder:
|
||||
"""Util to build a SampleBatch incrementally.
|
||||
@@ -76,6 +77,7 @@ class SampleBatchBuilder:
|
||||
|
||||
# TODO(sven): Remove the following class once we switch to trajectory view API.
|
||||
|
||||
|
||||
@DeveloperAPI
|
||||
class MultiAgentSampleBatchBuilder:
|
||||
"""Util to build SampleBatches for each policy in a multi-agent env.
|
||||
|
||||
@@ -60,12 +60,9 @@ class _SampleCollector(metaclass=ABCMeta):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def add_action_reward_next_obs(
|
||||
self,
|
||||
episode_id: EpisodeID,
|
||||
agent_id: AgentID,
|
||||
policy_id: PolicyID,
|
||||
values: Dict[str, TensorType]) -> None:
|
||||
def add_action_reward_next_obs(self, episode_id: EpisodeID,
|
||||
agent_id: AgentID, policy_id: PolicyID,
|
||||
values: Dict[str, TensorType]) -> None:
|
||||
"""Add the given dictionary (row) of values to this collector.
|
||||
|
||||
The incoming data (`values`) must include action, reward, done, and
|
||||
|
||||
+32
-27
@@ -373,26 +373,26 @@ class AsyncSampler(threading.Thread, SamplerInput):
|
||||
return extra
|
||||
|
||||
|
||||
def _env_runner(worker: "RolloutWorker",
|
||||
base_env: BaseEnv,
|
||||
extra_batch_callback: Callable[[SampleBatchType], None],
|
||||
policies: Dict[PolicyID, Policy],
|
||||
policy_mapping_fn: Callable[[AgentID], PolicyID],
|
||||
rollout_fragment_length: int,
|
||||
horizon: int,
|
||||
preprocessors: Dict[PolicyID, Preprocessor],
|
||||
obs_filters: Dict[PolicyID, Filter],
|
||||
clip_rewards: bool,
|
||||
clip_actions: bool,
|
||||
pack_multiple_episodes_in_batch: bool,
|
||||
callbacks: "DefaultCallbacks",
|
||||
tf_sess: Optional["tf.Session"],
|
||||
perf_stats: _PerfStats,
|
||||
soft_horizon: bool,
|
||||
no_done_at_end: bool,
|
||||
observation_fn: "ObservationFunction",
|
||||
_use_trajectory_view_api: bool = False
|
||||
) -> Iterable[SampleBatchType]:
|
||||
def _env_runner(
|
||||
worker: "RolloutWorker",
|
||||
base_env: BaseEnv,
|
||||
extra_batch_callback: Callable[[SampleBatchType], None],
|
||||
policies: Dict[PolicyID, Policy],
|
||||
policy_mapping_fn: Callable[[AgentID], PolicyID],
|
||||
rollout_fragment_length: int,
|
||||
horizon: int,
|
||||
preprocessors: Dict[PolicyID, Preprocessor],
|
||||
obs_filters: Dict[PolicyID, Filter],
|
||||
clip_rewards: bool,
|
||||
clip_actions: bool,
|
||||
pack_multiple_episodes_in_batch: bool,
|
||||
callbacks: "DefaultCallbacks",
|
||||
tf_sess: Optional["tf.Session"],
|
||||
perf_stats: _PerfStats,
|
||||
soft_horizon: bool,
|
||||
no_done_at_end: bool,
|
||||
observation_fn: "ObservationFunction",
|
||||
_use_trajectory_view_api: bool = False) -> Iterable[SampleBatchType]:
|
||||
"""This implements the common experience collection logic.
|
||||
|
||||
Args:
|
||||
@@ -571,18 +571,23 @@ def _env_runner(worker: "RolloutWorker",
|
||||
|
||||
|
||||
def _process_observations(
|
||||
worker: "RolloutWorker", base_env: BaseEnv,
|
||||
worker: "RolloutWorker",
|
||||
base_env: BaseEnv,
|
||||
policies: Dict[PolicyID, Policy],
|
||||
batch_builder_pool: List[MultiAgentSampleBatchBuilder],
|
||||
active_episodes: Dict[str, MultiAgentEpisode],
|
||||
unfiltered_obs: Dict[EnvID, Dict[AgentID, EnvObsType]],
|
||||
rewards: Dict[EnvID, Dict[AgentID, float]],
|
||||
dones: Dict[EnvID, Dict[AgentID, bool]],
|
||||
infos: Dict[EnvID, Dict[AgentID, EnvInfoDict]], horizon: int,
|
||||
infos: Dict[EnvID, Dict[AgentID, EnvInfoDict]],
|
||||
horizon: int,
|
||||
preprocessors: Dict[PolicyID, Preprocessor],
|
||||
obs_filters: Dict[PolicyID, Filter], rollout_fragment_length: int,
|
||||
pack_multiple_episodes_in_batch: bool, callbacks: "DefaultCallbacks",
|
||||
soft_horizon: bool, no_done_at_end: bool,
|
||||
obs_filters: Dict[PolicyID, Filter],
|
||||
rollout_fragment_length: int,
|
||||
pack_multiple_episodes_in_batch: bool,
|
||||
callbacks: "DefaultCallbacks",
|
||||
soft_horizon: bool,
|
||||
no_done_at_end: bool,
|
||||
observation_fn: "ObservationFunction",
|
||||
_use_trajectory_view_api: bool = False
|
||||
) -> Tuple[Set[EnvID], Dict[PolicyID, List[PolicyEvalData]], List[Union[
|
||||
@@ -931,8 +936,8 @@ def _do_policy_eval(
|
||||
def _process_policy_eval_results(
|
||||
*,
|
||||
to_eval: Dict[PolicyID, List[PolicyEvalData]],
|
||||
eval_results: Dict[PolicyID, Tuple[
|
||||
TensorStructType, StateBatch, dict]],
|
||||
eval_results: Dict[PolicyID, Tuple[TensorStructType, StateBatch,
|
||||
dict]],
|
||||
active_episodes: Dict[str, MultiAgentEpisode],
|
||||
active_envs: Set[int],
|
||||
off_policy_actions: MultiEnvDict,
|
||||
|
||||
Vendored
+3
-3
@@ -52,9 +52,9 @@ class RandomEnv(gym.Env):
|
||||
done = True
|
||||
# Max not reached yet -> Sample done via p_done.
|
||||
else:
|
||||
done = bool(np.random.choice(
|
||||
[True, False], p=[self.p_done, 1.0 - self.p_done]
|
||||
))
|
||||
done = bool(
|
||||
np.random.choice(
|
||||
[True, False], p=[self.p_done, 1.0 - self.p_done]))
|
||||
|
||||
return self.observation_space.sample(), \
|
||||
float(self.reward_space.sample()), done, {}
|
||||
|
||||
@@ -18,8 +18,10 @@ class RandomPolicy(Policy):
|
||||
if self.config.get("ignore_action_bounds", False) and \
|
||||
isinstance(self.action_space, Box):
|
||||
self.action_space_for_sampling = Box(
|
||||
-float("inf"), float("inf"),
|
||||
shape=self.action_space.shape, dtype=self.action_space.dtype)
|
||||
-float("inf"),
|
||||
float("inf"),
|
||||
shape=self.action_space.shape,
|
||||
dtype=self.action_space.dtype)
|
||||
else:
|
||||
self.action_space_for_sampling = self.action_space
|
||||
|
||||
|
||||
@@ -44,8 +44,8 @@ class CustomPolicy(Policy):
|
||||
episodes=None,
|
||||
**kwargs):
|
||||
# return random actions
|
||||
return np.array([self.action_space.sample()
|
||||
for _ in obs_batch]), [], {}
|
||||
return np.array(
|
||||
[self.action_space.sample() for _ in obs_batch]), [], {}
|
||||
|
||||
def learn_on_batch(self, samples):
|
||||
# implement your learning code here
|
||||
|
||||
@@ -138,10 +138,8 @@ class TrainTFMultiGPU:
|
||||
with self.workers.local_worker().tf_sess.graph.as_default():
|
||||
with self.workers.local_worker().tf_sess.as_default():
|
||||
for policy_id in self.policies:
|
||||
policy = self.workers.local_worker().get_policy(
|
||||
policy_id)
|
||||
with tf1.variable_scope(
|
||||
policy_id, reuse=tf1.AUTO_REUSE):
|
||||
policy = self.workers.local_worker().get_policy(policy_id)
|
||||
with tf1.variable_scope(policy_id, reuse=tf1.AUTO_REUSE):
|
||||
if policy._state_inputs:
|
||||
rnn_inputs = policy._state_inputs + [
|
||||
policy._seq_lens
|
||||
@@ -150,12 +148,10 @@ class TrainTFMultiGPU:
|
||||
rnn_inputs = []
|
||||
self.optimizers[policy_id] = (
|
||||
LocalSyncParallelOptimizer(
|
||||
policy._optimizer,
|
||||
self.devices,
|
||||
[v for _, v in policy._loss_inputs],
|
||||
rnn_inputs,
|
||||
self.per_device_batch_size,
|
||||
policy.copy))
|
||||
policy._optimizer, self.devices,
|
||||
[v
|
||||
for _, v in policy._loss_inputs], rnn_inputs,
|
||||
self.per_device_batch_size, policy.copy))
|
||||
|
||||
self.sess = self.workers.local_worker().tf_sess
|
||||
self.sess.run(tf1.global_variables_initializer())
|
||||
|
||||
@@ -6,9 +6,6 @@ from ray.rllib.models.tf.layers.skip_connection import SkipConnection
|
||||
from ray.rllib.models.tf.layers.multi_head_attention import MultiHeadAttention
|
||||
|
||||
__all__ = [
|
||||
"GRUGate",
|
||||
"MultiHeadAttention",
|
||||
"NoisyLayer",
|
||||
"RelativeMultiHeadAttention",
|
||||
"SkipConnection"
|
||||
"GRUGate", "MultiHeadAttention", "NoisyLayer",
|
||||
"RelativeMultiHeadAttention", "SkipConnection"
|
||||
]
|
||||
|
||||
@@ -16,11 +16,7 @@ class NoisyLayer(tf.keras.layers.Layer if tf else object):
|
||||
vanish along the training procedure
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
prefix,
|
||||
out_size,
|
||||
sigma0,
|
||||
activation="relu"):
|
||||
def __init__(self, prefix, out_size, sigma0, activation="relu"):
|
||||
"""Initializes a NoisyLayer object.
|
||||
|
||||
Args:
|
||||
@@ -53,8 +49,7 @@ class NoisyLayer(tf.keras.layers.Layer if tf else object):
|
||||
trainable=True,
|
||||
tf_name=self.prefix + "_sigma_w",
|
||||
shape=[in_size, self.out_size],
|
||||
dtype=tf.float32
|
||||
)
|
||||
dtype=tf.float32)
|
||||
|
||||
self.sigma_b = get_variable(
|
||||
value=tf.keras.initializers.Constant(
|
||||
|
||||
@@ -81,9 +81,9 @@ class VisionNetwork(TFModelV2):
|
||||
"Given `conv_filters` ({}) do not result in a [B, 1, "
|
||||
"1, {} (`num_outputs`)] shape (but in {})! Please "
|
||||
"adjust your Conv2D stack such that the dims 1 and 2 "
|
||||
"are both 1.".format(
|
||||
self.model_config["conv_filters"],
|
||||
self.num_outputs, list(conv_out.shape)))
|
||||
"are both 1.".format(self.model_config["conv_filters"],
|
||||
self.num_outputs,
|
||||
list(conv_out.shape)))
|
||||
|
||||
# num_outputs not known -> Flatten, then set self.num_outputs
|
||||
# to the resulting number of nodes.
|
||||
|
||||
@@ -67,20 +67,22 @@ class NoisyLayer(nn.Module):
|
||||
trainable=True)
|
||||
|
||||
def forward(self, inputs):
|
||||
epsilon_in = self._f_epsilon(torch.normal(
|
||||
mean=torch.zeros([self.in_size]),
|
||||
std=torch.ones([self.in_size])))
|
||||
epsilon_out = self._f_epsilon(torch.normal(
|
||||
mean=torch.zeros([self.out_size]),
|
||||
std=torch.ones([self.out_size])))
|
||||
epsilon_in = self._f_epsilon(
|
||||
torch.normal(
|
||||
mean=torch.zeros([self.in_size]),
|
||||
std=torch.ones([self.in_size])))
|
||||
epsilon_out = self._f_epsilon(
|
||||
torch.normal(
|
||||
mean=torch.zeros([self.out_size]),
|
||||
std=torch.ones([self.out_size])))
|
||||
epsilon_w = torch.matmul(
|
||||
torch.unsqueeze(epsilon_in, -1),
|
||||
other=torch.unsqueeze(epsilon_out, 0))
|
||||
epsilon_b = epsilon_out
|
||||
|
||||
action_activation = torch.matmul(
|
||||
inputs, self.w + self.sigma_w * epsilon_w
|
||||
) + self.b + self.sigma_b * epsilon_b
|
||||
inputs, self.w +
|
||||
self.sigma_w * epsilon_w) + self.b + self.sigma_b * epsilon_b
|
||||
|
||||
if self.activation is not None:
|
||||
action_activation = self.activation(action_activation)
|
||||
|
||||
@@ -158,9 +158,8 @@ class VisionNetwork(TorchModelV2, nn.Module):
|
||||
"Given `conv_filters` ({}) do not result in a [B, {} "
|
||||
"(`num_outputs`), 1, 1] shape (but in {})! Please adjust "
|
||||
"your Conv2D stack such that the last 2 dims are both "
|
||||
"1.".format(
|
||||
self.model_config["conv_filters"], self.num_outputs,
|
||||
list(conv_out.shape)))
|
||||
"1.".format(self.model_config["conv_filters"],
|
||||
self.num_outputs, list(conv_out.shape)))
|
||||
logits = conv_out.squeeze(3)
|
||||
logits = logits.squeeze(2)
|
||||
|
||||
|
||||
@@ -47,36 +47,36 @@ class DynamicTFPolicy(TFPolicy):
|
||||
"""
|
||||
|
||||
@DeveloperAPI
|
||||
def __init__(self,
|
||||
obs_space: gym.spaces.Space,
|
||||
action_space: gym.spaces.Space,
|
||||
config: TrainerConfigDict,
|
||||
loss_fn: Callable[
|
||||
[Policy, ModelV2, type, SampleBatch], TensorType],
|
||||
*,
|
||||
stats_fn: Optional[Callable[[Policy, SampleBatch],
|
||||
Dict[str, TensorType]]] = None,
|
||||
grad_stats_fn: Optional[Callable[
|
||||
[Policy, SampleBatch, ModelGradients],
|
||||
Dict[str, TensorType]]] = None,
|
||||
before_loss_init: Optional[Callable[
|
||||
[Policy, gym.spaces.Space, gym.spaces.Space,
|
||||
TrainerConfigDict], None]] = None,
|
||||
make_model: Optional[Callable[
|
||||
[Policy, gym.spaces.Space, gym.spaces.Space,
|
||||
TrainerConfigDict], ModelV2]] = None,
|
||||
action_sampler_fn: Optional[Callable[
|
||||
[TensorType, List[TensorType]], Tuple[
|
||||
TensorType, TensorType]]] = None,
|
||||
action_distribution_fn: Optional[Callable[
|
||||
[Policy, ModelV2, TensorType, TensorType, TensorType],
|
||||
Tuple[TensorType, type, List[TensorType]]]] = None,
|
||||
existing_inputs: Optional[Dict[
|
||||
str, "tf1.placeholder"]] = None,
|
||||
existing_model: Optional[ModelV2] = None,
|
||||
get_batch_divisibility_req: Optional[Callable[
|
||||
[Policy], int]] = None,
|
||||
obs_include_prev_action_reward: bool = True):
|
||||
def __init__(
|
||||
self,
|
||||
obs_space: gym.spaces.Space,
|
||||
action_space: gym.spaces.Space,
|
||||
config: TrainerConfigDict,
|
||||
loss_fn: Callable[[Policy, ModelV2, type, SampleBatch],
|
||||
TensorType],
|
||||
*,
|
||||
stats_fn: Optional[Callable[[Policy, SampleBatch], Dict[
|
||||
str, TensorType]]] = None,
|
||||
grad_stats_fn: Optional[Callable[[
|
||||
Policy, SampleBatch, ModelGradients
|
||||
], Dict[str, TensorType]]] = None,
|
||||
before_loss_init: Optional[Callable[[
|
||||
Policy, gym.spaces.Space, gym.spaces.Space, TrainerConfigDict
|
||||
], None]] = None,
|
||||
make_model: Optional[Callable[[
|
||||
Policy, gym.spaces.Space, gym.spaces.Space, TrainerConfigDict
|
||||
], ModelV2]] = None,
|
||||
action_sampler_fn: Optional[Callable[[
|
||||
TensorType, List[TensorType]
|
||||
], Tuple[TensorType, TensorType]]] = None,
|
||||
action_distribution_fn: Optional[Callable[[
|
||||
Policy, ModelV2, TensorType, TensorType, TensorType
|
||||
], Tuple[TensorType, type, List[TensorType]]]] = None,
|
||||
existing_inputs: Optional[Dict[str, "tf1.placeholder"]] = None,
|
||||
existing_model: Optional[ModelV2] = None,
|
||||
get_batch_divisibility_req: Optional[Callable[[Policy],
|
||||
int]] = None,
|
||||
obs_include_prev_action_reward: bool = True):
|
||||
"""Initialize a dynamic TF policy.
|
||||
|
||||
Arguments:
|
||||
|
||||
@@ -641,8 +641,8 @@ def build_eager_tf_policy(name,
|
||||
dummy_batch["seq_lens"] = np.array([1], dtype=np.int32)
|
||||
|
||||
# Convert everything to tensors.
|
||||
dummy_batch = tf.nest.map_structure(
|
||||
tf1.convert_to_tensor, dummy_batch)
|
||||
dummy_batch = tf.nest.map_structure(tf1.convert_to_tensor,
|
||||
dummy_batch)
|
||||
|
||||
# for IMPALA which expects a certain sample batch size.
|
||||
def tile_to(tensor, n):
|
||||
|
||||
+11
-14
@@ -46,11 +46,8 @@ class Policy(metaclass=ABCMeta):
|
||||
"""
|
||||
|
||||
@DeveloperAPI
|
||||
def __init__(
|
||||
self,
|
||||
observation_space: gym.spaces.Space,
|
||||
action_space: gym.spaces.Space,
|
||||
config: TrainerConfigDict):
|
||||
def __init__(self, observation_space: gym.spaces.Space,
|
||||
action_space: gym.spaces.Space, config: TrainerConfigDict):
|
||||
"""Initialize the graph.
|
||||
|
||||
This is the standard constructor for policies. The policy
|
||||
@@ -181,9 +178,9 @@ class Policy(metaclass=ABCMeta):
|
||||
episodes = [episode]
|
||||
if state is not None:
|
||||
state_batch = [
|
||||
s.unsqueeze(0) if torch and isinstance(s, torch.Tensor) else
|
||||
np.expand_dims(s, 0)
|
||||
for s in state
|
||||
s.unsqueeze(0)
|
||||
if torch and isinstance(s, torch.Tensor) else np.expand_dims(
|
||||
s, 0) for s in state
|
||||
]
|
||||
|
||||
out = self.compute_actions(
|
||||
@@ -261,10 +258,10 @@ class Policy(metaclass=ABCMeta):
|
||||
actions: Union[List[TensorType], TensorType],
|
||||
obs_batch: Union[List[TensorType], TensorType],
|
||||
state_batches: Optional[List[TensorType]] = None,
|
||||
prev_action_batch: Optional[
|
||||
Union[List[TensorType], TensorType]] = None,
|
||||
prev_reward_batch: Optional[
|
||||
Union[List[TensorType], TensorType]] = None) -> TensorType:
|
||||
prev_action_batch: Optional[Union[List[TensorType],
|
||||
TensorType]] = None,
|
||||
prev_reward_batch: Optional[Union[List[
|
||||
TensorType], TensorType]] = None) -> TensorType:
|
||||
"""Computes the log-prob/likelihood for a given action and observation.
|
||||
|
||||
Args:
|
||||
@@ -309,8 +306,8 @@ class Policy(metaclass=ABCMeta):
|
||||
def postprocess_trajectory(
|
||||
self,
|
||||
sample_batch: SampleBatch,
|
||||
other_agent_batches: Optional[
|
||||
Dict[AgentID, Tuple["Policy", SampleBatch]]] = None,
|
||||
other_agent_batches: Optional[Dict[AgentID, Tuple[
|
||||
"Policy", SampleBatch]]] = None,
|
||||
episode: Optional["MultiAgentEpisode"] = None) -> SampleBatch:
|
||||
"""Implements algorithm-specific trajectory postprocessing.
|
||||
|
||||
|
||||
@@ -305,10 +305,9 @@ class SampleBatch:
|
||||
self.data[key] = item
|
||||
|
||||
@DeveloperAPI
|
||||
def compress(
|
||||
self,
|
||||
bulk: bool = False,
|
||||
columns: Set[str] = frozenset(["obs", "new_obs"])) -> None:
|
||||
def compress(self,
|
||||
bulk: bool = False,
|
||||
columns: Set[str] = frozenset(["obs", "new_obs"])) -> None:
|
||||
"""Compresses the data buffers (by column) in place.
|
||||
|
||||
Args:
|
||||
@@ -327,10 +326,9 @@ class SampleBatch:
|
||||
[pack(o) for o in self.data[key]])
|
||||
|
||||
@DeveloperAPI
|
||||
def decompress_if_needed(
|
||||
self,
|
||||
columns: Set[str] = frozenset(
|
||||
["obs", "new_obs"])) -> "SampleBatch":
|
||||
def decompress_if_needed(self,
|
||||
columns: Set[str] = frozenset(
|
||||
["obs", "new_obs"])) -> "SampleBatch":
|
||||
"""Decompresses data buffers (per column if not compressed) in place.
|
||||
|
||||
Args:
|
||||
@@ -374,8 +372,7 @@ class MultiAgentBatch:
|
||||
"""
|
||||
|
||||
@PublicAPI
|
||||
def __init__(self,
|
||||
policy_batches: Dict[PolicyID, SampleBatch],
|
||||
def __init__(self, policy_batches: Dict[PolicyID, SampleBatch],
|
||||
env_steps: int):
|
||||
"""Initialize a MultiAgentBatch object.
|
||||
|
||||
@@ -541,11 +538,9 @@ class MultiAgentBatch:
|
||||
return sum(b.size_bytes() for b in self.policy_batches.values())
|
||||
|
||||
@DeveloperAPI
|
||||
def compress(
|
||||
self,
|
||||
bulk: bool = False,
|
||||
columns: Set[str] = frozenset(
|
||||
["obs", "new_obs"])) -> None:
|
||||
def compress(self,
|
||||
bulk: bool = False,
|
||||
columns: Set[str] = frozenset(["obs", "new_obs"])) -> None:
|
||||
"""Compresses each policy batch (per column) in place.
|
||||
|
||||
Args:
|
||||
@@ -558,10 +553,9 @@ class MultiAgentBatch:
|
||||
batch.compress(bulk=bulk, columns=columns)
|
||||
|
||||
@DeveloperAPI
|
||||
def decompress_if_needed(
|
||||
self,
|
||||
columns: Set[str] = frozenset(
|
||||
["obs", "new_obs"])) -> "MultiAgentBatch":
|
||||
def decompress_if_needed(self,
|
||||
columns: Set[str] = frozenset(
|
||||
["obs", "new_obs"])) -> "MultiAgentBatch":
|
||||
"""Decompresses each policy batch (per column), if already compressed.
|
||||
|
||||
Args:
|
||||
|
||||
@@ -25,8 +25,9 @@ class TestTrajectoryViewAPI(unittest.TestCase):
|
||||
assert len(view_req_model) == 1
|
||||
assert len(view_req_policy) == 6
|
||||
for key in [
|
||||
SampleBatch.OBS, SampleBatch.ACTIONS, SampleBatch.REWARDS,
|
||||
SampleBatch.DONES, SampleBatch.NEXT_OBS, SampleBatch.VF_PREDS
|
||||
SampleBatch.OBS, SampleBatch.ACTIONS, SampleBatch.REWARDS,
|
||||
SampleBatch.DONES, SampleBatch.NEXT_OBS,
|
||||
SampleBatch.VF_PREDS
|
||||
]:
|
||||
assert key in view_req_policy
|
||||
# None of the view cols has a special underlying data_col,
|
||||
@@ -53,9 +54,10 @@ class TestTrajectoryViewAPI(unittest.TestCase):
|
||||
assert len(view_req_model) == 3 # obs, prev_a, prev_r
|
||||
assert len(view_req_policy) == 8
|
||||
for key in [
|
||||
SampleBatch.OBS, SampleBatch.ACTIONS, SampleBatch.REWARDS,
|
||||
SampleBatch.DONES, SampleBatch.NEXT_OBS, SampleBatch.VF_PREDS,
|
||||
SampleBatch.PREV_ACTIONS, SampleBatch.PREV_REWARDS
|
||||
SampleBatch.OBS, SampleBatch.ACTIONS, SampleBatch.REWARDS,
|
||||
SampleBatch.DONES, SampleBatch.NEXT_OBS,
|
||||
SampleBatch.VF_PREDS, SampleBatch.PREV_ACTIONS,
|
||||
SampleBatch.PREV_REWARDS
|
||||
]:
|
||||
assert key in view_req_policy
|
||||
|
||||
@@ -65,9 +67,10 @@ class TestTrajectoryViewAPI(unittest.TestCase):
|
||||
elif key == SampleBatch.PREV_REWARDS:
|
||||
assert view_req_policy[key].data_col == SampleBatch.REWARDS
|
||||
assert view_req_policy[key].shift == -1
|
||||
elif key not in [SampleBatch.NEXT_OBS,
|
||||
SampleBatch.PREV_ACTIONS,
|
||||
SampleBatch.PREV_REWARDS]:
|
||||
elif key not in [
|
||||
SampleBatch.NEXT_OBS, SampleBatch.PREV_ACTIONS,
|
||||
SampleBatch.PREV_REWARDS
|
||||
]:
|
||||
assert view_req_policy[key].data_col is None
|
||||
else:
|
||||
assert view_req_policy[key].data_col == SampleBatch.OBS
|
||||
|
||||
+18
-16
@@ -239,8 +239,7 @@ class TFPolicy(Policy):
|
||||
"""Returns whether the loss function has been initialized."""
|
||||
return self._loss is not None
|
||||
|
||||
def _initialize_loss(self,
|
||||
loss: TensorType,
|
||||
def _initialize_loss(self, loss: TensorType,
|
||||
loss_inputs: List[Tuple[str, TensorType]]) -> None:
|
||||
"""Initializes the loss op from given loss tensor and placeholders.
|
||||
|
||||
@@ -264,8 +263,10 @@ class TFPolicy(Policy):
|
||||
self._loss = loss
|
||||
|
||||
self._optimizer = self.optimizer()
|
||||
self._grads_and_vars = [(g, v) for (g, v) in self.gradients(
|
||||
self._optimizer, self._loss) if g is not None]
|
||||
self._grads_and_vars = [
|
||||
(g, v) for (g, v) in self.gradients(self._optimizer, self._loss)
|
||||
if g is not None
|
||||
]
|
||||
self._grads = [g for (g, v) in self._grads_and_vars]
|
||||
|
||||
# TODO(sven/ekl): Deprecate support for v1 models.
|
||||
@@ -336,10 +337,10 @@ class TFPolicy(Policy):
|
||||
actions: Union[List[TensorType], TensorType],
|
||||
obs_batch: Union[List[TensorType], TensorType],
|
||||
state_batches: Optional[List[TensorType]] = None,
|
||||
prev_action_batch: Optional[
|
||||
Union[List[TensorType], TensorType]] = None,
|
||||
prev_reward_batch: Optional[
|
||||
Union[List[TensorType], TensorType]] = None) -> TensorType:
|
||||
prev_action_batch: Optional[Union[List[TensorType],
|
||||
TensorType]] = None,
|
||||
prev_reward_batch: Optional[Union[List[
|
||||
TensorType], TensorType]] = None) -> TensorType:
|
||||
|
||||
if self._log_likelihood is None:
|
||||
raise ValueError("Cannot compute log-prob/likelihood w/o a "
|
||||
@@ -378,8 +379,8 @@ class TFPolicy(Policy):
|
||||
|
||||
@override(Policy)
|
||||
@DeveloperAPI
|
||||
def learn_on_batch(self, postprocessed_batch: SampleBatch) -> Dict[
|
||||
str, TensorType]:
|
||||
def learn_on_batch(
|
||||
self, postprocessed_batch: SampleBatch) -> Dict[str, TensorType]:
|
||||
assert self.loss_initialized()
|
||||
builder = TFRunBuilder(self._sess, "learn_on_batch")
|
||||
fetches = self._build_learn_on_batch(builder, postprocessed_batch)
|
||||
@@ -457,7 +458,8 @@ class TFPolicy(Policy):
|
||||
|
||||
@override(Policy)
|
||||
@DeveloperAPI
|
||||
def export_checkpoint(self, export_dir: str,
|
||||
def export_checkpoint(self,
|
||||
export_dir: str,
|
||||
filename_prefix: str = "model") -> None:
|
||||
"""Export tensorflow checkpoint to export_dir."""
|
||||
try:
|
||||
@@ -573,8 +575,7 @@ class TFPolicy(Policy):
|
||||
return tf1.train.AdamOptimizer()
|
||||
|
||||
@DeveloperAPI
|
||||
def gradients(self,
|
||||
optimizer: "tf.keras.optimizers.Optimizer",
|
||||
def gradients(self, optimizer: "tf.keras.optimizers.Optimizer",
|
||||
loss: TensorType) -> List[Tuple[TensorType, TensorType]]:
|
||||
"""Override this for a custom gradient computation behavior.
|
||||
|
||||
@@ -816,8 +817,7 @@ class LearningRateSchedule:
|
||||
|
||||
@DeveloperAPI
|
||||
def __init__(self, lr, lr_schedule):
|
||||
self.cur_lr = tf1.get_variable(
|
||||
"lr", initializer=lr, trainable=False)
|
||||
self.cur_lr = tf1.get_variable("lr", initializer=lr, trainable=False)
|
||||
if lr_schedule is None:
|
||||
self.lr_schedule = ConstantSchedule(lr, framework=None)
|
||||
else:
|
||||
@@ -843,7 +843,9 @@ class EntropyCoeffSchedule:
|
||||
@DeveloperAPI
|
||||
def __init__(self, entropy_coeff, entropy_coeff_schedule):
|
||||
self.entropy_coeff = get_variable(
|
||||
entropy_coeff, framework="tf", tf_name="entropy_coeff",
|
||||
entropy_coeff,
|
||||
framework="tf",
|
||||
tf_name="entropy_coeff",
|
||||
trainable=False)
|
||||
|
||||
if entropy_coeff_schedule is None:
|
||||
|
||||
@@ -13,58 +13,52 @@ from ray.rllib.utils.types import ModelGradients, TensorType, TrainerConfigDict
|
||||
|
||||
|
||||
@DeveloperAPI
|
||||
def build_tf_policy(name: str,
|
||||
*,
|
||||
loss_fn: Callable[
|
||||
[Policy, ModelV2, type, SampleBatch], TensorType],
|
||||
get_default_config: Optional[
|
||||
Callable[[None], TrainerConfigDict]] = None,
|
||||
postprocess_fn: Optional[Callable[
|
||||
[Policy, SampleBatch, List[SampleBatch],
|
||||
"MultiAgentEpisode"], None]] = None,
|
||||
stats_fn: Optional[Callable[
|
||||
[Policy, SampleBatch], Dict[str, TensorType]]] = None,
|
||||
optimizer_fn: Optional[Callable[
|
||||
[Policy, TrainerConfigDict],
|
||||
"tf.keras.optimizers.Optimizer"]] = None,
|
||||
gradients_fn: Optional[Callable[
|
||||
[Policy, "tf.keras.optimizers.Optimizer",
|
||||
TensorType], ModelGradients]] = None,
|
||||
apply_gradients_fn: Optional[Callable[
|
||||
[Policy, "tf.keras.optimizers.Optimizer",
|
||||
ModelGradients], "tf.Operation"]] = None,
|
||||
grad_stats_fn: Optional[Callable[
|
||||
[Policy, SampleBatch, ModelGradients],
|
||||
Dict[str, TensorType]]] = None,
|
||||
extra_action_fetches_fn: Optional[Callable[
|
||||
[Policy], Dict[str, TensorType]]] = None,
|
||||
extra_learn_fetches_fn: Optional[Callable[
|
||||
[Policy], Dict[str, TensorType]]] = None,
|
||||
validate_spaces: Optional[Callable[
|
||||
[Policy, gym.Space, gym.Space, TrainerConfigDict],
|
||||
None]] = None,
|
||||
before_init: Optional[Callable[
|
||||
[Policy, gym.Space, gym.Space, TrainerConfigDict],
|
||||
None]] = None,
|
||||
before_loss_init: Optional[Callable[
|
||||
[Policy, gym.spaces.Space, gym.spaces.Space,
|
||||
TrainerConfigDict], None]] = None,
|
||||
after_init: Optional[Callable[
|
||||
[Policy, gym.Space, gym.Space, TrainerConfigDict],
|
||||
None]] = None,
|
||||
make_model: Optional[Callable[
|
||||
[Policy, gym.spaces.Space, gym.spaces.Space,
|
||||
TrainerConfigDict], ModelV2]] = None,
|
||||
action_sampler_fn: Optional[Callable[
|
||||
[TensorType, List[TensorType]], Tuple[
|
||||
TensorType, TensorType]]] = None,
|
||||
action_distribution_fn: Optional[Callable[
|
||||
[Policy, ModelV2, TensorType, TensorType, TensorType],
|
||||
Tuple[TensorType, type, List[TensorType]]]] = None,
|
||||
mixins: Optional[List[type]] = None,
|
||||
get_batch_divisibility_req: Optional[Callable[
|
||||
[Policy], int]] = None,
|
||||
obs_include_prev_action_reward: bool = True):
|
||||
def build_tf_policy(
|
||||
name: str,
|
||||
*,
|
||||
loss_fn: Callable[[Policy, ModelV2, type, SampleBatch], TensorType],
|
||||
get_default_config: Optional[Callable[[None],
|
||||
TrainerConfigDict]] = None,
|
||||
postprocess_fn: Optional[Callable[[
|
||||
Policy, SampleBatch, List[SampleBatch], "MultiAgentEpisode"
|
||||
], None]] = None,
|
||||
stats_fn: Optional[Callable[[Policy, SampleBatch], Dict[
|
||||
str, TensorType]]] = None,
|
||||
optimizer_fn: Optional[Callable[[
|
||||
Policy, TrainerConfigDict
|
||||
], "tf.keras.optimizers.Optimizer"]] = None,
|
||||
gradients_fn: Optional[Callable[[
|
||||
Policy, "tf.keras.optimizers.Optimizer", TensorType
|
||||
], ModelGradients]] = None,
|
||||
apply_gradients_fn: Optional[Callable[[
|
||||
Policy, "tf.keras.optimizers.Optimizer", ModelGradients
|
||||
], "tf.Operation"]] = None,
|
||||
grad_stats_fn: Optional[Callable[[Policy, SampleBatch, ModelGradients],
|
||||
Dict[str, TensorType]]] = None,
|
||||
extra_action_fetches_fn: Optional[Callable[[Policy], Dict[
|
||||
str, TensorType]]] = None,
|
||||
extra_learn_fetches_fn: Optional[Callable[[Policy], Dict[
|
||||
str, TensorType]]] = None,
|
||||
validate_spaces: Optional[Callable[
|
||||
[Policy, gym.Space, gym.Space, TrainerConfigDict], None]] = None,
|
||||
before_init: Optional[Callable[
|
||||
[Policy, gym.Space, gym.Space, TrainerConfigDict], None]] = None,
|
||||
before_loss_init: Optional[Callable[[
|
||||
Policy, gym.spaces.Space, gym.spaces.Space, TrainerConfigDict
|
||||
], None]] = None,
|
||||
after_init: Optional[Callable[
|
||||
[Policy, gym.Space, gym.Space, TrainerConfigDict], None]] = None,
|
||||
make_model: Optional[Callable[[
|
||||
Policy, gym.spaces.Space, gym.spaces.Space, TrainerConfigDict
|
||||
], ModelV2]] = None,
|
||||
action_sampler_fn: Optional[Callable[[TensorType, List[
|
||||
TensorType]], Tuple[TensorType, TensorType]]] = None,
|
||||
action_distribution_fn: Optional[Callable[[
|
||||
Policy, ModelV2, TensorType, TensorType, TensorType
|
||||
], Tuple[TensorType, type, List[TensorType]]]] = None,
|
||||
mixins: Optional[List[type]] = None,
|
||||
get_batch_divisibility_req: Optional[Callable[[Policy], int]] = None,
|
||||
obs_include_prev_action_reward: bool = True):
|
||||
"""Helper function for creating a dynamic tf policy at runtime.
|
||||
|
||||
Functions will be run in this order to initialize the policy:
|
||||
|
||||
@@ -19,62 +19,51 @@ torch, _ = try_import_torch()
|
||||
|
||||
|
||||
@DeveloperAPI
|
||||
def build_torch_policy(name: str,
|
||||
*,
|
||||
loss_fn: Callable[
|
||||
[Policy, ModelV2, type, SampleBatch], TensorType],
|
||||
get_default_config: Optional[Callable[
|
||||
[], TrainerConfigDict]] = None,
|
||||
stats_fn: Optional[Callable[
|
||||
[Policy, SampleBatch],
|
||||
Dict[str, TensorType]]] = None,
|
||||
postprocess_fn: Optional[Callable[
|
||||
[Policy, SampleBatch, List[SampleBatch],
|
||||
"MultiAgentEpisode"], None]] = None,
|
||||
extra_action_out_fn: Optional[Callable[
|
||||
[Policy, Dict[str, TensorType], List[TensorType],
|
||||
ModelV2, TorchDistributionWrapper],
|
||||
Dict[str, TensorType]]] = None,
|
||||
extra_grad_process_fn: Optional[Callable[
|
||||
[Policy, "torch.optim.Optimizer", TensorType],
|
||||
Dict[str, TensorType]]] = None,
|
||||
# TODO: (sven) Replace "fetches" with "process".
|
||||
extra_learn_fetches_fn: Optional[Callable[
|
||||
[Policy], Dict[str, TensorType]]] = None,
|
||||
optimizer_fn: Optional[Callable[
|
||||
[Policy, TrainerConfigDict],
|
||||
"torch.optim.Optimizer"]] = None,
|
||||
validate_spaces: Optional[Callable[
|
||||
[Policy, gym.Space, gym.Space, TrainerConfigDict],
|
||||
None]] = None,
|
||||
before_init: Optional[Callable[
|
||||
[Policy, gym.Space, gym.Space, TrainerConfigDict],
|
||||
None]] = None,
|
||||
after_init: Optional[Callable[
|
||||
[Policy, gym.Space, gym.Space, TrainerConfigDict],
|
||||
None]] = None,
|
||||
action_sampler_fn: Optional[Callable[
|
||||
[TensorType, List[TensorType]], Tuple[
|
||||
TensorType, TensorType]]] = None,
|
||||
action_distribution_fn: Optional[Callable[
|
||||
[Policy, ModelV2, TensorType, TensorType,
|
||||
TensorType],
|
||||
Tuple[TensorType, type, List[TensorType]]]] = None,
|
||||
make_model: Optional[Callable[
|
||||
[Policy, gym.spaces.Space, gym.spaces.Space,
|
||||
TrainerConfigDict], ModelV2]] = None,
|
||||
make_model_and_action_dist: Optional[Callable[
|
||||
[Policy, gym.spaces.Space, gym.spaces.Space,
|
||||
TrainerConfigDict],
|
||||
Tuple[ModelV2, TorchDistributionWrapper]]] = None,
|
||||
apply_gradients_fn: Optional[Callable[
|
||||
[Policy, "torch.optim.Optimizer"], None]] = None,
|
||||
mixins: Optional[List[type]] = None,
|
||||
training_view_requirements_fn: Optional[Callable[
|
||||
[], Dict[str, ViewRequirement]]] = None,
|
||||
get_batch_divisibility_req: Optional[Callable[
|
||||
[Policy], int]] = None
|
||||
):
|
||||
def build_torch_policy(
|
||||
name: str,
|
||||
*,
|
||||
loss_fn: Callable[[Policy, ModelV2, type, SampleBatch], TensorType],
|
||||
get_default_config: Optional[Callable[[], TrainerConfigDict]] = None,
|
||||
stats_fn: Optional[Callable[[Policy, SampleBatch], Dict[
|
||||
str, TensorType]]] = None,
|
||||
postprocess_fn: Optional[Callable[[
|
||||
Policy, SampleBatch, List[SampleBatch], "MultiAgentEpisode"
|
||||
], None]] = None,
|
||||
extra_action_out_fn: Optional[Callable[[
|
||||
Policy, Dict[str, TensorType], List[TensorType], ModelV2,
|
||||
TorchDistributionWrapper
|
||||
], Dict[str, TensorType]]] = None,
|
||||
extra_grad_process_fn: Optional[Callable[[
|
||||
Policy, "torch.optim.Optimizer", TensorType
|
||||
], Dict[str, TensorType]]] = None,
|
||||
# TODO: (sven) Replace "fetches" with "process".
|
||||
extra_learn_fetches_fn: Optional[Callable[[Policy], Dict[
|
||||
str, TensorType]]] = None,
|
||||
optimizer_fn: Optional[Callable[[Policy, TrainerConfigDict],
|
||||
"torch.optim.Optimizer"]] = None,
|
||||
validate_spaces: Optional[Callable[
|
||||
[Policy, gym.Space, gym.Space, TrainerConfigDict], None]] = None,
|
||||
before_init: Optional[Callable[
|
||||
[Policy, gym.Space, gym.Space, TrainerConfigDict], None]] = None,
|
||||
after_init: Optional[Callable[
|
||||
[Policy, gym.Space, gym.Space, TrainerConfigDict], None]] = None,
|
||||
action_sampler_fn: Optional[Callable[[TensorType, List[
|
||||
TensorType]], Tuple[TensorType, TensorType]]] = None,
|
||||
action_distribution_fn: Optional[Callable[[
|
||||
Policy, ModelV2, TensorType, TensorType, TensorType
|
||||
], Tuple[TensorType, type, List[TensorType]]]] = None,
|
||||
make_model: Optional[Callable[[
|
||||
Policy, gym.spaces.Space, gym.spaces.Space, TrainerConfigDict
|
||||
], ModelV2]] = None,
|
||||
make_model_and_action_dist: Optional[Callable[[
|
||||
Policy, gym.spaces.Space, gym.spaces.Space, TrainerConfigDict
|
||||
], Tuple[ModelV2, TorchDistributionWrapper]]] = None,
|
||||
apply_gradients_fn: Optional[Callable[
|
||||
[Policy, "torch.optim.Optimizer"], None]] = None,
|
||||
mixins: Optional[List[type]] = None,
|
||||
training_view_requirements_fn: Optional[Callable[[], Dict[
|
||||
str, ViewRequirement]]] = None,
|
||||
get_batch_divisibility_req: Optional[Callable[[Policy], int]] = None):
|
||||
"""Helper function for creating a torch policy class at runtime.
|
||||
|
||||
Args:
|
||||
|
||||
@@ -147,7 +147,7 @@ class ModelCatalogTest(unittest.TestCase):
|
||||
self.assertEqual(param_shape, action_space.shape)
|
||||
|
||||
# test the class works as a distribution
|
||||
dist_input = tf1.placeholder(tf.float32, (None,) + param_shape)
|
||||
dist_input = tf1.placeholder(tf.float32, (None, ) + param_shape)
|
||||
model = Model()
|
||||
model.model_config = model_config
|
||||
dist = dist_cls(dist_input, model=model)
|
||||
@@ -161,7 +161,7 @@ class ModelCatalogTest(unittest.TestCase):
|
||||
dist_cls, param_shape = ModelCatalog.get_action_dist(
|
||||
action_space, model_config)
|
||||
self.assertEqual(param_shape, (3, ))
|
||||
dist_input = tf1.placeholder(tf.float32, (None,) + param_shape)
|
||||
dist_input = tf1.placeholder(tf.float32, (None, ) + param_shape)
|
||||
model.model_config = model_config
|
||||
dist = dist_cls(dist_input, model=model)
|
||||
self.assertEqual(dist.sample().shape[1:], dist_input.shape[1:])
|
||||
|
||||
@@ -76,7 +76,10 @@ class TestEagerSupportPG(unittest.TestCase):
|
||||
|
||||
def test_impala(self):
|
||||
check_support(
|
||||
"IMPALA", {"num_workers": 1, "num_gpus": 0}, test_eager=True)
|
||||
"IMPALA", {
|
||||
"num_workers": 1,
|
||||
"num_gpus": 0
|
||||
}, test_eager=True)
|
||||
|
||||
|
||||
class TestEagerSupportOffPolicy(unittest.TestCase):
|
||||
@@ -130,5 +133,6 @@ if __name__ == "__main__":
|
||||
# None for all unittest.TestCase classes in this file.
|
||||
import pytest
|
||||
class_ = sys.argv[1] if len(sys.argv) > 1 else None
|
||||
sys.exit(pytest.main(
|
||||
["-v", __file__ + ("" if class_ is None else "::" + class_)]))
|
||||
sys.exit(
|
||||
pytest.main(
|
||||
["-v", __file__ + ("" if class_ is None else "::" + class_)]))
|
||||
|
||||
@@ -21,32 +21,34 @@ class TestMultiAgentPendulum(unittest.TestCase):
|
||||
|
||||
# Test for both torch and tf.
|
||||
for fw in framework_iterator(frameworks=["torch", "tf"]):
|
||||
trials = run_experiments({
|
||||
"test": {
|
||||
"run": "PPO",
|
||||
"env": "multi_agent_pendulum",
|
||||
"stop": {
|
||||
"timesteps_total": 500000,
|
||||
"episode_reward_mean": -300.0,
|
||||
},
|
||||
"config": {
|
||||
"train_batch_size": 2048,
|
||||
"vf_clip_param": 10.0,
|
||||
"num_workers": 0,
|
||||
"num_envs_per_worker": 10,
|
||||
"lambda": 0.1,
|
||||
"gamma": 0.95,
|
||||
"lr": 0.0003,
|
||||
"sgd_minibatch_size": 64,
|
||||
"num_sgd_iter": 10,
|
||||
"model": {
|
||||
"fcnet_hiddens": [128, 128],
|
||||
trials = run_experiments(
|
||||
{
|
||||
"test": {
|
||||
"run": "PPO",
|
||||
"env": "multi_agent_pendulum",
|
||||
"stop": {
|
||||
"timesteps_total": 500000,
|
||||
"episode_reward_mean": -300.0,
|
||||
},
|
||||
"batch_mode": "complete_episodes",
|
||||
"framework": fw,
|
||||
},
|
||||
}
|
||||
}, verbose=1)
|
||||
"config": {
|
||||
"train_batch_size": 2048,
|
||||
"vf_clip_param": 10.0,
|
||||
"num_workers": 0,
|
||||
"num_envs_per_worker": 10,
|
||||
"lambda": 0.1,
|
||||
"gamma": 0.95,
|
||||
"lr": 0.0003,
|
||||
"sgd_minibatch_size": 64,
|
||||
"num_sgd_iter": 10,
|
||||
"model": {
|
||||
"fcnet_hiddens": [128, 128],
|
||||
},
|
||||
"batch_mode": "complete_episodes",
|
||||
"framework": fw,
|
||||
},
|
||||
}
|
||||
},
|
||||
verbose=1)
|
||||
if trials[0].last_result["episode_reward_mean"] < -300.0:
|
||||
raise ValueError("Did not get to -200 reward",
|
||||
trials[0].last_result)
|
||||
|
||||
@@ -278,7 +278,7 @@ class TestRolloutWorker(unittest.TestCase):
|
||||
|
||||
def test_action_clipping(self):
|
||||
from ray.rllib.examples.env.random_env import RandomEnv
|
||||
action_space = gym.spaces.Box(-2.0, 1.0, (3,))
|
||||
action_space = gym.spaces.Box(-2.0, 1.0, (3, ))
|
||||
|
||||
# Clipping: True (clip between Policy's action_space.low/high),
|
||||
ev = RolloutWorker(
|
||||
|
||||
@@ -125,5 +125,6 @@ if __name__ == "__main__":
|
||||
# One can specify the specific TestCase class to run.
|
||||
# None for all unittest.TestCase classes in this file.
|
||||
class_ = sys.argv[1] if len(sys.argv) > 1 else None
|
||||
sys.exit(pytest.main(
|
||||
["-v", __file__ + ("" if class_ is None else "::" + class_)]))
|
||||
sys.exit(
|
||||
pytest.main(
|
||||
["-v", __file__ + ("" if class_ is None else "::" + class_)]))
|
||||
|
||||
@@ -203,5 +203,6 @@ if __name__ == "__main__":
|
||||
# One can specify the specific TestCase class to run.
|
||||
# None for all unittest.TestCase classes in this file.
|
||||
class_ = sys.argv[1] if len(sys.argv) > 1 else None
|
||||
sys.exit(pytest.main(
|
||||
["-v", __file__ + ("" if class_ is None else "::" + class_)]))
|
||||
sys.exit(
|
||||
pytest.main(
|
||||
["-v", __file__ + ("" if class_ is None else "::" + class_)]))
|
||||
|
||||
@@ -99,8 +99,8 @@ class EpsilonGreedy(Exploration):
|
||||
tf.random.categorical(random_valid_action_logits, 1), axis=1)
|
||||
|
||||
chose_random = tf.random.uniform(
|
||||
tf.stack([batch_size]),
|
||||
minval=0, maxval=1, dtype=tf.float32) < epsilon
|
||||
tf.stack([batch_size]), minval=0, maxval=1,
|
||||
dtype=tf.float32) < epsilon
|
||||
|
||||
action = tf.cond(
|
||||
pred=tf.constant(explore, dtype=tf.bool)
|
||||
|
||||
@@ -154,7 +154,7 @@ class GaussianNoise(Exploration):
|
||||
scale = self.scale_schedule(self.last_timestep)
|
||||
gaussian_sample = scale * torch.normal(
|
||||
mean=torch.zeros(det_actions.size()), std=self.stddev).to(
|
||||
self.device)
|
||||
self.device)
|
||||
action = torch.min(
|
||||
torch.max(
|
||||
det_actions + gaussian_sample,
|
||||
|
||||
@@ -9,23 +9,18 @@ from ray.rllib.utils.test_utils import check, framework_iterator
|
||||
class TestParameterNoise(unittest.TestCase):
|
||||
def test_ddpg_parameter_noise(self):
|
||||
self.do_test_parameter_noise_exploration(
|
||||
ddpg.DDPGTrainer,
|
||||
ddpg.DEFAULT_CONFIG,
|
||||
"Pendulum-v0", {},
|
||||
ddpg.DDPGTrainer, ddpg.DEFAULT_CONFIG, "Pendulum-v0", {},
|
||||
np.array([1.0, 0.0, -1.0]))
|
||||
|
||||
def test_dqn_parameter_noise(self):
|
||||
self.do_test_parameter_noise_exploration(
|
||||
dqn.DQNTrainer,
|
||||
dqn.DEFAULT_CONFIG,
|
||||
"FrozenLake-v0", {
|
||||
dqn.DQNTrainer, dqn.DEFAULT_CONFIG, "FrozenLake-v0", {
|
||||
"is_slippery": False,
|
||||
"map_name": "4x4"
|
||||
},
|
||||
np.array(0))
|
||||
}, np.array(0))
|
||||
|
||||
def do_test_parameter_noise_exploration(
|
||||
self, trainer_cls, config, env, env_config, obs):
|
||||
def do_test_parameter_noise_exploration(self, trainer_cls, config, env,
|
||||
env_config, obs):
|
||||
"""Tests, whether an Agent works with ParameterNoise."""
|
||||
core_config = config.copy()
|
||||
core_config["num_workers"] = 0 # Run locally.
|
||||
|
||||
@@ -200,9 +200,13 @@ def get_variable(value,
|
||||
if isinstance(value, float) else tf.int32
|
||||
if isinstance(value, int) else None)
|
||||
return tf.compat.v1.get_variable(
|
||||
tf_name, initializer=value, dtype=dtype, trainable=trainable,
|
||||
**({} if shape is None else {"shape": shape})
|
||||
)
|
||||
tf_name,
|
||||
initializer=value,
|
||||
dtype=dtype,
|
||||
trainable=trainable,
|
||||
**({} if shape is None else {
|
||||
"shape": shape
|
||||
}))
|
||||
elif framework == "torch" and torch_tensor is True:
|
||||
torch, _ = try_import_torch()
|
||||
var_ = torch.from_numpy(value)
|
||||
|
||||
@@ -53,7 +53,8 @@ class TestSchedules(unittest.TestCase):
|
||||
def test_polynomial_schedule(self):
|
||||
ts = [0, 5, 10, 100, 90, 2, 1, 99, 23, 1000]
|
||||
expected = [
|
||||
0.5 + (2.0 - 0.5) * (1.0 - min(t, 100) / 100)**2 for t in ts]
|
||||
0.5 + (2.0 - 0.5) * (1.0 - min(t, 100) / 100)**2 for t in ts
|
||||
]
|
||||
config = dict(
|
||||
type="ray.rllib.utils.schedules.polynomial_schedule."
|
||||
"PolynomialSchedule",
|
||||
|
||||
@@ -12,6 +12,7 @@ class FlexDict(gym.spaces.Dict):
|
||||
space['key'] = spaces.Box(4,)
|
||||
See also: documentation for gym.spaces.Dict
|
||||
"""
|
||||
|
||||
def __init__(self, spaces=None, **spaces_kwargs):
|
||||
err = "Use either Dict(spaces=dict(...)) or Dict(foo=x, bar=z)"
|
||||
assert (spaces is None) or (not spaces_kwargs), err
|
||||
|
||||
@@ -288,8 +288,8 @@ def check_compute_single_action(trainer,
|
||||
method_to_test = trainer.compute_action
|
||||
# Get the obs-space from Workers.env (not Policy) due to possible
|
||||
# pre-processor up front.
|
||||
worker_set = getattr(
|
||||
trainer, "workers", getattr(trainer, "_workers", None))
|
||||
worker_set = getattr(trainer, "workers",
|
||||
getattr(trainer, "_workers", None))
|
||||
assert worker_set
|
||||
if isinstance(worker_set, list):
|
||||
obs_space = trainer.get_policy().observation_space
|
||||
|
||||
@@ -34,8 +34,8 @@ def minimize_and_clip(optimizer, objective, var_list, clip_val=10.0):
|
||||
|
||||
if tf.executing_eagerly():
|
||||
tape = optimizer.tape
|
||||
grads_and_vars = list(zip(list(
|
||||
tape.gradient(objective, var_list)), var_list))
|
||||
grads_and_vars = list(
|
||||
zip(list(tape.gradient(objective, var_list)), var_list))
|
||||
else:
|
||||
grads_and_vars = optimizer.compute_gradients(
|
||||
objective, var_list=var_list)
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <flatbuffers/flatbuffers.h>
|
||||
|
||||
#include <unordered_set>
|
||||
|
||||
#include "ray/common/id.h"
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "ray/common/common_protocol.h"
|
||||
#include "ray/common/task/task_spec.h"
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
#include <sstream>
|
||||
|
||||
#include "absl/container/flat_hash_map.h"
|
||||
|
||||
#include "ray/common/bundle_spec.h"
|
||||
#include "ray/util/logging.h"
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#include <sstream>
|
||||
|
||||
#include "ray/common/task/task_execution_spec.h"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
namespace ray {
|
||||
|
||||
size_t TaskExecutionSpecification::NumForwards() const {
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "ray/common/client_connection.h"
|
||||
|
||||
#include <boost/asio.hpp>
|
||||
#include <boost/asio/error.hpp>
|
||||
#include <list>
|
||||
@@ -20,8 +22,6 @@
|
||||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "ray/common/client_connection.h"
|
||||
|
||||
namespace ray {
|
||||
namespace raylet {
|
||||
|
||||
|
||||
@@ -16,11 +16,11 @@
|
||||
|
||||
#include <jni.h>
|
||||
|
||||
#include "jni_utils.h"
|
||||
#include "ray/common/id.h"
|
||||
#include "ray/core_worker/actor_handle.h"
|
||||
#include "ray/core_worker/common.h"
|
||||
#include "ray/core_worker/core_worker.h"
|
||||
#include "jni_utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
||||
@@ -13,11 +13,13 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "io_ray_runtime_context_NativeWorkerContext.h"
|
||||
|
||||
#include <jni.h>
|
||||
|
||||
#include "jni_utils.h"
|
||||
#include "ray/common/id.h"
|
||||
#include "ray/core_worker/context.h"
|
||||
#include "ray/core_worker/core_worker.h"
|
||||
#include "jni_utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
||||
@@ -16,8 +16,8 @@
|
||||
|
||||
#include <jni.h>
|
||||
|
||||
#include "ray/core_worker/common.h"
|
||||
#include "jni_utils.h"
|
||||
#include "ray/core_worker/common.h"
|
||||
#include "ray/gcs/gcs_client/global_state_accessor.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -13,13 +13,14 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "io_ray_runtime_metric_NativeMetric.h"
|
||||
#include "jni_utils.h"
|
||||
#include "ray/stats/metric.h"
|
||||
|
||||
#include <jni.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "jni_utils.h"
|
||||
#include "opencensus/tags/tag_key.h"
|
||||
#include "ray/stats/metric.h"
|
||||
|
||||
using TagKeyType = opencensus::tags::TagKey;
|
||||
using TagsType = std::vector<std::pair<opencensus::tags::TagKey, std::string>>;
|
||||
|
||||
@@ -13,7 +13,9 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "io_ray_runtime_object_NativeObjectStore.h"
|
||||
|
||||
#include <jni.h>
|
||||
|
||||
#include "jni_utils.h"
|
||||
#include "ray/common/id.h"
|
||||
#include "ray/core_worker/common.h"
|
||||
|
||||
@@ -13,11 +13,13 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "io_ray_runtime_task_NativeTaskExecutor.h"
|
||||
|
||||
#include <jni.h>
|
||||
|
||||
#include "jni_utils.h"
|
||||
#include "ray/common/id.h"
|
||||
#include "ray/core_worker/common.h"
|
||||
#include "ray/core_worker/core_worker.h"
|
||||
#include "jni_utils.h"
|
||||
#include "ray/raylet_client/raylet_client.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <jni.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "ray/common/buffer.h"
|
||||
@@ -346,7 +347,7 @@ inline jobject NativeVectorToJavaList(
|
||||
env->NewObject(java_array_list_class, java_array_list_init_with_capacity,
|
||||
(jint)native_vector.size());
|
||||
RAY_CHECK_JAVA_EXCEPTION(env);
|
||||
for (auto it = native_vector.begin(); it != native_vector.end(); ++it){
|
||||
for (auto it = native_vector.begin(); it != native_vector.end(); ++it) {
|
||||
auto element = element_converter(env, *it);
|
||||
env->CallVoidMethod(java_list, java_list_add, element);
|
||||
RAY_CHECK_JAVA_EXCEPTION(env);
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include <thread>
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "ray/common/test_util.h"
|
||||
#include "ray/core_worker/transport/direct_actor_transport.h"
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user