ci: Redo format.sh --all script & backfill lint fixes (#9956)

This commit is contained in:
Barak Michener
2020-08-07 16:49:49 -07:00
committed by GitHub
parent 1d01c668f0
commit 8e76796fd0
147 changed files with 702 additions and 636 deletions
-1
View File
@@ -1,6 +1,5 @@
import ray
ray.init()
-1
View File
@@ -1,6 +1,5 @@
import ray
ray.init()
+1 -1
View File
@@ -313,7 +313,7 @@ lint_readme() {
}
lint_scripts() {
"${ROOT_DIR}"/format.sh --all
FORMAT_SH_PRINT_DIFF=1 "${ROOT_DIR}"/format.sh --all
}
lint_bazel() {
+64 -36
View File
@@ -46,11 +46,6 @@ builtin cd "$(dirname "${BASH_SOURCE:-$0}")"
ROOT="$(git rev-parse --show-toplevel)"
builtin cd "$ROOT" || exit 1
# Add the upstream remote if it doesn't exist
if ! git remote -v | grep -q upstream; then
git remote add 'upstream' 'https://github.com/ray-project/ray.git'
fi
FLAKE8_VERSION=$(flake8 --version | awk '{print $1}')
YAPF_VERSION=$(yapf --version | awk '{print $2}')
SHELLCHECK_VERSION=$(shellcheck --version | awk '/^version:/ {print $2}')
@@ -73,9 +68,6 @@ else
echo "WARNING: clang-format is not installed!"
fi
# Only fetch master since that's the branch we're diffing against.
git fetch upstream master || true
SHELLCHECK_FLAGS=(
--exclude=1090 # "Can't follow non-constant source. Use a directive to specify location."
--exclude=1091 # "Not following {file} due to some error"
@@ -99,7 +91,16 @@ YAPF_EXCLUDES=(
'--exclude' 'python/ray/thirdparty_files/*'
)
FLAKE8_EXCLUDES="python/ray/core/generated/,streaming/python/generated,doc/source/conf.py,python/ray/cloudpickle/,python/ray/thirdparty_files/"
GIT_LS_EXCLUDES=(
':(exclude)python/ray/cloudpickle/'
)
# TODO(barakmich): This should be cleaned up. I've at least excised the copies
# of these arguments to this location, but the long-term answer is to actually
# make a flake8 config file
FLAKE8_EXCLUDE="--exclude=python/ray/core/generated/,streaming/python/generated,doc/source/conf.py,python/ray/cloudpickle/,python/ray/thirdparty_files/,python/build/,python/.eggs/"
FLAKE8_IGNORES="--ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605"
FLAKE8_PYX_IGNORES="--ignore=C408,E121,E123,E126,E211,E225,E226,E227,E24,E704,E999,W503,W504,W605"
shellcheck_scripts() {
shellcheck "${SHELLCHECK_FLAGS[@]}" "$@"
@@ -110,7 +111,7 @@ shellcheck_bazel() {
}
# Format specified files
format() {
format_files() {
local shell_files=() python_files=() bazel_files=()
local name
@@ -162,6 +163,46 @@ format() {
fi
}
# Format all files, and print the diff to stdout for travis.
format_all() {
command -v flake8 &> /dev/null;
HAS_FLAKE8=$?
echo "$(date)" "YAPF...."
git ls-files -- '*.py' "${GIT_LS_EXCLUDES[@]}" | xargs -P 10 \
yapf --in-place "${YAPF_EXCLUDES[@]}" "${YAPF_FLAGS[@]}"
if [ $HAS_FLAKE8 ]; then
echo "$(date)" "Flake8...."
git ls-files -- '*.py' "${GIT_LS_EXCLUDES[@]}" | xargs -P 5 \
flake8 --inline-quotes '"' --no-avoid-escape "$FLAKE8_EXCLUDE" "$FLAKE8_IGNORES"
git ls-files -- '*.pyx' '*.pxd' '*.pxi' "${GIT_LS_EXCLUDES[@]}" | xargs -P 5 \
flake8 --inline-quotes '"' --no-avoid-escape "$FLAKE8_EXCLUDE" "$FLAKE8_PYX_IGNORES"
fi
echo "$(date)" "clang-format...."
if command -v clang-format >/dev/null; then
git ls-files -- '*.cc' '*.h' "${GIT_LS_EXCLUDES[@]}" | xargs -P 5 clang-format -i
fi
if command -v shellcheck >/dev/null; then
echo "$(date)" "shellcheck bazel...."
shellcheck_bazel
local shell_files non_shell_files
non_shell_files=($(git ls-files -- ':(exclude)*.sh'))
shell_files=($(git ls-files -- '*.sh'))
if [ 0 -lt "${#non_shell_files[@]}" ]; then
shell_files+=($(git --no-pager grep -l -- '^#!\(/usr\)\?/bin/\(env \+\)\?\(ba\)\?sh' "${non_shell_files[@]}" || true))
fi
if [ 0 -lt "${#shell_files[@]}" ]; then
echo "$(date)" "shellcheck scripts...."
shellcheck_scripts "${shell_files[@]}"
fi
fi
echo "$(date)" "done!"
}
# Format files that differ from main branch. Ignores dirs that are not slated
# for autoformat yet.
format_changed() {
@@ -178,17 +219,14 @@ format_changed() {
yapf --in-place "${YAPF_EXCLUDES[@]}" "${YAPF_FLAGS[@]}"
if which flake8 >/dev/null; then
git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.py' | xargs -P 5 \
flake8 --inline-quotes '"' --no-avoid-escape --exclude="$FLAKE8_EXCLUDES,rllib/" --ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605
# Ignore F821 for rllib flake8 checking (produces errors for type annotations using quotes (non-imported classes)).
git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.py' | xargs -P 5 \
flake8 --inline-quotes '"' --no-avoid-escape --exclude="$FLAKE8_EXCLUDES" --filename="rllib/" --ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605,F821
flake8 --inline-quotes '"' --no-avoid-escape "$FLAKE8_EXCLUDE" "$FLAKE8_IGNORES"
fi
fi
if ! git diff --diff-filter=ACRM --quiet --exit-code "$MERGEBASE" -- '*.pyx' '*.pxd' '*.pxi' &>/dev/null; then
if which flake8 >/dev/null; then
git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.pyx' '*.pxd' '*.pxi' | xargs -P 5 \
flake8 --inline-quotes '"' --no-avoid-escape --exclude="$FLAKE8_EXCLUDES" --ignore=C408,E121,E123,E126,E211,E225,E226,E227,E24,E704,E999,W503,W504,W605
flake8 --inline-quotes '"' --no-avoid-escape "$FLAKE8_EXCLUDE" "$FLAKE8_PYX_IGNORES"
fi
fi
@@ -216,35 +254,25 @@ format_changed() {
fi
}
# Format all files, and print the diff to stdout for travis.
format_all() {
# Ignore F821 for rllib flake8 checking (produces errors for type annotations using quotes (non-imported classes)).
flake8 --inline-quotes '"' --no-avoid-escape --exclude="$FLAKE8_EXCLUDES,rllib/" --ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605
flake8 --inline-quotes '"' --no-avoid-escape --exclude="$FLAKE8_EXCLUDES" --filename="rllib/" --ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605,F821
yapf --diff "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" test python
local shell_files
# shellcheck disable=SC2207
shell_files=($(
git -C "${ROOT}" ls-files --exclude-standard HEAD -- "*.sh" &&
{ git -C "${ROOT}" --no-pager grep -l '^#!\(/usr\)\?/bin/\(env \+\)\?\(ba\)\?sh' ":(exclude)*.sh" || true; }
))
if [ 0 -lt "${#shell_files[@]}" ]; then
shellcheck_scripts "${shell_files[@]}"
fi
shellcheck_bazel
}
# This flag formats individual files. --files *must* be the first command line
# arg to use this option.
if [ "${1-}" == '--files' ]; then
format "${@:2}"
format_files "${@:2}"
# If `--all` is passed, then any further arguments are ignored and the
# entire python directory is formatted.
elif [ "${1-}" == '--all' ]; then
format_all
format_all "${@}"
if [ -n "${FORMAT_SH_PRINT_DIFF-}" ]; then git --no-pager diff; fi
else
# Add the upstream remote if it doesn't exist
if ! git remote -v | grep -q upstream; then
git remote add 'upstream' 'https://github.com/ray-project/ray.git'
fi
# Only fetch master since that's the branch we're diffing against.
git fetch upstream master || true
# Format only the files that changed in last commit.
format_changed
fi
+6 -8
View File
@@ -1,13 +1,14 @@
#pragma once
#include <memory>
#include <ray/api/generated/actor_funcs.generated.h>
#include <ray/api/generated/create_funcs.generated.h>
#include <ray/api/generated/funcs.generated.h>
#include <ray/api/ray_runtime.h>
#include <memory>
#include <msgpack.hpp>
#include "ray/core.h"
namespace ray {
namespace api {
@@ -232,13 +233,10 @@ inline ActorTaskCaller<ReturnType> Ray::CallActorInternal(FuncType &actor_func,
return ActorTaskCaller<ReturnType>(runtime_, actor.ID(), ptr, buffer);
}
#include <ray/api/generated/exec_funcs.generated.h>
#include <ray/api/generated/call_funcs_impl.generated.h>
#include <ray/api/generated/create_actors_impl.generated.h>
#include <ray/api/generated/call_actors_impl.generated.h>
#include <ray/api/generated/call_funcs_impl.generated.h>
#include <ray/api/generated/create_actors_impl.generated.h>
#include <ray/api/generated/exec_funcs.generated.h>
} // namespace api
} // namespace ray
+1
View File
@@ -2,6 +2,7 @@
#pragma once
#include <ray/api/serializer.h>
#include <msgpack.hpp>
namespace ray {
+1 -2
View File
@@ -2,9 +2,8 @@
#pragma once
#include <memory>
#include <utility>
#include <msgpack.hpp>
#include <utility>
#include "ray/core.h"
+2 -1
View File
@@ -1,13 +1,14 @@
#pragma once
#include <ray/api/wait_result.h>
#include <cstdint>
#include <memory>
#include <msgpack.hpp>
#include <typeinfo>
#include <vector>
#include <ray/api/wait_result.h>
#include "ray/core.h"
namespace ray {
+1
View File
@@ -2,6 +2,7 @@
#pragma once
#include <ray/api/ray_exception.h>
#include <msgpack.hpp>
namespace ray {
+1
View File
@@ -2,6 +2,7 @@
#pragma once
#include <vector>
#include "ray/core.h"
namespace ray {
+1 -1
View File
@@ -1,7 +1,7 @@
#include <ray/api.h>
#include <ray/api/ray_config.h>
#include "runtime/abstract_ray_runtime.h"
namespace ray {
+3 -2
View File
@@ -1,11 +1,12 @@
#include "abstract_ray_runtime.h"
#include <cassert>
#include <ray/api.h>
#include <ray/api/ray_config.h>
#include <ray/api/ray_exception.h>
#include <cassert>
#include "../util/address_helper.h"
#include "../util/process_helper.h"
#include "local_mode_ray_runtime.h"
+3 -2
View File
@@ -1,11 +1,12 @@
#pragma once
#include <mutex>
#include <ray/api/ray_config.h>
#include <ray/api/ray_runtime.h>
#include <msgpack.hpp>
#include <mutex>
#include "./object/object_store.h"
#include "./task/task_executor.h"
#include "./task/task_submitter.h"
@@ -2,6 +2,7 @@
#include "local_mode_ray_runtime.h"
#include <ray/api.h>
#include "../util/address_helper.h"
#include "./object/local_mode_object_store.h"
#include "./object/object_store.h"
@@ -2,6 +2,7 @@
#pragma once
#include <unordered_map>
#include "abstract_ray_runtime.h"
#include "ray/core.h"
@@ -2,6 +2,7 @@
#include "native_ray_runtime.h"
#include <ray/api.h>
#include "../util/address_helper.h"
#include "./object/native_object_store.h"
#include "./object/object_store.h"
+1
View File
@@ -2,6 +2,7 @@
#pragma once
#include <unordered_map>
#include "abstract_ray_runtime.h"
#include "ray/core.h"
@@ -1,12 +1,14 @@
#include "local_mode_object_store.h"
#include <ray/api/ray_exception.h>
#include <algorithm>
#include <chrono>
#include <list>
#include <thread>
#include <ray/api/ray_exception.h>
#include "../abstract_ray_runtime.h"
#include "local_mode_object_store.h"
namespace ray {
namespace api {
@@ -2,10 +2,10 @@
#pragma once
#include <unordered_map>
#include "ray/core.h"
#include "../local_mode_ray_runtime.h"
#include "object_store.h"
#include "ray/core.h"
namespace ray {
namespace api {
@@ -1,12 +1,14 @@
#include "native_object_store.h"
#include <ray/api/ray_exception.h>
#include <algorithm>
#include <chrono>
#include <list>
#include <thread>
#include <ray/api/ray_exception.h>
#include "../abstract_ray_runtime.h"
#include "native_object_store.h"
namespace ray {
namespace api {
@@ -2,10 +2,10 @@
#pragma once
#include <unordered_map>
#include "ray/core.h"
#include "../native_ray_runtime.h"
#include "object_store.h"
#include "ray/core.h"
namespace ray {
namespace api {
+2 -2
View File
@@ -1,9 +1,9 @@
#pragma once
#include <memory>
#include <ray/api/wait_result.h>
#include <memory>
#include <msgpack.hpp>
namespace ray {
@@ -2,6 +2,7 @@
#pragma once
#include <msgpack.hpp>
#include "ray/core.h"
namespace ray {
@@ -1,11 +1,13 @@
#include "local_mode_task_submitter.h"
#include <ray/api/ray_exception.h>
#include <boost/asio/post.hpp>
#include <memory>
#include <ray/api/ray_exception.h>
#include "../../util/address_helper.h"
#include "../abstract_ray_runtime.h"
#include "local_mode_task_submitter.h"
namespace ray {
namespace api {
@@ -3,6 +3,7 @@
#include <boost/asio/thread_pool.hpp>
#include <memory>
#include <queue>
#include "../local_mode_ray_runtime.h"
#include "absl/synchronization/mutex.h"
#include "invocation_spec.h"
@@ -1,5 +1,7 @@
#include "native_task_submitter.h"
#include <ray/api/ray_exception.h>
#include "../../util/address_helper.h"
#include "../abstract_ray_runtime.h"
@@ -3,6 +3,7 @@
#include <boost/asio/thread_pool.hpp>
#include <memory>
#include <queue>
#include "../native_ray_runtime.h"
#include "invocation_spec.h"
#include "ray/core.h"
+2 -1
View File
@@ -1,9 +1,10 @@
#include "task_executor.h"
#include <memory>
#include "../../util/address_helper.h"
#include "../abstract_ray_runtime.h"
#include "task_executor.h"
namespace ray {
namespace api {
+1
View File
@@ -1,6 +1,7 @@
#pragma once
#include <memory>
#include "absl/synchronization/mutex.h"
#include "invocation_spec.h"
#include "ray/core.h"
+2 -1
View File
@@ -1,8 +1,9 @@
#pragma once
#include <ray/api/ray_runtime.h>
#include <memory>
#include <ray/api/ray_runtime.h>
#include "invocation_spec.h"
namespace ray {
+1
View File
@@ -1,6 +1,7 @@
#include <gtest/gtest.h>
#include <ray/api.h>
#include <future>
#include <thread>
+1
View File
@@ -1,6 +1,7 @@
#include <gtest/gtest.h>
#include <ray/api.h>
#include <chrono>
#include <thread>
+25 -18
View File
@@ -9,7 +9,6 @@ import ray
@ray.remote
class NewsServer(object):
def __init__(self):
self.conn = sqlite3.connect("newsreader.db")
c = self.conn.cursor()
@@ -25,29 +24,36 @@ class NewsServer(object):
items = []
c = self.conn.cursor()
for item in feed.items:
items.append({"title": item.title,
"link": item.link,
"description": item.description,
"description_text": item.description,
"pubDate": str(item.pub_date)})
c.execute("""INSERT INTO news (title, link, description,
items.append({
"title": item.title,
"link": item.link,
"description": item.description,
"description_text": item.description,
"pubDate": str(item.pub_date)
})
c.execute(
"""INSERT INTO news (title, link, description,
published, feed, liked) values
(?, ?, ?, ?, ?, ?)""", (
item.title, item.link, item.description,
item.pub_date, feed.link, False))
(?, ?, ?, ?, ?, ?)""",
(item.title, item.link, item.description, item.pub_date,
feed.link, False))
self.conn.commit()
return {"channel": {"title": feed.title,
"link": feed.link,
"url": feed.link},
"items": items}
return {
"channel": {
"title": feed.title,
"link": feed.link,
"url": feed.link
},
"items": items
}
def like_item(self, url, is_faved):
c = self.conn.cursor()
if is_faved:
c.execute("UPDATE news SET liked = 1 WHERE link = ?", (url,))
c.execute("UPDATE news SET liked = 1 WHERE link = ?", (url, ))
else:
c.execute("UPDATE news SET liked = 0 WHERE link = ?", (url,))
c.execute("UPDATE news SET liked = 0 WHERE link = ?", (url, ))
self.conn.commit()
@@ -71,8 +77,9 @@ def dispatcher():
result = ray.get(method.remote(*method_args))
return jsonify(result)
else:
return jsonify(
{"error": "method_name '" + method_name + "' not found"})
return jsonify({
"error": "method_name '" + method_name + "' not found"
})
if __name__ == "__main__":
+20 -12
View File
@@ -7,10 +7,13 @@ import ray
import wikipedia
parser = argparse.ArgumentParser()
parser.add_argument("--num-mappers",
help="number of mapper actors used", default=3, type=int)
parser.add_argument("--num-reducers",
help="number of reducer actors used", default=4, type=int)
parser.add_argument(
"--num-mappers", help="number of mapper actors used", default=3, type=int)
parser.add_argument(
"--num-reducers",
help="number of reducer actors used",
default=4,
type=int)
@ray.remote
@@ -47,8 +50,10 @@ class Reducer(object):
word_count_sum = defaultdict(lambda: 0)
# Get the word counts for this Reducer's keys from all of the Mappers
# and aggregate the results.
count_ids = [mapper.get_range.remote(article_index, self.keys)
for mapper in self.mappers]
count_ids = [
mapper.get_range.remote(article_index, self.keys)
for mapper in self.mappers
]
# TODO(rkn): We should process these out of order using ray.wait.
for count_id in count_ids:
for k, v in ray.get(count_id):
@@ -78,8 +83,9 @@ if __name__ == "__main__":
streams.append(Stream([line.strip() for line in f.readlines()]))
# Partition the keys among the reducers.
chunks = np.array_split([chr(i) for i in range(ord("a"), ord("z") + 1)],
args.num_reducers)
chunks = np.array_split([chr(i)
for i in range(ord("a"),
ord("z") + 1)], args.num_reducers)
keys = [[chunk[0], chunk[-1]] for chunk in chunks]
# Create a number of mappers.
@@ -93,12 +99,14 @@ if __name__ == "__main__":
while True:
print("article index = {}".format(article_index))
wordcounts = {}
counts = ray.get([reducer.next_reduce_result.remote(article_index)
for reducer in reducers])
counts = ray.get([
reducer.next_reduce_result.remote(article_index)
for reducer in reducers
])
for count in counts:
wordcounts.update(count)
most_frequent_words = heapq.nlargest(10, wordcounts,
key=wordcounts.get)
most_frequent_words = heapq.nlargest(
10, wordcounts, key=wordcounts.get)
for word in most_frequent_words:
print(" ", word, wordcounts[word])
article_index += 1
+1 -4
View File
@@ -68,10 +68,7 @@ parameter_grid = {"alpha": [1e-4, 1e-1, 1], "epsilon": [0.01, 0.1]}
# As you can see, the setup here is exactly how you would do it for Scikit-Learn. Now, let's try fitting a model.
tune_search = TuneGridSearchCV(
SGDClassifier(),
parameter_grid,
early_stopping=True,
max_iters=10)
SGDClassifier(), parameter_grid, early_stopping=True, max_iters=10)
import time # Just to compare fit times
start = time.time()
+6 -6
View File
@@ -11,13 +11,13 @@ cdef extern from "opencensus/tags/tag_key.h" nogil:
cdef extern from "ray/stats/metric.h" nogil:
cdef cppclass CMetric "ray::stats::Metric":
CMetric(const c_string &name,
const c_string &description,
const c_string &unit,
const c_vector[CTagKey] &tag_keys)
const c_string &description,
const c_string &unit,
const c_vector[CTagKey] &tag_keys)
c_string GetName() const
void Record(double value)
void Record(double value,
unordered_map[c_string, c_string] &tags)
void Record(double value,
unordered_map[c_string, c_string] &tags)
cdef cppclass CGauge "ray::stats::Gauge":
CGauge(const c_string &name,
@@ -42,4 +42,4 @@ cdef extern from "ray/stats/metric.h" nogil:
const c_string &description,
const c_string &unit,
const c_vector[double] &boundaries,
const c_vector[CTagKey] &tag_keys)
const c_vector[CTagKey] &tag_keys)
+14 -13
View File
@@ -126,9 +126,8 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
target_noise_clip = policy.config["target_noise_clip"]
clipped_normal_sample = tf.clip_by_value(
tf.random.normal(
tf.shape(policy_tp1),
stddev=policy.config["target_noise"]), -target_noise_clip,
target_noise_clip)
tf.shape(policy_tp1), stddev=policy.config["target_noise"]),
-target_noise_clip, target_noise_clip)
policy_tp1_smoothed = tf.clip_by_value(
policy_tp1 + clipped_normal_sample,
policy.action_space.low * tf.ones_like(policy_tp1),
@@ -146,8 +145,8 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
q_t_det_policy = model.get_q_values(model_out_t, policy_t)
if twin_q:
twin_q_t = model.get_twin_q_values(
model_out_t, train_batch[SampleBatch.ACTIONS])
twin_q_t = model.get_twin_q_values(model_out_t,
train_batch[SampleBatch.ACTIONS])
# Target q-net(s) evaluation.
q_tp1 = policy.target_model.get_q_values(target_model_out_tp1,
@@ -278,11 +277,11 @@ def gradients_fn(policy, optimizer, loss):
if policy.config["framework"] in ["tf2", "tfe"]:
tape = optimizer.tape
pol_weights = policy.model.policy_variables()
actor_grads_and_vars = list(zip(tape.gradient(
policy.actor_loss, pol_weights), pol_weights))
actor_grads_and_vars = list(
zip(tape.gradient(policy.actor_loss, pol_weights), pol_weights))
q_weights = policy.model.q_variables()
critic_grads_and_vars = list(zip(tape.gradient(
policy.critic_loss, q_weights), q_weights))
critic_grads_and_vars = list(
zip(tape.gradient(policy.critic_loss, q_weights), q_weights))
else:
actor_grads_and_vars = policy._actor_optimizer.compute_gradients(
policy.actor_loss, var_list=policy.model.policy_variables())
@@ -296,10 +295,12 @@ def gradients_fn(policy, optimizer, loss):
clip_func = tf.identity
# Save grads and vars for later use in `build_apply_op`.
policy._actor_grads_and_vars = [
(clip_func(g), v) for (g, v) in actor_grads_and_vars if g is not None]
policy._critic_grads_and_vars = [
(clip_func(g), v) for (g, v) in critic_grads_and_vars if g is not None]
policy._actor_grads_and_vars = [(clip_func(g), v)
for (g, v) in actor_grads_and_vars
if g is not None]
policy._critic_grads_and_vars = [(clip_func(g), v)
for (g, v) in critic_grads_and_vars
if g is not None]
grads_and_vars = policy._actor_grads_and_vars + \
policy._critic_grads_and_vars
+1 -2
View File
@@ -65,8 +65,7 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
torch.normal(
mean=torch.zeros(policy_tp1.size()),
std=policy.config["target_noise"]).to(policy_tp1.device),
-target_noise_clip,
target_noise_clip)
-target_noise_clip, target_noise_clip)
policy_tp1_smoothed = torch.min(
torch.max(
+6 -2
View File
@@ -405,7 +405,9 @@ class TestDDPG(unittest.TestCase):
policy_t = sigmoid(2.0 * fc(
relu(
fc(model_out_t, weights[ks[1]], weights[ks[0]], framework=fw)),
weights[ks[5]], weights[ks[4]], framework=fw))
weights[ks[5]],
weights[ks[4]],
framework=fw))
# Get policy output for t+1 (target model).
policy_tp1 = sigmoid(2.0 * fc(
relu(
@@ -413,7 +415,9 @@ class TestDDPG(unittest.TestCase):
weights[ks[3]],
weights[ks[2]],
framework=fw)),
weights[ks[7]], weights[ks[6]], framework=fw))
weights[ks[7]],
weights[ks[6]],
framework=fw))
# Assume no smooth target policy.
policy_tp1_smoothed = policy_tp1
@@ -74,8 +74,7 @@ class DistributionalQTFModel(TFModelV2):
for i in range(len(q_hiddens)):
if use_noisy:
action_out = NoisyLayer(
"{}hidden_{}".format(prefix, i),
q_hiddens[i],
"{}hidden_{}".format(prefix, i), q_hiddens[i],
sigma0)(action_out)
elif add_layer_norm:
action_out = tf.keras.layers.Dense(
@@ -135,8 +134,7 @@ class DistributionalQTFModel(TFModelV2):
for i in range(len(q_hiddens)):
if use_noisy:
state_out = NoisyLayer(
"{}dueling_hidden_{}".format(prefix, i),
q_hiddens[i],
"{}dueling_hidden_{}".format(prefix, i), q_hiddens[i],
sigma0)(state_out)
else:
state_out = tf.keras.layers.Dense(
@@ -160,8 +158,8 @@ class DistributionalQTFModel(TFModelV2):
self.register_variables(self.q_value_head.variables)
if dueling:
state_out = build_state_score(
name + "/state_value/", self.model_out)
state_out = build_state_score(name + "/state_value/",
self.model_out)
self.state_value_head = tf.keras.Model(self.model_out, state_out)
self.register_variables(self.state_value_head.variables)
+7 -8
View File
@@ -231,8 +231,8 @@ def build_q_losses(policy, model, _, train_batch):
train_batch[SampleBatch.NEXT_OBS],
explore=False)
q_tp1_best_using_online_net = tf.argmax(q_tp1_using_online_net, 1)
q_tp1_best_one_hot_selection = tf.one_hot(
q_tp1_best_using_online_net, policy.action_space.n)
q_tp1_best_one_hot_selection = tf.one_hot(q_tp1_best_using_online_net,
policy.action_space.n)
q_tp1_best = tf.reduce_sum(q_tp1 * q_tp1_best_one_hot_selection, 1)
q_dist_tp1_best = tf.reduce_sum(
q_dist_tp1 * tf.expand_dims(q_tp1_best_one_hot_selection, -1), 1)
@@ -246,9 +246,9 @@ def build_q_losses(policy, model, _, train_batch):
policy.q_loss = QLoss(
q_t_selected, q_logits_t_selected, q_tp1_best, q_dist_tp1_best,
train_batch[PRIO_WEIGHTS], train_batch[SampleBatch.REWARDS],
tf.cast(train_batch[SampleBatch.DONES], tf.float32), config["gamma"],
config["n_step"], config["num_atoms"],
config["v_min"], config["v_max"])
tf.cast(train_batch[SampleBatch.DONES],
tf.float32), config["gamma"], config["n_step"],
config["num_atoms"], config["v_min"], config["v_max"])
return policy.q_loss.loss
@@ -378,9 +378,8 @@ def postprocess_nstep_and_prio(policy, batch, other_agent=None, episode=None):
batch[SampleBatch.CUR_OBS], batch[SampleBatch.ACTIONS],
batch[SampleBatch.REWARDS], batch[SampleBatch.NEXT_OBS],
batch[SampleBatch.DONES], batch[PRIO_WEIGHTS])
new_priorities = (
np.abs(convert_to_numpy(td_errors)) +
policy.config["prioritized_replay_eps"])
new_priorities = (np.abs(convert_to_numpy(td_errors)) +
policy.config["prioritized_replay_eps"])
batch.data[PRIO_WEIGHTS] = new_priorities
return batch
+18 -13
View File
@@ -72,12 +72,16 @@ class DQNTorchModel(TorchModelV2, nn.Module):
advantage_module.add_module(
"dueling_A_{}".format(i),
NoisyLayer(
ins, n, sigma0=self.sigma0,
ins,
n,
sigma0=self.sigma0,
activation=dueling_activation))
value_module.add_module(
"dueling_V_{}".format(i),
NoisyLayer(
ins, n, sigma0=self.sigma0,
ins,
n,
sigma0=self.sigma0,
activation=dueling_activation))
else:
advantage_module.add_module(
@@ -88,25 +92,26 @@ class DQNTorchModel(TorchModelV2, nn.Module):
SlimFC(ins, n, activation_fn=dueling_activation))
# Add LayerNorm after each Dense.
if add_layer_norm:
advantage_module.add_module(
"LayerNorm_A_{}".format(i), nn.LayerNorm(n))
value_module.add_module(
"LayerNorm_V_{}".format(i), nn.LayerNorm(n))
advantage_module.add_module("LayerNorm_A_{}".format(i),
nn.LayerNorm(n))
value_module.add_module("LayerNorm_V_{}".format(i),
nn.LayerNorm(n))
ins = n
# Actual Advantages layer (nodes=num-actions).
if use_noisy:
advantage_module.add_module("A", NoisyLayer(
ins,
self.action_space.n * self.num_atoms,
sigma0,
activation=None))
advantage_module.add_module(
"A",
NoisyLayer(
ins,
self.action_space.n * self.num_atoms,
sigma0,
activation=None))
elif q_hiddens:
advantage_module.add_module(
"A",
SlimFC(
ins, action_space.n * self.num_atoms,
activation_fn=None))
ins, action_space.n * self.num_atoms, activation_fn=None))
self.advantage_module = advantage_module
+6 -6
View File
@@ -212,8 +212,8 @@ def build_q_losses(policy, model, _, train_batch):
is_training=True)
# Q scores for actions which we know were selected in the given state.
one_hot_selection = F.one_hot(
train_batch[SampleBatch.ACTIONS], policy.action_space.n)
one_hot_selection = F.one_hot(train_batch[SampleBatch.ACTIONS],
policy.action_space.n)
q_t_selected = torch.sum(
torch.where(q_t > -float("inf"), q_t, torch.tensor(0.0)) *
one_hot_selection, 1)
@@ -230,8 +230,8 @@ def build_q_losses(policy, model, _, train_batch):
explore=False,
is_training=True)
q_tp1_best_using_online_net = torch.argmax(q_tp1_using_online_net, 1)
q_tp1_best_one_hot_selection = F.one_hot(
q_tp1_best_using_online_net, policy.action_space.n)
q_tp1_best_one_hot_selection = F.one_hot(q_tp1_best_using_online_net,
policy.action_space.n)
q_tp1_best = torch.sum(
torch.where(q_tp1 > -float("inf"), q_tp1, torch.tensor(0.0)) *
q_tp1_best_one_hot_selection, 1)
@@ -250,8 +250,8 @@ def build_q_losses(policy, model, _, train_batch):
q_t_selected, q_logits_t_selected, q_tp1_best, q_probs_tp1_best,
train_batch[PRIO_WEIGHTS], train_batch[SampleBatch.REWARDS],
train_batch[SampleBatch.DONES].float(), config["gamma"],
config["n_step"], config["num_atoms"],
config["v_min"], config["v_max"])
config["n_step"], config["num_atoms"], config["v_min"],
config["v_max"])
return policy.q_loss.loss
+11 -9
View File
@@ -222,10 +222,12 @@ def multi_from_logits(behaviour_policy_logits,
behaviour_policy_logits[i].shape.assert_has_rank(3)
target_policy_logits[i].shape.assert_has_rank(3)
with tf1.name_scope(name, values=[
behaviour_policy_logits, target_policy_logits, actions,
discounts, rewards, values, bootstrap_value
]):
with tf1.name_scope(
name,
values=[
behaviour_policy_logits, target_policy_logits, actions,
discounts, rewards, values, bootstrap_value
]):
target_action_log_probs = multi_log_probs_from_logits_and_actions(
target_policy_logits, actions, dist_class, model)
@@ -330,16 +332,16 @@ def from_importance_weights(log_rhos,
if clip_pg_rho_threshold is not None:
clip_pg_rho_threshold.shape.assert_has_rank(0)
with tf1.name_scope(name, values=[
log_rhos, discounts, rewards, values, bootstrap_value
]):
with tf1.name_scope(
name,
values=[log_rhos, discounts, rewards, values, bootstrap_value]):
rhos = tf.math.exp(log_rhos)
if clip_rho_threshold is not None:
clipped_rhos = tf.minimum(
clip_rho_threshold, rhos, name="clipped_rhos")
tf1.summary.histogram(
"clipped_rhos_1000", tf.minimum(1000.0, rhos))
tf1.summary.histogram("clipped_rhos_1000", tf.minimum(
1000.0, rhos))
tf1.summary.scalar(
"num_of_clipped_rhos",
tf.reduce_sum(
+6 -6
View File
@@ -259,13 +259,13 @@ def choose_optimizer(policy, config):
return tf1.train.AdamOptimizer(policy.cur_lr)
else:
if tfv == 2:
return tf.keras.optimizers.RMSprop(
policy.cur_lr, config["decay"], config["momentum"],
config["epsilon"])
return tf.keras.optimizers.RMSprop(policy.cur_lr, config["decay"],
config["momentum"],
config["epsilon"])
else:
return tf1.train.RMSPropOptimizer(
policy.cur_lr, config["decay"], config["momentum"],
config["epsilon"])
return tf1.train.RMSPropOptimizer(policy.cur_lr, config["decay"],
config["momentum"],
config["epsilon"])
def clip_gradients(policy, optimizer, loss):
+8 -10
View File
@@ -40,23 +40,21 @@ class ReweightedImitationLoss:
# update averaged advantage norm
if policy.config["framework"] in ["tf2", "tfe"]:
policy._ma_adv_norm.assign_add(
1e-6 * (tf.reduce_mean(
tf.math.square(adv)) - policy._ma_adv_norm))
1e-6 *
(tf.reduce_mean(tf.math.square(adv)) - policy._ma_adv_norm))
# Exponentially weighted advantages.
exp_advs = tf.math.exp(
beta * tf.math.divide(
adv, 1e-8 + tf.math.sqrt(policy._ma_adv_norm)))
exp_advs = tf.math.exp(beta * tf.math.divide(
adv, 1e-8 + tf.math.sqrt(policy._ma_adv_norm)))
else:
update_adv_norm = tf1.assign_add(
ref=policy._ma_adv_norm,
value=1e-6 * (
tf.reduce_mean(tf.math.square(adv)) - policy._ma_adv_norm))
value=1e-6 *
(tf.reduce_mean(tf.math.square(adv)) - policy._ma_adv_norm))
# exponentially weighted advantages
with tf1.control_dependencies([update_adv_norm]):
exp_advs = tf.math.exp(
beta * tf.math.divide(
adv, 1e-8 + tf.math.sqrt(policy._ma_adv_norm)))
exp_advs = tf.math.exp(beta * tf.math.divide(
adv, 1e-8 + tf.math.sqrt(policy._ma_adv_norm)))
# log\pi_\theta(a|s)
logprobs = action_dist.logp(actions)
+2 -2
View File
@@ -28,8 +28,8 @@ class TestMARWIL(unittest.TestCase):
rllib_dir = Path(__file__).parent.parent.parent.parent
print("rllib dir={}".format(rllib_dir))
data_file = os.path.join(rllib_dir, "tests/data/cartpole/large.json")
print("data_file={} exists={}".format(
data_file, os.path.isfile(data_file)))
print("data_file={} exists={}".format(data_file,
os.path.isfile(data_file)))
config = marwil.DEFAULT_CONFIG.copy()
config["num_workers"] = 0 # Run locally.
+1
View File
@@ -59,6 +59,7 @@ class TDModel(nn.Module):
if torch:
class TDDataset(torch.utils.data.Dataset):
def __init__(self, dataset: SampleBatchType, norms):
self.count = dataset.count
+2 -2
View File
@@ -26,8 +26,8 @@ def pg_tf_loss(policy, model, dist_class, train_batch):
logits, _ = model.from_batch(train_batch)
action_dist = dist_class(logits, model)
return -tf.reduce_mean(
action_dist.logp(train_batch[SampleBatch.ACTIONS]) *
tf.cast(train_batch[Postprocessing.ADVANTAGES], dtype=tf.float32))
action_dist.logp(train_batch[SampleBatch.ACTIONS]) * tf.cast(
train_batch[Postprocessing.ADVANTAGES], dtype=tf.float32))
PGTFPolicy = build_tf_policy(
+6 -7
View File
@@ -77,13 +77,12 @@ class TestPG(unittest.TestCase):
feed_dict=policy._get_loss_inputs_dict(
train_batch, shuffle=False))
else:
results = (
pg.pg_tf_loss if fw in ["tf2", "tfe"] else pg.pg_torch_loss
)(
policy,
policy.model,
dist_class=dist_cls,
train_batch=train_batch)
results = (pg.pg_tf_loss
if fw in ["tf2", "tfe"] else pg.pg_torch_loss)(
policy,
policy.model,
dist_class=dist_cls,
train_batch=train_batch)
# Calculate expected results.
if fw != "torch":
-1
View File
@@ -17,7 +17,6 @@ from ray.rllib.utils.numpy import fc
from ray.rllib.utils.test_utils import check, framework_iterator, \
check_compute_single_action
# Fake CartPole episode of n time steps.
FAKE_BATCH = {
SampleBatch.CUR_OBS: np.array(
+3 -3
View File
@@ -280,14 +280,14 @@ class QMixTorchPolicy(Policy):
masked_q_values = q_values.clone()
masked_q_values[avail == 0.0] = -float("inf")
masked_q_values_folded = torch.reshape(
masked_q_values,
[-1] + list(masked_q_values.shape)[2:])
masked_q_values, [-1] + list(masked_q_values.shape)[2:])
actions, _ = self.exploration.get_exploration_action(
action_distribution=TorchCategorical(masked_q_values_folded),
timestep=timestep,
explore=explore)
actions = torch.reshape(
actions, list(masked_q_values.shape)[:-1]).cpu().numpy()
actions,
list(masked_q_values.shape)[:-1]).cpu().numpy()
hiddens = [s.cpu().numpy() for s in hiddens]
return tuple(actions.transpose([1, 0])), hiddens, {}
+31 -32
View File
@@ -231,10 +231,8 @@ def sac_actor_critic_loss(policy, model, _, train_batch):
y_true=q_t_selected_target, y_pred=q_t_selected)
]
if policy.config["twin_q"]:
critic_loss.append(
0.5 * tf.keras.losses.MSE(
y_true=q_t_selected_target,
y_pred=twin_q_t_selected))
critic_loss.append(0.5 * tf.keras.losses.MSE(
y_true=q_t_selected_target, y_pred=twin_q_t_selected))
# Alpha- and actor losses.
# Note: In the papers, alpha is used directly, here we take the log.
@@ -281,25 +279,27 @@ def gradients_fn(policy, optimizer, loss):
if policy.config["framework"] in ["tf2", "tfe"]:
tape = optimizer.tape
pol_weights = policy.model.policy_variables()
actor_grads_and_vars = list(zip(tape.gradient(
policy.actor_loss, pol_weights), pol_weights))
actor_grads_and_vars = list(
zip(tape.gradient(policy.actor_loss, pol_weights), pol_weights))
q_weights = policy.model.q_variables()
if policy.config["twin_q"]:
half_cutoff = len(q_weights) // 2
grads_1 = tape.gradient(
policy.critic_loss[0], q_weights[:half_cutoff])
grads_2 = tape.gradient(
policy.critic_loss[1], q_weights[half_cutoff:])
grads_1 = tape.gradient(policy.critic_loss[0],
q_weights[:half_cutoff])
grads_2 = tape.gradient(policy.critic_loss[1],
q_weights[half_cutoff:])
critic_grads_and_vars = \
list(zip(grads_1, q_weights[:half_cutoff])) + \
list(zip(grads_2, q_weights[half_cutoff:]))
else:
critic_grads_and_vars = list(zip(tape.gradient(
policy.critic_loss[0], q_weights), q_weights))
critic_grads_and_vars = list(
zip(
tape.gradient(policy.critic_loss[0], q_weights),
q_weights))
alpha_vars = [policy.model.log_alpha]
alpha_grads_and_vars = list(zip(tape.gradient(
policy.alpha_loss, alpha_vars), alpha_vars))
alpha_grads_and_vars = list(
zip(tape.gradient(policy.alpha_loss, alpha_vars), alpha_vars))
# Tf1.x: Use optimizer.compute_gradients()
else:
actor_grads_and_vars = policy._actor_optimizer.compute_gradients(
@@ -327,12 +327,15 @@ def gradients_fn(policy, optimizer, loss):
clip_func = tf.identity
# Save grads and vars for later use in `build_apply_op`.
policy._actor_grads_and_vars = [
(clip_func(g), v) for (g, v) in actor_grads_and_vars if g is not None]
policy._critic_grads_and_vars = [
(clip_func(g), v) for (g, v) in critic_grads_and_vars if g is not None]
policy._alpha_grads_and_vars = [
(clip_func(g), v) for (g, v) in alpha_grads_and_vars if g is not None]
policy._actor_grads_and_vars = [(clip_func(g), v)
for (g, v) in actor_grads_and_vars
if g is not None]
policy._critic_grads_and_vars = [(clip_func(g), v)
for (g, v) in critic_grads_and_vars
if g is not None]
policy._alpha_grads_and_vars = [(clip_func(g), v)
for (g, v) in alpha_grads_and_vars
if g is not None]
grads_and_vars = (
policy._actor_grads_and_vars + policy._critic_grads_and_vars +
@@ -391,15 +394,13 @@ class ActorCriticOptimizerMixin:
self._actor_optimizer = tf.keras.optimizers.Adam(
learning_rate=config["optimization"]["actor_learning_rate"])
self._critic_optimizer = [
tf.keras.optimizers.Adam(
learning_rate=config["optimization"][
"critic_learning_rate"])
tf.keras.optimizers.Adam(learning_rate=config["optimization"][
"critic_learning_rate"])
]
if config["twin_q"]:
self._critic_optimizer.append(
tf.keras.optimizers.Adam(
learning_rate=config["optimization"][
"critic_learning_rate"]))
tf.keras.optimizers.Adam(learning_rate=config[
"optimization"]["critic_learning_rate"]))
self._alpha_optimizer = tf.keras.optimizers.Adam(
learning_rate=config["optimization"]["entropy_learning_rate"])
else:
@@ -407,15 +408,13 @@ class ActorCriticOptimizerMixin:
self._actor_optimizer = tf1.train.AdamOptimizer(
learning_rate=config["optimization"]["actor_learning_rate"])
self._critic_optimizer = [
tf1.train.AdamOptimizer(
learning_rate=config["optimization"][
"critic_learning_rate"])
tf1.train.AdamOptimizer(learning_rate=config["optimization"][
"critic_learning_rate"])
]
if config["twin_q"]:
self._critic_optimizer.append(
tf1.train.AdamOptimizer(
learning_rate=config["optimization"][
"critic_learning_rate"]))
tf1.train.AdamOptimizer(learning_rate=config[
"optimization"]["critic_learning_rate"]))
self._alpha_optimizer = tf1.train.AdamOptimizer(
learning_rate=config["optimization"]["entropy_learning_rate"])
+2
View File
@@ -27,6 +27,7 @@ def to_float_array(v: List[Any]) -> np.ndarray:
# TODO(sven): Remove the following class once we switch to trajectory view API.
@PublicAPI
class SampleBatchBuilder:
"""Util to build a SampleBatch incrementally.
@@ -76,6 +77,7 @@ class SampleBatchBuilder:
# TODO(sven): Remove the following class once we switch to trajectory view API.
@DeveloperAPI
class MultiAgentSampleBatchBuilder:
"""Util to build SampleBatches for each policy in a multi-agent env.
+3 -6
View File
@@ -60,12 +60,9 @@ class _SampleCollector(metaclass=ABCMeta):
raise NotImplementedError
@abstractmethod
def add_action_reward_next_obs(
self,
episode_id: EpisodeID,
agent_id: AgentID,
policy_id: PolicyID,
values: Dict[str, TensorType]) -> None:
def add_action_reward_next_obs(self, episode_id: EpisodeID,
agent_id: AgentID, policy_id: PolicyID,
values: Dict[str, TensorType]) -> None:
"""Add the given dictionary (row) of values to this collector.
The incoming data (`values`) must include action, reward, done, and
+32 -27
View File
@@ -373,26 +373,26 @@ class AsyncSampler(threading.Thread, SamplerInput):
return extra
def _env_runner(worker: "RolloutWorker",
base_env: BaseEnv,
extra_batch_callback: Callable[[SampleBatchType], None],
policies: Dict[PolicyID, Policy],
policy_mapping_fn: Callable[[AgentID], PolicyID],
rollout_fragment_length: int,
horizon: int,
preprocessors: Dict[PolicyID, Preprocessor],
obs_filters: Dict[PolicyID, Filter],
clip_rewards: bool,
clip_actions: bool,
pack_multiple_episodes_in_batch: bool,
callbacks: "DefaultCallbacks",
tf_sess: Optional["tf.Session"],
perf_stats: _PerfStats,
soft_horizon: bool,
no_done_at_end: bool,
observation_fn: "ObservationFunction",
_use_trajectory_view_api: bool = False
) -> Iterable[SampleBatchType]:
def _env_runner(
worker: "RolloutWorker",
base_env: BaseEnv,
extra_batch_callback: Callable[[SampleBatchType], None],
policies: Dict[PolicyID, Policy],
policy_mapping_fn: Callable[[AgentID], PolicyID],
rollout_fragment_length: int,
horizon: int,
preprocessors: Dict[PolicyID, Preprocessor],
obs_filters: Dict[PolicyID, Filter],
clip_rewards: bool,
clip_actions: bool,
pack_multiple_episodes_in_batch: bool,
callbacks: "DefaultCallbacks",
tf_sess: Optional["tf.Session"],
perf_stats: _PerfStats,
soft_horizon: bool,
no_done_at_end: bool,
observation_fn: "ObservationFunction",
_use_trajectory_view_api: bool = False) -> Iterable[SampleBatchType]:
"""This implements the common experience collection logic.
Args:
@@ -571,18 +571,23 @@ def _env_runner(worker: "RolloutWorker",
def _process_observations(
worker: "RolloutWorker", base_env: BaseEnv,
worker: "RolloutWorker",
base_env: BaseEnv,
policies: Dict[PolicyID, Policy],
batch_builder_pool: List[MultiAgentSampleBatchBuilder],
active_episodes: Dict[str, MultiAgentEpisode],
unfiltered_obs: Dict[EnvID, Dict[AgentID, EnvObsType]],
rewards: Dict[EnvID, Dict[AgentID, float]],
dones: Dict[EnvID, Dict[AgentID, bool]],
infos: Dict[EnvID, Dict[AgentID, EnvInfoDict]], horizon: int,
infos: Dict[EnvID, Dict[AgentID, EnvInfoDict]],
horizon: int,
preprocessors: Dict[PolicyID, Preprocessor],
obs_filters: Dict[PolicyID, Filter], rollout_fragment_length: int,
pack_multiple_episodes_in_batch: bool, callbacks: "DefaultCallbacks",
soft_horizon: bool, no_done_at_end: bool,
obs_filters: Dict[PolicyID, Filter],
rollout_fragment_length: int,
pack_multiple_episodes_in_batch: bool,
callbacks: "DefaultCallbacks",
soft_horizon: bool,
no_done_at_end: bool,
observation_fn: "ObservationFunction",
_use_trajectory_view_api: bool = False
) -> Tuple[Set[EnvID], Dict[PolicyID, List[PolicyEvalData]], List[Union[
@@ -931,8 +936,8 @@ def _do_policy_eval(
def _process_policy_eval_results(
*,
to_eval: Dict[PolicyID, List[PolicyEvalData]],
eval_results: Dict[PolicyID, Tuple[
TensorStructType, StateBatch, dict]],
eval_results: Dict[PolicyID, Tuple[TensorStructType, StateBatch,
dict]],
active_episodes: Dict[str, MultiAgentEpisode],
active_envs: Set[int],
off_policy_actions: MultiEnvDict,
+3 -3
View File
@@ -52,9 +52,9 @@ class RandomEnv(gym.Env):
done = True
# Max not reached yet -> Sample done via p_done.
else:
done = bool(np.random.choice(
[True, False], p=[self.p_done, 1.0 - self.p_done]
))
done = bool(
np.random.choice(
[True, False], p=[self.p_done, 1.0 - self.p_done]))
return self.observation_space.sample(), \
float(self.reward_space.sample()), done, {}
+4 -2
View File
@@ -18,8 +18,10 @@ class RandomPolicy(Policy):
if self.config.get("ignore_action_bounds", False) and \
isinstance(self.action_space, Box):
self.action_space_for_sampling = Box(
-float("inf"), float("inf"),
shape=self.action_space.shape, dtype=self.action_space.dtype)
-float("inf"),
float("inf"),
shape=self.action_space.shape,
dtype=self.action_space.dtype)
else:
self.action_space_for_sampling = self.action_space
@@ -44,8 +44,8 @@ class CustomPolicy(Policy):
episodes=None,
**kwargs):
# return random actions
return np.array([self.action_space.sample()
for _ in obs_batch]), [], {}
return np.array(
[self.action_space.sample() for _ in obs_batch]), [], {}
def learn_on_batch(self, samples):
# implement your learning code here
+6 -10
View File
@@ -138,10 +138,8 @@ class TrainTFMultiGPU:
with self.workers.local_worker().tf_sess.graph.as_default():
with self.workers.local_worker().tf_sess.as_default():
for policy_id in self.policies:
policy = self.workers.local_worker().get_policy(
policy_id)
with tf1.variable_scope(
policy_id, reuse=tf1.AUTO_REUSE):
policy = self.workers.local_worker().get_policy(policy_id)
with tf1.variable_scope(policy_id, reuse=tf1.AUTO_REUSE):
if policy._state_inputs:
rnn_inputs = policy._state_inputs + [
policy._seq_lens
@@ -150,12 +148,10 @@ class TrainTFMultiGPU:
rnn_inputs = []
self.optimizers[policy_id] = (
LocalSyncParallelOptimizer(
policy._optimizer,
self.devices,
[v for _, v in policy._loss_inputs],
rnn_inputs,
self.per_device_batch_size,
policy.copy))
policy._optimizer, self.devices,
[v
for _, v in policy._loss_inputs], rnn_inputs,
self.per_device_batch_size, policy.copy))
self.sess = self.workers.local_worker().tf_sess
self.sess.run(tf1.global_variables_initializer())
+2 -5
View File
@@ -6,9 +6,6 @@ from ray.rllib.models.tf.layers.skip_connection import SkipConnection
from ray.rllib.models.tf.layers.multi_head_attention import MultiHeadAttention
__all__ = [
"GRUGate",
"MultiHeadAttention",
"NoisyLayer",
"RelativeMultiHeadAttention",
"SkipConnection"
"GRUGate", "MultiHeadAttention", "NoisyLayer",
"RelativeMultiHeadAttention", "SkipConnection"
]
+2 -7
View File
@@ -16,11 +16,7 @@ class NoisyLayer(tf.keras.layers.Layer if tf else object):
vanish along the training procedure
"""
def __init__(self,
prefix,
out_size,
sigma0,
activation="relu"):
def __init__(self, prefix, out_size, sigma0, activation="relu"):
"""Initializes a NoisyLayer object.
Args:
@@ -53,8 +49,7 @@ class NoisyLayer(tf.keras.layers.Layer if tf else object):
trainable=True,
tf_name=self.prefix + "_sigma_w",
shape=[in_size, self.out_size],
dtype=tf.float32
)
dtype=tf.float32)
self.sigma_b = get_variable(
value=tf.keras.initializers.Constant(
+3 -3
View File
@@ -81,9 +81,9 @@ class VisionNetwork(TFModelV2):
"Given `conv_filters` ({}) do not result in a [B, 1, "
"1, {} (`num_outputs`)] shape (but in {})! Please "
"adjust your Conv2D stack such that the dims 1 and 2 "
"are both 1.".format(
self.model_config["conv_filters"],
self.num_outputs, list(conv_out.shape)))
"are both 1.".format(self.model_config["conv_filters"],
self.num_outputs,
list(conv_out.shape)))
# num_outputs not known -> Flatten, then set self.num_outputs
# to the resulting number of nodes.
+10 -8
View File
@@ -67,20 +67,22 @@ class NoisyLayer(nn.Module):
trainable=True)
def forward(self, inputs):
epsilon_in = self._f_epsilon(torch.normal(
mean=torch.zeros([self.in_size]),
std=torch.ones([self.in_size])))
epsilon_out = self._f_epsilon(torch.normal(
mean=torch.zeros([self.out_size]),
std=torch.ones([self.out_size])))
epsilon_in = self._f_epsilon(
torch.normal(
mean=torch.zeros([self.in_size]),
std=torch.ones([self.in_size])))
epsilon_out = self._f_epsilon(
torch.normal(
mean=torch.zeros([self.out_size]),
std=torch.ones([self.out_size])))
epsilon_w = torch.matmul(
torch.unsqueeze(epsilon_in, -1),
other=torch.unsqueeze(epsilon_out, 0))
epsilon_b = epsilon_out
action_activation = torch.matmul(
inputs, self.w + self.sigma_w * epsilon_w
) + self.b + self.sigma_b * epsilon_b
inputs, self.w +
self.sigma_w * epsilon_w) + self.b + self.sigma_b * epsilon_b
if self.activation is not None:
action_activation = self.activation(action_activation)
+2 -3
View File
@@ -158,9 +158,8 @@ class VisionNetwork(TorchModelV2, nn.Module):
"Given `conv_filters` ({}) do not result in a [B, {} "
"(`num_outputs`), 1, 1] shape (but in {})! Please adjust "
"your Conv2D stack such that the last 2 dims are both "
"1.".format(
self.model_config["conv_filters"], self.num_outputs,
list(conv_out.shape)))
"1.".format(self.model_config["conv_filters"],
self.num_outputs, list(conv_out.shape)))
logits = conv_out.squeeze(3)
logits = logits.squeeze(2)
+30 -30
View File
@@ -47,36 +47,36 @@ class DynamicTFPolicy(TFPolicy):
"""
@DeveloperAPI
def __init__(self,
obs_space: gym.spaces.Space,
action_space: gym.spaces.Space,
config: TrainerConfigDict,
loss_fn: Callable[
[Policy, ModelV2, type, SampleBatch], TensorType],
*,
stats_fn: Optional[Callable[[Policy, SampleBatch],
Dict[str, TensorType]]] = None,
grad_stats_fn: Optional[Callable[
[Policy, SampleBatch, ModelGradients],
Dict[str, TensorType]]] = None,
before_loss_init: Optional[Callable[
[Policy, gym.spaces.Space, gym.spaces.Space,
TrainerConfigDict], None]] = None,
make_model: Optional[Callable[
[Policy, gym.spaces.Space, gym.spaces.Space,
TrainerConfigDict], ModelV2]] = None,
action_sampler_fn: Optional[Callable[
[TensorType, List[TensorType]], Tuple[
TensorType, TensorType]]] = None,
action_distribution_fn: Optional[Callable[
[Policy, ModelV2, TensorType, TensorType, TensorType],
Tuple[TensorType, type, List[TensorType]]]] = None,
existing_inputs: Optional[Dict[
str, "tf1.placeholder"]] = None,
existing_model: Optional[ModelV2] = None,
get_batch_divisibility_req: Optional[Callable[
[Policy], int]] = None,
obs_include_prev_action_reward: bool = True):
def __init__(
self,
obs_space: gym.spaces.Space,
action_space: gym.spaces.Space,
config: TrainerConfigDict,
loss_fn: Callable[[Policy, ModelV2, type, SampleBatch],
TensorType],
*,
stats_fn: Optional[Callable[[Policy, SampleBatch], Dict[
str, TensorType]]] = None,
grad_stats_fn: Optional[Callable[[
Policy, SampleBatch, ModelGradients
], Dict[str, TensorType]]] = None,
before_loss_init: Optional[Callable[[
Policy, gym.spaces.Space, gym.spaces.Space, TrainerConfigDict
], None]] = None,
make_model: Optional[Callable[[
Policy, gym.spaces.Space, gym.spaces.Space, TrainerConfigDict
], ModelV2]] = None,
action_sampler_fn: Optional[Callable[[
TensorType, List[TensorType]
], Tuple[TensorType, TensorType]]] = None,
action_distribution_fn: Optional[Callable[[
Policy, ModelV2, TensorType, TensorType, TensorType
], Tuple[TensorType, type, List[TensorType]]]] = None,
existing_inputs: Optional[Dict[str, "tf1.placeholder"]] = None,
existing_model: Optional[ModelV2] = None,
get_batch_divisibility_req: Optional[Callable[[Policy],
int]] = None,
obs_include_prev_action_reward: bool = True):
"""Initialize a dynamic TF policy.
Arguments:
+2 -2
View File
@@ -641,8 +641,8 @@ def build_eager_tf_policy(name,
dummy_batch["seq_lens"] = np.array([1], dtype=np.int32)
# Convert everything to tensors.
dummy_batch = tf.nest.map_structure(
tf1.convert_to_tensor, dummy_batch)
dummy_batch = tf.nest.map_structure(tf1.convert_to_tensor,
dummy_batch)
# for IMPALA which expects a certain sample batch size.
def tile_to(tensor, n):
+11 -14
View File
@@ -46,11 +46,8 @@ class Policy(metaclass=ABCMeta):
"""
@DeveloperAPI
def __init__(
self,
observation_space: gym.spaces.Space,
action_space: gym.spaces.Space,
config: TrainerConfigDict):
def __init__(self, observation_space: gym.spaces.Space,
action_space: gym.spaces.Space, config: TrainerConfigDict):
"""Initialize the graph.
This is the standard constructor for policies. The policy
@@ -181,9 +178,9 @@ class Policy(metaclass=ABCMeta):
episodes = [episode]
if state is not None:
state_batch = [
s.unsqueeze(0) if torch and isinstance(s, torch.Tensor) else
np.expand_dims(s, 0)
for s in state
s.unsqueeze(0)
if torch and isinstance(s, torch.Tensor) else np.expand_dims(
s, 0) for s in state
]
out = self.compute_actions(
@@ -261,10 +258,10 @@ class Policy(metaclass=ABCMeta):
actions: Union[List[TensorType], TensorType],
obs_batch: Union[List[TensorType], TensorType],
state_batches: Optional[List[TensorType]] = None,
prev_action_batch: Optional[
Union[List[TensorType], TensorType]] = None,
prev_reward_batch: Optional[
Union[List[TensorType], TensorType]] = None) -> TensorType:
prev_action_batch: Optional[Union[List[TensorType],
TensorType]] = None,
prev_reward_batch: Optional[Union[List[
TensorType], TensorType]] = None) -> TensorType:
"""Computes the log-prob/likelihood for a given action and observation.
Args:
@@ -309,8 +306,8 @@ class Policy(metaclass=ABCMeta):
def postprocess_trajectory(
self,
sample_batch: SampleBatch,
other_agent_batches: Optional[
Dict[AgentID, Tuple["Policy", SampleBatch]]] = None,
other_agent_batches: Optional[Dict[AgentID, Tuple[
"Policy", SampleBatch]]] = None,
episode: Optional["MultiAgentEpisode"] = None) -> SampleBatch:
"""Implements algorithm-specific trajectory postprocessing.
+13 -19
View File
@@ -305,10 +305,9 @@ class SampleBatch:
self.data[key] = item
@DeveloperAPI
def compress(
self,
bulk: bool = False,
columns: Set[str] = frozenset(["obs", "new_obs"])) -> None:
def compress(self,
bulk: bool = False,
columns: Set[str] = frozenset(["obs", "new_obs"])) -> None:
"""Compresses the data buffers (by column) in place.
Args:
@@ -327,10 +326,9 @@ class SampleBatch:
[pack(o) for o in self.data[key]])
@DeveloperAPI
def decompress_if_needed(
self,
columns: Set[str] = frozenset(
["obs", "new_obs"])) -> "SampleBatch":
def decompress_if_needed(self,
columns: Set[str] = frozenset(
["obs", "new_obs"])) -> "SampleBatch":
"""Decompresses data buffers (per column if not compressed) in place.
Args:
@@ -374,8 +372,7 @@ class MultiAgentBatch:
"""
@PublicAPI
def __init__(self,
policy_batches: Dict[PolicyID, SampleBatch],
def __init__(self, policy_batches: Dict[PolicyID, SampleBatch],
env_steps: int):
"""Initialize a MultiAgentBatch object.
@@ -541,11 +538,9 @@ class MultiAgentBatch:
return sum(b.size_bytes() for b in self.policy_batches.values())
@DeveloperAPI
def compress(
self,
bulk: bool = False,
columns: Set[str] = frozenset(
["obs", "new_obs"])) -> None:
def compress(self,
bulk: bool = False,
columns: Set[str] = frozenset(["obs", "new_obs"])) -> None:
"""Compresses each policy batch (per column) in place.
Args:
@@ -558,10 +553,9 @@ class MultiAgentBatch:
batch.compress(bulk=bulk, columns=columns)
@DeveloperAPI
def decompress_if_needed(
self,
columns: Set[str] = frozenset(
["obs", "new_obs"])) -> "MultiAgentBatch":
def decompress_if_needed(self,
columns: Set[str] = frozenset(
["obs", "new_obs"])) -> "MultiAgentBatch":
"""Decompresses each policy batch (per column), if already compressed.
Args:
+11 -8
View File
@@ -25,8 +25,9 @@ class TestTrajectoryViewAPI(unittest.TestCase):
assert len(view_req_model) == 1
assert len(view_req_policy) == 6
for key in [
SampleBatch.OBS, SampleBatch.ACTIONS, SampleBatch.REWARDS,
SampleBatch.DONES, SampleBatch.NEXT_OBS, SampleBatch.VF_PREDS
SampleBatch.OBS, SampleBatch.ACTIONS, SampleBatch.REWARDS,
SampleBatch.DONES, SampleBatch.NEXT_OBS,
SampleBatch.VF_PREDS
]:
assert key in view_req_policy
# None of the view cols has a special underlying data_col,
@@ -53,9 +54,10 @@ class TestTrajectoryViewAPI(unittest.TestCase):
assert len(view_req_model) == 3 # obs, prev_a, prev_r
assert len(view_req_policy) == 8
for key in [
SampleBatch.OBS, SampleBatch.ACTIONS, SampleBatch.REWARDS,
SampleBatch.DONES, SampleBatch.NEXT_OBS, SampleBatch.VF_PREDS,
SampleBatch.PREV_ACTIONS, SampleBatch.PREV_REWARDS
SampleBatch.OBS, SampleBatch.ACTIONS, SampleBatch.REWARDS,
SampleBatch.DONES, SampleBatch.NEXT_OBS,
SampleBatch.VF_PREDS, SampleBatch.PREV_ACTIONS,
SampleBatch.PREV_REWARDS
]:
assert key in view_req_policy
@@ -65,9 +67,10 @@ class TestTrajectoryViewAPI(unittest.TestCase):
elif key == SampleBatch.PREV_REWARDS:
assert view_req_policy[key].data_col == SampleBatch.REWARDS
assert view_req_policy[key].shift == -1
elif key not in [SampleBatch.NEXT_OBS,
SampleBatch.PREV_ACTIONS,
SampleBatch.PREV_REWARDS]:
elif key not in [
SampleBatch.NEXT_OBS, SampleBatch.PREV_ACTIONS,
SampleBatch.PREV_REWARDS
]:
assert view_req_policy[key].data_col is None
else:
assert view_req_policy[key].data_col == SampleBatch.OBS
+18 -16
View File
@@ -239,8 +239,7 @@ class TFPolicy(Policy):
"""Returns whether the loss function has been initialized."""
return self._loss is not None
def _initialize_loss(self,
loss: TensorType,
def _initialize_loss(self, loss: TensorType,
loss_inputs: List[Tuple[str, TensorType]]) -> None:
"""Initializes the loss op from given loss tensor and placeholders.
@@ -264,8 +263,10 @@ class TFPolicy(Policy):
self._loss = loss
self._optimizer = self.optimizer()
self._grads_and_vars = [(g, v) for (g, v) in self.gradients(
self._optimizer, self._loss) if g is not None]
self._grads_and_vars = [
(g, v) for (g, v) in self.gradients(self._optimizer, self._loss)
if g is not None
]
self._grads = [g for (g, v) in self._grads_and_vars]
# TODO(sven/ekl): Deprecate support for v1 models.
@@ -336,10 +337,10 @@ class TFPolicy(Policy):
actions: Union[List[TensorType], TensorType],
obs_batch: Union[List[TensorType], TensorType],
state_batches: Optional[List[TensorType]] = None,
prev_action_batch: Optional[
Union[List[TensorType], TensorType]] = None,
prev_reward_batch: Optional[
Union[List[TensorType], TensorType]] = None) -> TensorType:
prev_action_batch: Optional[Union[List[TensorType],
TensorType]] = None,
prev_reward_batch: Optional[Union[List[
TensorType], TensorType]] = None) -> TensorType:
if self._log_likelihood is None:
raise ValueError("Cannot compute log-prob/likelihood w/o a "
@@ -378,8 +379,8 @@ class TFPolicy(Policy):
@override(Policy)
@DeveloperAPI
def learn_on_batch(self, postprocessed_batch: SampleBatch) -> Dict[
str, TensorType]:
def learn_on_batch(
self, postprocessed_batch: SampleBatch) -> Dict[str, TensorType]:
assert self.loss_initialized()
builder = TFRunBuilder(self._sess, "learn_on_batch")
fetches = self._build_learn_on_batch(builder, postprocessed_batch)
@@ -457,7 +458,8 @@ class TFPolicy(Policy):
@override(Policy)
@DeveloperAPI
def export_checkpoint(self, export_dir: str,
def export_checkpoint(self,
export_dir: str,
filename_prefix: str = "model") -> None:
"""Export tensorflow checkpoint to export_dir."""
try:
@@ -573,8 +575,7 @@ class TFPolicy(Policy):
return tf1.train.AdamOptimizer()
@DeveloperAPI
def gradients(self,
optimizer: "tf.keras.optimizers.Optimizer",
def gradients(self, optimizer: "tf.keras.optimizers.Optimizer",
loss: TensorType) -> List[Tuple[TensorType, TensorType]]:
"""Override this for a custom gradient computation behavior.
@@ -816,8 +817,7 @@ class LearningRateSchedule:
@DeveloperAPI
def __init__(self, lr, lr_schedule):
self.cur_lr = tf1.get_variable(
"lr", initializer=lr, trainable=False)
self.cur_lr = tf1.get_variable("lr", initializer=lr, trainable=False)
if lr_schedule is None:
self.lr_schedule = ConstantSchedule(lr, framework=None)
else:
@@ -843,7 +843,9 @@ class EntropyCoeffSchedule:
@DeveloperAPI
def __init__(self, entropy_coeff, entropy_coeff_schedule):
self.entropy_coeff = get_variable(
entropy_coeff, framework="tf", tf_name="entropy_coeff",
entropy_coeff,
framework="tf",
tf_name="entropy_coeff",
trainable=False)
if entropy_coeff_schedule is None:
+46 -52
View File
@@ -13,58 +13,52 @@ from ray.rllib.utils.types import ModelGradients, TensorType, TrainerConfigDict
@DeveloperAPI
def build_tf_policy(name: str,
*,
loss_fn: Callable[
[Policy, ModelV2, type, SampleBatch], TensorType],
get_default_config: Optional[
Callable[[None], TrainerConfigDict]] = None,
postprocess_fn: Optional[Callable[
[Policy, SampleBatch, List[SampleBatch],
"MultiAgentEpisode"], None]] = None,
stats_fn: Optional[Callable[
[Policy, SampleBatch], Dict[str, TensorType]]] = None,
optimizer_fn: Optional[Callable[
[Policy, TrainerConfigDict],
"tf.keras.optimizers.Optimizer"]] = None,
gradients_fn: Optional[Callable[
[Policy, "tf.keras.optimizers.Optimizer",
TensorType], ModelGradients]] = None,
apply_gradients_fn: Optional[Callable[
[Policy, "tf.keras.optimizers.Optimizer",
ModelGradients], "tf.Operation"]] = None,
grad_stats_fn: Optional[Callable[
[Policy, SampleBatch, ModelGradients],
Dict[str, TensorType]]] = None,
extra_action_fetches_fn: Optional[Callable[
[Policy], Dict[str, TensorType]]] = None,
extra_learn_fetches_fn: Optional[Callable[
[Policy], Dict[str, TensorType]]] = None,
validate_spaces: Optional[Callable[
[Policy, gym.Space, gym.Space, TrainerConfigDict],
None]] = None,
before_init: Optional[Callable[
[Policy, gym.Space, gym.Space, TrainerConfigDict],
None]] = None,
before_loss_init: Optional[Callable[
[Policy, gym.spaces.Space, gym.spaces.Space,
TrainerConfigDict], None]] = None,
after_init: Optional[Callable[
[Policy, gym.Space, gym.Space, TrainerConfigDict],
None]] = None,
make_model: Optional[Callable[
[Policy, gym.spaces.Space, gym.spaces.Space,
TrainerConfigDict], ModelV2]] = None,
action_sampler_fn: Optional[Callable[
[TensorType, List[TensorType]], Tuple[
TensorType, TensorType]]] = None,
action_distribution_fn: Optional[Callable[
[Policy, ModelV2, TensorType, TensorType, TensorType],
Tuple[TensorType, type, List[TensorType]]]] = None,
mixins: Optional[List[type]] = None,
get_batch_divisibility_req: Optional[Callable[
[Policy], int]] = None,
obs_include_prev_action_reward: bool = True):
def build_tf_policy(
name: str,
*,
loss_fn: Callable[[Policy, ModelV2, type, SampleBatch], TensorType],
get_default_config: Optional[Callable[[None],
TrainerConfigDict]] = None,
postprocess_fn: Optional[Callable[[
Policy, SampleBatch, List[SampleBatch], "MultiAgentEpisode"
], None]] = None,
stats_fn: Optional[Callable[[Policy, SampleBatch], Dict[
str, TensorType]]] = None,
optimizer_fn: Optional[Callable[[
Policy, TrainerConfigDict
], "tf.keras.optimizers.Optimizer"]] = None,
gradients_fn: Optional[Callable[[
Policy, "tf.keras.optimizers.Optimizer", TensorType
], ModelGradients]] = None,
apply_gradients_fn: Optional[Callable[[
Policy, "tf.keras.optimizers.Optimizer", ModelGradients
], "tf.Operation"]] = None,
grad_stats_fn: Optional[Callable[[Policy, SampleBatch, ModelGradients],
Dict[str, TensorType]]] = None,
extra_action_fetches_fn: Optional[Callable[[Policy], Dict[
str, TensorType]]] = None,
extra_learn_fetches_fn: Optional[Callable[[Policy], Dict[
str, TensorType]]] = None,
validate_spaces: Optional[Callable[
[Policy, gym.Space, gym.Space, TrainerConfigDict], None]] = None,
before_init: Optional[Callable[
[Policy, gym.Space, gym.Space, TrainerConfigDict], None]] = None,
before_loss_init: Optional[Callable[[
Policy, gym.spaces.Space, gym.spaces.Space, TrainerConfigDict
], None]] = None,
after_init: Optional[Callable[
[Policy, gym.Space, gym.Space, TrainerConfigDict], None]] = None,
make_model: Optional[Callable[[
Policy, gym.spaces.Space, gym.spaces.Space, TrainerConfigDict
], ModelV2]] = None,
action_sampler_fn: Optional[Callable[[TensorType, List[
TensorType]], Tuple[TensorType, TensorType]]] = None,
action_distribution_fn: Optional[Callable[[
Policy, ModelV2, TensorType, TensorType, TensorType
], Tuple[TensorType, type, List[TensorType]]]] = None,
mixins: Optional[List[type]] = None,
get_batch_divisibility_req: Optional[Callable[[Policy], int]] = None,
obs_include_prev_action_reward: bool = True):
"""Helper function for creating a dynamic tf policy at runtime.
Functions will be run in this order to initialize the policy:
+45 -56
View File
@@ -19,62 +19,51 @@ torch, _ = try_import_torch()
@DeveloperAPI
def build_torch_policy(name: str,
*,
loss_fn: Callable[
[Policy, ModelV2, type, SampleBatch], TensorType],
get_default_config: Optional[Callable[
[], TrainerConfigDict]] = None,
stats_fn: Optional[Callable[
[Policy, SampleBatch],
Dict[str, TensorType]]] = None,
postprocess_fn: Optional[Callable[
[Policy, SampleBatch, List[SampleBatch],
"MultiAgentEpisode"], None]] = None,
extra_action_out_fn: Optional[Callable[
[Policy, Dict[str, TensorType], List[TensorType],
ModelV2, TorchDistributionWrapper],
Dict[str, TensorType]]] = None,
extra_grad_process_fn: Optional[Callable[
[Policy, "torch.optim.Optimizer", TensorType],
Dict[str, TensorType]]] = None,
# TODO: (sven) Replace "fetches" with "process".
extra_learn_fetches_fn: Optional[Callable[
[Policy], Dict[str, TensorType]]] = None,
optimizer_fn: Optional[Callable[
[Policy, TrainerConfigDict],
"torch.optim.Optimizer"]] = None,
validate_spaces: Optional[Callable[
[Policy, gym.Space, gym.Space, TrainerConfigDict],
None]] = None,
before_init: Optional[Callable[
[Policy, gym.Space, gym.Space, TrainerConfigDict],
None]] = None,
after_init: Optional[Callable[
[Policy, gym.Space, gym.Space, TrainerConfigDict],
None]] = None,
action_sampler_fn: Optional[Callable[
[TensorType, List[TensorType]], Tuple[
TensorType, TensorType]]] = None,
action_distribution_fn: Optional[Callable[
[Policy, ModelV2, TensorType, TensorType,
TensorType],
Tuple[TensorType, type, List[TensorType]]]] = None,
make_model: Optional[Callable[
[Policy, gym.spaces.Space, gym.spaces.Space,
TrainerConfigDict], ModelV2]] = None,
make_model_and_action_dist: Optional[Callable[
[Policy, gym.spaces.Space, gym.spaces.Space,
TrainerConfigDict],
Tuple[ModelV2, TorchDistributionWrapper]]] = None,
apply_gradients_fn: Optional[Callable[
[Policy, "torch.optim.Optimizer"], None]] = None,
mixins: Optional[List[type]] = None,
training_view_requirements_fn: Optional[Callable[
[], Dict[str, ViewRequirement]]] = None,
get_batch_divisibility_req: Optional[Callable[
[Policy], int]] = None
):
def build_torch_policy(
name: str,
*,
loss_fn: Callable[[Policy, ModelV2, type, SampleBatch], TensorType],
get_default_config: Optional[Callable[[], TrainerConfigDict]] = None,
stats_fn: Optional[Callable[[Policy, SampleBatch], Dict[
str, TensorType]]] = None,
postprocess_fn: Optional[Callable[[
Policy, SampleBatch, List[SampleBatch], "MultiAgentEpisode"
], None]] = None,
extra_action_out_fn: Optional[Callable[[
Policy, Dict[str, TensorType], List[TensorType], ModelV2,
TorchDistributionWrapper
], Dict[str, TensorType]]] = None,
extra_grad_process_fn: Optional[Callable[[
Policy, "torch.optim.Optimizer", TensorType
], Dict[str, TensorType]]] = None,
# TODO: (sven) Replace "fetches" with "process".
extra_learn_fetches_fn: Optional[Callable[[Policy], Dict[
str, TensorType]]] = None,
optimizer_fn: Optional[Callable[[Policy, TrainerConfigDict],
"torch.optim.Optimizer"]] = None,
validate_spaces: Optional[Callable[
[Policy, gym.Space, gym.Space, TrainerConfigDict], None]] = None,
before_init: Optional[Callable[
[Policy, gym.Space, gym.Space, TrainerConfigDict], None]] = None,
after_init: Optional[Callable[
[Policy, gym.Space, gym.Space, TrainerConfigDict], None]] = None,
action_sampler_fn: Optional[Callable[[TensorType, List[
TensorType]], Tuple[TensorType, TensorType]]] = None,
action_distribution_fn: Optional[Callable[[
Policy, ModelV2, TensorType, TensorType, TensorType
], Tuple[TensorType, type, List[TensorType]]]] = None,
make_model: Optional[Callable[[
Policy, gym.spaces.Space, gym.spaces.Space, TrainerConfigDict
], ModelV2]] = None,
make_model_and_action_dist: Optional[Callable[[
Policy, gym.spaces.Space, gym.spaces.Space, TrainerConfigDict
], Tuple[ModelV2, TorchDistributionWrapper]]] = None,
apply_gradients_fn: Optional[Callable[
[Policy, "torch.optim.Optimizer"], None]] = None,
mixins: Optional[List[type]] = None,
training_view_requirements_fn: Optional[Callable[[], Dict[
str, ViewRequirement]]] = None,
get_batch_divisibility_req: Optional[Callable[[Policy], int]] = None):
"""Helper function for creating a torch policy class at runtime.
Args:
+2 -2
View File
@@ -147,7 +147,7 @@ class ModelCatalogTest(unittest.TestCase):
self.assertEqual(param_shape, action_space.shape)
# test the class works as a distribution
dist_input = tf1.placeholder(tf.float32, (None,) + param_shape)
dist_input = tf1.placeholder(tf.float32, (None, ) + param_shape)
model = Model()
model.model_config = model_config
dist = dist_cls(dist_input, model=model)
@@ -161,7 +161,7 @@ class ModelCatalogTest(unittest.TestCase):
dist_cls, param_shape = ModelCatalog.get_action_dist(
action_space, model_config)
self.assertEqual(param_shape, (3, ))
dist_input = tf1.placeholder(tf.float32, (None,) + param_shape)
dist_input = tf1.placeholder(tf.float32, (None, ) + param_shape)
model.model_config = model_config
dist = dist_cls(dist_input, model=model)
self.assertEqual(dist.sample().shape[1:], dist_input.shape[1:])
+7 -3
View File
@@ -76,7 +76,10 @@ class TestEagerSupportPG(unittest.TestCase):
def test_impala(self):
check_support(
"IMPALA", {"num_workers": 1, "num_gpus": 0}, test_eager=True)
"IMPALA", {
"num_workers": 1,
"num_gpus": 0
}, test_eager=True)
class TestEagerSupportOffPolicy(unittest.TestCase):
@@ -130,5 +133,6 @@ if __name__ == "__main__":
# None for all unittest.TestCase classes in this file.
import pytest
class_ = sys.argv[1] if len(sys.argv) > 1 else None
sys.exit(pytest.main(
["-v", __file__ + ("" if class_ is None else "::" + class_)]))
sys.exit(
pytest.main(
["-v", __file__ + ("" if class_ is None else "::" + class_)]))
+27 -25
View File
@@ -21,32 +21,34 @@ class TestMultiAgentPendulum(unittest.TestCase):
# Test for both torch and tf.
for fw in framework_iterator(frameworks=["torch", "tf"]):
trials = run_experiments({
"test": {
"run": "PPO",
"env": "multi_agent_pendulum",
"stop": {
"timesteps_total": 500000,
"episode_reward_mean": -300.0,
},
"config": {
"train_batch_size": 2048,
"vf_clip_param": 10.0,
"num_workers": 0,
"num_envs_per_worker": 10,
"lambda": 0.1,
"gamma": 0.95,
"lr": 0.0003,
"sgd_minibatch_size": 64,
"num_sgd_iter": 10,
"model": {
"fcnet_hiddens": [128, 128],
trials = run_experiments(
{
"test": {
"run": "PPO",
"env": "multi_agent_pendulum",
"stop": {
"timesteps_total": 500000,
"episode_reward_mean": -300.0,
},
"batch_mode": "complete_episodes",
"framework": fw,
},
}
}, verbose=1)
"config": {
"train_batch_size": 2048,
"vf_clip_param": 10.0,
"num_workers": 0,
"num_envs_per_worker": 10,
"lambda": 0.1,
"gamma": 0.95,
"lr": 0.0003,
"sgd_minibatch_size": 64,
"num_sgd_iter": 10,
"model": {
"fcnet_hiddens": [128, 128],
},
"batch_mode": "complete_episodes",
"framework": fw,
},
}
},
verbose=1)
if trials[0].last_result["episode_reward_mean"] < -300.0:
raise ValueError("Did not get to -200 reward",
trials[0].last_result)
+1 -1
View File
@@ -278,7 +278,7 @@ class TestRolloutWorker(unittest.TestCase):
def test_action_clipping(self):
from ray.rllib.examples.env.random_env import RandomEnv
action_space = gym.spaces.Box(-2.0, 1.0, (3,))
action_space = gym.spaces.Box(-2.0, 1.0, (3, ))
# Clipping: True (clip between Policy's action_space.low/high),
ev = RolloutWorker(
+3 -2
View File
@@ -125,5 +125,6 @@ if __name__ == "__main__":
# One can specify the specific TestCase class to run.
# None for all unittest.TestCase classes in this file.
class_ = sys.argv[1] if len(sys.argv) > 1 else None
sys.exit(pytest.main(
["-v", __file__ + ("" if class_ is None else "::" + class_)]))
sys.exit(
pytest.main(
["-v", __file__ + ("" if class_ is None else "::" + class_)]))
+3 -2
View File
@@ -203,5 +203,6 @@ if __name__ == "__main__":
# One can specify the specific TestCase class to run.
# None for all unittest.TestCase classes in this file.
class_ = sys.argv[1] if len(sys.argv) > 1 else None
sys.exit(pytest.main(
["-v", __file__ + ("" if class_ is None else "::" + class_)]))
sys.exit(
pytest.main(
["-v", __file__ + ("" if class_ is None else "::" + class_)]))
+2 -2
View File
@@ -99,8 +99,8 @@ class EpsilonGreedy(Exploration):
tf.random.categorical(random_valid_action_logits, 1), axis=1)
chose_random = tf.random.uniform(
tf.stack([batch_size]),
minval=0, maxval=1, dtype=tf.float32) < epsilon
tf.stack([batch_size]), minval=0, maxval=1,
dtype=tf.float32) < epsilon
action = tf.cond(
pred=tf.constant(explore, dtype=tf.bool)
+1 -1
View File
@@ -154,7 +154,7 @@ class GaussianNoise(Exploration):
scale = self.scale_schedule(self.last_timestep)
gaussian_sample = scale * torch.normal(
mean=torch.zeros(det_actions.size()), std=self.stddev).to(
self.device)
self.device)
action = torch.min(
torch.max(
det_actions + gaussian_sample,
@@ -9,23 +9,18 @@ from ray.rllib.utils.test_utils import check, framework_iterator
class TestParameterNoise(unittest.TestCase):
def test_ddpg_parameter_noise(self):
self.do_test_parameter_noise_exploration(
ddpg.DDPGTrainer,
ddpg.DEFAULT_CONFIG,
"Pendulum-v0", {},
ddpg.DDPGTrainer, ddpg.DEFAULT_CONFIG, "Pendulum-v0", {},
np.array([1.0, 0.0, -1.0]))
def test_dqn_parameter_noise(self):
self.do_test_parameter_noise_exploration(
dqn.DQNTrainer,
dqn.DEFAULT_CONFIG,
"FrozenLake-v0", {
dqn.DQNTrainer, dqn.DEFAULT_CONFIG, "FrozenLake-v0", {
"is_slippery": False,
"map_name": "4x4"
},
np.array(0))
}, np.array(0))
def do_test_parameter_noise_exploration(
self, trainer_cls, config, env, env_config, obs):
def do_test_parameter_noise_exploration(self, trainer_cls, config, env,
env_config, obs):
"""Tests, whether an Agent works with ParameterNoise."""
core_config = config.copy()
core_config["num_workers"] = 0 # Run locally.
+7 -3
View File
@@ -200,9 +200,13 @@ def get_variable(value,
if isinstance(value, float) else tf.int32
if isinstance(value, int) else None)
return tf.compat.v1.get_variable(
tf_name, initializer=value, dtype=dtype, trainable=trainable,
**({} if shape is None else {"shape": shape})
)
tf_name,
initializer=value,
dtype=dtype,
trainable=trainable,
**({} if shape is None else {
"shape": shape
}))
elif framework == "torch" and torch_tensor is True:
torch, _ = try_import_torch()
var_ = torch.from_numpy(value)
@@ -53,7 +53,8 @@ class TestSchedules(unittest.TestCase):
def test_polynomial_schedule(self):
ts = [0, 5, 10, 100, 90, 2, 1, 99, 23, 1000]
expected = [
0.5 + (2.0 - 0.5) * (1.0 - min(t, 100) / 100)**2 for t in ts]
0.5 + (2.0 - 0.5) * (1.0 - min(t, 100) / 100)**2 for t in ts
]
config = dict(
type="ray.rllib.utils.schedules.polynomial_schedule."
"PolynomialSchedule",
+1
View File
@@ -12,6 +12,7 @@ class FlexDict(gym.spaces.Dict):
space['key'] = spaces.Box(4,)
See also: documentation for gym.spaces.Dict
"""
def __init__(self, spaces=None, **spaces_kwargs):
err = "Use either Dict(spaces=dict(...)) or Dict(foo=x, bar=z)"
assert (spaces is None) or (not spaces_kwargs), err
+2 -2
View File
@@ -288,8 +288,8 @@ def check_compute_single_action(trainer,
method_to_test = trainer.compute_action
# Get the obs-space from Workers.env (not Policy) due to possible
# pre-processor up front.
worker_set = getattr(
trainer, "workers", getattr(trainer, "_workers", None))
worker_set = getattr(trainer, "workers",
getattr(trainer, "_workers", None))
assert worker_set
if isinstance(worker_set, list):
obs_space = trainer.get_policy().observation_space
+2 -2
View File
@@ -34,8 +34,8 @@ def minimize_and_clip(optimizer, objective, var_list, clip_val=10.0):
if tf.executing_eagerly():
tape = optimizer.tape
grads_and_vars = list(zip(list(
tape.gradient(objective, var_list)), var_list))
grads_and_vars = list(
zip(list(tape.gradient(objective, var_list)), var_list))
else:
grads_and_vars = optimizer.compute_gradients(
objective, var_list=var_list)
+1
View File
@@ -15,6 +15,7 @@
#pragma once
#include <flatbuffers/flatbuffers.h>
#include <unordered_set>
#include "ray/common/id.h"
-1
View File
@@ -13,7 +13,6 @@
// limitations under the License.
#include "gtest/gtest.h"
#include "ray/common/common_protocol.h"
#include "ray/common/task/task_spec.h"
@@ -4,7 +4,6 @@
#include <sstream>
#include "absl/container/flat_hash_map.h"
#include "ray/common/bundle_spec.h"
#include "ray/util/logging.h"
+2 -2
View File
@@ -1,7 +1,7 @@
#include <sstream>
#include "ray/common/task/task_execution_spec.h"
#include <sstream>
namespace ray {
size_t TaskExecutionSpecification::NumForwards() const {
@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ray/common/client_connection.h"
#include <boost/asio.hpp>
#include <boost/asio/error.hpp>
#include <list>
@@ -20,8 +22,6 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "ray/common/client_connection.h"
namespace ray {
namespace raylet {
@@ -16,11 +16,11 @@
#include <jni.h>
#include "jni_utils.h"
#include "ray/common/id.h"
#include "ray/core_worker/actor_handle.h"
#include "ray/core_worker/common.h"
#include "ray/core_worker/core_worker.h"
#include "jni_utils.h"
#ifdef __cplusplus
extern "C" {
@@ -13,11 +13,13 @@
// limitations under the License.
#include "io_ray_runtime_context_NativeWorkerContext.h"
#include <jni.h>
#include "jni_utils.h"
#include "ray/common/id.h"
#include "ray/core_worker/context.h"
#include "ray/core_worker/core_worker.h"
#include "jni_utils.h"
#ifdef __cplusplus
extern "C" {
@@ -16,8 +16,8 @@
#include <jni.h>
#include "ray/core_worker/common.h"
#include "jni_utils.h"
#include "ray/core_worker/common.h"
#include "ray/gcs/gcs_client/global_state_accessor.h"
#ifdef __cplusplus
@@ -13,13 +13,14 @@
// limitations under the License.
#include "io_ray_runtime_metric_NativeMetric.h"
#include "jni_utils.h"
#include "ray/stats/metric.h"
#include <jni.h>
#include <algorithm>
#include "jni_utils.h"
#include "opencensus/tags/tag_key.h"
#include "ray/stats/metric.h"
using TagKeyType = opencensus::tags::TagKey;
using TagsType = std::vector<std::pair<opencensus::tags::TagKey, std::string>>;
@@ -13,7 +13,9 @@
// limitations under the License.
#include "io_ray_runtime_object_NativeObjectStore.h"
#include <jni.h>
#include "jni_utils.h"
#include "ray/common/id.h"
#include "ray/core_worker/common.h"
@@ -13,11 +13,13 @@
// limitations under the License.
#include "io_ray_runtime_task_NativeTaskExecutor.h"
#include <jni.h>
#include "jni_utils.h"
#include "ray/common/id.h"
#include "ray/core_worker/common.h"
#include "ray/core_worker/core_worker.h"
#include "jni_utils.h"
#include "ray/raylet_client/raylet_client.h"
#ifdef __cplusplus
+2 -1
View File
@@ -15,6 +15,7 @@
#pragma once
#include <jni.h>
#include <algorithm>
#include "ray/common/buffer.h"
@@ -346,7 +347,7 @@ inline jobject NativeVectorToJavaList(
env->NewObject(java_array_list_class, java_array_list_init_with_capacity,
(jint)native_vector.size());
RAY_CHECK_JAVA_EXCEPTION(env);
for (auto it = native_vector.begin(); it != native_vector.end(); ++it){
for (auto it = native_vector.begin(); it != native_vector.end(); ++it) {
auto element = element_converter(env, *it);
env->CallVoidMethod(java_list, java_list_add, element);
RAY_CHECK_JAVA_EXCEPTION(env);
@@ -13,8 +13,8 @@
// limitations under the License.
#include <thread>
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "ray/common/test_util.h"
#include "ray/core_worker/transport/direct_actor_transport.h"

Some files were not shown because too many files have changed in this diff Show More