[rllib] Include config dicts in the sphinx docs (#3064)

This commit is contained in:
Eric Liang
2018-10-16 15:55:11 -07:00
committed by GitHub
parent 64e5eb305e
commit a9e454f6fd
26 changed files with 236 additions and 137 deletions
+3 -23
View File
@@ -10,6 +10,7 @@ from ray.rllib.optimizers import AsyncGradientsOptimizer
from ray.rllib.utils import merge_dicts
from ray.tune.trial import Resources
# __sphinx_doc_begin__
DEFAULT_CONFIG = with_common_config({
# Size of rollout batch
"sample_batch_size": 10,
@@ -34,31 +35,10 @@ DEFAULT_CONFIG = with_common_config({
# Workers sample async. Note that this increases the effective
# sample_batch_size by up to 5x due to async buffering of batches.
"sample_async": True,
# Model and preprocessor options
"model": {
# Use LSTM model. Requires TF.
"use_lstm": False,
# Max seq length for LSTM training.
"max_seq_len": 20,
# (Image statespace) - Converts image to Channels = 1
"grayscale": True,
# (Image statespace) - Each pixel
"zero_mean": False,
# (Image statespace) - Converts image to (dim, dim, C)
"dim": 84,
# (Image statespace) - Converts image shape to (C, dim, dim)
"channel_major": False,
},
# Configure TF for single-process operation
"tf_session_args": {
"intra_op_parallelism_threads": 1,
"inter_op_parallelism_threads": 1,
"gpu_options": {
"allow_growth": True,
},
},
})
# __sphinx_doc_end__
class A3CAgent(Agent):
"""A3C implementations in TensorFlow and PyTorch."""
+9 -7
View File
@@ -10,6 +10,7 @@ from datetime import datetime
import tensorflow as tf
import ray
from ray.rllib.models import MODEL_DEFAULTS
from ray.rllib.evaluation.policy_evaluator import PolicyEvaluator
from ray.rllib.optimizers.policy_optimizer import PolicyOptimizer
from ray.rllib.utils import FilterManager, deep_update, merge_dicts
@@ -18,10 +19,11 @@ from ray.tune.trainable import Trainable
from ray.tune.logger import UnifiedLogger
from ray.tune.result import DEFAULT_RESULTS_DIR
# __sphinx_doc_begin__
COMMON_CONFIG = {
# Discount factor of the MDP
"gamma": 0.99,
# Number of steps after which the rollout gets cut
# Number of steps after which the episode is forced to terminate
"horizon": None,
# Number of environments to evaluate vectorwise per worker.
"num_envs_per_worker": 1,
@@ -36,7 +38,7 @@ COMMON_CONFIG = {
"batch_mode": "truncate_episodes",
# Whether to use a background thread for sampling (slightly off-policy)
"sample_async": False,
# Which observation filter to apply to the observation
# Element-wise observation filter, either "NoFilter" or "MeanStdFilter"
"observation_filter": "NoFilter",
# Whether to synchronize the statistics of remote filters.
"synchronize_filters": True,
@@ -50,14 +52,12 @@ COMMON_CONFIG = {
# Environment name can also be passed via config
"env": None,
# Arguments to pass to model
"model": {
"use_lstm": False,
"max_seq_len": 20,
},
# Arguments to pass to the rllib optimizer
"model": MODEL_DEFAULTS,
# Arguments to pass to the policy optimizer. These vary by optimizer.
"optimizer": {},
# Configure TF for single-process operation by default
"tf_session_args": {
# note: parallelism_threads is set to auto for the local evaluator
"intra_op_parallelism_threads": 1,
"inter_op_parallelism_threads": 1,
"gpu_options": {
@@ -88,6 +88,8 @@ COMMON_CONFIG = {
},
}
# __sphinx_doc_end__
def with_common_config(extra_config):
"""Returns the given config dict merged with common agent confs."""
+2 -1
View File
@@ -24,6 +24,7 @@ Result = namedtuple("Result", [
"eval_returns", "eval_lengths"
])
# __sphinx_doc_begin__
DEFAULT_CONFIG = with_common_config({
"noise_stdev": 0.02, # std deviation of parameter noise
"num_rollouts": 32, # number of perturbs to try
@@ -34,9 +35,9 @@ DEFAULT_CONFIG = with_common_config({
"noise_size": 250000000,
"eval_prob": 0.03, # probability of evaluating the parameter rewards
"report_length": 10, # how many of the last rewards we average over
"env_config": {},
"offset": 0,
})
# __sphinx_doc_end__
@ray.remote
+1 -1
View File
@@ -7,7 +7,7 @@ from ray.rllib.utils import merge_dicts
from ray.tune.trial import Resources
APEX_DDPG_DEFAULT_CONFIG = merge_dicts(
DDPG_CONFIG,
DDPG_CONFIG, # see also the options in ddpg.py, which are also supported
{
"optimizer_class": "AsyncReplayOptimizer",
"optimizer": merge_dicts(
+3
View File
@@ -13,6 +13,7 @@ OPTIMIZER_SHARED_CONFIGS = [
"train_batch_size", "learning_starts"
]
# __sphinx_doc_begin__
DEFAULT_CONFIG = with_common_config({
# === Model ===
# Hidden layer sizes of the policy network
@@ -108,6 +109,8 @@ DEFAULT_CONFIG = with_common_config({
"min_iter_time_s": 1,
})
# __sphinx_doc_end__
class DDPGAgent(DQNAgent):
"""DDPG implementation in TensorFlow."""
+4 -1
View File
@@ -6,8 +6,9 @@ from ray.rllib.agents.dqn.dqn import DQNAgent, DEFAULT_CONFIG as DQN_CONFIG
from ray.rllib.utils import merge_dicts
from ray.tune.trial import Resources
# __sphinx_doc_begin__
APEX_DEFAULT_CONFIG = merge_dicts(
DQN_CONFIG,
DQN_CONFIG, # see also the options in dqn.py, which are also supported
{
"optimizer_class": "AsyncReplayOptimizer",
"optimizer": merge_dicts(
@@ -31,6 +32,8 @@ APEX_DEFAULT_CONFIG = merge_dicts(
},
)
# __sphinx_doc_end__
class ApexAgent(DQNAgent):
"""DQN variant that uses the Ape-X distributed policy optimizer.
@@ -13,7 +13,7 @@ def wrap_dqn(env, options):
# Override atari default to use the deepmind wrappers.
# TODO(ekl) this logic should be pushed to the catalog.
if is_atari and "custom_preprocessor" not in options:
if is_atari and not options.get("custom_preprocessor"):
return wrap_deepmind(env, dim=options.get("dim", 84))
return ModelCatalog.get_preprocessor_as_wrapper(env, options)
+3
View File
@@ -20,6 +20,7 @@ OPTIMIZER_SHARED_CONFIGS = [
"learning_starts"
]
# __sphinx_doc_begin__
DEFAULT_CONFIG = with_common_config({
# === Model ===
# Number of atoms for representing the distribution of return. When
@@ -116,6 +117,8 @@ DEFAULT_CONFIG = with_common_config({
"min_iter_time_s": 1,
})
# __sphinx_doc_end__
class DQNAgent(Agent):
"""DQN implementation in TensorFlow."""
+4 -4
View File
@@ -24,6 +24,7 @@ Result = namedtuple("Result", [
"eval_returns", "eval_lengths"
])
# __sphinx_doc_begin__
DEFAULT_CONFIG = with_common_config({
"l2_coeff": 0.005,
"noise_stdev": 0.02,
@@ -36,10 +37,8 @@ DEFAULT_CONFIG = with_common_config({
"observation_filter": "MeanStdFilter",
"noise_size": 250000000,
"report_length": 10,
"env": None,
"env_config": {},
"model": {},
})
# __sphinx_doc_end__
@ray.remote
@@ -77,7 +76,8 @@ class Worker(object):
self.env = env_creator(config["env_config"])
from ray.rllib import models
self.preprocessor = models.ModelCatalog.get_preprocessor(self.env)
self.preprocessor = models.ModelCatalog.get_preprocessor(
self.env, config["model"])
self.sess = utils.make_session(single_threaded=True)
self.policy = policies.GenericPolicy(
+3 -7
View File
@@ -23,6 +23,7 @@ OPTIMIZER_SHARED_CONFIGS = [
"max_sample_requests_in_flight_per_worker",
]
# __sphinx_doc_begin__
DEFAULT_CONFIG = with_common_config({
# V-trace params (see vtrace.py).
"vtrace": True,
@@ -63,15 +64,10 @@ DEFAULT_CONFIG = with_common_config({
# balancing the three losses
"vf_loss_coeff": 0.5,
"entropy_coeff": -0.01,
# Model and preprocessor options.
"model": {
"use_lstm": False,
"max_seq_len": 20,
"dim": 84,
},
})
# __sphinx_doc_end__
class ImpalaAgent(Agent):
"""IMPALA implementation using DeepMind's V-trace."""
+3 -7
View File
@@ -8,20 +8,16 @@ from ray.rllib.optimizers import SyncSamplesOptimizer
from ray.rllib.utils import merge_dicts
from ray.tune.trial import Resources
# __sphinx_doc_begin__
DEFAULT_CONFIG = with_common_config({
# No remote workers by default
"num_workers": 0,
# Learning rate
"lr": 0.0004,
# Override model config
"model": {
# Use LSTM model.
"use_lstm": False,
# Max seq length for LSTM training.
"max_seq_len": 20,
},
})
# __sphinx_doc_end__
class PGAgent(Agent):
"""Simple policy gradient agent.
+3 -7
View File
@@ -8,6 +8,7 @@ from ray.rllib.utils import merge_dicts
from ray.rllib.optimizers import SyncSamplesOptimizer, LocalMultiGPUOptimizer
from ray.tune.trial import Resources
# __sphinx_doc_begin__
DEFAULT_CONFIG = with_common_config({
# If true, use the Generalized Advantage Estimator (GAE)
# with a value function, see https://arxiv.org/pdf/1506.02438.pdf.
@@ -53,15 +54,10 @@ DEFAULT_CONFIG = with_common_config({
"observation_filter": "MeanStdFilter",
# Use the sync samples optimizer instead of the multi-gpu one
"simple_optimizer": False,
# Override model config
"model": {
# Whether to use LSTM model
"use_lstm": False,
# Max seq length for LSTM training.
"max_seq_len": 20,
},
})
# __sphinx_doc_end__
class PPOAgent(Agent):
"""Multi-GPU optimized implementation of PPO in TensorFlow."""
@@ -187,7 +187,7 @@ class PolicyEvaluator(EvaluatorInterface):
def wrap(env):
return env # we can't auto-wrap these env types
elif is_atari(self.env) and \
"custom_preprocessor" not in model_config and \
not model_config.get("custom_preprocessor") and \
preprocessor_pref == "deepmind":
if clip_rewards is None:
@@ -196,9 +196,8 @@ class PolicyEvaluator(EvaluatorInterface):
def wrap(env):
env = wrap_deepmind(
env,
dim=model_config.get("dim", 84),
framestack=not model_config.get("use_lstm")
and not model_config.get("no_framestack"))
dim=model_config.get("dim"),
framestack=model_config.get("framestack"))
if monitor_path:
env = _monitor(env, monitor_path)
return env
+11 -3
View File
@@ -1,4 +1,4 @@
from ray.rllib.models.catalog import ModelCatalog
from ray.rllib.models.catalog import ModelCatalog, MODEL_DEFAULTS
from ray.rllib.models.action_dist import (ActionDistribution, Categorical,
DiagGaussian, Deterministic)
from ray.rllib.models.model import Model
@@ -7,6 +7,14 @@ from ray.rllib.models.fcnet import FullyConnectedNetwork
from ray.rllib.models.lstm import LSTM
__all__ = [
"ActionDistribution", "Categorical", "DiagGaussian", "Deterministic",
"ModelCatalog", "Model", "Preprocessor", "FullyConnectedNetwork", "LSTM"
"ActionDistribution",
"Categorical",
"DiagGaussian",
"Deterministic",
"ModelCatalog",
"Model",
"Preprocessor",
"FullyConnectedNetwork",
"LSTM",
"MODEL_DEFAULTS",
]
+56 -32
View File
@@ -18,29 +18,52 @@ from ray.rllib.models.fcnet import FullyConnectedNetwork
from ray.rllib.models.visionnet import VisionNetwork
from ray.rllib.models.lstm import LSTM
MODEL_CONFIGS = [
# __sphinx_doc_begin__
MODEL_DEFAULTS = {
# === Built-in options ===
# Filter config. List of [out_channels, kernel, stride] for each filter
"conv_filters",
"conv_activation", # Nonlinearity for built-in convnet
"fcnet_activation", # Nonlinearity for fully connected net (tanh, relu)
"fcnet_hiddens", # Number of hidden layers for fully connected net
"dim", # Dimension for ATARI
"grayscale", # Converts ATARI frame to 1 Channel Grayscale image
"zero_mean", # Changes frame to range from [-1, 1] if true
"extra_frameskip", # (int) for number of frames to skip
"free_log_std", # Documented in ray.rllib.models.Model
"channel_major", # Pytorch conv requires images to be channel-major
"squash_to_range", # Whether to squash the action output to space range
"use_lstm", # Whether to wrap the model with a LSTM
"max_seq_len", # Max seq len for training the LSTM, defaults to 20
"lstm_cell_size", # Size of the LSTM cell
"conv_filters": None,
# Nonlinearity for built-in convnet
"conv_activation": "relu",
# Nonlinearity for fully connected net (tanh, relu)
"fcnet_activation": "tanh",
# Number of hidden layers for fully connected net
"fcnet_hiddens": [256, 256],
# For control envs, documented in ray.rllib.models.Model
"free_log_std": False,
# Whether to squash the action output to space range
"squash_to_range": False,
# == LSTM ==
# Whether to wrap the model with a LSTM
"use_lstm": False,
# Max seq len for training the LSTM, defaults to 20
"max_seq_len": 20,
# Size of the LSTM cell
"lstm_cell_size": 256,
# == Atari ==
# Whether to enable framestack for Atari envs
"framestack": True,
# Final resized frame dimension
"dim": 84,
# Pytorch conv requires images to be channel-major
"channel_major": False,
# (deprecated) Converts ATARI frame to 1 Channel Grayscale image
"grayscale": False,
# (deprecated) Changes frame to range from [-1, 1] if true
"zero_mean": True,
# === Options for custom models ===
"custom_preprocessor", # Name of a custom preprocessor to use
"custom_model", # Name of a custom model to use
"custom_options", # Extra options to pass to the custom classes
]
# Name of a custom preprocessor to use
"custom_preprocessor": None,
# Name of a custom model to use
"custom_model": None,
# Extra options to pass to the custom classes
"custom_options": {},
}
# __sphinx_doc_end__
class ModelCatalog(object):
@@ -71,10 +94,7 @@ class ModelCatalog(object):
dist_dim (int): The size of the input vector to the distribution.
"""
# TODO(ekl) are list spaces valid?
if isinstance(action_space, list):
action_space = gym.spaces.Tuple(action_space)
config = config or {}
config = config or MODEL_DEFAULTS
if isinstance(action_space, gym.spaces.Box):
if dist_type is None:
dist = DiagGaussian
@@ -82,7 +102,7 @@ class ModelCatalog(object):
dist = squash_to_range(dist, action_space.low,
action_space.high)
return dist, action_space.shape[0] * 2
elif dist_type == 'deterministic':
elif dist_type == "deterministic":
return Deterministic, action_space.shape[0]
elif isinstance(action_space, gym.spaces.Discrete):
return Categorical, action_space.n
@@ -154,6 +174,7 @@ class ModelCatalog(object):
model (Model): Neural network model.
"""
options = options or MODEL_DEFAULTS
model = ModelCatalog._get_model(inputs, num_outputs, options, state_in,
seq_lens)
@@ -165,7 +186,7 @@ class ModelCatalog(object):
@staticmethod
def _get_model(inputs, num_outputs, options, state_in, seq_lens):
if "custom_model" in options:
if options.get("custom_model"):
model = options["custom_model"]
print("Using custom model {}".format(model))
return _global_registry.get(RLLIB_MODEL, model)(
@@ -183,7 +204,7 @@ class ModelCatalog(object):
return FullyConnectedNetwork(inputs, num_outputs, options)
@staticmethod
def get_torch_model(input_shape, num_outputs, options={}):
def get_torch_model(input_shape, num_outputs, options=None):
"""Returns a PyTorch suitable model. This is currently only supported
in A3C.
@@ -200,7 +221,8 @@ class ModelCatalog(object):
from ray.rllib.models.pytorch.visionnet import (VisionNetwork as
PyTorchVisionNet)
if "custom_model" in options:
options = options or MODEL_DEFAULTS
if options.get("custom_model"):
model = options["custom_model"]
print("Using custom torch model {}".format(model))
return _global_registry.get(RLLIB_MODEL, model)(
@@ -217,7 +239,7 @@ class ModelCatalog(object):
return PyTorchFCNet(input_shape[0], num_outputs, options)
@staticmethod
def get_preprocessor(env, options={}):
def get_preprocessor(env, options=None):
"""Returns a suitable processor for the given environment.
Args:
@@ -227,12 +249,13 @@ class ModelCatalog(object):
Returns:
preprocessor (Preprocessor): Preprocessor for the env observations.
"""
options = options or MODEL_DEFAULTS
for k in options.keys():
if k not in MODEL_CONFIGS:
if k not in MODEL_DEFAULTS:
raise Exception("Unknown config key `{}`, all keys: {}".format(
k, MODEL_CONFIGS))
k, list(MODEL_DEFAULTS)))
if "custom_preprocessor" in options:
if options.get("custom_preprocessor"):
preprocessor = options["custom_preprocessor"]
print("Using custom preprocessor {}".format(preprocessor))
return _global_registry.get(RLLIB_PREPROCESSOR, preprocessor)(
@@ -242,7 +265,7 @@ class ModelCatalog(object):
return preprocessor(env.observation_space, options)
@staticmethod
def get_preprocessor_as_wrapper(env, options={}):
def get_preprocessor_as_wrapper(env, options=None):
"""Returns a preprocessor as a gym observation wrapper.
Args:
@@ -253,6 +276,7 @@ class ModelCatalog(object):
wrapper (gym.ObservationWrapper): Preprocessor in wrapper form.
"""
options = options or MODEL_DEFAULTS
preprocessor = ModelCatalog.get_preprocessor(env, options)
return _RLlibPreprocessorWrapper(env, preprocessor)
+2 -2
View File
@@ -13,8 +13,8 @@ class FullyConnectedNetwork(Model):
"""Generic fully connected network."""
def _build_layers(self, inputs, num_outputs, options):
hiddens = options.get("fcnet_hiddens", [256, 256])
activation = get_activation_fn(options.get("fcnet_activation", "tanh"))
hiddens = options.get("fcnet_hiddens")
activation = get_activation_fn(options.get("fcnet_activation"))
with tf.name_scope("fc_net"):
i = 1
+1 -1
View File
@@ -135,7 +135,7 @@ class LSTM(Model):
"""
def _build_layers(self, inputs, num_outputs, options):
cell_size = options.get("lstm_cell_size", 256)
cell_size = options.get("lstm_cell_size")
last_layer = add_time_dimension(inputs, self.seq_lens)
# Setup the LSTM cell
+1 -1
View File
@@ -55,7 +55,7 @@ class Model(object):
self.seq_lens = tf.placeholder(
dtype=tf.int32, shape=[None], name="seq_lens")
if options.get("free_log_std", False):
if options.get("free_log_std"):
assert num_outputs % 2 == 0
num_outputs = num_outputs // 2
self.outputs, self.last_layer = self._build_layers(
+12 -6
View File
@@ -30,12 +30,18 @@ class Preprocessor(object):
raise NotImplementedError
class AtariPixelPreprocessor(Preprocessor):
class GenericPixelPreprocessor(Preprocessor):
"""Generic image preprocessor.
Note: for Atari games, use config {"preprocessor_pref": "deepmind"}
instead for deepmind-style Atari preprocessing.
"""
def _init(self):
self._grayscale = self._options.get("grayscale", False)
self._zero_mean = self._options.get("zero_mean", True)
self._dim = self._options.get("dim", 84)
self._channel_major = self._options.get("channel_major", False)
self._grayscale = self._options.get("grayscale")
self._zero_mean = self._options.get("zero_mean")
self._dim = self._options.get("dim")
self._channel_major = self._options.get("channel_major")
if self._grayscale:
self.shape = (self._dim, self._dim, 1)
else:
@@ -130,7 +136,7 @@ def get_preprocessor(space):
if isinstance(space, gym.spaces.Discrete):
preprocessor = OneHotPreprocessor
elif obs_shape == ATARI_OBS_SHAPE:
preprocessor = AtariPixelPreprocessor
preprocessor = GenericPixelPreprocessor
elif obs_shape == ATARI_RAM_OBS_SHAPE:
preprocessor = AtariRamPreprocessor
elif isinstance(space, gym.spaces.Tuple):
+2 -2
View File
@@ -18,11 +18,11 @@ class VisionNetwork(Model):
inputs (tuple): (channels, rows/height, cols/width)
num_outputs (int): logits size
"""
filters = options.get("conv_filters", [
filters = options.get("conv_filters") or [
[16, [8, 8], 4],
[32, [4, 4], 2],
[512, [11, 11], 1],
])
]
layers = []
in_channels, in_size = inputs[0], inputs[1:]
+2 -2
View File
@@ -17,7 +17,7 @@ class VisionNetwork(Model):
if not filters:
filters = get_filter_config(options)
activation = get_activation_fn(options.get("conv_activation", "relu"))
activation = get_activation_fn(options.get("conv_activation"))
with tf.name_scope("vision_net"):
for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
@@ -57,7 +57,7 @@ def get_filter_config(options):
[32, [4, 4], 2],
[256, [11, 11], 1],
]
dim = options.get("dim", 84)
dim = options.get("dim")
if dim == 84:
return filters_84x84
elif dim == 42:
@@ -19,10 +19,6 @@ ACTION_SPACES_TO_TEST = {
Box(0.0, 1.0, (5, ), dtype=np.float32),
Box(0.0, 1.0, (5, ), dtype=np.float32)
]),
"implicit_tuple": [
Box(0.0, 1.0, (5, ), dtype=np.float32),
Box(0.0, 1.0, (5, ), dtype=np.float32)
],
"mixed_tuple": Tuple(
[Discrete(2),
Discrete(3),