mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 22:53:20 +08:00
[rllib] Update Docs for RLLib (#1248)
* init_changes * last_changes * addressing comments * fix comments * update * nit
This commit is contained in:
@@ -28,53 +28,4 @@ The available algorithms are:
|
||||
`A3C <https://arxiv.org/abs/1602.01783>`__ based on `the OpenAI
|
||||
starter agent <https://github.com/openai/universe-starter-agent>`__.
|
||||
|
||||
Storing logs
|
||||
------------
|
||||
|
||||
You can store the algorithm configuration (including hyperparameters) and
|
||||
training results on a filesystem with the ``--upload-dir`` flag. Two protocols
|
||||
are supported at the moment:
|
||||
|
||||
- ``--upload-dir file:///tmp/ray/`` will store the logs on the local filesystem
|
||||
in a subdirectory of /tmp/ray which is named after the algorithm name, the
|
||||
environment and the current date. This is the default.
|
||||
|
||||
- ``--upload-dir s3://bucketname/`` will store the logs in S3. Not that if you
|
||||
store the logs in S3, TensorFlow files will not currently be stored because
|
||||
TensorFlow doesn't support directly uploading files to S3 at the moment.
|
||||
|
||||
Querying logs with Athena
|
||||
-------------------------
|
||||
|
||||
If you stored the logs in S3 or uploaded them there from the local file system,
|
||||
they can be queried with Athena. First create tables containing the
|
||||
experimental results with
|
||||
|
||||
.. code:: sql
|
||||
|
||||
CREATE EXTERNAL TABLE IF NOT EXISTS experiments (
|
||||
experiment_id STRING,
|
||||
env_name STRING,
|
||||
alg STRING,
|
||||
-- result.json
|
||||
training_iteration INT,
|
||||
episode_reward_mean FLOAT,
|
||||
episode_len_mean FLOAT
|
||||
) ROW FORMAT serde 'org.apache.hive.hcatalog.data.JsonSerDe'
|
||||
LOCATION 's3://bucketname/'
|
||||
|
||||
and then you can for example visualize the results with
|
||||
|
||||
.. code:: sql
|
||||
|
||||
SELECT c.experiment_id, c.env_name, c.alg, a.episode_reward_mean, a.episode_len_mean
|
||||
FROM experiments a
|
||||
LEFT OUTER JOIN experiments b
|
||||
ON a.experiment_id = b.experiment_id AND a.training_iteration < b.training_iteration
|
||||
INNER JOIN experiments c
|
||||
ON a.experiment_id = c.experiment_id
|
||||
WHERE b.experiment_id IS NULL AND a.training_iteration IS NOT NULL AND c.alg is NOT NULL;
|
||||
|
||||
This query selects last iteration from each experiment (see `this
|
||||
stackoverflow
|
||||
post <https://stackoverflow.com/questions/7745609/sql-select-only-rows-with-max-value-on-a-column>`__).
|
||||
Documentation can be `found here <http://ray.readthedocs.io/en/latest/rllib.html>`__.
|
||||
|
||||
@@ -27,14 +27,7 @@ MODEL_CONFIGS = [
|
||||
|
||||
|
||||
class ModelCatalog(object):
|
||||
"""Registry of default models and action distributions for envs.
|
||||
|
||||
Example:
|
||||
dist_class, dist_dim = ModelCatalog.get_action_dist(env.action_space)
|
||||
model = ModelCatalog.get_model(inputs, dist_dim)
|
||||
dist = dist_class(model.outputs)
|
||||
action_op = dist.sample()
|
||||
"""
|
||||
"""Registry of default models and action distributions for envs."""
|
||||
|
||||
ATARI_OBS_SHAPE = (210, 160, 3)
|
||||
ATARI_RAM_OBS_SHAPE = (128,)
|
||||
@@ -47,7 +40,7 @@ class ModelCatalog(object):
|
||||
|
||||
Args:
|
||||
action_space (Space): Action space of the target gym env.
|
||||
dist_type (Optional[str]): Identifier of the action distribution.
|
||||
dist_type (str): Optional identifier of the action distribution.
|
||||
|
||||
Returns:
|
||||
dist_class (ActionDistribution): Python class of the distribution.
|
||||
@@ -87,10 +80,11 @@ class ModelCatalog(object):
|
||||
|
||||
@staticmethod
|
||||
def get_torch_model(input_shape, num_outputs, options=dict()):
|
||||
"""Returns a PyTorch suitable model.
|
||||
"""Returns a PyTorch suitable model. This is currently only supported
|
||||
in A3C.
|
||||
|
||||
Args:
|
||||
input_shape (tup): The input shape to the model.
|
||||
input_shape (tuple): The input shape to the model.
|
||||
num_outputs (int): The size of the output vector of the model.
|
||||
options (dict): Optional args to pass to the model constructor.
|
||||
|
||||
|
||||
@@ -13,6 +13,9 @@ from ray.rllib.models.model import Model
|
||||
|
||||
|
||||
class LSTM(Model):
|
||||
"""Vision LSTM network based here:
|
||||
https://github.com/openai/universe-starter-agent"""
|
||||
|
||||
# TODO(rliaw): Add LSTM code for other algorithms
|
||||
def _init(self, inputs, num_outputs, options):
|
||||
use_tf100_api = (distutils.version.LooseVersion(tf.VERSION) >=
|
||||
|
||||
@@ -15,7 +15,7 @@ class Model(object):
|
||||
The last layer of the network can also be retrieved if the algorithm
|
||||
needs to further post-processing (e.g. Actor and Critic networks in A3C).
|
||||
|
||||
If options["free_log_std"] is True, the last half of the
|
||||
If `options["free_log_std"]` is True, the last half of the
|
||||
output layer will be free variables that are not dependent on
|
||||
inputs. This is often used if the output of the network is used
|
||||
to parametrize a probability distribution. In this case, the
|
||||
|
||||
Reference in New Issue
Block a user