[rllib] Update Docs for RLLib (#1248)

* init_changes * last_changes * addressing comments * fix comments * update * nit
2026-06-27 22:53:20 +08:00 · 2017-11-24 10:36:57 -08:00
parent 7af5292646
commit f34d705178
5 changed files with 97 additions and 81 deletions
@@ -28,53 +28,4 @@ The available algorithms are:
   `A3C <https://arxiv.org/abs/1602.01783>`__ based on `the OpenAI
   starter agent <https://github.com/openai/universe-starter-agent>`__.

-Storing logs
------------
-
-You can store the algorithm configuration (including hyperparameters) and
-training results on a filesystem with the ``--upload-dir`` flag. Two protocols
-are supported at the moment:
-
- ``--upload-dir file:///tmp/ray/`` will store the logs on the local filesystem
-  in a subdirectory of /tmp/ray which is named after the algorithm name, the
-  environment and the current date. This is the default.
-
- ``--upload-dir s3://bucketname/`` will store the logs in S3. Not that if you
-  store the logs in S3, TensorFlow files will not currently be stored because
-  TensorFlow doesn't support directly uploading files to S3 at the moment.
-
-Querying logs with Athena
-------------------------
-
-If you stored the logs in S3 or uploaded them there from the local file system,
-they can be queried with Athena. First create tables containing the
-experimental results with
-
-.. code:: sql
-
-    CREATE EXTERNAL TABLE IF NOT EXISTS experiments (
-      experiment_id STRING,
-      env_name STRING,
-      alg STRING,
-      -- result.json
-      training_iteration INT,
-      episode_reward_mean FLOAT,
-      episode_len_mean FLOAT
-    ) ROW FORMAT serde 'org.apache.hive.hcatalog.data.JsonSerDe'
-    LOCATION 's3://bucketname/'
-
-and then you can for example visualize the results with
-
-.. code:: sql
-
-    SELECT c.experiment_id, c.env_name, c.alg, a.episode_reward_mean, a.episode_len_mean
-    FROM experiments a
-    LEFT OUTER JOIN experiments b
-        ON a.experiment_id = b.experiment_id AND a.training_iteration < b.training_iteration
-    INNER JOIN experiments c
-        ON a.experiment_id = c.experiment_id
-    WHERE b.experiment_id IS NULL AND a.training_iteration IS NOT NULL AND c.alg is NOT NULL;
-
-This query selects last iteration from each experiment (see `this
-stackoverflow
-post <https://stackoverflow.com/questions/7745609/sql-select-only-rows-with-max-value-on-a-column>`__).
+Documentation can be `found here <http://ray.readthedocs.io/en/latest/rllib.html>`__.
@@ -27,14 +27,7 @@ MODEL_CONFIGS = [


 class ModelCatalog(object):
-    """Registry of default models and action distributions for envs.
-
-    Example:
-        dist_class, dist_dim = ModelCatalog.get_action_dist(env.action_space)
-        model = ModelCatalog.get_model(inputs, dist_dim)
-        dist = dist_class(model.outputs)
-        action_op = dist.sample()
-    """
+    """Registry of default models and action distributions for envs."""

    ATARI_OBS_SHAPE = (210, 160, 3)
    ATARI_RAM_OBS_SHAPE = (128,)
@@ -47,7 +40,7 @@ class ModelCatalog(object):

        Args:
            action_space (Space): Action space of the target gym env.
-            dist_type (Optional[str]): Identifier of the action distribution.
+            dist_type (str): Optional identifier of the action distribution.

        Returns:
            dist_class (ActionDistribution): Python class of the distribution.
@@ -87,10 +80,11 @@ class ModelCatalog(object):

    @staticmethod
    def get_torch_model(input_shape, num_outputs, options=dict()):
-        """Returns a PyTorch suitable model.
+        """Returns a PyTorch suitable model. This is currently only supported
+        in A3C.

        Args:
-            input_shape (tup): The input shape to the model.
+            input_shape (tuple): The input shape to the model.
            num_outputs (int): The size of the output vector of the model.
            options (dict): Optional args to pass to the model constructor.

@@ -13,6 +13,9 @@ from ray.rllib.models.model import Model


 class LSTM(Model):
+    """Vision LSTM network based here:
+    https://github.com/openai/universe-starter-agent"""
+
    # TODO(rliaw): Add LSTM code for other algorithms
    def _init(self, inputs, num_outputs, options):
        use_tf100_api = (distutils.version.LooseVersion(tf.VERSION) >=
@@ -15,7 +15,7 @@ class Model(object):
    The last layer of the network can also be retrieved if the algorithm
    needs to further post-processing (e.g. Actor and Critic networks in A3C).

-    If options["free_log_std"] is True, the last half of the
+    If `options["free_log_std"]` is True, the last half of the
    output layer will be free variables that are not dependent on
    inputs. This is often used if the output of the network is used
    to parametrize a probability distribution. In this case, the