diff --git a/doc/source/conf.py b/doc/source/conf.py index a4f9aebd4..bf8db51e5 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -19,36 +19,21 @@ import shlex # These lines added to enable Sphinx to work without installing Ray. import mock MOCK_MODULES = [ - "gym", - "gym.spaces", - "scipy", - "scipy.signal", - "tensorflow", - "tensorflow.contrib", - "tensorflow.contrib.all_reduce", - "tensorflow.contrib.all_reduce.python", - "tensorflow.contrib.layers", - "tensorflow.contrib.slim", - "tensorflow.contrib.rnn", - "tensorflow.core", - "tensorflow.core.util", - "tensorflow.python", - "tensorflow.python.client", - "tensorflow.python.util", - "ray.core.generated", + "gym", "gym.spaces", "scipy", "scipy.signal", "tensorflow", + "tensorflow.contrib", "tensorflow.contrib.all_reduce", + "tensorflow.contrib.all_reduce.python", "tensorflow.contrib.layers", + "tensorflow.contrib.slim", "tensorflow.contrib.rnn", "tensorflow.core", + "tensorflow.core.util", "tensorflow.python", "tensorflow.python.client", + "tensorflow.python.util", "ray.core.generated", "ray.core.generated.ActorCheckpointIdData", - "ray.core.generated.ClientTableData", - "ray.core.generated.GcsTableEntry", + "ray.core.generated.ClientTableData", "ray.core.generated.GcsTableEntry", "ray.core.generated.HeartbeatTableData", "ray.core.generated.HeartbeatBatchTableData", - "ray.core.generated.DriverTableData", - "ray.core.generated.ErrorTableData", + "ray.core.generated.DriverTableData", "ray.core.generated.ErrorTableData", "ray.core.generated.ProfileTableData", "ray.core.generated.ObjectTableData", - "ray.core.generated.ray.protocol.Task", - "ray.core.generated.TablePrefix", - "ray.core.generated.TablePubsub", - "ray.core.generated.Language", + "ray.core.generated.ray.protocol.Task", "ray.core.generated.TablePrefix", + "ray.core.generated.TablePubsub", "ray.core.generated.Language", "ray._raylet" ] for mod_name in MOCK_MODULES: diff --git a/doc/source/offline-q.png b/doc/source/offline-q.png index 324288b41..dbafeef0b 100644 Binary files a/doc/source/offline-q.png and b/doc/source/offline-q.png differ diff --git a/doc/source/rllib-offline.rst b/doc/source/rllib-offline.rst index 74d25e74a..e3e935e97 100644 --- a/doc/source/rllib-offline.rst +++ b/doc/source/rllib-offline.rst @@ -44,14 +44,46 @@ Then, we can tell DQN to train using these previously generated experiences with --env=CartPole-v0 \ --config='{ "input": "/tmp/cartpole-out", + "input_evaluation": [], "exploration_final_eps": 0, "exploration_fraction": 0}' -Since the input experiences are not from running simulations, RLlib cannot report the true policy performance during training. However, you can use ``tensorboard --logdir=~/ray_results`` to monitor training progress via other metrics such as estimated Q-value: +**Off-policy estimation:** Since the input experiences are not from running simulations, RLlib cannot report the true policy performance during training. However, you can use ``tensorboard --logdir=~/ray_results`` to monitor training progress via other metrics such as estimated Q-value. Alternatively, `off-policy estimation `__ can be used, which requires both the source and target action probabilities to be available (i.e., the ``action_prob`` batch key). For DQN, this means enabling soft Q learning so that actions are sampled from a probability distribution: + +.. code-block:: bash + + $ rllib train \ + --run=DQN \ + --env=CartPole-v0 \ + --config='{ + "input": "/tmp/cartpole-out", + "input_evaluation": ["is", "wis"], + "soft_q": true, + "softmax_temp": 1.0}' + +This example plot shows the Q-value metric in addition to importance sampling (IS) and weighted importance sampling (WIS) gain estimates (>1.0 means there is an estimated improvement over the original policy): .. image:: offline-q.png -In offline input mode, no simulations are run, though you still need to specify the environment in order to define the action and observation spaces. If true simulation is also possible (i.e., your env supports ``step()``), you can also set ``"input_evaluation": "simulation"`` to tell RLlib to run background simulations to estimate current policy performance. The output of these simulations will not be used for learning. +**Estimator Python API:** For greater control over the evaluation process, you can create off-policy estimators in your Python code and call ``estimator.estimate(episode_batch)`` to perform counterfactual estimation as needed. The estimators take in a policy graph object and gamma value for the environment: + +.. code-block:: python + + agent = DQNAgent(...) + ... # train agent offline + + from ray.rllib.offline.json_reader import JsonReader + from ray.rllib.offline.wis_estimator import WeightedImportanceSamplingEstimator + + estimator = WeightedImportanceSamplingEstimator(agent.get_policy(), gamma=0.99) + reader = JsonReader("/path/to/data") + for _ in range(1000): + batch = reader.next() + for episode in batch.split_by_episode(): + print(estimator.estimate(episode)) + + +**Simulation-based estimation:** If true simulation is also possible (i.e., your env supports ``step()``), you can also set ``"input_evaluation": ["simulation"]`` to tell RLlib to run background simulations to estimate current policy performance. The output of these simulations will not be used for learning. Note that in all cases you still need to specify an environment object to define the action and observation spaces. However, you don't need to implement functions like reset() and step(). Example: Converting external experiences to batch format ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/python/ray/rllib/agents/a3c/a3c_tf_policy_graph.py b/python/ray/rllib/agents/a3c/a3c_tf_policy_graph.py index 90ba87dae..105996e38 100644 --- a/python/ray/rllib/agents/a3c/a3c_tf_policy_graph.py +++ b/python/ray/rllib/agents/a3c/a3c_tf_policy_graph.py @@ -97,6 +97,7 @@ class A3CPolicyGraph(LearningRateSchedule, TFPolicyGraph): self.sess, obs_input=self.observations, action_sampler=action_dist.sample(), + action_prob=action_dist.sampled_action_prob(), loss=self.model.loss() + self.loss.total_loss, loss_inputs=loss_in, state_inputs=self.model.state_in, @@ -153,7 +154,9 @@ class A3CPolicyGraph(LearningRateSchedule, TFPolicyGraph): @override(TFPolicyGraph) def extra_compute_action_fetches(self): - return {"vf_preds": self.vf} + return dict( + TFPolicyGraph.extra_compute_action_fetches(self), + **{"vf_preds": self.vf}) def _value(self, ob, *args): feed_dict = {self.observations: [ob], self.model.seq_lens: [1]} diff --git a/python/ray/rllib/agents/agent.py b/python/ray/rllib/agents/agent.py index 4b56eb8e3..4e918a7c2 100644 --- a/python/ray/rllib/agents/agent.py +++ b/python/ray/rllib/agents/agent.py @@ -13,7 +13,8 @@ import tensorflow as tf from types import FunctionType import ray -from ray.rllib.offline import NoopOutput, JsonReader, MixedInput, JsonWriter +from ray.rllib.offline import NoopOutput, JsonReader, MixedInput, JsonWriter, \ + ShuffledInput from ray.rllib.models import MODEL_DEFAULTS from ray.rllib.evaluation.policy_evaluator import PolicyEvaluator from ray.rllib.evaluation.sample_batch import DEFAULT_POLICY_ID @@ -145,18 +146,22 @@ COMMON_CONFIG = { # {"sampler": 0.4, "/tmp/*.json": 0.4, "s3://bucket/expert.json": 0.2}). # - a function that returns a rllib.offline.InputReader "input": "sampler", - # Specify how to evaluate the current policy. This only makes sense to set - # when the input is not already generating simulation data: - # - None: don't evaluate the policy. The episode reward and other - # metrics will be NaN if using offline data. + # Specify how to evaluate the current policy. This only has an effect when + # reading offline experiences. Available options: + # - "wis": the weighted step-wise importance sampling estimator. + # - "is": the step-wise importance sampling estimator. # - "simulation": run the environment in the background, but use # this data for evaluation only and not for learning. - "input_evaluation": None, + "input_evaluation": ["is", "wis"], # Whether to run postprocess_trajectory() on the trajectory fragments from # offline inputs. Note that postprocessing will be done using the *current* # policy, not the *behaviour* policy, which is typically undesirable for # on-policy algorithms. "postprocess_inputs": False, + # If positive, input batches will be shuffled via a sliding window buffer + # of this number of batches. Use this if the input data is not in random + # enough order. Input is delayed until the shuffle buffer is filled. + "shuffle_buffer_size": 0, # __sphinx_doc_input_end__ # __sphinx_doc_output_begin__ # Specify where experiences should be saved: @@ -552,10 +557,10 @@ class Agent(Trainable): raise ValueError( "The `use_gpu_for_workers` config is deprecated, please use " "`num_gpus_per_worker=1` instead.") - if (config["input"] == "sampler" - and config["input_evaluation"] is not None): + if type(config["input_evaluation"]) != list: raise ValueError( - "`input_evaluation` should not be set when input=sampler") + "`input_evaluation` must be a list of strings, got {}".format( + config["input_evaluation"])) def _make_evaluator(self, cls, @@ -575,9 +580,13 @@ class Agent(Trainable): elif config["input"] == "sampler": input_creator = (lambda ioctx: ioctx.default_sampler_input()) elif isinstance(config["input"], dict): - input_creator = (lambda ioctx: MixedInput(config["input"], ioctx)) + input_creator = (lambda ioctx: ShuffledInput( + MixedInput(config["input"], ioctx), + config["shuffle_buffer_size"])) else: - input_creator = (lambda ioctx: JsonReader(config["input"], ioctx)) + input_creator = (lambda ioctx: ShuffledInput( + JsonReader(config["input"], ioctx), + config["shuffle_buffer_size"])) if isinstance(config["output"], FunctionType): output_creator = config["output"] @@ -596,6 +605,11 @@ class Agent(Trainable): max_file_size=config["output_max_file_size"], compress_columns=config["output_compress_columns"])) + if config["input"] == "sampler": + input_evaluation = [] + else: + input_evaluation = config["input_evaluation"] + return cls( env_creator, self.config["multiagent"]["policy_graphs"] or policy_graph, @@ -622,7 +636,7 @@ class Agent(Trainable): log_level=config["log_level"], callbacks=config["callbacks"], input_creator=input_creator, - input_evaluation_method=config["input_evaluation"], + input_evaluation=input_evaluation, output_creator=output_creator, remote_worker_envs=remote_worker_envs) diff --git a/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py b/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py index ca3fae559..24276d6b7 100644 --- a/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py +++ b/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py @@ -269,6 +269,11 @@ class DDPGPolicyGraph(TFPolicyGraph): q_t, self.q_model = self._build_q_network( self.obs_t, observation_space, self.act_t) self.q_func_vars = _scope_vars(scope.name) + self.stats = { + "mean_q": tf.reduce_mean(q_t), + "max_q": tf.reduce_max(q_t), + "min_q": tf.reduce_min(q_t), + } with tf.variable_scope(Q_SCOPE, reuse=True): q_tp0, _ = self._build_q_network(self.obs_t, observation_space, output_actions) @@ -416,6 +421,7 @@ class DDPGPolicyGraph(TFPolicyGraph): def extra_compute_grad_fetches(self): return { "td_error": self.loss.td_error, + "stats": self.stats, } @override(PolicyGraph) diff --git a/python/ray/rllib/agents/dqn/dqn.py b/python/ray/rllib/agents/dqn/dqn.py index c48f3dd69..d36f72034 100644 --- a/python/ray/rllib/agents/dqn/dqn.py +++ b/python/ray/rllib/agents/dqn/dqn.py @@ -68,6 +68,11 @@ DEFAULT_CONFIG = with_common_config({ "exploration_final_eps": 0.02, # Update the target network every `target_network_update_freq` steps. "target_network_update_freq": 500, + # Use softmax for sampling actions. + "soft_q": False, + # Softmax temperature. Q values are divided by this value prior to softmax. + # Softmax approaches argmax as the temperature drops to zero. + "softmax_temp": 1.0, # === Replay buffer === # Size of the replay buffer. Note that if async_updates is set, then diff --git a/python/ray/rllib/agents/dqn/dqn_policy_graph.py b/python/ray/rllib/agents/dqn/dqn_policy_graph.py index f9e15299a..57f629f7a 100644 --- a/python/ray/rllib/agents/dqn/dqn_policy_graph.py +++ b/python/ray/rllib/agents/dqn/dqn_policy_graph.py @@ -8,8 +8,7 @@ import tensorflow as tf import tensorflow.contrib.layers as layers import ray -from ray.rllib.models import ModelCatalog -from ray.rllib.evaluation.sample_batch import SampleBatch +from ray.rllib.models import ModelCatalog, Categorical from ray.rllib.utils.annotations import override from ray.rllib.utils.error import UnsupportedSpaceException from ray.rllib.evaluation.policy_graph import PolicyGraph @@ -182,7 +181,14 @@ class QNetwork(object): class QValuePolicy(object): - def __init__(self, q_values, observations, num_actions, stochastic, eps): + def __init__(self, q_values, observations, num_actions, stochastic, eps, + softmax, softmax_temp): + if softmax: + action_dist = Categorical(q_values / softmax_temp) + self.action = action_dist.sample() + self.action_prob = action_dist.sampled_action_prob() + return + deterministic_actions = tf.argmax(q_values, axis=1) batch_size = tf.shape(observations)[0] @@ -200,6 +206,7 @@ class QValuePolicy(object): deterministic_actions) self.action = tf.cond(stochastic, lambda: stochastic_actions, lambda: deterministic_actions) + self.action_prob = None class QLoss(object): @@ -300,10 +307,12 @@ class DQNPolicyGraph(TFPolicyGraph): with tf.variable_scope(Q_SCOPE) as scope: q_values, q_logits, q_dist, _ = self._build_q_network( self.cur_observations, observation_space) + self.q_values = q_values self.q_func_vars = _scope_vars(scope.name) # Action outputs - self.output_actions = self._build_q_value_policy(q_values) + self.output_actions, self.action_prob = self._build_q_value_policy( + q_values) # Replay inputs self.obs_t = tf.placeholder( @@ -387,6 +396,7 @@ class DQNPolicyGraph(TFPolicyGraph): self.sess, obs_input=self.cur_observations, action_sampler=self.output_actions, + action_prob=self.action_prob, loss=model.loss() + self.loss.loss, loss_inputs=self.loss_inputs, update_ops=q_batchnorm_update_ops) @@ -412,6 +422,13 @@ class DQNPolicyGraph(TFPolicyGraph): grads_and_vars = [(g, v) for (g, v) in grads_and_vars if g is not None] return grads_and_vars + @override(TFPolicyGraph) + def extra_compute_action_fetches(self): + return dict( + TFPolicyGraph.extra_compute_action_fetches(self), **{ + "q_values": self.q_values, + }) + @override(TFPolicyGraph) def extra_compute_action_feed_dict(self): return { @@ -474,8 +491,10 @@ class DQNPolicyGraph(TFPolicyGraph): return qnet.value, qnet.logits, qnet.dist, qnet.model def _build_q_value_policy(self, q_values): - return QValuePolicy(q_values, self.cur_observations, self.num_actions, - self.stochastic, self.eps).action + policy = QValuePolicy( + q_values, self.cur_observations, self.num_actions, self.stochastic, + self.eps, self.config["soft_q"], self.config["softmax_temp"]) + return policy.action, policy.action_prob def _build_q_loss(self, q_t_selected, q_logits_t_selected, q_tp1_best, q_dist_tp1_best): @@ -511,26 +530,16 @@ def _adjust_nstep(n_step, gamma, obs, actions, rewards, new_obs, dones): rewards[i] += gamma**j * rewards[i + j] -def _postprocess_dqn(policy_graph, sample_batch): - obs, actions, rewards, new_obs, dones = [ - list(x) for x in sample_batch.columns( - ["obs", "actions", "rewards", "new_obs", "dones"]) - ] - +def _postprocess_dqn(policy_graph, batch): # N-step Q adjustments if policy_graph.config["n_step"] > 1: _adjust_nstep(policy_graph.config["n_step"], - policy_graph.config["gamma"], obs, actions, rewards, - new_obs, dones) + policy_graph.config["gamma"], batch["obs"], + batch["actions"], batch["rewards"], batch["new_obs"], + batch["dones"]) - batch = SampleBatch({ - "obs": obs, - "actions": actions, - "rewards": rewards, - "new_obs": new_obs, - "dones": dones, - "weights": np.ones_like(rewards) - }) + if "weights" not in batch: + batch["weights"] = np.ones_like(batch["rewards"]) # Prioritize on the worker side if batch.count > 0 and policy_graph.config["worker_side_prioritization"]: diff --git a/python/ray/rllib/agents/impala/vtrace_policy_graph.py b/python/ray/rllib/agents/impala/vtrace_policy_graph.py index 127c3f9c5..af9f0397f 100644 --- a/python/ray/rllib/agents/impala/vtrace_policy_graph.py +++ b/python/ray/rllib/agents/impala/vtrace_policy_graph.py @@ -215,6 +215,7 @@ class VTracePolicyGraph(LearningRateSchedule, TFPolicyGraph): self.sess, obs_input=observations, action_sampler=action_dist.sample(), + action_prob=action_dist.sampled_action_prob(), loss=self.model.loss() + self.loss.total_loss, loss_inputs=loss_in, state_inputs=self.model.state_in, @@ -270,7 +271,9 @@ class VTracePolicyGraph(LearningRateSchedule, TFPolicyGraph): @override(TFPolicyGraph) def extra_compute_action_fetches(self): - return {"behaviour_logits": self.model.outputs} + return dict( + TFPolicyGraph.extra_compute_action_fetches(self), + **{"behaviour_logits": self.model.outputs}) @override(TFPolicyGraph) def extra_compute_grad_fetches(self): diff --git a/python/ray/rllib/agents/marwil/marwil.py b/python/ray/rllib/agents/marwil/marwil.py index 2c1230fb1..d0be3fbaa 100644 --- a/python/ray/rllib/agents/marwil/marwil.py +++ b/python/ray/rllib/agents/marwil/marwil.py @@ -19,8 +19,8 @@ DEFAULT_CONFIG = with_common_config({ "postprocess_inputs": True, # Whether to rollout "complete_episodes" or "truncate_episodes" "batch_mode": "complete_episodes", - # Read data from historic data and evaluate by a sampler - "input_evaluation": "simulation", + # Use importance sampling estimators for reward + "input_evaluation": ["is", "wis"], # Learning rate for adam optimizer "lr": 1e-4, # Number of timesteps collected for each SGD round diff --git a/python/ray/rllib/agents/marwil/marwil_policy_graph.py b/python/ray/rllib/agents/marwil/marwil_policy_graph.py index fafd9f657..03f87ca2a 100644 --- a/python/ray/rllib/agents/marwil/marwil_policy_graph.py +++ b/python/ray/rllib/agents/marwil/marwil_policy_graph.py @@ -107,6 +107,7 @@ class MARWILPolicyGraph(TFPolicyGraph): self.sess, obs_input=self.obs_t, action_sampler=self.output_actions, + action_prob=action_dist.sampled_action_prob(), loss=self.model.loss() + objective, loss_inputs=self.loss_inputs, state_inputs=self.model.state_in, diff --git a/python/ray/rllib/agents/pg/pg_policy_graph.py b/python/ray/rllib/agents/pg/pg_policy_graph.py index 1594a4934..c8f9a1185 100644 --- a/python/ray/rllib/agents/pg/pg_policy_graph.py +++ b/python/ray/rllib/agents/pg/pg_policy_graph.py @@ -67,6 +67,7 @@ class PGPolicyGraph(TFPolicyGraph): sess, obs_input=obs, action_sampler=action_dist.sample(), + action_prob=action_dist.sampled_action_prob(), loss=self.model.loss() + loss, loss_inputs=loss_in, state_inputs=self.model.state_in, diff --git a/python/ray/rllib/agents/ppo/appo_policy_graph.py b/python/ray/rllib/agents/ppo/appo_policy_graph.py index f5533f137..ace8f39be 100644 --- a/python/ray/rllib/agents/ppo/appo_policy_graph.py +++ b/python/ray/rllib/agents/ppo/appo_policy_graph.py @@ -320,6 +320,7 @@ class AsyncPPOPolicyGraph(LearningRateSchedule, TFPolicyGraph): self.sess, obs_input=observations, action_sampler=action_dist.sample(), + action_prob=action_dist.sampled_action_prob(), loss=self.model.loss() + self.loss.total_loss, loss_inputs=loss_in, state_inputs=self.model.state_in, @@ -373,7 +374,7 @@ class AsyncPPOPolicyGraph(LearningRateSchedule, TFPolicyGraph): out = {"behaviour_logits": self.model.outputs} if not self.config["vtrace"]: out["vf_preds"] = self.value_function - return out + return dict(TFPolicyGraph.extra_compute_action_fetches(self), **out) def extra_compute_grad_fetches(self): return self.stats_fetches diff --git a/python/ray/rllib/agents/ppo/ppo_policy_graph.py b/python/ray/rllib/agents/ppo/ppo_policy_graph.py index d3ba03555..611ba299f 100644 --- a/python/ray/rllib/agents/ppo/ppo_policy_graph.py +++ b/python/ray/rllib/agents/ppo/ppo_policy_graph.py @@ -234,6 +234,7 @@ class PPOPolicyGraph(LearningRateSchedule, TFPolicyGraph): self.sess, obs_input=obs_ph, action_sampler=self.sampler, + action_prob=curr_action_dist.sampled_action_prob(), loss=self.model.loss() + self.loss_obj.loss, loss_inputs=self.loss_in, state_inputs=self.model.state_in, @@ -307,7 +308,11 @@ class PPOPolicyGraph(LearningRateSchedule, TFPolicyGraph): @override(TFPolicyGraph) def extra_compute_action_fetches(self): - return {"vf_preds": self.value_function, "logits": self.logits} + return dict( + TFPolicyGraph.extra_compute_action_fetches(self), **{ + "vf_preds": self.value_function, + "logits": self.logits + }) @override(TFPolicyGraph) def extra_compute_grad_fetches(self): diff --git a/python/ray/rllib/evaluation/metrics.py b/python/ray/rllib/evaluation/metrics.py index 84e8cb750..511e96937 100644 --- a/python/ray/rllib/evaluation/metrics.py +++ b/python/ray/rllib/evaluation/metrics.py @@ -8,6 +8,8 @@ import collections import ray from ray.rllib.evaluation.sample_batch import DEFAULT_POLICY_ID +from ray.rllib.evaluation.sampler import RolloutMetrics +from ray.rllib.offline.off_policy_estimator import OffPolicyEstimate from ray.rllib.utils.annotations import DeveloperAPI logger = logging.getLogger(__name__) @@ -31,15 +33,14 @@ def collect_episodes(local_evaluator, """Gathers new episodes metrics tuples from the given evaluators.""" pending = [ - a.apply.remote(lambda ev: ev.sampler.get_metrics()) - for a in remote_evaluators + a.apply.remote(lambda ev: ev.get_metrics()) for a in remote_evaluators ] collected, _ = ray.wait( pending, num_returns=len(pending), timeout=timeout_seconds * 1.0) num_metric_batches_dropped = len(pending) - len(collected) metric_lists = ray.get(collected) - metric_lists.append(local_evaluator.sampler.get_metrics()) + metric_lists.append(local_evaluator.get_metrics()) episodes = [] for metrics in metric_lists: episodes.extend(metrics) @@ -60,6 +61,9 @@ def summarize_episodes(episodes, new_episodes, num_dropped): logger.warning("WARNING: {} workers have NOT returned metrics".format( num_dropped)) + episodes, estimates = _partition(episodes) + new_episodes, _ = _partition(new_episodes) + episode_rewards = [] episode_lengths = [] policy_rewards = collections.defaultdict(list) @@ -95,6 +99,16 @@ def summarize_episodes(episodes, new_episodes, num_dropped): custom_metrics[k + "_max"] = float("nan") del custom_metrics[k] + estimators = collections.defaultdict(lambda: collections.defaultdict(list)) + for e in estimates: + acc = estimators[e.estimator_name] + for k, v in e.metrics.items(): + acc[k].append(v) + for name, metrics in estimators.items(): + for k, v_list in metrics.items(): + metrics[k] = np.mean(v_list) + estimators[name] = dict(metrics) + return dict( episode_reward_max=max_reward, episode_reward_min=min_reward, @@ -103,4 +117,19 @@ def summarize_episodes(episodes, new_episodes, num_dropped): episodes_this_iter=len(new_episodes), policy_reward_mean=dict(policy_rewards), custom_metrics=dict(custom_metrics), + off_policy_estimator=dict(estimators), num_metric_batches_dropped=num_dropped) + + +def _partition(episodes): + """Divides metrics data into true rollouts vs off-policy estimates.""" + + rollouts, estimates = [], [] + for e in episodes: + if isinstance(e, RolloutMetrics): + rollouts.append(e) + elif isinstance(e, OffPolicyEstimate): + estimates.append(e) + else: + raise ValueError("Unknown metric type: {}".format(e)) + return rollouts, estimates diff --git a/python/ray/rllib/evaluation/policy_evaluator.py b/python/ray/rllib/evaluation/policy_evaluator.py index bcc9ee94c..39e395cee 100644 --- a/python/ray/rllib/evaluation/policy_evaluator.py +++ b/python/ray/rllib/evaluation/policy_evaluator.py @@ -19,6 +19,8 @@ from ray.rllib.evaluation.sampler import AsyncSampler, SyncSampler from ray.rllib.evaluation.policy_graph import PolicyGraph from ray.rllib.evaluation.tf_policy_graph import TFPolicyGraph from ray.rllib.offline import NoopOutput, IOContext, OutputWriter, InputReader +from ray.rllib.offline.is_estimator import ImportanceSamplingEstimator +from ray.rllib.offline.wis_estimator import WeightedImportanceSamplingEstimator from ray.rllib.models import ModelCatalog from ray.rllib.models.preprocessors import NoPreprocessor from ray.rllib.utils import merge_dicts @@ -116,7 +118,7 @@ class PolicyEvaluator(EvaluatorInterface): log_level=None, callbacks=None, input_creator=lambda ioctx: ioctx.default_sampler_input(), - input_evaluation_method=None, + input_evaluation=frozenset([]), output_creator=lambda ioctx: NoopOutput(), remote_worker_envs=False): """Initialize a policy evaluator. @@ -184,11 +186,11 @@ class PolicyEvaluator(EvaluatorInterface): callbacks (dict): Dict of custom debug callbacks. input_creator (func): Function that returns an InputReader object for loading previous generated experiences. - input_evaluation_method (str): How to evaluate the current policy. - This only applies when the input is reading offline data. - Options are: - - None: don't evaluate the policy. The episode reward and - other metrics will be NaN. + input_evaluation (list): How to evaluate the policy performance. + This only makes sense to set when the input is reading offline + data. The possible values include: + - "is": the step-wise importance sampling estimator. + - "wis": the weighted step-wise is estimator. - "simulation": run the environment in the background, but use this data for evaluation only and never for learning. output_creator (func): Function that returns an OutputWriter object @@ -316,16 +318,24 @@ class PolicyEvaluator(EvaluatorInterface): raise ValueError("Unsupported batch mode: {}".format( self.batch_mode)) - if input_evaluation_method == "simulation": - logger.warning( - "Requested 'simulation' input evaluation method: " - "will discard all sampler outputs and keep only metrics.") - sample_async = True - elif input_evaluation_method is None: - pass - else: - raise ValueError("Unknown evaluation method: {}".format( - input_evaluation_method)) + self.io_context = IOContext(log_dir, policy_config, worker_index, self) + self.reward_estimators = [] + for method in input_evaluation: + if method == "simulation": + logger.warning( + "Requested 'simulation' input evaluation method: " + "will discard all sampler outputs and keep only metrics.") + sample_async = True + elif method == "is": + ise = ImportanceSamplingEstimator.create(self.io_context) + self.reward_estimators.append(ise) + elif method == "wis": + wise = WeightedImportanceSamplingEstimator.create( + self.io_context) + self.reward_estimators.append(wise) + else: + raise ValueError( + "Unknown evaluation method: {}".format(method)) if sample_async: self.sampler = AsyncSampler( @@ -341,7 +351,7 @@ class PolicyEvaluator(EvaluatorInterface): pack=pack_episodes, tf_sess=self.tf_sess, clip_actions=clip_actions, - blackhole_outputs=input_evaluation_method == "simulation") + blackhole_outputs="simulation" in input_evaluation) self.sampler.start() else: self.sampler = SyncSampler( @@ -358,7 +368,6 @@ class PolicyEvaluator(EvaluatorInterface): tf_sess=self.tf_sess, clip_actions=clip_actions) - self.io_context = IOContext(log_dir, policy_config, worker_index, self) self.input_reader = input_creator(self.io_context) assert isinstance(self.input_reader, InputReader), self.input_reader self.output_writer = output_creator(self.io_context) @@ -402,6 +411,12 @@ class PolicyEvaluator(EvaluatorInterface): # for better compression inside the writer. self.output_writer.write(batch) + # Do off-policy estimation if needed + if self.reward_estimators: + for sub_batch in batch.split_by_episode(): + for estimator in self.reward_estimators: + estimator.process(sub_batch) + if self.compress_observations: if isinstance(batch, MultiAgentBatch): for data in batch.policy_batches.values(): @@ -504,6 +519,15 @@ class PolicyEvaluator(EvaluatorInterface): self.policy_map[DEFAULT_POLICY_ID].learn_on_batch(samples)) return grad_fetch + @DeveloperAPI + def get_metrics(self): + """Returns a list of new RolloutMetric objects from evaluation.""" + + out = self.sampler.get_metrics() + for m in self.reward_estimators: + out.extend(m.get_metrics()) + return out + @DeveloperAPI def foreach_env(self, func): """Apply the given function to each underlying env instance.""" diff --git a/python/ray/rllib/evaluation/sampler.py b/python/ray/rllib/evaluation/sampler.py index 11ad4c8e0..5a40fe8a5 100644 --- a/python/ray/rllib/evaluation/sampler.py +++ b/python/ray/rllib/evaluation/sampler.py @@ -16,6 +16,8 @@ from ray.rllib.evaluation.tf_policy_graph import TFPolicyGraph from ray.rllib.env.base_env import BaseEnv from ray.rllib.env.atari_wrappers import get_wrapper_by_cls, MonitorEnv from ray.rllib.models.action_dist import TupleActions +from ray.rllib.offline import InputReader +from ray.rllib.utils.annotations import override from ray.rllib.utils.tf_run_builder import TFRunBuilder logger = logging.getLogger(__name__) @@ -31,7 +33,20 @@ PolicyEvalData = namedtuple("PolicyEvalData", [ ]) -class SyncSampler(object): +class SamplerInput(InputReader): + """Reads input experiences from an existing sampler.""" + + @override(InputReader) + def next(self): + batches = [self.get_data()] + batches.extend(self.get_extra_batches()) + if len(batches) > 1: + return batches[0].concat_samples(batches) + else: + return batches[0] + + +class SyncSampler(SamplerInput): def __init__(self, env, policies, @@ -87,7 +102,7 @@ class SyncSampler(object): return extra -class AsyncSampler(threading.Thread): +class AsyncSampler(threading.Thread, SamplerInput): def __init__(self, env, policies, diff --git a/python/ray/rllib/evaluation/tf_policy_graph.py b/python/ray/rllib/evaluation/tf_policy_graph.py index 9946bdcd9..3271b49e6 100644 --- a/python/ray/rllib/evaluation/tf_policy_graph.py +++ b/python/ray/rllib/evaluation/tf_policy_graph.py @@ -52,6 +52,7 @@ class TFPolicyGraph(PolicyGraph): action_sampler, loss, loss_inputs, + action_prob=None, state_inputs=None, state_outputs=None, prev_action_input=None, @@ -77,6 +78,7 @@ class TFPolicyGraph(PolicyGraph): and has shape [BATCH_SIZE, data...]. These keys will be read from postprocessed sample batches and fed into the specified placeholders during loss computation. + action_prob (Tensor): probability of the sampled action. state_inputs (list): list of RNN state input Tensors. state_outputs (list): list of RNN state output Tensors. prev_action_input (Tensor): placeholder for previous actions @@ -104,6 +106,7 @@ class TFPolicyGraph(PolicyGraph): self._loss_inputs = loss_inputs self._loss_input_dict = dict(self._loss_inputs) self._is_training = self._get_is_training_placeholder() + self._action_prob = action_prob self._state_inputs = state_inputs or [] self._state_outputs = state_outputs or [] for i, ph in enumerate(self._state_inputs): @@ -231,8 +234,14 @@ class TFPolicyGraph(PolicyGraph): @DeveloperAPI def extra_compute_action_fetches(self): - """Extra values to fetch and return from compute_actions().""" - return {} # e.g, value function + """Extra values to fetch and return from compute_actions(). + + By default we only return action probability info (if present). + """ + if self._action_prob is not None: + return {"action_prob": self._action_prob} + else: + return {} @DeveloperAPI def extra_compute_grad_feed_dict(self): diff --git a/python/ray/rllib/examples/saving_experiences.py b/python/ray/rllib/examples/saving_experiences.py index e866a8186..7a29b0fe7 100644 --- a/python/ray/rllib/examples/saving_experiences.py +++ b/python/ray/rllib/examples/saving_experiences.py @@ -33,6 +33,7 @@ if __name__ == "__main__": agent_index=0, obs=obs, actions=action, + action_prob=1.0, # put the true action probability here rewards=rew, prev_actions=prev_action, prev_rewards=prev_reward, diff --git a/python/ray/rllib/models/action_dist.py b/python/ray/rllib/models/action_dist.py index cad979201..5de4ea225 100644 --- a/python/ray/rllib/models/action_dist.py +++ b/python/ray/rllib/models/action_dist.py @@ -24,6 +24,7 @@ class ActionDistribution(object): @DeveloperAPI def __init__(self, inputs): self.inputs = inputs + self.sample_op = self._build_sample_op() @DeveloperAPI def logp(self, x): @@ -37,13 +38,27 @@ class ActionDistribution(object): @DeveloperAPI def entropy(self): - """The entroy of the action distribution.""" + """The entropy of the action distribution.""" + raise NotImplementedError + + @DeveloperAPI + def _build_sample_op(self): + """Implement this instead of sample(), to enable op reuse. + + This is needed since the sample op is non-deterministic and is shared + between sample() and sampled_action_prob(). + """ raise NotImplementedError @DeveloperAPI def sample(self): """Draw a sample from the action distribution.""" - raise NotImplementedError + return self.sample_op + + @DeveloperAPI + def sampled_action_prob(self): + """Returns the log probability of the sampled action.""" + return tf.exp(self.logp(self.sample_op)) class Categorical(ActionDistribution): @@ -95,7 +110,7 @@ class Categorical(ActionDistribution): p0 * (a0 - tf.log(z0) - a1 + tf.log(z1)), reduction_indices=[1]) @override(ActionDistribution) - def sample(self): + def _build_sample_op(self): return tf.squeeze(tf.multinomial(self.inputs, 1), axis=1) @@ -107,11 +122,11 @@ class DiagGaussian(ActionDistribution): """ def __init__(self, inputs): - ActionDistribution.__init__(self, inputs) mean, log_std = tf.split(inputs, 2, axis=1) self.mean = mean self.log_std = log_std self.std = tf.exp(log_std) + ActionDistribution.__init__(self, inputs) @override(ActionDistribution) def logp(self, x): @@ -136,7 +151,7 @@ class DiagGaussian(ActionDistribution): reduction_indices=[1]) @override(ActionDistribution) - def sample(self): + def _build_sample_op(self): return self.mean + self.std * tf.random_normal(tf.shape(self.mean)) @@ -147,7 +162,11 @@ class Deterministic(ActionDistribution): """ @override(ActionDistribution) - def sample(self): + def sampled_action_prob(self): + return 1.0 + + @override(ActionDistribution) + def _build_sample_op(self): return self.inputs @@ -205,5 +224,12 @@ class MultiActionDistribution(ActionDistribution): def sample(self): return TupleActions([s.sample() for s in self.child_distributions]) + @override(ActionDistribution) + def sampled_action_prob(self): + p = self.child_distributions[0].sampled_action_prob() + for c in self.child_distributions[1:]: + p *= c.sampled_action_prob() + return p + TupleActions = namedtuple("TupleActions", ["batches"]) diff --git a/python/ray/rllib/offline/__init__.py b/python/ray/rllib/offline/__init__.py index 195d9e776..a40da6c5b 100644 --- a/python/ray/rllib/offline/__init__.py +++ b/python/ray/rllib/offline/__init__.py @@ -8,6 +8,7 @@ from ray.rllib.offline.json_writer import JsonWriter from ray.rllib.offline.output_writer import OutputWriter, NoopOutput from ray.rllib.offline.input_reader import InputReader from ray.rllib.offline.mixed_input import MixedInput +from ray.rllib.offline.shuffled_input import ShuffledInput __all__ = [ "IOContext", @@ -17,4 +18,5 @@ __all__ = [ "OutputWriter", "InputReader", "MixedInput", + "ShuffledInput", ] diff --git a/python/ray/rllib/offline/input_reader.py b/python/ray/rllib/offline/input_reader.py index 10b66a842..073cf3148 100644 --- a/python/ray/rllib/offline/input_reader.py +++ b/python/ray/rllib/offline/input_reader.py @@ -2,7 +2,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from ray.rllib.utils.annotations import override from ray.rllib.utils.annotations import PublicAPI @@ -18,19 +17,3 @@ class InputReader(object): SampleBatch or MultiAgentBatch read. """ raise NotImplementedError - - -class SamplerInput(InputReader): - """Reads input experiences from an existing sampler.""" - - def __init__(self, sampler): - self.sampler = sampler - - @override(InputReader) - def next(self): - batches = [self.sampler.get_data()] - batches.extend(self.sampler.get_extra_batches()) - if len(batches) > 1: - return batches[0].concat_samples(batches) - else: - return batches[0] diff --git a/python/ray/rllib/offline/io_context.py b/python/ray/rllib/offline/io_context.py index 082696176..187c02f9c 100644 --- a/python/ray/rllib/offline/io_context.py +++ b/python/ray/rllib/offline/io_context.py @@ -4,7 +4,6 @@ from __future__ import print_function import os -from ray.rllib.offline.input_reader import SamplerInput from ray.rllib.utils.annotations import PublicAPI @@ -35,4 +34,4 @@ class IOContext(object): @PublicAPI def default_sampler_input(self): - return SamplerInput(self.evaluator.sampler) + return self.evaluator.sampler diff --git a/python/ray/rllib/offline/is_estimator.py b/python/ray/rllib/offline/is_estimator.py new file mode 100644 index 000000000..55678c951 --- /dev/null +++ b/python/ray/rllib/offline/is_estimator.py @@ -0,0 +1,46 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from ray.rllib.offline.off_policy_estimator import OffPolicyEstimator, \ + OffPolicyEstimate +from ray.rllib.utils.annotations import override + + +class ImportanceSamplingEstimator(OffPolicyEstimator): + """The step-wise IS estimator. + + Step-wise IS estimator described in https://arxiv.org/pdf/1511.03722.pdf""" + + def __init__(self, policy, gamma): + OffPolicyEstimator.__init__(self, policy, gamma) + + @override(OffPolicyEstimator) + def estimate(self, batch): + self.check_can_estimate_for(batch) + + rewards, old_prob = batch["rewards"], batch["action_prob"] + new_prob = self.action_prob(batch) + + # calculate importance ratios + p = [] + for t in range(batch.count - 1): + if t == 0: + pt_prev = 1.0 + else: + pt_prev = p[t - 1] + p.append(pt_prev * new_prob[t] / old_prob[t]) + + # calculate stepwise IS estimate + V_prev, V_step_IS = 0.0, 0.0 + for t in range(batch.count - 1): + V_prev += rewards[t] * self.gamma**t + V_step_IS += p[t] * rewards[t] * self.gamma**t + + estimation = OffPolicyEstimate( + "is", { + "V_prev": V_prev, + "V_step_IS": V_step_IS, + "V_gain_est": V_step_IS / max(1e-8, V_prev), + }) + return estimation diff --git a/python/ray/rllib/offline/json_reader.py b/python/ray/rllib/offline/json_reader.py index 7349c53fd..e9568e75c 100644 --- a/python/ray/rllib/offline/json_reader.py +++ b/python/ray/rllib/offline/json_reader.py @@ -44,6 +44,7 @@ class JsonReader(InputReader): self.ioctx = ioctx or IOContext() if isinstance(inputs, six.string_types): + inputs = os.path.abspath(os.path.expanduser(inputs)) if os.path.isdir(inputs): inputs = os.path.join(inputs, "*.json") logger.warning( diff --git a/python/ray/rllib/offline/json_writer.py b/python/ray/rllib/offline/json_writer.py index 41b264523..5613d1f67 100644 --- a/python/ray/rllib/offline/json_writer.py +++ b/python/ray/rllib/offline/json_writer.py @@ -43,13 +43,13 @@ class JsonWriter(OutputWriter): compress_columns (list): list of sample batch columns to compress. """ - self.path = path self.ioctx = ioctx or IOContext() self.max_file_size = max_file_size self.compress_columns = compress_columns if urlparse(path).scheme: self.path_is_uri = True else: + path = os.path.abspath(os.path.expanduser(path)) # Try to create local dirs if they don't exist try: os.makedirs(path) @@ -57,6 +57,7 @@ class JsonWriter(OutputWriter): pass # already exists assert os.path.exists(path), "Failed to create {}".format(path) self.path_is_uri = False + self.path = path self.file_index = 0 self.bytes_written = 0 self.cur_file = None diff --git a/python/ray/rllib/offline/mixed_input.py b/python/ray/rllib/offline/mixed_input.py index e6fc8fa26..1cdf75a84 100644 --- a/python/ray/rllib/offline/mixed_input.py +++ b/python/ray/rllib/offline/mixed_input.py @@ -6,10 +6,10 @@ import numpy as np from ray.rllib.offline.input_reader import InputReader from ray.rllib.offline.json_reader import JsonReader -from ray.rllib.utils.annotations import override, PublicAPI +from ray.rllib.utils.annotations import override, DeveloperAPI -@PublicAPI +@DeveloperAPI class MixedInput(InputReader): """Mixes input from a number of other input sources. @@ -21,7 +21,7 @@ class MixedInput(InputReader): }, ioctx) """ - @PublicAPI + @DeveloperAPI def __init__(self, dist, ioctx): """Initialize a MixedInput. diff --git a/python/ray/rllib/offline/off_policy_estimator.py b/python/ray/rllib/offline/off_policy_estimator.py new file mode 100644 index 000000000..dba85df7a --- /dev/null +++ b/python/ray/rllib/offline/off_policy_estimator.py @@ -0,0 +1,107 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import namedtuple +import logging + +from ray.rllib.evaluation.sample_batch import MultiAgentBatch +from ray.rllib.utils.annotations import DeveloperAPI + +logger = logging.getLogger(__name__) + +OffPolicyEstimate = namedtuple("OffPolicyEstimate", + ["estimator_name", "metrics"]) + + +@DeveloperAPI +class OffPolicyEstimator(object): + """Interface for an off policy reward estimator.""" + + @DeveloperAPI + def __init__(self, policy, gamma): + """Creates an off-policy estimator. + + Arguments: + policy (PolicyGraph): Policy graph to evaluate. + gamma (float): Discount of the MDP. + """ + self.policy = policy + self.gamma = gamma + self.new_estimates = [] + + @classmethod + def create(cls, ioctx): + """Create an off-policy estimator from a IOContext.""" + gamma = ioctx.evaluator.policy_config["gamma"] + # Grab a reference to the current model + keys = list(ioctx.evaluator.policy_map.keys()) + if len(keys) > 1: + raise NotImplementedError( + "Off-policy estimation is not implemented for multi-agent. " + "You can set `input_evaluation: []` to resolve this.") + policy = ioctx.evaluator.get_policy(keys[0]) + return cls(policy, gamma) + + @DeveloperAPI + def estimate(self, batch): + """Returns an estimate for the given batch of experiences. + + The batch will only contain data from one episode, but it may only be + a fragment of an episode. + """ + raise NotImplementedError + + @DeveloperAPI + def action_prob(self, batch): + """Returns the probs for the batch actions for the current policy.""" + + num_state_inputs = 0 + for k in batch.keys(): + if k.startswith("state_in_"): + num_state_inputs += 1 + state_keys = ["state_in_{}".format(i) for i in range(num_state_inputs)] + _, _, info = self.policy.compute_actions( + obs_batch=batch["obs"], + state_batches=[batch[k] for k in state_keys], + prev_action_batch=batch.data.get("prev_action"), + prev_reward_batch=batch.data.get("prev_reward"), + info_batch=batch.data.get("info")) + if "action_prob" not in info: + raise ValueError( + "Off-policy estimation is not possible unless the policy " + "returns action probabilities when computing actions (i.e., " + "the 'action_prob' key is output by the policy graph). You " + "can set `input_evaluation: []` to resolve this.") + return info["action_prob"] + + @DeveloperAPI + def process(self, batch): + self.new_estimates.append(self.estimate(batch)) + + @DeveloperAPI + def check_can_estimate_for(self, batch): + """Returns whether we can support OPE for this batch.""" + + if isinstance(batch, MultiAgentBatch): + raise ValueError( + "IS-estimation is not implemented for multi-agent batches. " + "You can set `input_evaluation: []` to resolve this.") + + if "action_prob" not in batch: + raise ValueError( + "Off-policy estimation is not possible unless the inputs " + "include action probabilities (i.e., the policy is stochastic " + "and emits the 'action_prob' key). You can set " + "`input_evaluation: []` to resolve this.") + + @DeveloperAPI + def get_metrics(self): + """Return a list of new episode metric estimates since the last call. + + Returns: + list of OffPolicyEstimate objects. + """ + out = self.new_estimates + self.new_estimates = [] + return out diff --git a/python/ray/rllib/offline/shuffled_input.py b/python/ray/rllib/offline/shuffled_input.py new file mode 100644 index 000000000..22b4d6941 --- /dev/null +++ b/python/ray/rllib/offline/shuffled_input.py @@ -0,0 +1,45 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import logging +import random + +from ray.rllib.offline.input_reader import InputReader +from ray.rllib.utils.annotations import override, DeveloperAPI + +logger = logging.getLogger(__name__) + + +@DeveloperAPI +class ShuffledInput(InputReader): + """Randomizes data over a sliding window buffer of N batches. + + This increases the randomization of the data, which is useful if the + batches were not in random order to start with. + """ + + @DeveloperAPI + def __init__(self, child, n=0): + """Initialize a MixedInput. + + Arguments: + child (InputReader): child input reader to shuffle. + n (int): if positive, shuffle input over this many batches. + """ + self.n = n + self.child = child + self.buffer = [] + + @override(InputReader) + def next(self): + if self.n <= 1: + return self.child.next() + if len(self.buffer) < self.n: + logger.info("Filling shuffle buffer to {} batches".format(self.n)) + while len(self.buffer) < self.n: + self.buffer.append(self.child.next()) + logger.info("Shuffle buffer filled") + i = random.randint(0, len(self.buffer) - 1) + self.buffer[i] = self.child.next() + return random.choice(self.buffer) diff --git a/python/ray/rllib/offline/wis_estimator.py b/python/ray/rllib/offline/wis_estimator.py new file mode 100644 index 000000000..f538d3871 --- /dev/null +++ b/python/ray/rllib/offline/wis_estimator.py @@ -0,0 +1,56 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from ray.rllib.offline.off_policy_estimator import OffPolicyEstimator, \ + OffPolicyEstimate +from ray.rllib.utils.annotations import override + + +class WeightedImportanceSamplingEstimator(OffPolicyEstimator): + """The weighted step-wise IS estimator. + + Step-wise WIS estimator in https://arxiv.org/pdf/1511.03722.pdf""" + + def __init__(self, policy, gamma): + OffPolicyEstimator.__init__(self, policy, gamma) + self.filter_values = [] + self.filter_counts = [] + + @override(OffPolicyEstimator) + def estimate(self, batch): + self.check_can_estimate_for(batch) + + rewards, old_prob = batch["rewards"], batch["action_prob"] + new_prob = self.action_prob(batch) + + # calculate importance ratios + p = [] + for t in range(batch.count - 1): + if t == 0: + pt_prev = 1.0 + else: + pt_prev = p[t - 1] + p.append(pt_prev * new_prob[t] / old_prob[t]) + for t, v in enumerate(p): + if t >= len(self.filter_values): + self.filter_values.append(v) + self.filter_counts.append(1.0) + else: + self.filter_values[t] += v + self.filter_counts[t] += 1.0 + + # calculate stepwise weighted IS estimate + V_prev, V_step_WIS = 0.0, 0.0 + for t in range(batch.count - 1): + V_prev += rewards[t] * self.gamma**t + w_t = self.filter_values[t] / self.filter_counts[t] + V_step_WIS += p[t] / w_t * rewards[t] * self.gamma**t + + estimation = OffPolicyEstimate( + "wis", { + "V_prev": V_prev, + "V_step_WIS": V_step_WIS, + "V_gain_est": V_step_WIS / max(1e-8, V_prev), + }) + return estimation diff --git a/python/ray/rllib/test/data/cartpole_small/output-2019-01-14_16-37-16_worker-1_0.json b/python/ray/rllib/test/data/cartpole_small/output-2019-01-14_16-37-16_worker-1_0.json deleted file mode 100644 index f375d1178..000000000 --- a/python/ray/rllib/test/data/cartpole_small/output-2019-01-14_16-37-16_worker-1_0.json +++ /dev/null @@ -1,3 +0,0 @@ -{"prev_actions": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "type": "SampleBatch", "obs": "BCJNGGhAwA8AAAAAAADQKQ4AAFAAAAAAAQUAAwIAUcQAAAAQDACwCgAMAAYABQAIAAoQAOMBAwAMAAAACAAIAAAABAYAAjwAAAgA0cj///8AAAEOeAAAABgiAAIcABEwNAAASgACOgAAFgAEHAAAAgCREAAUAAgABgAHXgATEHwAcwECJAAAABRIAAACAABEADEIAAd4AACzAPEAIAAAAAcAAABuZGFycmF5VABBbGlzdB4AQwAAAMxAAAACADMMABbKABEMZgAxAAMDpAAAqAACAgBRCgAYAAzWAADsABNsBAEIGAESBSkADwIAAgASAQACAAwIAAJIAQYCAAQQAAQoAAACABACzQAACQAHAgAIeAAPAgBNGfwoARoaKAEwBAMAaAEQoPkBBQIAMQ4AKIgBEQy8ARIOGABjA4wAAAA0/AEEAgAIOAAA6AAEAAEArAEEAgAAGAAAJAIAYwBRCAAQAAiGAQIQAAgoAAACABEILAICfAIATAAQyhUACQIAAGYCMQYABhQAD0YBI/H///////////////+T1QE1v/IENT9z/zQ/8wQ1v2Kckz/v/38/wsaTv5v+f79r1LU/7s8butdTtb8PchA8zBKpPyzVdL+62qi/4YR1PyqJhD9XCbe/+qCFv4ymtj9vzNQ+qt7Lv0JA6b5zbcs/R7E7v4nj0r+vBiU/c73SPz/I6L9P126/oejgP+uQbT8azfm/HrKmv8Hn9T//4qc/MTAHwMwpxb8hygZA7a3HPxWDDsAVxnG/fAUPQC3sej/oGwjAsTvUvksBCUA/r/c+jCL8vyBKY79m5/4/qJl8P6s2+785QqW/C2b/P9exsz8CdAHAgBM9v9AvBEDpCGk/2dj8v5ctWL526gFALuDwPjoI8L8qPjq/cD/5P4D/gT9QUe+/8/Q3vrCk+j9j6BU/PelAP8CGjD/NVpO/vcSBv/0GPT9sRMQ/QXKLv4A8uL/1VEw/+ORxP4QZjb9Gu2i/LHpIP26cxj648Ia/rVXRvhYkMj/w2Wk/lzRzv5SNab8iVDA/6MmxP2MnbL854q6/+2xCPyXW4z9uQne/8BTfv6J8Zz8Dxpk/bI6Jv7awm79tKXc/swbOP1uIjr82V86/M7CMP0fbiD9V0pu/IWiQv1YHkz98Og8/jmCgv57SLr+ljI8/4mZjPd5Cnb+xOIC+y/2CP0wq5b7FRZO/eMI4PhJRWz8/vpk9EdKCv/IlrL54RUY/cFAYP0x3eL/0IVe/tQNGP3pUjD+gsn2/4SSov/HoWT+nzsY/BxuKvynA3b+Pf4A/GlP4PxlMnb+6/QTAzLmcP9yBD0DMW7e/PPoVwJwCwD/j6dc/arDWv6m+7r+mEdY/AhWZP+gv67/ME7y/cmzgPwqKOz84ifa/NWaPv2BnZj92Jio9Ryi3PixNfbvke14/ScH9Pu1GuT5yVcS+ShBpP97RcD8/l54+ARRCv3O0gj8UX+8+Vq9NPv3nr75xQ4c/shhSu4+SHD7IkYY9P6iCP5Wz7j4Qwik+AZisvvxPhz9CPVq8CI3yPXXGoT24zoI/LkDuPlktCD6Fs6q+y4uHPyLxurzuza89ixO3PYIhgz/hgQK/3S3QPWBYAj9+xWs/suV8v9oPOT6DLmw/29Y/Pz/k+L5GrKU+HBP8PpkxJz/OR3i/Eg/OPnNnaz8ogPg+CmXuvlgDDD9d0/0+DtPIPh11dL9JmSA/vaBvPzRvZj6FaeW+Q0FGP0vtBD/f8wc+EF9xvzmCWz/dzng/ZqARvSLf3b778YA/hCYQPzELA77X+26/J/OLP0x+gz9VbZi+wfO0v5Jqnz+Wo7w/+V8Lv1SW7b+Dqbo/1b3xPziNXb+7wKq/eZfcPyaTvj97C42/LctVv5XM9D/CLJA/Go+gv9BasL4hIwJAcJ5JPwJamD9/pL4+7wIRvuYIUL4RJpc/gj5dP7YNBr61wR6/EoqfPzn9tj5cqDi+7zdDvpUfnj9zolo/a9ksvuNTHb+uOKY/hPeqP1G7X74U9oO/skW3PyxFUj+Fw6i+lMUYv7Aqvj8/gqA+zAnCvh4sLr5Hj7s/w1RGvoz4u74mG4I+YAiwP2h8oj6Dopa+W/VEvt8srj+nEVM/SFGSviFXIr9ndLU/T8CcPozrrr4FlEi+wXKzP/WRWL7xTKu+t2F1PvelqD80s54+74iHvjoCXb7TPqc/8uBTPwZ7hb5Yoyq/6smuPzWAqj+xDqW+wwGOv4vEvj9v7Ew/OE7mvkz0KL8PPsU/d4aMPgQxA79NcmC+Es3CPxfiSz/B3gK/5Z4vv3UMyT9Ujog+4AsUv8Ater6sicY/9lSGvgjUFL/ol0Y+D8y7P3jiSb8uYwW/JGYhPzMvqT8qbH++KXHLvpX6Fj5ttZ8/EdVHv+EOsL4JtBg/xSWOP0N5pr8+O2a+RV6FPzIiaT9sZ0G/i5chvX5cED/A20c/1yZcvvwMnT3PycM9Dh04PxFMpj4Prf49oB28viq1OT/m2GC+md7UPeeC7T1FDyo/uKamPpwfHj6YerO+edUrP8TwXT8Qjgs+XOFPv3TmPj/rpbI/KdZKPajtob8YCGM/ZgFXP2rW2b0cGUa//UZ1P/M1rz845kK+BXSev1Q2jD/Cru8/UOawviX217+L5aU/g3YVQJJ3Er+0MwfAFxnHP6vu4j/onV6/OInRv31U3j92tw5AYgeMv8jBBMDZR/w/6E3YP6WOsb+cCtG/fv8HQDYLCUDthM2/6QcFwGDXFEDXHCNAS5zxv7tcH8ASByRAQbQ5QPxCDsA24jbAhZwMvmrySr0/686+qtsJvU/zDr5LGf2+ms/RvrGxsj46406+0X5vv2+is77tyzo/EEymvm4Hr786nmi+n76NP43/Ab9gHWm/P4wbvRnGMD/aBSG/2eWrv7lrpj3FZ4o/11pPv5Ah4b+ah4k+Yq27PwBKhr+Zhaa/jU0GPxIriD+fj5y/i4Pbv445Nj8iPrs/0965v3rcob+I+nc/MSqKPw+Lzr8EoNa/xBiUP1bTvj+mu+m/R9edv+b5tD8BOpA/ARX8v8aLTL9CCs0/NpZHPwANA8BhnJy/C77cP675mz/TQQvARkrRv/wd9T/h+NI/+0QWwLmCmb87pQpA1reoP8O3Jb/xQkQ9K24PPZGVSb5g4ie/NSzyPlExzDzwYBS/Iuwbvxo4ZT+ZLky9JKd1v4HaAb9Kxqc/vyhCvsLoqr+6QLO+eD/bPxwuyr57C9q/mX0MvoBUoz/efiq/6DSov7HIoDw0LFk/mPpev+B7cb/PV/Y9YpChP1d5gb9N+qu/kzOOPvT/1D8jIpy/eyDev9LZ+z4HAANAYSC/v69DB8BMEkM/mdAZQOvQ6b+iNB7AGv+KP6uELkAdjg3ADX4zwKohDj7CAu48dgT+PF9/Kz28fhI+qWLXPm44Ez0TU5++jGBRPi9j1Dwnvbe84/w7PamBVT7iLNc+zsqKvBx3n76ER4o+SxFQP0W6mr1z5yq/ek7HPm+mmT/RL0u+W+SCv0XiED81qkw/c6HFvvTaKr/7Gy8/uxOYPzGRAr9QUIS/fvJbP7/MST9oTjO/j3Awv1mUeT8CxZY/og1UvxB8iL8fzJI/wnVHP5oDg78Przu/lRqhP5m4lT+yH5S/MG2Pv+1ttj8ApEU/6Nmtv7W4TL8Y+sM/7CbBPs3Mv7/afvq+l/vJP9RxRj9RVcq/y4Blv5H/1j+ymJU/zJndv759pr9Fpuo/YNzGP78Z+b+Ybtm/1JZyPmtBnrvrqRm+OfAWPns1gD6IH8c+JnsSvsViXr5096M+T/pHP50JTb4gthS/FJTkPvyrlT9EtqS+NLtxvwP4ID8aVMY/q/sBvxg7p78t1V0/LBOTP7MtQr9KT3e/AtuFPwIhQT/vyHK/rmYjvxlVlT8ngbk+gR2Kv8xyo76ecp0/jqpBPzl+k7/vLje/AZisP/Eiuj7PlaW/LpXOvm9ftD/pAuq8cnKwvwOByL3k+bQ/fRnXviJftL/ajlA+hpiuP+JaT7+Pn7G/J8oAP1BioT+ZDJm/rmaov9kzTT9QbI0/RkBKvxLPmL8wtcU+mN2AP1Kclr/Lb5K/4Z8xP8/aWj8LIMe//nGFvygggD+NKyY/z/uSv1+sY7/rQRc/MhYAP33zPr+FeU6/teY+PtUL0T400bC+o/FKv+yEXL7F7L4+TQFhPRzbWL/quR2/fdPJPiYUrr6pCXi/UYSTviQPuD4/jIA91WiEv+M8Mr92e8M+DdaqvsZ9lb+d5L++IySyPsmkkj3Ue5+/M2hKvwQpvj4YC/Q+Py2yv5I9mr9Oj+c+0vadPZBPzb83TmW/QAP0PhAt+D7LR+G/+O2ov5JZOz4czwC+7BAzP/wPQD7WCTk+ZlQIvzMPMz/sHhI/pgb3PfBZAL6XUkM/TpFePvgH8j3zvAi/A5xEP0vHGj+v1Gs9JlkAvmpJVj9FUoE+Js5gPVn4kD7cEVk/9HPFvRC74T1A0zA/bS1NP1ii475mE2U+8D6NPqayMj86p0u94IaOPth8Lz/z6Cg/2p/PviltyD4mz4s/vjEQP4b8Qr+SGBA/0MksP9vx0D6Rfb6+4L0sP2mfij8jSaI+EUk9v7FCWD/IgCo/owUpPvAmt752Z3Q/XlaAPtRSmz3A8ZI8oKOAP6EiKj89nkY9qXu7vlWEjj9l+X0+DTAtvaHL3Dsyv5Q/efIpP6Kwkr2UYcS+9V+iPxPaiT+gZim+uhhGv5o5tz/A+L0/9N6lvlcFlb/7BdM/zMfwPzg0DL9+4Ma/bFf1P/PPEEBJN1a/YVH4vxjEDkBZ5ydA82eYvxZyFMA5VSVAFVQ9QPqtzb/yASzAAAAAAAAA0QwMAgBQAAAAAAAAAAAA", "infos": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], "vf_preds": [0.0, -0.00487118773162365, -0.0018986212089657784, 0.0028677929658442736, 0.005240548402070999, 0.005457673221826553, 0.005698265973478556, 0.0038879704661667347, 0.0011190500808879733, 0.0019698755349963903, 0.0022739642299711704, 0.0010677454993128777, -0.0010650367476046085, 0.0012116925790905952, 0.0021752738393843174, 0.0008449580054730177, -0.0012952529359608889, 0.0012519401498138905, -0.0008352645672857761, -0.0029235705733299255, -0.003919134847819805, -0.0024472344666719437, -0.0003346663434058428, -0.0025739537086337805, -0.003718443214893341, -0.004324411973357201, -0.002983825746923685, -0.003760548308491707, -0.0024112325627356768, -0.0008347660768777132, 0.0014185444451868534, 0.003469552379101515, 0.0005347575061023235, -0.0020258589647710323, -0.0033851801417768, -0.004155046306550503, -0.00459117628633976, -0.004748952109366655, -0.003307944629341364, -0.002527016680687666, -0.0016687412280589342, 0.003317778231576085, 0.001444548019208014, -0.0003428780473768711, 0.0020549846813082695, 0.0035416982136666775, 0.002046281471848488, 0.0035331614781171083, 0.0020307975355535746, 0.0035208670888096094, 0.004445224069058895, 0.005048881284892559, 0.004507682751864195, 0.005087967962026596, 0.00434712041169405, 0.004935638979077339, 0.003924364224076271, 0.004689022433012724, 0.003332592314109206, 0.0043931929394602776, 0.004899259656667709, 0.005244260188192129, 0.004016464576125145, 0.0026906426064670086, 0.0008456592913717031, 0.002525786869227886, 0.0008650395320728421, 0.002598958555608988, 0.0009742589900270104, -0.0004426852101460099, 0.0012120272731408477, 0.0026703188195824623, 0.0037083355709910393, 0.0025998265482485294, 0.001024225726723671, 0.0025726454332470894, 0.0036626593209803104, 0.002498065819963813, 0.0008406571578234434, -0.0006118541350588202, 0.0009266565321013331, 0.002421534853056073, 0.0008016590727493167, 0.0023035150952637196, 0.0035252771340310574, 0.004193081520497799, 0.0033619976602494717, 0.004202482756227255, 0.0046736495569348335, 0.004308420233428478, 0.003088015131652355, 0.0009314963244833052, 0.0030956340488046408, 0.0008079272229224443, -0.0014290213584899902, -0.0026682952884584665, -0.0005425104172900319, -0.001979182241484523, -0.002800151938572526, -0.0033438843674957752, -0.0021698642522096634, -0.0028907316736876965, -0.0017615329707041383, -0.0023519338574260473, -0.0027652650605887175, -0.0029656486585736275, -0.0009676689514890313, 0.001679705805145204, 0.0032263367902487516, 0.0037603513337671757, 0.001616372144781053, 0.0025137888733297586, 0.002981772180646658, 0.0015898161800578237, 0.002418223302811384, 0.001256283838301897, 0.002054948825389147, 0.0012158966856077313, 0.00029009790159761906, 0.001428661635145545, 0.0020064038690179586, 0.0017069533932954073, -0.003172996686771512, -0.004483560100197792, -0.005149888806045055, -0.005361895076930523, -0.005426064133644104, -0.005010651890188456, -0.004377369768917561, -0.004861254245042801, -0.005238750483840704, -0.005581873469054699, -0.005827872082591057, -0.005852051079273224, 0.000852237397339195, -0.0013425062643364072, 0.0011106824968010187, -0.0010679324623197317, -0.0024497921112924814, -0.0031280789989978075, -0.00152774213347584, -0.002492697676643729, -0.0010899901390075684, -0.0021704095415771008, -0.0009247022680938244, -0.0019996080081909895, -0.0009293071925640106, 0.0005665442440658808, -0.0011444611009210348, -0.0021483497694134712, -0.0026740499306470156, 0.0017427883576601744, -0.0004151382017880678, -0.0019196700304746628, -0.002759127411991358, -0.0032823209185153246, -0.0020891970489174128, -0.0007471528369933367, 0.0008949560578912497, -0.000829189782962203, 0.0007344535551965237, 0.0024862182326614857, 0.003801158629357815, 0.004443793091922998, 0.004708093591034412, 0.004105967469513416, 0.004569336771965027, 0.004882926121354103, 0.004332147538661957, 0.002846303628757596, 0.00028358493000268936, -0.0021803961135447025, -0.00018524634651839733, -0.00262784818187356, -0.0007005062652751803, -0.0030652908608317375, -0.004242834635078907, -0.003025660291314125, -0.004231693223118782, 0.002240358851850033, 0.0039157080464065075, 0.0021749918814748526, 0.0038856121245771646, 0.0021294518373906612, -0.0004168582381680608, -0.0025820452719926834, 0.0006671231240034103, -0.001618842827156186, -0.0029013026505708694, 4.8932997742667794e-05, -0.0013398578157648444, 0.0012839692644774914, 0.0031888303346931934, 0.0016653237398713827, 0.0032414207234978676, 0.0018515897681936622, 0.00043198035564273596, -0.0006342222914099693, -0.001385447452776134, -0.0019433284178376198, -0.0023486767895519733, -0.002568600233644247], "dones": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "eps_id": [393516090, 393516090, 393516090, 393516090, 393516090, 393516090, 393516090, 393516090, 393516090, 393516090, 393516090, 393516090, 393516090, 393516090, 393516090, 393516090, 393516090, 393516090, 393516090, 1236877951, 1236877951, 1236877951, 1236877951, 1236877951, 1236877951, 1236877951, 1236877951, 1236877951, 1236877951, 1236877951, 1236877951, 1236877951, 1236877951, 1236877951, 1236877951, 1236877951, 1236877951, 1236877951, 1236877951, 1236877951, 1236877951, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 928702045, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 365304931, 1291452630, 1291452630, 1291452630, 1291452630, 1291452630, 1291452630, 1291452630, 1291452630, 1291452630, 1291452630, 1291452630, 1291452630, 1291452630, 1291452630, 1291452630, 1291452630, 1279680958, 1279680958, 1279680958, 1279680958, 1279680958, 1279680958, 1279680958, 1279680958, 1279680958, 1279680958, 1279680958, 1279680958, 1441028559, 1441028559, 1441028559, 1441028559, 1441028559, 1441028559, 1441028559, 1441028559, 1441028559, 1441028559, 1441028559, 1441028559, 1441028559, 1441028559, 1441028559, 1441028559, 1441028559, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 1769014140, 154326745, 154326745, 154326745, 154326745, 154326745, 154326745, 154326745, 154326745, 154326745, 154326745, 154326745, 154326745, 154326745, 154326745, 154326745, 154326745, 154326745, 154326745, 154326745, 154326745, 154326745, 154326745, 154326745], "agent_index": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "value_targets": [17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 19.83694076538086, 19.02721405029297, 18.209306716918945, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 21.432186126708984, 20.63857078552246, 19.83694076538086, 19.02721405029297, 18.209306716918945, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 33.77179718017578, 33.10282516479492, 32.427093505859375, 31.74454116821289, 31.055091857910156, 30.358678817749023, 29.655229568481445, 28.944677352905273, 28.226945877075195, 27.50196647644043, 26.769662857055664, 26.02996253967285, 25.282791137695312, 24.5280704498291, 23.765727996826172, 22.995685577392578, 22.217864990234375, 21.432186126708984, 20.63857078552246, 19.83694076538086, 19.02721405029297, 18.209306716918945, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 24.5280704498291, 23.765727996826172, 22.995685577392578, 22.217864990234375, 21.432186126708984, 20.63857078552246, 19.83694076538086, 19.02721405029297, 18.209306716918945, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 20.63857078552246, 19.83694076538086, 19.02721405029297, 18.209306716918945, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0], "new_obs": "BCJNGGhAwA8AAAAAAADQNA4AAFAAAAAAAQUAAwIAUcQAAAAQDACwCgAMAAYABQAIAAoQAOMBAwAMAAAACAAIAAAABAYAAjwAAAgA0cj///8AAAEOeAAAABgiAAIcABEwNAAASgACOgAAFgAEHAAAAgCREAAUAAgABgAHXgATEHwAcwECJAAAABRIAAACAABEADEIAAd4AACzAPEAIAAAAAcAAABuZGFycmF5VABBbGlzdB4AQwAAAMxAAAACADMMABbKABEMZgAxAAMDpAAAqAACAgBRCgAYAAzWAADsABNsBAEIGAESBSkADwIAAgASAQACAAwIAAJIAQYCAAQQAAQoAAACABACzQAACQAHAgAIeAAPAgBNGfwoARoaKAEwBAMAaAEQoPkBBQIAMQ4AKIgBEQy8ARIOGABjA4wAAAA0/AEEAgAIOAAA6AAEAAEArAEEAgAAGAAAJAIAYwBRCAAQAAiGAQIQAAgoAAACABEILAICfAIATAAQyhUACQIAAGYCMQYABhQAD0YBE/H///////////////+d1QE1v/IENT9z/zQ/8wQ1v2Kckz/v/38/wsaTv5v+f79r1LU/7s8butdTtb8PchA8zBKpPyzVdL+62qi/4YR1PyqJhD9XCbe/+qCFv4ymtj9vzNQ+qt7Lv0JA6b5zbcs/R7E7v4nj0r+vBiU/c73SPz/I6L9P126/oejgP+uQbT8azfm/HrKmv8Hn9T//4qc/MTAHwMwpxb8hygZA7a3HPxWDDsAVxnG/fAUPQC3sej/oGwjAsTvUvksBCUA/r/c+jCL8vyBKY79m5/4/qJl8P6s2+785QqW/C2b/P9exsz8CdAHAgBM9v9AvBEDpCGk/2dj8v5ctWL526gFALuDwPjoI8L8qPjq/cD/5P4D/gT9QUe+/8/Q3vrCk+j9j6BU/sP/mv9I3Ob/rOfU/L8GSP/0GPT9sRMQ/QXKLv4A8uL/1VEw/+ORxP4QZjb9Gu2i/LHpIP26cxj648Ia/rVXRvhYkMj/w2Wk/lzRzv5SNab8iVDA/6MmxP2MnbL854q6/+2xCPyXW4z9uQne/8BTfv6J8Zz8Dxpk/bI6Jv7awm79tKXc/swbOP1uIjr82V86/M7CMP0fbiD9V0pu/IWiQv1YHkz98Og8/jmCgv57SLr+ljI8/4mZjPd5Cnb+xOIC+y/2CP0wq5b7FRZO/eMI4PhJRWz8/vpk9EdKCv/IlrL54RUY/cFAYP0x3eL/0IVe/tQNGP3pUjD+gsn2/4SSov/HoWT+nzsY/BxuKvynA3b+Pf4A/GlP4PxlMnb+6/QTAzLmcP9yBD0DMW7e/PPoVwJwCwD/j6dc/arDWv6m+7r+mEdY/AhWZP+gv67/ME7y/cmzgPwqKOz84ifa/NWaPvxfn4D8j1pQ/8Y36v12hw7/ke14/ScH9Pu1GuT5yVcS+ShBpP97RcD8/l54+ARRCv3O0gj8UX+8+Vq9NPv3nr75xQ4c/shhSu4+SHD7IkYY9P6iCP5Wz7j4Qwik+AZisvvxPhz9CPVq8CI3yPXXGoT24zoI/LkDuPlktCD6Fs6q+y4uHPyLxurzuza89ixO3PYIhgz/hgQK/3S3QPWBYAj9+xWs/suV8v9oPOT6DLmw/29Y/Pz/k+L5GrKU+HBP8PpkxJz/OR3i/Eg/OPnNnaz8ogPg+CmXuvlgDDD9d0/0+DtPIPh11dL9JmSA/vaBvPzRvZj6FaeW+Q0FGP0vtBD/f8wc+EF9xvzmCWz/dzng/ZqARvSLf3b778YA/hCYQPzELA77X+26/J/OLP0x+gz9VbZi+wfO0v5Jqnz+Wo7w/+V8Lv1SW7b+Dqbo/1b3xPziNXb+7wKq/eZfcPyaTvj97C42/LctVv5XM9D/CLJA/Go+gv9BasL4hIwJAcJ5JP4S4qb8IiFa/ZR0GQPJuoj8RJpc/gj5dP7YNBr61wR6/EoqfPzn9tj5cqDi+7zdDvpUfnj9zolo/a9ksvuNTHb+uOKY/hPeqP1G7X74U9oO/skW3PyxFUj+Fw6i+lMUYv7Aqvj8/gqA+zAnCvh4sLr5Hj7s/w1RGvoz4u74mG4I+YAiwP2h8oj6Dopa+W/VEvt8srj+nEVM/SFGSviFXIr9ndLU/T8CcPozrrr4FlEi+wXKzP/WRWL7xTKu+t2F1PvelqD80s54+74iHvjoCXb7TPqc/8uBTPwZ7hb5Yoyq/6smuPzWAqj+xDqW+wwGOv4vEvj9v7Ew/OE7mvkz0KL8PPsU/d4aMPgQxA79NcmC+Es3CPxfiSz/B3gK/5Z4vv3UMyT9Ujog+4AsUv8Ater6sicY/9lSGvgjUFL/ol0Y+D8y7P3jiSb8uYwW/JGYhPzMvqT8qbH++KXHLvpX6Fj5ttZ8/EdVHv+EOsL4JtBg/xSWOP0N5pr8+O2a+RV6FPzIiaT9sZ0G/i5chvX5cED/A20c/1yZcvvwMnT3PycM9Dh04PxFMpj4Prf49oB28viq1OT/m2GC+md7UPeeC7T1FDyo/uKamPpwfHj6YerO+edUrP8TwXT8Qjgs+XOFPv3TmPj/rpbI/KdZKPajtob8YCGM/ZgFXP2rW2b0cGUa//UZ1P/M1rz845kK+BXSev1Q2jD/Cru8/UOawviX217+L5aU/g3YVQJJ3Er+0MwfAFxnHP6vu4j/onV6/OInRv31U3j92tw5AYgeMv8jBBMDZR/w/6E3YP6WOsb+cCtG/fv8HQDYLCUDthM2/6QcFwGDXFEDXHCNAS5zxv7tcH8ASByRAQbQ5QPxCDsA24jbAa8g0QAl0TEBYLSbA8yRLwE/zDr5LGf2+ms/RvrGxsj46406+0X5vv2+is77tyzo/EEymvm4Hr786nmi+n76NP43/Ab9gHWm/P4wbvRnGMD/aBSG/2eWrv7lrpj3FZ4o/11pPv5Ah4b+ah4k+Yq27PwBKhr+Zhaa/jU0GPxIriD+fj5y/i4Pbv445Nj8iPrs/0965v3rcob+I+nc/MSqKPw+Lzr8EoNa/xBiUP1bTvj+mu+m/R9edv+b5tD8BOpA/ARX8v8aLTL9CCs0/NpZHPwANA8BhnJy/C77cP675mz/TQQvARkrRv/wd9T/h+NI/+0QWwLmCmb87pQpA1reoP1jxHMB09s2/c0IWQAOW4D9g4ie/NSzyPlExzDzwYBS/Iuwbvxo4ZT+ZLky9JKd1v4HaAb9Kxqc/vyhCvsLoqr+6QLO+eD/bPxwuyr57C9q/mX0MvoBUoz/efiq/6DSov7HIoDw0LFk/mPpev+B7cb/PV/Y9YpChP1d5gb9N+qu/kzOOPvT/1D8jIpy/eyDev9LZ+z4HAANAYSC/v69DB8BMEkM/mdAZQOvQ6b+iNB7AGv+KP6uELkAdjg3ADX4zwELGuj8r+hFA4akowBWUHcC8fhI+qWLXPm44Ez0TU5++jGBRPi9j1Dwnvbe84/w7PamBVT7iLNc+zsqKvBx3n76ER4o+SxFQP0W6mr1z5yq/ek7HPm+mmT/RL0u+W+SCv0XiED81qkw/c6HFvvTaKr/7Gy8/uxOYPzGRAr9QUIS/fvJbP7/MST9oTjO/j3Awv1mUeT8CxZY/og1UvxB8iL8fzJI/wnVHP5oDg78Przu/lRqhP5m4lT+yH5S/MG2Pv+1ttj8ApEU/6Nmtv7W4TL8Y+sM/7CbBPs3Mv7/afvq+l/vJP9RxRj9RVcq/y4Blv5H/1j+ymJU/zJndv759pr9Fpuo/YNzGP78Z+b+Ybtm/pTYCQL9T9j+rDQ7Ao3wFwHs1gD6IH8c+JnsSvsViXr5096M+T/pHP50JTb4gthS/FJTkPvyrlT9EtqS+NLtxvwP4ID8aVMY/q/sBvxg7p78t1V0/LBOTP7MtQr9KT3e/AtuFPwIhQT/vyHK/rmYjvxlVlT8ngbk+gR2Kv8xyo76ecp0/jqpBPzl+k7/vLje/AZisP/Eiuj7PlaW/LpXOvm9ftD/pAuq8cnKwvwOByL3k+bQ/fRnXviJftL/ajlA+hpiuP+JaT7+Pn7G/J8oAP1BioT+ZDJm/rmaov9kzTT9QbI0/RkBKvxLPmL8wtcU+mN2AP1Kclr/Lb5K/4Z8xP8/aWj8LIMe//nGFvygggD+NKyY/z/uSv1+sY7/rQRc/MhYAP33zPr+FeU6/teY+PtUL0T400bC+o/FKv+yEXL7F7L4+TQFhPRzbWL/quR2/fdPJPiYUrr6pCXi/UYSTviQPuD4/jIA91WiEv+M8Mr92e8M+DdaqvsZ9lb+d5L++IySyPsmkkj3Ue5+/M2hKvwQpvj4YC/Q+Py2yv5I9mr9Oj+c+0vadPZBPzb83TmW/QAP0PhAt+D7LR+G/+O2ov379Dj+A9WM/mXb9v5GL3r/WCTk+ZlQIvzMPMz/sHhI/pgb3PfBZAL6XUkM/TpFePvgH8j3zvAi/A5xEP0vHGj+v1Gs9JlkAvmpJVj9FUoE+Js5gPVn4kD7cEVk/9HPFvRC74T1A0zA/bS1NP1ii475mE2U+8D6NPqayMj86p0u94IaOPth8Lz/z6Cg/2p/PviltyD4mz4s/vjEQP4b8Qr+SGBA/0MksP9vx0D6Rfb6+4L0sP2mfij8jSaI+EUk9v7FCWD/IgCo/owUpPvAmt752Z3Q/XlaAPtRSmz3A8ZI8oKOAP6EiKj89nkY9qXu7vlWEjj9l+X0+DTAtvaHL3Dsyv5Q/efIpP6Kwkr2UYcS+9V+iPxPaiT+gZim+uhhGv5o5tz/A+L0/9N6lvlcFlb/7BdM/zMfwPzg0DL9+4Ma/bFf1P/PPEEBJN1a/YVH4vxjEDkBZ5ydA82eYvxZyFMA5VSVAFVQ9QPqtzb/yASzA8b49QNTLUEC2GgXAmoJCwMsMDwIAA1AAAAAAAAAAAAA=", "actions": [1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1], "advantages": [17.383136749267578, 16.553495407104492, 15.707579612731934, 14.85135555267334, 13.988924026489258, 13.11996078491211, 12.242199897766113, 11.357625007629395, 10.465055465698242, 9.559823036193848, 8.646000862121582, 7.724462985992432, 6.794530391693115, 5.850773334503174, 4.898819923400879, 3.93955397605896, 2.971395254135132, 1.9887480735778809, 1.0008352994918823, 19.83986473083496, 19.031131744384766, 18.211753845214844, 17.383472442626953, 16.551197052001953, 15.709399223327637, 14.85854721069336, 13.997148513793945, 13.129179000854492, 12.250308990478516, 11.362347602844238, 10.46475601196289, 9.55832290649414, 8.647740364074707, 7.727556228637695, 6.796850204467773, 5.85614013671875, 4.905586242675781, 3.94514799118042, 2.9734079837799072, 1.9925270080566406, 1.0016686916351318, 21.42886734008789, 20.637126922607422, 19.837284088134766, 19.025157928466797, 18.205764770507812, 17.381092071533203, 16.54509162902832, 15.703649520874023, 14.850702285766602, 13.98971939086914, 13.120369911193848, 12.243390083312988, 11.356425285339355, 10.461827278137207, 9.556857109069824, 8.6443510055542, 7.720841407775879, 6.790132522583008, 5.847591876983643, 4.8960957527160645, 3.935154676437378, 2.966083526611328, 1.9873093366622925, 0.9991543292999268, 33.76927185058594, 33.101959228515625, 32.424495697021484, 31.743566513061523, 31.05553436279297, 30.357465744018555, 29.652559280395508, 28.940969467163086, 28.224346160888672, 27.50094223022461, 26.76708984375, 26.02630043029785, 25.280292510986328, 24.527231216430664, 23.766340255737305, 22.99475860595703, 22.215442657470703, 21.431385040283203, 20.636268615722656, 19.833415985107422, 19.023019790649414, 18.205944061279297, 17.378934860229492, 16.5439510345459, 15.701372146606445, 14.85113525390625, 13.993232727050781, 13.122323036193848, 12.247089385986328, 11.36294174194336, 10.468842506408691, 9.562335014343262, 8.650254249572754, 7.728330612182617, 6.796809196472168, 5.85415506362915, 4.903885841369629, 3.9421606063842773, 2.97245192527771, 1.9927653074264526, 1.0029656887054443, 14.85519027709961, 13.992485046386719, 13.1221923828125, 12.24413776397705, 11.359896659851074, 10.463661193847656, 9.55881118774414, 8.646685600280762, 7.723112106323242, 6.792209148406982, 5.849930286407471, 4.899779319763184, 3.9401090145111084, 2.9686713218688965, 1.987993597984314, 0.9982930421829224, 11.364686012268066, 10.470658302307129, 9.56694221496582, 8.653636932373047, 7.730956554412842, 6.798475742340088, 5.856362342834473, 4.905856132507324, 3.9456377029418945, 2.975681781768799, 1.9958279132843018, 1.0058521032333374, 15.704828262329102, 14.855565071105957, 13.993053436279297, 13.126486778259277, 12.250347137451172, 11.364641189575195, 10.46770191192627, 9.564285278320312, 8.649365425109863, 7.727701187133789, 6.794389724731445, 5.853984832763672, 4.901924133300781, 3.9398324489593506, 2.9712445735931396, 1.9921483993530273, 1.0026741027832031, 24.526329040527344, 23.766143798828125, 22.997604370117188, 22.220623016357422, 21.435468673706055, 20.640661239624023, 19.837688446044922, 19.026317596435547, 18.210134506225586, 17.382402420043945, 16.5461368560791, 15.701879501342773, 14.84977912902832, 13.989456176757812, 13.121313095092773, 12.243328094482422, 11.356630325317383, 10.46184253692627, 9.55894660949707, 8.647992134094238, 7.727710723876953, 6.7936506271362305, 5.854612827301025, 4.901695728302002, 3.9434642791748047, 2.9743428230285645, 1.9930256605148315, 1.0042316913604736, 20.63633155822754, 19.833024978637695, 19.02503776550293, 18.205421447753906, 17.38100814819336, 16.549039840698242, 15.70826244354248, 14.853555679321289, 13.995783805847168, 13.12831974029541, 12.247848510742188, 11.362853050231934, 10.464890480041504, 9.558603286743164, 8.646610260009766, 7.722289085388184, 6.791613578796387, 5.851552963256836, 4.901629447937012, 3.941784381866455, 2.97204327583313, 1.9923486709594727, 1.0025686025619507], "logits": [[0.0, 0.0], [-0.0010800379095599055, 0.0007375705754384398], [-0.004435062408447266, 0.0005343838129192591], [7.393816486001015e-05, 9.761075489223003e-05], [-0.0003560207551345229, -6.331061013042927e-05], [0.0003169793635606766, -0.0002939291298389435], [0.003200689796358347, -0.0012865292374044657], [0.006604439578950405, -0.0012162395287305117], [0.0021192205604165792, -0.0006269766017794609], [0.003641175338998437, -0.0006381338462233543], [0.004360188730061054, -0.0005922527052462101], [0.001417054096236825, -0.0007361224852502346], [-3.198068588972092e-05, -0.0008571171201765537], [0.001837056828662753, -0.0008419665973633528], [0.0038398834876716137, -0.0008210362866520882], [0.0012573665007948875, -0.0010016369633376598], [-0.00011849473230540752, -0.0010597014334052801], [0.001786176348105073, -0.0012479787692427635], [3.500527236610651e-05, -0.001278881449252367], [-0.005703244358301163, -0.0003652316518127918], [-0.006992516107857227, 0.00020493927877396345], [-0.005003170110285282, -0.00014105380978435278], [-0.001993823330849409, -0.00032680376898497343], [-0.0052564712241292, -1.789536327123642e-06], [-0.00676480820402503, 0.00047385599464178085], [-0.007028358522802591, 0.0009446442127227783], [-0.0059624044224619865, 0.0004535103216767311], [-0.006786239333450794, 0.0007546060951426625], [-0.0050008161924779415, 0.0006136884912848473], [-0.002260302659124136, 0.0006678428035229445], [-0.0002696269075386226, 0.0004214867949485779], [0.0004195414367131889, -2.048560418188572e-05], [-0.0006894773105159402, 0.0005334699526429176], [-0.0037746415473520756, 0.0009291857713833451], [-0.00602816604077816, 0.0009991814149543643], [-0.006788923405110836, 0.001170357340015471], [-0.006952063180506229, 0.001290753367356956], [-0.007007186766713858, 0.0012366960290819407], [-0.006351281888782978, 0.0008891085162758827], [-0.004467473831027746, 0.0011227107606828213], [-0.0023014587350189686, 0.0014136126264929771], [0.0016227291198447347, 0.0012764446437358856], [-0.0010416203876957297, 0.0005669461097568274], [-0.0031295043881982565, 0.00039500254206359386], [-0.0006933086551725864, 0.0003083520568907261], [0.001807844964787364, 0.0010358556173741817], [-0.0007128255674615502, 0.0002681287005543709], [0.0018035663524642587, 0.0010112517047673464], [-0.0007324713515117764, 0.00023809564299881458], [0.0017964036669582129, 0.000994343776255846], [0.0031056005973368883, 0.0016815210692584515], [0.004113082308322191, 0.0014251377433538437], [0.003803423373028636, 0.0018576004076749086], [0.005233174655586481, 0.0016621258109807968], [0.004150517750531435, 0.0022027476225048304], [0.005999088287353516, 0.0018860942218452692], [0.004094031173735857, 0.0023150492925196886], [0.006307140924036503, 0.0018380418187007308], [0.003701298963278532, 0.001959684770554304], [0.006129459943622351, 0.0014877254143357277], [0.007217158563435078, 0.0006848081247881055], [0.007509515155106783, -3.839656710624695e-05], [0.006087173707783222, 0.0003579510375857353], [0.0035247155465185642, -0.000188724254257977], [0.0010830723913386464, -0.0005574983078986406], [5.6536286137998104e-05, 5.478714592754841e-05], [-0.002588421106338501, -0.00021174154244363308], [0.0002230106620118022, 9.627663530409336e-05], [-0.002394695533439517, -0.00021500838920474052], [-0.004300069995224476, -0.00013164640404284], [-0.0016993348253890872, -0.00018428429029881954], [0.0006647265981882811, 0.0003479989245533943], [0.0015459577552974224, 0.001259325072169304], [0.0004974583862349391, 0.0003458969295024872], [-0.0020204749889671803, -4.9651600420475006e-05], [0.000553722376935184, 0.000431679654866457], [0.0016260375268757343, 0.0013321982696652412], [0.00038990791654214263, 0.000438010785728693], [-0.002279052510857582, 7.313606329262257e-05], [-0.004333174787461758, 9.68386884778738e-05], [-0.0016952950973063707, 0.00016459706239402294], [0.0006255449843592942, 0.0007182159461081028], [-0.001692108460702002, 0.0002983873710036278], [0.0005813814350403845, 0.0008484721183776855], [0.0012512565590441227, 0.0015899240970611572], [0.001244992483407259, 0.0018235805910080671], [0.0015870153438299894, 0.0016183708794414997], [0.001990275690332055, 0.0017771294806152582], [0.002803694922477007, 0.0011717278975993395], [0.003451043041422963, 0.0017926108557730913], [0.002375768031924963, 0.0017600422725081444], [-0.0009131088736467063, 0.001059853471815586], [0.002421694342046976, 0.0017106495797634125], [-0.0009613449219614267, 0.0010171919129788876], [-0.0037584470119327307, 0.0007922837976366282], [-0.005072444677352905, 0.0009023400489240885], [-0.0036915787495672703, 0.0005583206657320261], [-0.005295662675052881, 0.0006634769961237907], [-0.005917491391301155, 0.0008207808714359999], [-0.00614863820374012, 0.0009063952602446079], [-0.005684515926986933, 0.0006037822458893061], [-0.006182287819683552, 0.0006507427897304296], [-0.004925982560962439, 0.0008042142726480961], [-0.005873315501958132, 0.0007443537469953299], [-0.006386878434568644, 0.0006989664398133755], [-0.006722467485815287, 0.0006425075698643923], [-0.0005776492762379348, -0.0010512794833630323], [0.0019863415509462357, -0.0007343639736063778], [0.0035934066399931908, -0.0006665601395070553], [0.004697972908616066, -0.0008066741283982992], [0.004341405816376209, -9.761005640029907e-05], [0.005484138615429401, -0.0004459059564396739], [0.00607579480856657, -0.0007124424446374178], [0.00537487305700779, -0.0001858370378613472], [0.006087019108235836, -0.0004235580563545227], [0.0043447306379675865, -0.0003254897892475128], [0.005473911762237549, -0.000475503271445632], [0.0033071681391447783, -0.000569990137591958], [0.0011151796206831932, -0.0007102272938936949], [0.002965803723782301, -0.0007895294111222029], [0.004563276190310717, -0.0007913610897958279], [0.002529766410589218, -0.0009521231986582279], [-0.0012498646974563599, -2.1373620256781578e-05], [-0.002770094433799386, -0.00029310700483620167], [-0.003873873734846711, -0.00012300931848585606], [-0.004923214204609394, 0.00015584012726321816], [-0.00580529123544693, 0.0005344734527170658], [-0.006559757515788078, -0.0004746696213260293], [-0.005923030432313681, -0.0013381675817072392], [-0.00698273116722703, -0.0007083917153067887], [-0.0073970528319478035, -5.815160693600774e-05], [-0.00752799678593874, 0.0003889466170221567], [-0.007589861750602722, 0.0005010159220546484], [-0.0076975589618086815, 0.0002687347587198019], [0.00010613290942274034, -0.0001470879651606083], [-0.0026522167026996613, -0.0002168218488804996], [0.0001289901847485453, -0.00023689866065979004], [-0.002687482861801982, -0.00028737366665154696], [-0.004664585459977388, -2.8059817850589752e-05], [-0.005818864330649376, 0.000376584124751389], [-0.0047286017797887325, -0.00013903493527323008], [-0.006018351763486862, 0.0002856364008039236], [-0.004038622602820396, 5.873129703104496e-05], [-0.0055610667914152145, 0.0003624274395406246], [-0.0031682022381573915, 0.0003654947504401207], [-0.004853295627981424, 0.0005459522362798452], [-0.0024462409783154726, 0.0006552375853061676], [-0.0006114467396400869, 0.0007685029413551092], [-0.002281778957694769, 0.0009000978898257017], [-0.004025255795568228, 0.0009014743845909834], [-0.005303448997437954, 0.0007799137383699417], [0.00022213140618987381, -0.001002668752335012], [-0.0025810461957007647, -0.0009840666316449642], [-0.004786238074302673, -0.0005547075998038054], [-0.006039753556251526, 1.3829907402396202e-05], [-0.00664203567430377, 0.0004847293021157384], [-0.005627049598842859, 3.4638913348317146e-05], [-0.0031584911048412323, -1.0794028639793396e-06], [-0.0009116228320635855, 0.00012168451212346554], [-0.002764269011095166, 0.000367672648280859], [-0.0007386121433228254, 0.0004937669727951288], [0.00011580751743167639, 0.00046129990369081497], [0.00019147258717566729, 0.00031332368962466717], [8.468996384181082e-05, 0.0003437260165810585], [0.0001211678099934943, 0.0006336728110909462], [0.0005504883010871708, 0.0003328542225062847], [0.0007563657709397376, 0.0005401924718171358], [0.0012433270458132029, 0.0006439455319195986], [0.0018633500440046191, 0.00041398266330361366], [0.001914552180096507, 0.0003963641356676817], [0.0008827910642139614, 0.0007518293568864465], [-0.0013752428349107504, 0.0011742732021957636], [0.000760504393838346, 0.0004015882732346654], [-0.001377050532028079, 0.000858649262227118], [0.000745287979952991, 7.275980897247791e-05], [-0.0012959004379808903, 0.0005324947414919734], [-0.003552869660779834, 0.00047999166417866945], [-0.0009886303450912237, 0.00023985619191080332], [-0.0030798912048339844, 0.0002040746039710939], [0.0020380308851599693, 0.002149072475731373], [0.004552861209958792, 0.002088376320898533], [0.001988939242437482, 0.002082229359075427], [0.004529917147010565, 0.0019764245953410864], [0.0019340565195307136, 0.0019936594180762768], [-0.0002657528384588659, 0.0017942761769518256], [-0.0017278515733778477, 0.0013864226639270782], [-0.00031316117383539677, 0.0013187469448894262], [-0.0020637030247598886, 0.000918589299544692], [-0.0033115462865680456, 0.0007704842137172818], [-0.0023367544636130333, 0.00012074154801666737], [-0.003829509951174259, 0.00019309879280626774], [-0.0020602273289114237, -0.0003878220450133085], [0.0004965046537108719, -0.00022024009376764297], [-0.001727356924675405, -0.0004655481316149235], [0.0007076065521687269, -0.00014657550491392612], [-0.0013724630698561668, -0.00044523016549646854], [-0.0032849465496838093, -0.00031104753725230694], [-0.004507928155362606, -0.00014815782196819782], [-0.005198683124035597, -2.2264430299401283e-05], [-0.005591966677457094, 8.734408766031265e-05], [-0.00584792485460639, 0.00019399984739720821], [-0.006065152585506439, 0.0003119467291980982]], "rewards": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "t": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22], "prev_rewards": [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]} -{"prev_actions": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "type": "SampleBatch", "obs": "BCJNGGhAgBAAAAAAAACtBQ8AAFAAAAAAAQUAAwIAUcQAAAAQDACwCgAMAAYABQAIAAoQAOMBAwAMAAAACAAIAAAABAYAAjwAAAgA0cj///8AAAEOeAAAABgiAAIcABEwNAAASgACOgAAFgAEHAAAAgCREAAUAAgABgAHXgATEHwAcwECJAAAABRIAAACAABEADEIAAd4AACzAPEAIAAAAAcAAABuZGFycmF5VABBbGlzdB4AQwAAAMxAAAACADMMABbKABEMZgAxAAMDpAAAqAACAgBRCgAYAAzWAADsABNsBAEIGAESBSkADwIAAgASAQACAAwIAAJIAQYCAAQQAAQoAAACABACzQAACQAHAgAIeAAPAgBNGfwoARoaKAEwBAMAaAEogA2GADEOACiIAREMvAESDhsAYwOMAAAANPwBBAIACDgAAOgABAABAKwBBAIAABgAACQCAlwCMRAACIYBAhAACCgAAAIAEQgsAgJ8AgBMABDYFQAJAgAAZgIxBgAGFAAPRgET8P////////////////9+TJ4SvzkjPr67EiU/VaBkPlVhEb94bk0+ncYjP+eu/b3D6QG/QyZBvrojEz+x0IA+2dIAv7BrTD6DKRM/3CbMvS/h4r4pYBY/b6wDP1DA5r6Hfqe+m+BHPqhXyT4kP5y9iAOJvuC/FT85eaw+jf7evtlnG767EXk/rZNfPmS0S7/riKg8yCIUPywXGD3YsNu+PIULPq/Wdz97nqW9KhVNv7MTnj4N8xI/hgWHvpOa476+P9k+BYQ5PhgQxb6BJsO9F0X3PrdjEz/dFuS+t6P4vk9DGT9LVTo+btwSv6dhEb5HQig/TmhYvgFlJL/6REk+5qooP0ztPz5M5Sa/mNBSvh3LNz95TVS+Ryk7v1IoBj7FTTg/l/AZv8V7QL8Eye0+dUEqPxBIfr/nDze/jr5LPy+sDT8XBxe/5vkevy3rxT4y8/8+kaV7vxz0GL+8cTk/d0jHPhWWr7/o9wO/zs2HPxGIYj4tfne/7u+/vlPyJz/6T+Q9GX4Qv++jm75Zc4M+4Ph1PczJJr70qJq+vnEPvvVIiT2Cag+/UdK8vlorWz55j4I8yqN0v6WVv75grRA/NiS/vVogDb/3J6O+ZOImPsg9E74ilHK/tU+qvgB0BD+RXIG+i+0Kv5/7kb6nDOw9ytyavizODb6kP52+aAiSvkcXlr4D5Qm/e//LvhIGlT3UZq++YPYHvuAC2741j6q+lk6qvgq2CL9z+Aa/rAixPBZgw74s8m6//LUQv3+cvj5Wlfq+CkcGv7/qCr/hcy29r5EJvzAK7r2tbhe/xYPpvuKHBr9q6AS/yy42v7NC1729oxK/DJNrv3xfRb8yvHY+dNktv7ieqL8sRUW/QYkVP74RWL/3lNq/Hgo2v10CbT/cjYi/8poFwO21F780JqI/c0usv9/9HMD2UNS+nbbNPwfq1r8RDALAzGw0vlg/mD9B+/i/EsDPv4ktF7yphkg/LUcJwHJYnL+IN7M9i77EPs3sEcCZTs2/f0TrPUVWQj+iyx3AvSyav1fzVD7GI7o+b4UlwIcGy79VvW0+AXI+P9pPMMA7w/q/hGulPiC6jz+22T3A83UUwDY09j7VDcA/nr9NwDxR9b9jsTQ/5Z6OP0UKWcCMkBHAm/JcP/+LwD86bGbA7Gvwv5pmiz9oDpE/d01vwKTyDsBf2p8/f3fEP54ZesCEviTAMTS9P5Zy9z+CL4PAYl05wGE64z8h2RRActOJwPKWTMANwQhAIGUtQF68kMDiOl7AuKgjQJYaRUC1iKC+/7mPPDlplT3nqTQ+YEqfvo8xp775d6c9DqgHPxn3s76PqaA8TH4ePkfQJD5gnrK+Iie7PtjmJD5gY1K+ID6bvofgmTwLoM49IY4dPgPsmb6TZbs+g8zYPbrWXL6gfIK+p2s2PyOXMz0uqxW/rs4pvrZbhz+mkq69SE10v+jvCL1WPzU/oFCQvqanGb+BBGQ9BUG4PuAq1L4U+oG+DcjPPY1qNT9C0fe+D0Ikvy6MQz64abg+LMIfv7F+mr4Vv3I+2NBIPGjGM7/BCwY9aHl1Ppd/uj7COzi/5um7viSikj4Qvnw8yzFPv7KlD71dPJQ+oNC8Po25Vr88/eC+4nOsPve0OD+z5XC/A/5Xvw9L2z4FSYk/fMuOvxHJn7/jXRA/7ao4P+9Prr/ikm2/a9cnP39hvj5PJMa/74Qev0saND8ICTo/aJPWvwuqhb9RtEs/zzWKP59V8L9k/7u/FgedPoHUxb1BU7A+AndFPnndmD6ES4M+GyOyPulHMr6KZqs+LOwbP+dhkj5IZgq/2KTUPrzkdT+nDSI+/3tov7lKCj9B3xo/upoRvQQjC7/W0h4/5TqAPu5BKL7oxT6+ZvAnP8EIGz9Srmq+2G8Sv0N1PD90pXU/mPy5vq9idb9eVFw//MunP34cEb+WUqy/NLqDPxgw1D8BHFe/AQvevyjTnj/Nrv8/rnuXv8vUB8ATTL8/fPUUQGEvzL8QbiDAguS3vubUIb5Av1M/XM6LPvGWvL4+VgG/ZAtWP+zKJT98H9i+SftZvwvsaT/PsII//j0Fv/oKAb8do4c/4QMuP70DE7+CLCG+YECSP8ZzsD4/ZhW/xLwBvxX+lD+8DTs/OUMjv/9BI75etqA/qTrMPs24Jb/CIEA+zqWkP+MzkT39zRy/j0Qoviz4oD/0tu0+e28fv/AxPD5caqY/tf4KPqaqFr+LWi2+PEKkPy/kBz9WeBm/UB44PvY0qz9whk8+FtoQvypLBz+flao/5T78vail+b7TT2A/xYiiP9bM5b7Fv7q+0mecP90fkz/tqka/euJJvlUyyD99qng/GZONvw23ezwURvM/GBU8P3VCuL+ivYs+66oOQEwq4D5SZeO/oU8TP98P7z/dA6E9zlqzv5ruVD9ZfAxAsitbvq9C4b/s45A/SMggQB7DEr/ZrwfAb7i8P3jMCUBF9n+/07njv1wt4j/o9R1ABX6uv4E4CsC7RAZAsG4HQCOY5b+01uy/LjqzvgWIMr7DkcY+AyuDPvNCtb6dDCQ+TBvHPsGgw70ca6C+Sjj9PvaRpj5i4uS+HVlpvugKVD/G2kk+BM1Mvz/rx73irvs+D0F0OxCc5L6yt0i8Qk8fPk6b/73Gx8y9tx/nPM5nOL5wLEK+pLt3PmaQwjzY4CE+whFEvi3xBr6xMYQ9RKs2vnEwhr4GxlU+LQR3PWzBJD55PYq+CWIsvqfTzz0QlzS+fc+xvh52Lj4lsMc90f4nPoF0ub7+lFe+JW8OPgAcMr4W9eS+KNgAPlGxCj7PqCs+TMPwvj3FhL7kxDU+dVACP8JkEL/JESW/x9uHPim+LT6kgjq/J8WgvuqWnT6RKwM/0f5Uv/dQNb+L1co+zbhaP23xgL8rT42/jsoHP/TpmD+TjaC/JCrAv8LhNT8yDcQ/JDrJv+Uf87+Il28/U1qYP824+r9i4cy/s297vR+/Lr43q1U/75t9PvBzfL2ZLgO/V55TPyBqHz88J929fXsvvrMcYz+qfI8+vCTevTWTLT7AnGI/Qjx1vSEdgb2JnAI/ZUlSPwbAzL7tSeg8a6tZP6ArMj9k3D2/mMopPqVWAT/aJQI/h/S7vsiDgz7dtCQ+CDbHPuW7iTqYXpo+Y6g7vi58rD6E7Lo+23SZPlMCJT5EwbM+OrBMOzlRsD5UXQE/xkmZPuxut76l9N4+kk5ZP7cxOj6Vjji/Pq0SP6ZsmD9cNQW7iwWLv3m3QT8QF1g/OYuBvk9mO79xtWQ/0Xr/PpML4b5Sgca+O697P48vWD9sKRC/x45Gv0Q8jz+fzf8+VOhBv+xT4r5zmpo/aJ5YPyn3Y79pJFe/fNarPxBrAD9TPIy/isMEv7kItz/fyiE+KNuev2ocUr7HQ7w/NZc9vuwRqr/midE93py7P00eB79iFK6/YOXPPkkotT8CfDS+4wurvwBPdLzOs7Q/JW8zPtm1sb/H096+pSy6P4dALb5M78G/KLAHvi7NuT9m2Ts+4A/Lv/gYD7/ZVr8/gi8JP4/B3b/lgHy/kLPKP/QlQz7w0fm/4UsyvxrfUT4oGwS+TrtKPWQ0qz65bVo+n1JkPg40Wj2sMj29Qg2JPtgeEz8GDCm8QWLavrt1vD4Z9Gw/fLoQvrPxTr8ZtwM/sMkSP+Qvrr5rMem+72UdP7zjbD9XAfS+zk5Zv4vbQj/I1hI/U8Iuv28NAr9helw/6jJtP3b6U79csGm/Q+uAP8iJoz8St4W/O++ov0domT8LC9A/mYeqv3hG3b+nmLc/YOmiP+xS2L+icrW/v+rLPd0Kir5DWR4/Ali3Pnxmvj2I+Kc9EAofPw8pZ7uRkAc+oLGKvthiDz+a8L8+xJkAPqBkpz1W4RA/YmQpPM3kKD4vN4u+i+cBP1eGxj7bwSE+GQcgvzQFBD/lN0M//07WPTGmir5jIBc/7zHKPh0AyD3qiao9cJ8ZP9D19jyaUww+GUKLvtSlCz/2E9I+afoEPld3IL+b4Q4/87lJP3zUmz2/zYq+KzkjP9pV1z7DCY09jncgvwz5Jj8LVU0/Typ0PLFGe79j/js/q1mXP1DZr73xyKq/1kBiPxb6xz/sGW++iH3Xv4XgjD9cjvg/xiTbvoKkqb90O7E/K4LLP2lgE7+zbni/X3DNP4lWoD9ZMy2/fWepv5DH4T9XJtQ/5QhTvywl1r8bAf8/S+IDQNdlgr+Yxai/9m4SQBhz3z80OJW/lK13v6F1IUBVSLk/k8gXvi5O170/6eI+FBuyPqHiDL6ZCuu+2zLoPqjpNz8yLDK+T/pPv1dgBz9HRIs/pNiDvhw46r59SCs/hww6P7uAlr4R0U+/LwA/PwxSjT/jWcG+tO3pvsqpYz8LMkA/mhLUvgsa0r0Icng/D6XOPvWvzr6Rv4A+VKZ9P6LTdD39M7G+kcfZvct8cz+bM+c+9xSsvuOJ7b683no/4GZXP31Uv75SK1K/xdOJP1GAnT8Z9Oq+twnuviLgnj9je2M/BFv+vg6sUr+ONqw/aZykP6IYFb9r+5a/yFHCP31t1z9kNje/88VSv1MY4T9ttq0/Vi1NvzYel78yUfg/Q/ThPw5Ub7/mWFO/aBwMQKSNuj+lqIK/PsvxvstjGECE5ZQ/AAAAAA==", "infos": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], "vf_preds": [-0.0003796578384935856, -0.002456505550071597, -7.810455281287432e-05, -0.0022041304036974907, -0.003872133791446686, -0.0016416260041296482, -0.003452520351856947, -0.004309226293116808, -0.0024389687459915876, -0.0033460655249655247, -0.001433951430954039, 0.0007769828662276268, -0.001127357012592256, 0.0009260236984118819, 0.002964747603982687, 0.0007195825455710292, 0.0027564240153878927, 0.004196952097117901, 0.0049581690691411495, 0.0037657320499420166, 0.004726911894977093, 0.005166159477084875, 0.004100305959582329, 0.0020867956336587667, -0.000682109734043479, 0.0017088409513235092, 0.003262245561927557, 0.0007282691076397896, 0.0024708444252610207, -0.0002800166548695415, -0.002959177829325199, -0.0007011548150330782, -0.0034132248256355524, -0.0011806667316704988, 0.0009989746613427997, -0.0022421632893383503, -0.004605888854712248, -0.002790383994579315, -0.0004800470778718591, 0.0012704904656857252, 0.0021878511179238558, 0.0023356557358056307, 0.0020151601638644934, -7.66341108828783e-05, -0.001855218317359686, -0.0030063772574067116, -0.0022491086274385452, -0.0029867803677916527, -0.0023802078794687986, -0.0018311773892492056, -0.001313419546931982, -0.00162122689653188, -0.001216343604028225, -0.0012807240709662437, -0.0009556326549500227, -0.0006751327309757471, -0.0003685473930090666, -2.4164444766938686e-05, 0.0003265442792326212, -0.0002451796317473054, 0.0016900580376386642, -0.00042991171358153224, -0.00251213600859046, -0.0003102478221990168, -0.0024427268654108047, -0.003874316345900297, -0.004523758310824633, -0.0033128135837614536, -0.001598500763066113, -0.00312067405320704, -0.0014630573568865657, 0.0008105661254376173, -0.001666256575845182, 0.0005583707243204117, -0.001888505881652236, -0.003350293030962348, -0.004190042149275541, -0.003138748463243246, -0.001549110864289105, -0.0031480544712394476, -0.004015617072582245, 0.0025813437532633543, 0.00023657505516894162, -0.0017688622465357184, -0.0028438731096684933, -0.0008201090968213975, 0.0011842836393043399, -0.0006312360055744648, -0.0019441907061263919, -0.002856617560610175, -0.0035659773275256157, -0.00412758206948638, -0.004464372061192989, 0.0006781811825931072, 0.0025189442094415426, 0.0035674022510647774, 0.002267917850986123, 0.00038188789039850235, 0.002304879715666175, 0.00045750057324767113, -0.0016583711840212345, 0.0008476129733026028, -0.0012669251300394535, 0.0012289746664464474, -0.0008647751528769732, -0.0026925511192530394, -0.003941033501178026, -0.0048150960355997086, -0.005407280288636684, -0.0055251759476959705, -0.005018196534365416, -0.0033981138840317726, -0.003355837892740965, -0.0036203498020768166, -0.0028505949303507805, -0.003282367717474699, -0.002267539966851473, 0.0003633172018453479, -0.0018094793194904923, -0.0035942939575761557, -0.0045354338362813, -0.0029659424908459187, -0.0007932413136586547, 0.0015795639483258128, -0.0008720650803297758, 0.0015142972115427256, -0.0009830995695665479, 0.001410903176292777, -0.0011297957971692085, 0.001263000420294702, -0.0013133694883435965, -0.0029958784580230713, -0.0012196344323456287, -0.002936138305813074, -0.0038840214256197214, -0.0044785188511013985, -0.0048728687688708305, -0.004016873426735401, 0.0016567980637773871, 0.003436808940023184, 0.001661231042817235, -0.0007169516757130623, -0.0028382749296724796, -0.004129359032958746, -0.0017163504380732775, 0.0012054480612277985, 0.003404902992770076, 0.0013871202245354652, -0.000757101341150701, -0.0020965083967894316, -0.0027364594861865044, -0.0009868127526715398, 0.0006588523974642158, -0.0008511590422131121, 0.0005403268150985241, -0.0009535744320601225, 0.000304623506963253, 0.001890731044113636, 0.0033780494704842567, 0.0043363431468605995, 0.0029387420509010553, 0.0009145862422883511, 0.002575400983914733, 0.0004518760833889246, -0.0014131104107946157, 0.00012336065992712975, 0.0027789396699517965, 0.0005735747399739921, -0.0014150141505524516, -0.002589239738881588, -0.0007624861318618059, -0.002107424894347787, -0.0005912386113777757, -0.0019928649999201298, -0.002935927826911211, -0.0035718048457056284, -0.0027459259144961834, 0.002836595755070448, 0.0005608622450381517, 0.0030119079165160656, 0.0007857187883928418, 0.0031605311669409275, 0.004337075166404247, 0.0030224351212382317, 0.0007966915145516396, 0.003187896218150854, 0.004350027535110712, 0.0030595283024013042, 0.0042669945396482944, 0.004784124903380871, 0.0050933887250721455, 0.005395932588726282, 0.0048818811774253845, 0.004006732255220413, 0.0045940750278532505, 0.004922960884869099, 0.004121568985283375, 0.0032988234888762236, 0.0016924175433814526, 0.003327952465042472, 0.004127899184823036, 0.0030603320337831974, 0.003966538701206446, 0.002864538226276636, 0.001139025203883648, -0.0010812655091285706, 0.001625096658244729, 0.003331562504172325, 0.004207889549434185, 0.0031830770894885063, 0.0041384510695934296, 0.004684389568865299, 0.003873857669532299, 0.004439413547515869, 0.003653423860669136, 0.002465489087626338], "dones": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "eps_id": [1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1281850274, 1734102238, 1734102238, 1734102238, 1734102238, 1734102238, 1734102238, 1734102238, 1734102238, 1734102238, 1734102238, 1734102238, 1734102238, 1734102238, 1734102238, 1734102238, 1734102238, 1734102238, 1734102238, 1734102238, 1734102238, 1734102238, 1734102238, 405581355, 405581355, 405581355, 405581355, 405581355, 405581355, 405581355, 405581355, 405581355, 405581355, 405581355, 405581355, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 619148421, 1984411977, 1984411977, 1984411977, 1984411977, 1984411977, 1984411977, 1984411977, 1984411977, 1984411977, 1984411977, 1984411977, 1984411977, 1984411977, 1984411977, 1984411977, 1984411977, 1984411977, 1984411977, 1984411977, 1984411977, 1984411977, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 867641018, 250546282, 250546282, 250546282, 250546282, 250546282, 250546282, 250546282, 250546282, 250546282, 250546282, 250546282, 1887268485, 1887268485, 1887268485, 1887268485, 1887268485, 1887268485, 1887268485, 1887268485, 1887268485, 1887268485, 1887268485, 1887268485, 1887268485, 1887268485, 1887268485, 1887268485, 1887268485, 1887268485, 1887268485, 1887268485, 1887268485, 1546656597, 1546656597, 1546656597, 1546656597, 1546656597, 1546656597, 1546656597, 1546656597, 1546656597, 1546656597, 1546656597, 1546656597, 1546656597, 1546656597, 1546656597, 1546656597, 1546656597, 1546656597], "agent_index": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "value_targets": [44.731651306152344, 44.17338562011719, 43.60948181152344, 43.039878845214844, 42.46452331542969, 41.883358001708984, 41.296321868896484, 40.70335388183594, 40.104400634765625, 39.499393463134766, 38.888275146484375, 38.2709846496582, 37.6474609375, 37.017635345458984, 36.38145065307617, 35.73883819580078, 35.08973693847656, 34.434078216552734, 33.77179718017578, 33.10282516479492, 32.427093505859375, 31.74454116821289, 31.055091857910156, 30.358678817749023, 29.655229568481445, 28.944677352905273, 28.226945877075195, 27.50196647644043, 26.769662857055664, 26.02996253967285, 25.282791137695312, 24.5280704498291, 23.765727996826172, 22.995685577392578, 22.217864990234375, 21.432186126708984, 20.63857078552246, 19.83694076538086, 19.02721405029297, 18.209306716918945, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 19.83694076538086, 19.02721405029297, 18.209306716918945, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 21.432186126708984, 20.63857078552246, 19.83694076538086, 19.02721405029297, 18.209306716918945, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 19.02721405029297, 18.209306716918945, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 24.5280704498291, 23.765727996826172, 22.995685577392578, 22.217864990234375, 21.432186126708984, 20.63857078552246, 19.83694076538086, 19.02721405029297, 18.209306716918945, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 19.02721405029297, 18.209306716918945, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0], "new_obs": "BCJNGGhAgBAAAAAAAACtBQ8AAFAAAAAAAQUAAwIAUcQAAAAQDACwCgAMAAYABQAIAAoQAOMBAwAMAAAACAAIAAAABAYAAjwAAAgA0cj///8AAAEOeAAAABgiAAIcABEwNAAASgACOgAAFgAEHAAAAgCREAAUAAgABgAHXgATEHwAcwECJAAAABRIAAACAABEADEIAAd4AACzAPEAIAAAAAcAAABuZGFycmF5VABBbGlzdB4AQwAAAMxAAAACADMMABbKABEMZgAxAAMDpAAAqAACAgBRCgAYAAzWAADsABNsBAEIGAESBSkADwIAAgASAQACAAwIAAJIAQYCAAQQAAQoAAACABACzQAACQAHAgAIeAAPAgBNGfwoARoaKAEwBAMAaAEogA2GADEOACiIAREMvAESDhsAYwOMAAAANPwBBAIACDgAAOgABAABAKwBBAIAABgAACQCAlwCMRAACIYBAhAACCgAAAIAEQgsAgJ8AgBMABDYFQAJAgAAZgIxBgAGFAAPRgET8P////////////////9+VWERv3huTT6dxiM/5679vcPpAb9DJkG+uiMTP7HQgD7Z0gC/sGtMPoMpEz/cJsy9L+HivilgFj9vrAM/UMDmvod+p76b4Ec+qFfJPiQ/nL2IA4m+4L8VPzl5rD6N/t6+2WcbvrsReT+tk18+ZLRLv+uIqDzIIhQ/LBcYPdiw2748hQs+r9Z3P3uepb0qFU2/sxOePg3zEj+GBYe+k5rjvr4/2T4FhDk+GBDFvoEmw70XRfc+t2MTP90W5L63o/i+T0MZP0tVOj5u3BK/p2ERvkdCKD9OaFi+AWUkv/pEST7mqig/TO0/PkzlJr+Y0FK+Hcs3P3lNVL5HKTu/UigGPsVNOD+X8Bm/xXtAvwTJ7T51QSo/EEh+v+cPN7+Ovks/L6wNPxcHF7/m+R6/LevFPjLz/z6RpXu/HPQYv7xxOT93SMc+FZavv+j3A7/OzYc/EYhiPi1+d7/u77++U/InP/pP5D0ZfhC/76Obvllzgz7g+HU9zMkmvvSomr6+cQ++9UiJPYJqD79R0ry+WitbPnmPgjzKo3S/pZW/vmCtED82JL+9WiANv/cno75k4iY+yD0TviKUcr+1T6q+AHQEP5Fcgb6L7Qq/n/uRvqcM7D3K3Jq+LM4NvqQ/nb5oCJK+RxeWvgPlCb97/8u+EgaVPdRmr75g9ge+4ALbvjWPqr6WTqq+CrYIv3P4Br+sCLE8FmDDvizybr/8tRC/f5y+PlaV+r4KRwa/v+oKv+FzLb2vkQm/MAruva1uF7/Fg+m+4ocGv2roBL/LLja/s0LXvb2jEr8Mk2u/fF9FvzK8dj502S2/uJ6ovyxFRb9BiRU/vhFYv/eU2r8eCja/XQJtP9yNiL/ymgXA7bUXvzQmoj9zS6y/3/0cwPZQ1L6dts0/B+rWvxEMAsDMbDS+WD+YP0H7+L8SwM+/iS0XvKmGSD8tRwnAclicv4g3sz2LvsQ+zewRwJlOzb9/ROs9RVZCP6LLHcC9LJq/V/NUPsYjuj5vhSXAhwbLv1W9bT4Bcj4/2k8wwDvD+r+Ea6U+ILqPP7bZPcDzdRTANjT2PtUNwD+ev03APFH1v2OxND/lno4/RQpZwIyQEcCb8lw//4vAPzpsZsDsa/C/mmaLP2gOkT93TW/ApPIOwF/anz9/d8Q/nhl6wIS+JMAxNL0/lnL3P4Ivg8BiXTnAYTrjPyHZFEBy04nA8pZMwA3BCEAgZS1AXryQwOI6XsC4qCNAlhpFQJWvl8D+I27A4r9BQKuuW0BgSp++jzGnvvl3pz0OqAc/Gfezvo+poDxMfh4+R9AkPmCesr4iJ7s+2OYkPmBjUr4gPpu+h+CZPAugzj0hjh0+A+yZvpNluz6DzNg9utZcvqB8gr6nazY/I5czPS6rFb+uzim+tluHP6aSrr1ITXS/6O8IvVY/NT+gUJC+pqcZv4EEZD0FQbg+4CrUvhT6gb4NyM89jWo1P0LR974PQiS/LoxDPrhpuD4swh+/sX6avhW/cj7Y0Eg8aMYzv8ELBj1oeXU+l3+6PsI7OL/m6bu+JKKSPhC+fDzLMU+/sqUPvV08lD6g0Lw+jblWvzz94L7ic6w+97Q4P7PlcL8D/le/D0vbPgVJiT98y46/Ecmfv+NdED/tqjg/70+uv+KSbb9r1yc/f2G+Pk8kxr/vhB6/Sxo0PwgJOj9ok9a/C6qFv1G0Sz/PNYo/n1Xwv2T/u7/skm4/UfK2P/KICcCKDfK/ed2YPoRLgz4bI7I+6Ucyvopmqz4s7Bs/52GSPkhmCr/YpNQ+vOR1P6cNIj7/e2i/uUoKP0HfGj+6mhG9BCMLv9bSHj/lOoA+7kEovujFPr5m8Cc/wQgbP1Kuar7YbxK/Q3U8P3SldT+Y/Lm+r2J1v15UXD/8y6c/fhwRv5ZSrL80uoM/GDDUPwEcV78BC96/KNOeP82u/z+ue5e/y9QHwBNMvz989RRAYS/MvxBuIMAT4uQ/jD8pQGOdBMBVmDjA8Za8vj5WAb9kC1Y/7MolP3wf2L5J+1m/C+xpP8+wgj/+PQW/+goBvx2jhz/hAy4/vQMTv4IsIb5gQJI/xnOwPj9mFb/EvAG/Ff6UP7wNOz85QyO//0Ejvl62oD+pOsw+zbglv8IgQD7OpaQ/4zORPf3NHL+PRCi+LPigP/S27T57bx+/8DE8Plxqpj+1/go+pqoWv4taLb48QqQ/L+QHP1Z4Gb9QHjg+9jSrP3CGTz4W2hC/KksHP5+Vqj/lPvy9qKX5vtNPYD/FiKI/1szlvsW/ur7SZ5w/3R+TP+2qRr964km+VTLIP32qeD8Zk42/Dbd7PBRG8z8YFTw/dUK4v6K9iz7rqg5ATCrgPlJl47+hTxM/3w/vP90DoT3OWrO/mu5UP1l8DECyK1u+r0Lhv+zjkD9IyCBAHsMSv9mvB8BvuLw/eMwJQEX2f7/TueO/XC3iP+j1HUAFfq6/gTgKwLtEBkCwbgdAI5jlv7TW7L/5ChhAkQnjP3NNCsApKsi/80K1vp0MJD5MG8c+waDDvRxroL5KOP0+9pGmPmLi5L4dWWm+6ApUP8baST4EzUy/P+vHveKu+z4PQXQ7EJzkvrK3SLxCTx8+Tpv/vcbHzL23H+c8zmc4vnAsQr6ku3c+ZpDCPNjgIT7CEUS+LfEGvrExhD1Eqza+cTCGvgbGVT4tBHc9bMEkPnk9ir4JYiy+p9PPPRCXNL59z7G+HnYuPiWwxz3R/ic+gXS5vv6UV74lbw4+ABwyvhb15L4o2AA+UbEKPs+oKz5Mw/C+PcWEvuTENT51UAI/wmQQv8kRJb/H24c+Kb4tPqSCOr8nxaC+6padPpErAz/R/lS/91A1v4vVyj7NuFo/bfGAvytPjb+Oygc/9OmYP5ONoL8kKsC/wuE1PzINxD8kOsm/5R/zv4iXbz9TWpg/zbj6v2LhzL9Mto4//y9aP5goEsCSKam/8HN8vZkuA79XnlM/IGofPzwn3b19ey++sxxjP6p8jz68JN69NZMtPsCcYj9CPHW9IR2BvYmcAj9lSVI/BsDMvu1J6Dxrq1k/oCsyP2TcPb+Yyik+pVYBP9olAj+H9Lu+yIODPt20JD4INsc+5buJOphemj5jqDu+LnysPoTsuj7bdJk+UwIlPkTBsz46sEw7OVGwPlRdAT/GSZk+7G63vqX03j6STlk/tzE6PpWOOL8+rRI/pmyYP1w1BbuLBYu/ebdBPxAXWD85i4G+T2Y7v3G1ZD/Rev8+kwvhvlKBxr47r3s/jy9YP2wpEL/Hjka/RDyPP5/N/z5U6EG/7FPivnOamj9onlg/Kfdjv2kkV7981qs/EGsAP1M8jL+KwwS/uQi3P9/KIT4o256/ahxSvsdDvD81lz2+7BGqv+aJ0T3enLs/TR4Hv2IUrr9g5c8+SSi1PwJ8NL7jC6u/AE90vM6ztD8lbzM+2bWxv8fT3r6lLLo/h0Atvkzvwb8osAe+Ls25P2bZOz7gD8u/+BgPv9lWvz+CLwk/j8Hdv+WAfL+Qs8o/9CVDPvDR+b/hSzK/Ji7QP32ZHb7GaQfASnHVvrltWj6fUmQ+DjRaPawyPb1CDYk+2B4TPwYMKbxBYtq+u3W8Phn0bD98uhC+s/FOvxm3Az+wyRI/5C+uvmsx6b7vZR0/vONsP1cB9L7OTlm/i9tCP8jWEj9Twi6/bw0Cv2F6XD/qMm0/dvpTv1ywab9D64A/yImjPxK3hb8776i/R2iZPwsL0D+Zh6q/eEbdv6eYtz9g6aI/7FLYv6Jytb8EqM8/enVsP+eF/r+J44+/fGa+PYj4pz0QCh8/Dylnu5GQBz6gsYq+2GIPP5rwvz7EmQA+oGSnPVbhED9iZCk8zeQoPi83i76L5wE/V4bGPtvBIT4ZByC/NAUEP+U3Qz//TtY9MaaKvmMgFz/vMco+HQDIPeqJqj1wnxk/0PX2PJpTDD4ZQou+1KULP/YT0j5p+gQ+V3cgv5vhDj/zuUk/fNSbPb/Nir4rOSM/2lXXPsMJjT2OdyC/DPkmPwtVTT9PKnQ8sUZ7v2P+Oz+rWZc/UNmvvfHIqr/WQGI/FvrHP+wZb76Ifde/heCMP1yO+D/GJNu+gqSpv3Q7sT8rgss/aWATv7NueL9fcM0/iVagP1kzLb99Z6m/kMfhP1cm1D/lCFO/LCXWvxsB/z9L4gNA12WCv5jFqL/2bhJAGHPfPzQ4lb+UrXe/oXUhQFVIuT8KBKK/NVkev/bILEBf05Q/oeIMvpkK677bMug+qOk3PzIsMr5P+k+/V2AHP0dEiz+k2IO+HDjqvn1IKz+HDDo/u4CWvhHRT78vAD8/DFKNP+NZwb607em+yqljPwsyQD+aEtS+CxrSvQhyeD8Ppc4+9a/OvpG/gD5Upn0/otN0Pf0zsb6Rx9m9y3xzP5sz5z73FKy+44ntvrzeej/gZlc/fVS/vlIrUr/F04k/UYCdPxn06r63Ce6+IuCeP2N7Yz8EW/6+DqxSv442rD9pnKQ/ohgVv2v7lr/IUcI/fW3XP2Q2N7/zxVK/UxjhP222rT9WLU2/Nh6XvzJR+D9D9OE/DlRvv+ZYU79oHAxApI26P6Wogr8+y/G+y2MYQITllD8rnIe/XY71vbouIUC0LGE/AAAAAA==", "actions": [1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1], "advantages": [44.732032775878906, 44.17584228515625, 43.60955810546875, 43.042083740234375, 42.4683952331543, 41.8849983215332, 41.299774169921875, 40.707664489746094, 40.10683822631836, 39.50273895263672, 38.88970947265625, 38.27021026611328, 37.64858627319336, 37.0167121887207, 36.37848663330078, 35.738121032714844, 35.086978912353516, 34.42988204956055, 33.76683807373047, 33.09906005859375, 32.422367095947266, 31.7393741607666, 31.05099105834961, 30.356592178344727, 29.655912399291992, 28.942968368530273, 28.223684310913086, 27.501237869262695, 26.767192840576172, 26.030242919921875, 25.285749435424805, 24.528772354125977, 23.769142150878906, 22.99686622619629, 22.21686553955078, 21.43442726135254, 20.643177032470703, 19.839731216430664, 19.027692794799805, 18.208036422729492, 17.380949020385742, 16.546287536621094, 15.703665733337402, 14.854299545288086, 13.99601936340332, 13.128424644470215, 12.250146865844727, 11.364500045776367, 10.468554496765137, 9.563623428344727, 8.649588584899902, 7.727151870727539, 6.794681549072266, 5.853265762329102, 4.901950836181641, 3.9410741329193115, 2.970468521118164, 1.990024209022522, 0.9996734261512756, 19.837186813354492, 19.025522232055664, 18.209735870361328, 17.385650634765625, 16.548934936523438, 15.708123207092285, 14.858097076416016, 13.998688697814941, 13.128731727600098, 12.249496459960938, 11.364633560180664, 10.46763801574707, 9.560981750488281, 8.649941444396973, 7.724972248077393, 6.795353889465332, 5.855335235595703, 4.905185222625732, 3.943537712097168, 2.971649169921875, 1.9931480884552002, 1.004015564918518, 11.358931541442871, 10.465937614440918, 9.56356143951416, 8.651119232177734, 7.726350784301758, 6.792281150817871, 5.852616310119629, 4.902939319610596, 3.943255662918091, 2.973665952682495, 1.994127631187439, 1.0044643878936768, 21.431507110595703, 20.63605308532715, 19.8333740234375, 19.024946212768555, 18.208925247192383, 17.38083267211914, 16.548166275024414, 15.7073392868042, 14.853375434875488, 13.995431900024414, 13.124189376831055, 12.248762130737305, 11.364205360412598, 10.470115661621094, 9.566607475280762, 8.653682708740234, 7.731055736541748, 6.798483371734619, 5.855383396148682, 4.904350757598877, 3.944019317626953, 2.9729506969451904, 1.9932823181152344, 1.002267599105835, 19.0268497467041, 18.21111488342285, 17.38673210144043, 16.553159713745117, 15.708646774291992, 14.855015754699707, 13.992585182189941, 13.126290321350098, 12.246383666992188, 11.362496376037598, 10.464763641357422, 9.562922477722168, 8.647012710571289, 7.72684383392334, 6.79646110534668, 5.853204727172852, 4.903931140899658, 3.9442830085754395, 2.974578619003296, 1.9948729276657104, 1.0040168762207031, 24.52641487121582, 23.762290954589844, 22.9940242767334, 22.21858024597168, 21.43502426147461, 20.6427001953125, 19.83865737915039, 19.02600860595703, 18.205902099609375, 17.381750106811523, 16.549381256103516, 15.70777702331543, 14.856959342956543, 13.99515151977539, 13.124759674072266, 12.248748779296875, 11.36097240447998, 10.467127799987793, 9.561488151550293, 8.646384239196777, 7.7221527099609375, 6.78912878036499, 5.849046230316162, 4.90008020401001, 3.937823534011841, 2.9696481227874756, 1.9914131164550781, 0.9998766183853149, 10.463396072387695, 9.561219215393066, 8.649690628051758, 7.728119850158691, 6.794227600097656, 5.854092597961426, 4.901586055755615, 3.942391872406006, 2.9730358123779297, 1.9935717582702637, 1.0027458667755127, 19.024375915527344, 18.2087459564209, 17.380125045776367, 16.54783821105957, 15.702520370483398, 14.849885940551758, 13.991142272949219, 13.124622344970703, 12.244709968566895, 11.357162475585938, 10.463114738464355, 9.557525634765625, 8.6434907913208, 7.720437049865723, 6.788069248199463, 5.847103118896484, 4.896988391876221, 3.935804843902588, 2.965177059173584, 1.9858784675598145, 0.996701180934906, 16.546932220458984, 15.702352523803711, 14.85009479522705, 13.991104125976562, 13.121452331542969, 12.245033264160156, 11.360373497009277, 10.467255592346191, 9.56016731262207, 8.644943237304688, 7.721322536468506, 6.790282249450684, 5.847846508026123, 4.896310806274414, 3.9365251064300537, 2.965660572052002, 1.9863466024398804, 0.9975345134735107], "logits": [[0.0012965014902874827, 0.0002976544201374054], [-0.0004843074711970985, 0.00048058549873530865], [0.0014425171539187431, 0.0001715684775263071], [-0.0005660843453370035, 0.00036323629319667816], [-0.0018452053191140294, 0.00040441949386149645], [-0.000809538527391851, 0.00012495939154177904], [-0.002545792842283845, 0.00016267423052340746], [-0.003961142152547836, 0.0002815534535329789], [-0.003575292881578207, -0.0004087970592081547], [-0.005176819860935211, -2.0060979295521975e-05], [-0.003926202654838562, -0.0006615647580474615], [-0.0011513752397149801, -0.0008321347995661199], [-0.003789752023294568, -0.00044307182542979717], [-0.0010443609207868576, -0.0005245319334790111], [0.0005548080080188811, -0.0005338710034266114], [-0.0011018383083865047, -0.00020585069432854652], [0.0004390317481011152, -0.00031700124964118004], [0.001045720069669187, -0.00033237109892070293], [0.0015868854243308306, -0.00027427251916378736], [0.0015503682661801577, -0.00017832464072853327], [0.002255434403195977, -0.0002107176696881652], [0.0031027260702103376, -0.0003426398616284132], [0.0034386005718261003, 1.5479978173971176e-05], [0.0027383065316826105, 0.0005604266189038754], [0.000474379223305732, 0.0008314098231494427], [0.0025319054257124662, 0.000482234638184309], [0.0036493963561952114, 0.00012327751028351486], [0.00257390015758574, 0.0007594252238050103], [0.0038392897695302963, 0.0002901744155678898], [0.00242556887678802, 0.0009884107857942581], [-0.0001546032726764679, 0.0009440208086743951], [0.002123122336342931, 0.000747424375731498], [-0.0003184748929925263, 0.0005993768572807312], [0.0018496739212423563, 0.00046953780110925436], [0.0031561239156872034, 0.00015625683590769768], [0.001562154502607882, 0.0006097214063629508], [-0.0006344455759972334, 0.00013418891467154026], [0.001373047474771738, 0.00021405378356575966], [0.002671424765139818, 9.263190440833569e-05], [0.003420306136831641, -0.00020083622075617313], [0.003934687934815884, -0.00043653673492372036], [0.004412542097270489, -0.0004550088196992874], [0.004864140413701534, -0.0002698437310755253], [0.004377185367047787, 0.0009765578433871269], [0.0026024833787232637, 0.0016803368926048279], [0.00022990722209215164, 0.0019071630667895079], [0.001560614095069468, 0.0018267505802214146], [-0.0006000853609293699, 0.0018405662849545479], [0.0006546038202941418, 0.0018357753288000822], [0.0017550144111737609, 0.0017695724964141846], [0.002555674873292446, 0.001728384755551815], [0.00038441724609583616, 0.0014803465455770493], [0.0013962103985249996, 0.001342053059488535], [-0.0004660829436033964, 0.001150897005572915], [0.0006058998988009989, 0.0009448796045035124], [0.0016571648884564638, 0.0006941256579011679], [0.002623572014272213, 0.00041404925286769867], [0.0035040469374507666, 0.00013064313679933548], [0.004280067048966885, -0.0001293544191867113], [-0.00022816166165284812, -0.0013697299873456359], [0.0023948224261403084, -0.0013926331885159016], [-0.00015994356363080442, -0.0011068800231441855], [-0.002314866054803133, -0.0007802143227308989], [-0.00019652469200082123, -0.0011491009499877691], [-0.0024627200327813625, -0.0008391068549826741], [-0.003968102857470512, -0.00045200291788205504], [-0.0051105208694934845, -2.457934897392988e-06], [-0.004972095601260662, -0.0010019117034971714], [-0.0031455704011023045, -0.0017055622301995754], [-0.005213512107729912, -0.0010591743048280478], [-0.0030592055991292, -0.001610743347555399], [-0.0008092679781839252, -0.0018667534459382296], [-0.0030619394965469837, -0.0013503512600436807], [-0.000878474791534245, -0.0016843595076352358], [-0.0030367253348231316, -0.0011114581720903516], [-0.005065556149929762, -0.000600624771323055], [-0.006307575851678848, -0.00016289297491312027], [-0.004316988401114941, -0.00047897506738081574], [-0.002061999635770917, -0.0006443207967095077], [-0.0038326869253069162, -0.00032450014259666204], [-0.005184384994208813, -0.0002931891940534115], [0.0017252166289836168, 0.0010147152934223413], [-0.0006959029706194997, 0.0009111438412219286], [-0.0027242933865636587, 0.0008455775678157806], [-0.004229119047522545, 0.0009612806607037783], [-0.00324517535045743, 0.0004012659192085266], [-0.0008909080643206835, 8.230446837842464e-05], [-0.003359655849635601, 0.00040650367736816406], [-0.005101955961436033, 0.000781885115429759], [-0.006054887548089027, 0.0010764242615550756], [-0.0065054516308009624, 0.001267771702259779], [-0.006734901107847691, 0.001335951266810298], [-0.00693984841927886, 0.0012522423639893532], [0.0016419203020632267, 0.0009023171151056886], [0.003879566676914692, 0.0003752768971025944], [0.005617161281406879, -3.722170367836952e-05], [0.003465508809313178, 0.00025427667424082756], [0.0013300955761224031, 0.0006486024358309805], [0.003334798151627183, 8.312647696584463e-05], [0.0012486466439440846, 0.00048183707986027], [-7.257447578012943e-06, 0.0011019509984180331], [0.0013528225244954228, 0.0002991758519783616], [-2.3820844944566488e-05, 0.0009729514131322503], [0.0014403988607227802, 0.000163380173034966], [-4.294526297599077e-05, 0.0008797120535746217], [-0.000753451487980783, 0.001364413881674409], [-0.0011178510030731559, 0.0012201578356325626], [-0.0015444171149283648, 0.0008627488277852535], [-0.0022596775088459253, 0.0008058750536292791], [-0.0033209959510713816, 0.000986485742032528], [-0.004480984061956406, 0.0011563698062673211], [-0.005489215720444918, 0.0007078952621668577], [-0.006035325117409229, 0.0008641051826998591], [-0.006329120136797428, 0.000927868066355586], [-0.006398190278559923, 0.0003100468311458826], [-0.0065472787246108055, 0.0004537825006991625], [-0.005697893910109997, 0.0002937363460659981], [0.0015144178178161383, 8.234172128140926e-06], [-0.000671942310873419, 0.0003303820267319679], [-0.0022066531237214804, 0.0004783880431205034], [-0.0035537956282496452, 0.0006172758294269443], [-0.0032921971287578344, -0.00017975317314267159], [-0.0014062864938750863, -0.0007672312203794718], [0.0010142629034817219, -0.0009637313196435571], [-0.0015189088881015778, -0.0007476690807379782], [0.0008474384667351842, -0.0009647515835240483], [-0.0016413857229053974, -0.0007262215949594975], [0.0006547424709424376, -0.0009773042984306812], [-0.0017663117032498121, -0.0007006990490481257], [0.0004438023897819221, -0.0010017079766839743], [-0.0018821798730641603, -0.0006714549963362515], [-0.004209268372505903, -0.00028409421793185174], [-0.0018691839650273323, -0.0007582131074741483], [-0.004064375534653664, -0.00029329664539545774], [-0.005629011429846287, 5.912058986723423e-05], [-0.006474962458014488, 0.00033668207470327616], [-0.006881957873702049, 0.00047441304195672274], [-0.005406523123383522, -7.449544500559568e-05], [0.002077733166515827, 0.001916067791171372], [0.00426902761682868, 0.001456680241972208], [0.002033080905675888, 0.0017698974115774035], [0.00020969120669178665, 0.0019702408462762833], [-0.0010492026340216398, 0.0018220284255221486], [-0.0021535803098231554, 0.0015389189356938004], [-0.0017570375930517912, 0.0010443634819239378], [-1.9000028260052204e-05, 0.0006652420852333307], [0.002264512935653329, 0.0005526819732040167], [-1.4626013580709696e-05, 0.0005539753474295139], [-0.0021560771856456995, 0.0004782401956617832], [-0.0038315835408866405, 0.0006008673226460814], [-0.005072793923318386, 0.0008381507359445095], [-0.004258171655237675, 0.0002870431635528803], [-0.0019598929211497307, -2.2542662918567657e-05], [-0.003967150114476681, 0.00043834326788783073], [-0.0015857950784265995, 0.00029674102552235126], [-0.003495241980999708, 0.000700285891070962], [-0.0012717582285404205, 0.00062559824436903], [-3.8218568079173565e-05, 0.0005700704641640186], [0.00020879623480141163, 0.0004300391301512718], [3.342697164043784e-05, 0.00029799388721585274], [0.00023549562320113182, 0.0005834999028593302], [-0.00036195258144289255, 0.0010351315140724182], [0.00024324993137270212, 0.0006428898777812719], [-0.00046691589523106813, 0.0011640640441328287], [-0.0020157075487077236, 0.0015065546613186598], [-0.00037464540218934417, 0.0011886016000062227], [0.001433855970390141, -0.0007608237210661173], [-0.0010871038539335132, -0.0006067591602914035], [-0.003450144315138459, -0.0002596177509985864], [-0.005103083793073893, 0.0002553194062784314], [-0.003545298706740141, -0.00038490293081849813], [-0.005337544251233339, 0.00023888039868324995], [-0.0030990634113550186, -0.00011187884956598282], [-0.00494936341419816, 0.0004186874721199274], [-0.0060301548801362514, 0.0007550541777163744], [-0.006577006541192532, 0.0009054960682988167], [-0.0050524440594017506, 0.0006907191127538681], [0.002895893296226859, 0.001612960360944271], [0.0005759287159889936, 0.001687302254140377], [0.0029136580415070057, 0.0014221803285181522], [0.0005537784309126437, 0.0014982185093685985], [0.002915533259510994, 0.0012542096665129066], [0.004806260112673044, 0.0008531981147825718], [0.002905629575252533, 0.0013533963356167078], [0.0005577949341386557, 0.0014700866304337978], [0.002918464597314596, 0.0012048741336911917], [0.004847967065870762, 0.0008088504546321929], [0.002899998566135764, 0.001303751370869577], [0.004920503590255976, 0.0008896776125766337], [0.006118847988545895, 0.0003310288884676993], [0.006716056261211634, -0.00029290179372765124], [0.006982625927776098, -0.0008631169330328703], [0.006814014632254839, 0.00021161604672670364], [0.0052685486152768135, 0.0006249913130886853], [0.006311751902103424, 0.00040376436663791537], [0.006872469559311867, 0.00021896115504205227], [0.005353277083486319, 0.000529623357579112], [0.00357463164255023, 0.00027531664818525314], [0.0015234323218464851, -0.0002795680775307119], [0.0039343321695923805, -0.0005106256576254964], [0.005487929098308086, -0.0007548577850684524], [0.003915231209248304, -0.0002839798107743263], [0.005606237333267927, -0.0005634748958982527], [0.003694342914968729, -0.00018445809837430716], [0.0013794554397463799, 0.00027414283249527216], [-0.00023033120669424534, 0.0008964564185589552], [0.0015012831427156925, 0.00020370911806821823], [0.0037722778506577015, -0.00018518458819016814], [0.005491828080266714, -0.00038867839612066746], [0.003403106704354286, -0.00010363676119595766], [0.005160628817975521, -0.0002493042265996337], [0.006208741571754217, -0.0003871325170621276], [0.00443717697635293, -0.00012965570203959942], [0.00561926607042551, -8.17151740193367e-05], [0.003741077147424221, -8.144194725900888e-05], [0.0018088880460709333, -0.00017780030611902475]], "rewards": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "t": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], "prev_rewards": [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]} -{"prev_actions": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "type": "SampleBatch", "obs": "BCJNGGhAwBAAAAAAAAAcIQ8AAFAAAAAAAQUAAwIAUcQAAAAQDACwCgAMAAYABQAIAAoQAOMBAwAMAAAACAAIAAAABAYAAjwAAAgA0cj///8AAAEOeAAAABgiAAIcABEwNAAASgACOgAAFgAEHAAAAgCREAAUAAgABgAHXgATEHwAcwECJAAAABRIAAACAABEADEIAAd4AACzAPEAIAAAAAcAAABuZGFycmF5VABBbGlzdB4AQwAAAMxAAAACADMMABbKABEMZgAxAAMDpAAAqAACAgBRCgAYAAzWAADsABNsBAEIGAESBSkADwIAAgASAQACAAwIAAJIAQYCAAQQAAQoAAACABACzQAACQAHAgAIeAAPAgBNGfwoARoaKAEwBAMAaAEokA2GADEOACiIAREMvAESDhsAYwOMAAAANPwBBAIACDgAAOgABAABAKwBBAIAABgAACQCAlwCMRAACIYBAhAACCgAAAIAEQgsAgJ8AgBMABDZFQAJAgAAZgIxBgAGFAAPRgET8f////////////////+OkRkIvheHMr7Kjus+P3VWPnqZCb5uaQi/CVnoPqEVFT+BDDy+Dv9jv6z+Aj/pRHQ/AL6PvtmVn7/wNyI/PrupPyv22b7Yzsy/LONRPxtp2T9mUx6/bKeev3kIiT9Wvaw/4F1Dv4rky7+GXKE/piXePwGYdL/pjfi/jSDCP7fGB0DF75i/gYPKv/U76z+dLOY/PU6xvwELnb+eYwZAeAu/P61uw79i4V+/jIoTQD+6mT8zvYE+FFpQvh2S0z43SWA+Ci55Pjk2D79UjtQ+TUwWP33cPT6a+E++Vd31PsJpbT7OfzM+eVMPvy0K+D5uXho/i/fvPUx2ar9nZg0//TB5P5kbNDwBmKK/YQ8vPy0OrD9dwRK+I5jPv+ILYT/Zo9s/pQyxvhgH/L+OtZE/oKwFQIasGL+ZAc6/Iha7P7qu4D9+dUy/84ygvy3o3D9eMbg/vuFzvz/sZr88dPc/43GRP019h790LQ2/u4MFQMoYWD+O6I6/8aNOvjn4C0B3TA8/WD2Qv273Fj5oPQ9A/LGPPlyGi78p7li+QXUPQMUANT/ZCo2/DjINPhg2FEDzoNw+zc0Lvw4c2b2Nlzc+bMgIPmOnC79+xey+Bj0xPpML/T7F1Be/RXxRvwa0aD5ToVo/1FEwv1m+677+/a4+HjX9PrNtPL9qDtO9XePKPnwHDz4JMDy/VhvsvndnyD7qbwI/sU9Ivy1l072qoeU+6awfPu0TSL/wmuy+LpfkPrlDBz+NOFS/tU/UvQK9AT/+iTQ+fwBUv/BA7b4rHQI/bjINP3UrYL81klK/KpESPxcUbT/prXi/bxiXv40UMz+xi6Y/B7uOvxAVUr8nsmM/J/h0P57gmr8Mney+tcCCP0ISHz9d0qC/jaVSv5xvjD/Kx4E/iOmsv+9Tl7+kep4/HBK0P88Uv79nBcW/NNi4P3tz5j+dOte/AjXyv/l42z9gcAxANTT1v6waxL/9HQNAXJXxP45FWL8MUby869SXvhM0ID4iNFa/WgbCvppplb5X9v4+u1Ngv8EKPL92L2u+C9BWP9WZdr+1GcC+OlDhvTmz7j4sRYC/9lQ7v0fBWL2YmlA/m1CLv3Yvi79ECIE9OwuVPxdlnL8ed7i/eOJzPr0Jwj+ncrO/N1flv1tS8j4cXO8/Z1/QvxzNCMAT6kQ/qoEOQBMC87/eQeO/tDSQP1Fb8T/yjAfAkLgHwJR6tj9EvhBAdlIYwH9gHcDXzeQ/oNYoQOKmK8DEVAbA2H8NQPd8FUAk/jC/WfWTveB/jD4gjfw8dxAyvx1m2L7Kh4c+HZDFPgsOP79c15O9a76hPkcnKz25HkC/u7/YvjTFnT6kR8w+bh1Nv0AmRr+uKrk++2dBP2ABZr9R1I+/P+nzPsdvjj9FXoW/eVW8v+sQJz97Z7w/LZudvyg/j79gBmQ/OeKRP3XFr79X2US/kSeJP7N5UT/25ru/zP/WvoMrmT84OAE/LA7Cv/lTkr0uP6I/Z2JJPthLwr8Ms40+noqkP1jz3b1SsLy/e3Gfvd8qoD+UGZE+nhG9v6tZ3b7YW6Q/wIIsP1dgw7+5qam9OgexPxm7vD6lzcO/IVSIPnj9tj/yuYo9ZGq+v5IouL0GZbY/X2HuPjz9vr9P+IQ+hmi+P8YqKj7kwrm/vS7Hvc/nvz+6DhE/Rnu6vz+C6L4RA8o/F3x3P98Xwb9rMNS9SKDcP+5LLD/g4MG/7zx8PhnE6D+IxsU+X+e8v9J9GD+Tp+4/9jbVPco5sr8k3nE+PXvuPzS0BD9Qg62/F1P8vQYU9z/Rmm4/aLeuv7YI975kJARAKkisP+3OVz7Z2mu8ISCPPmk4pz3OH1g+IS++vng1kT6qDOM+oYopPk+YdLzwA7I+bnfKPaW+KT4w3r6+/Je1PjJ37T5Q4PU9JLGBvBoo2D6Tqvg9nPT1PR2/v74Drt0+hbn6PtvJlz1MZ4y8ETsBP0ZzGT42bpc9kBGuPsY1BT9Ukz++Iun0PTRdMj/ozvU+YPMFv1clWD5XvoY/Ya3EPuBGXL/NaLI+/hMxP2m7bT5Pb/u+pg7hPmu9qT4btBA+H4sCvhwH+D7AoDA/dLriPYh29b49ShM/PcGoPpymsTz0UfW93bYePwKA+bxdkf276SFyPm9NHj/t2Kg+EKezPFbd+r0muSk/94swPyfeAbxr3vW+yPRAP6QvqD5UBMe9Haz9vYJPTD/TgwS9jmkCvpeIaj7K0Es/aJmoPte3yr0H5Qq+qSxXPxbJMD++Pwa+U4//vvNZbj8uh4Y/VPNkvl8AXb+OpYg/Y1IwP/XXwL6ddgO/9yOUP5Cdpz4fg/G+TRkuviewmT/A3Qm9KH4Cv/S6LT5tU5k/awqpPtWX+74+pFe+oeOeP2mRMT/cPwm/VwoXv0dWqj/rx6k+HNAkv87mgL7e2K8/VSUyP0oAMr8hGSO/7Tm7Pzfvqj6whk+/oBWcvlatwD/57jI/uetev8FMMr8O+ss/u4msPk3gfr+d+b2+plzRP4+WyLzGfIi/Uu9Qvf7m0D+vYq8+7b6Kvz/V5770UdY/yZI1P115lb9zy1q/r4rhP5OdiT8Ioai/igChv/V58j+0Nrg/Gi3Ev1fa1L9igARA2GnmPyEP6L+2eATAtCQPvqirE77Pbdo+7GWBPTTeGr4pFV8+S3/YPj9gkL5GdfC972oUP45NuT6OhyC/v7wcvaPwcD9Zonk+P015v+SpsD0chhM/ATaLPR9EG78DLSk+tSRZPg7dQL05/nu+ulxLPvl4G75Uy9G9vRHhPTblPj7F8wO/oETKvZ9j7T5XwgM+uuVgv9n0FL3vLFE/dkfPPEvGnr/Ruq092P2VP+Zu/b2r1l+/38+HPpB3Tz+OfGi+eFWev4yJxT4BmJY/9ZXAvkt6zL+zRhE/R7zFP8YvEr+xL/q/XJFPP5Q99T+iw0+/OEvLv1Lljj9zTso/EsKAv03dnL8kxq4/OkyhP3y6k7/oeF2/lqzHP9Wqcz8Y0aC/VjqdvxzY2T9qBK4/6Lezv9tty78JgvQ/xljiPzh9lz6TOLW6k+gXvjyhuDy8EJk+drq5vlASHr7q3r4+ijqDPtxJOb8l+tC9pvU4P0XtKz7awYq/NqENPB1jiT+b5Qk9Sqy4v65XNj4ynLY/aPsVvk4Xir/nWdA+USqJP7Kzj76vdTe/T6QTP4D7OT//6Ly+DAaKv05ZMD9yEo0/Md0AvyJ/N7/7IF0/SJ9EP7WDF78iZba+JJF7P2Ac4j7ybCK/JyB7O3f/hT/CWPY9UZ4hvwIwuj5KVoc/fA1LvuAbFb82Hjk/l+KBP/ooBL8Uzfm+zHmKP41Yaz+c4lW//PWxvjUtNz8nWEU/3Tjjvpnugb60nIk/VhMwP3v8Rb+3dem95nG3P9nADD+/fo2/8iOTPeP05D+6b7Y+nHy4vyGOmz5ZAbY/aKDZPfSziL9wfvo+143jPwhwp73k3bW/x4U4P+lNCEAlnqi+G5Djv/yHfz+Aex5AxzMjv+TrCMAkDqk/If4GQDDvgL/+j+a/6C/MP9cbHUC+8ai/ocILwC3l9D+twDJA0BnZv+xrJMAlnh2/uJDevCAE3r4Jtxo9OoMcv1z3wb7JY+G+VZ2+PtEoJ79g7Tq/IqvHvtbNND8Jiz2/ZRLAvgjdkL6Rf6w+6g1Iv0k2p7x3TnS+2zTqvAW4Rr/AP7++rTmDvqX1oD49KlG/6u45v7bpXL5ZCyg/R11nvwjSvb45iu+9q6qVPleucb9f3X28qtijvdubi71LJ3C/YuStPhCZ1b2Lv9q+1NBivw11e7xRNUK+Gxafvb9OYb+tYa4+1c1cvi7u4b72+FO/cN9vvMJbm74jbMO9A3ZSv6s6vb4JOqq+5/Z8PtS9XL/zBUi85EmbvreW+73DJVu/wkG8vjiIrL6IbmE+clllv2AAILxY7Z++XhMavgKsY7/aNbI+fpCzvsskBb+4JFa/kqg0P41f576L+2O/Ssc8v9cGiD91uR2/3rOhv0CSF79BtjQ/HAFYv8wlbr+VZfy+eyuIP53cgb+Qdai/v9Oxvh/NtT8KEqC/yTfav5TEHr68Oog/UqPGv2y0sr97EBC8Ouy1PwlF5r+nbOa/JMtGvsQdnL1xukM+pIQNPss0SL6P8t2+0y5LPiDe/j7mV3m+YFdKv7iriD4tcFs//Rytvsmb3b5IIss+uDQCPzOzxb5RYUq/wT7vPjEdYD/7Nfa+NMndvvyvGT8LxQg/I24Hv4FFnL3W4Cw/yThLPt3WB78iLN++hG4xP5KKEz+JQRS/EVKhvb5xRj/ynXk+kcAUvyjO4L5z9kw/NSMgPylGIb8mjUy/vxZkP+j0gD8lzzm/RArivgTnhT8/jy4/9WZGv1wvrb0ApJI/d9a5PvsVR79nRIs+o2uYP5TTRT2V4zu/o4a5vfdclz/lCOQ+bsg8v4z657416J4/L4BXP+6/Sb+KBcS9Df+uP0vCBz9gz0q/OruFPiNJuD//x2U+UP8/v9m/0r1777o/J2EhP1lOQb+yK4I+p03GP70LqD7ovTa/IXvivUkdyz9q9jw/PlA4v+1e875YrNg/j/qSP1YBRr8MM/G9OuXuP0D4Wj+Xz0e/gNR0PnGq/j/BIhI/SsU9v0J9Ar6ZHARAnpB+P7sNDwIAE1AAAAAAAAAAAAA=", "infos": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], "vf_preds": [0.0010402757907286286, 0.0028413026593625546, 0.0037810872308909893, 0.00431163189932704, 0.004723266698420048, 0.003950132988393307, 0.004496464040130377, 0.0048917848616838455, 0.003922652918845415, 0.0032191474456340075, 0.002353084972128272, 0.002578997053205967, 0.004008057992905378, 0.0024400288239121437, 0.003913480788469315, 0.004568010102957487, 0.0048880549147725105, 0.005158786196261644, 0.005476135760545731, 0.004934339784085751, 0.004030210431665182, 0.002991861430928111, 0.0015638573095202446, -0.00026057870127260685, -0.0020338022150099277, 0.0003945217467844486, -0.0014081455301493406, -0.0012248163111507893, 0.0006284500705078244, 0.0019570651929825544, 0.00027051731012761593, -0.0015403867000713944, 0.00023643637541681528, -0.001533828442916274, 0.00023239629808813334, -0.0015009879134595394, 0.00025988102424889803, 0.0016160665545612574, 0.002564151305705309, 0.0014708093367516994, 0.00019068224355578423, 0.0015450208447873592, 0.0024216589517891407, 0.0029999862890690565, 0.0033652870915830135, 0.002799519570544362, -0.0025196727365255356, -0.0008931842166930437, 0.0005520045524463058, -0.001364842988550663, 5.223136395215988e-05, 0.0011318428441882133, 0.0018865184392780066, 0.002468251157552004, 0.00297921197488904, 0.002053499221801758, 0.002467946382239461, 0.002693531569093466, 0.001991966972127557, -0.0020508593879640102, -0.0003327802987769246, -0.0020924946293234825, -0.0004035376477986574, 0.0010247663594782352, 0.0020461201202124357, 0.002782409777864814, 0.001758446218445897, 0.0007419304456561804, -0.0005510586779564619, -0.002104424173012376, -0.0034554176963865757, -0.00173275382257998, 9.150570258498192e-05, -0.0014371366705745459, -0.0029199542477726936, -0.0010101005900651217, -0.0025701425038278103, -0.0005597651470452547, 0.0012485274346545339, -0.00021748477593064308, -0.0018897939007729292, -0.003185926005244255, -0.0012474372051656246, 0.0009707727003842592, 0.0026178727857768536, 0.0015407716855406761, 0.0034134353045374155, 0.0014960009139031172, 0.003395048901438713, 0.0014794055605307221, 0.0033944640308618546, 0.0014939424581825733, -0.0009294501505792141, -0.0027731815353035927, -0.0036802045069634914, -0.0014586849138140678, 0.0010445560328662395, -0.0007545860134996474, 0.0015223769005388021, 0.0033049716148525476, 0.0016561050433665514, -5.0942355301231146e-05, 0.001940088695846498, 0.003447389230132103, 0.0020018555223941803, 0.0003961410839110613, -0.0009032513480633497, 0.0006483832839876413, 0.0021297261118888855, 0.003402163740247488, 0.0020210673101246357, 0.0005266758380457759, 0.0018888151971623302, 0.0003898909781128168, 0.0017005500849336386, 0.00018559349700808525, 0.0014589494094252586, 0.0028310103807598352, 0.0011452778708189726, -0.00037928204983472824, -0.001420694519765675, -0.002065181266516447, -0.002464397344738245, 0.00012033700477331877, -0.002239314606413245, -0.0038707905914634466, -0.00457341130822897, -0.0030023190192878246, -0.000868042407091707, 0.001484881853684783, 0.0033179011661559343, 0.004264160990715027, 0.004611663985997438, 0.0034606782719492912, 0.004008416086435318, 0.0043799695558846, 0.004756149835884571, 0.004050952848047018, 0.00309291435405612, 0.002065784065052867, 0.0029899836517870426, 0.0035169932525604963, 0.0012502263998612761, 0.0032083033584058285, 0.0043652914464473724, 0.004818848334252834, 0.004931753035634756, 0.004045845475047827, 0.0028974541928619146, 0.0037152348086237907, 0.002567401621490717, 0.0009512153919786215, -0.0011202520690858364, -0.0030454841908067465, -0.004331763368099928, -0.005087487865239382, -0.003779731225222349, -0.004714089445769787, -0.005089610815048218, -0.005009075626730919, -0.003540596691891551, -0.0036344164982438087, -0.0037304959259927273, -0.004005604423582554, -0.0032616485841572285, -0.003687901422381401, -0.003927555400878191, -0.002448589541018009, -0.0004860227054450661, 0.0011287956731393933, -0.0012092490214854479, -0.003053155494853854, -0.0014438638463616371, 0.00013999163638800383, -0.0018741725943982601, -0.0033145032357424498, -0.004304771777242422, -0.0033891506027430296, -0.004432027228176594, -0.0034831983502954245, -0.0019638410303741693, -0.003646444994956255, -0.002206676173955202, -0.0038088811561465263, -0.004800473805516958, -0.005402465350925922, -0.0055862609297037125, -0.005500330124050379, -0.005580060184001923, -0.005614358466118574, -0.005475461948662996, -0.00561920553445816, 0.00015610601985827088, 0.0021242774091660976, 0.0032481946982443333, 0.0017174261156469584, 0.0029272502288222313, 0.001469368813559413, -0.00043509574607014656, 0.0015513841062784195, -0.0003064952325075865, 0.001667223870754242, 0.0029145691078156233, 0.0016162877436727285, -0.00016709254123270512, -0.0021058139391243458, 0.0003349096514284611, 0.0022347411140799522, 0.0005754542071372271, -0.0013534817844629288, 0.0011086654849350452, -0.0007979283109307289, 0.001636212575249374, 0.0032612252980470657, 0.0018361307447776198, 5.398457869887352e-05, 0.002341435756534338], "dones": [false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "eps_id": [865057916, 865057916, 865057916, 865057916, 865057916, 865057916, 865057916, 865057916, 865057916, 865057916, 865057916, 953835551, 953835551, 953835551, 953835551, 953835551, 953835551, 953835551, 953835551, 953835551, 953835551, 953835551, 953835551, 953835551, 953835551, 953835551, 953835551, 750799161, 750799161, 750799161, 750799161, 750799161, 750799161, 750799161, 750799161, 750799161, 750799161, 750799161, 750799161, 750799161, 750799161, 750799161, 750799161, 750799161, 750799161, 750799161, 722735339, 722735339, 722735339, 722735339, 722735339, 722735339, 722735339, 722735339, 722735339, 722735339, 722735339, 722735339, 722735339, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1499626744, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1689565956, 1727995426, 1727995426, 1727995426, 1727995426, 1727995426, 1727995426, 1727995426, 1727995426, 1727995426, 1727995426, 1727995426, 1727995426, 1727995426, 1727995426, 1727995426, 1727995426, 1727995426, 1727995426, 1727995426, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1767456340, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1378628212, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332, 1860485332], "agent_index": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "value_targets": [10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 22.995685577392578, 22.217864990234375, 21.432186126708984, 20.63857078552246, 19.83694076538086, 19.02721405029297, 18.209306716918945, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 31.74454116821289, 31.055091857910156, 30.358678817749023, 29.655229568481445, 28.944677352905273, 28.226945877075195, 27.50196647644043, 26.769662857055664, 26.02996253967285, 25.282791137695312, 24.5280704498291, 23.765727996826172, 22.995685577392578, 22.217864990234375, 21.432186126708984, 20.63857078552246, 19.83694076538086, 19.02721405029297, 18.209306716918945, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 22.217864990234375, 21.432186126708984, 20.63857078552246, 19.83694076538086, 19.02721405029297, 18.209306716918945, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 22.217864990234375, 21.432186126708984, 20.63857078552246, 19.83694076538086, 19.02721405029297, 18.209306716918945, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0, 22.217864990234375, 21.432186126708984, 20.63857078552246, 19.83694076538086, 19.02721405029297, 18.209306716918945, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0], "new_obs": "BCJNGGhAwBAAAAAAAAAcIQ8AAFAAAAAAAQUAAwIAUcQAAAAQDACwCgAMAAYABQAIAAoQAOMBAwAMAAAACAAIAAAABAYAAjwAAAgA0cj///8AAAEOeAAAABgiAAIcABEwNAAASgACOgAAFgAEHAAAAgCREAAUAAgABgAHXgATEHwAcwECJAAAABRIAAACAABEADEIAAd4AACzAPEAIAAAAAcAAABuZGFycmF5VABBbGlzdB4AQwAAAMxAAAACADMMABbKABEMZgAxAAMDpAAAqAACAgBRCgAYAAzWAADsABNsBAEIGAESBSkADwIAAgASAQACAAwIAAJIAQYCAAQQAAQoAAACABACzQAACQAHAgAIeAAPAgBNGfwoARoaKAEwBAMAaAEokA2GADEOACiIAREMvAESDhsAYwOMAAAANPwBBAIACDgAAOgABAABAKwBBAIAABgAACQCAlwCMRAACIYBAhAACCgAAAIAEQgsAgJ8AgBMABDZFQAJAgAAZgIxBgAGFAAPRgET8f////////////////+OepkJvm5pCL8JWeg+oRUVP4EMPL4O/2O/rP4CP+lEdD8Avo++2ZWfv/A3Ij8+u6k/K/bZvtjOzL8s41E/G2nZP2ZTHr9sp56/eQiJP1a9rD/gXUO/iuTLv4ZcoT+mJd4/AZh0v+mN+L+NIMI/t8YHQMXvmL+Bg8q/9TvrP50s5j89TrG/AQudv55jBkB4C78/rW7Dv2LhX7+MihNAP7qZP5Bgz78UHAa/HUAdQC2qaz8KLnk+OTYPv1SO1D5NTBY/fdw9Ppr4T75V3fU+wmltPs5/Mz55Uw+/LQr4Pm5eGj+L9+89THZqv2dmDT/9MHk/mRs0PAGYor9hDy8/LQ6sP13BEr4jmM+/4gthP9mj2z+lDLG+GAf8v461kT+grAVAhqwYv5kBzr8iFrs/uq7gP351TL/zjKC/LejcP14xuD++4XO/P+xmvzx09z/jcZE/TX2Hv3QtDb+7gwVAyhhYP47ojr/xo06+OfgLQHdMDz9YPZC/bvcWPmg9D0D8sY8+XIaLvynuWL5BdQ9AxQA1P9kKjb8OMg0+GDYUQPOg3D7WhIi/7ZdjvgPxFUBKSlw/Y6cLv37F7L4GPTE+kwv9PsXUF79FfFG/BrRoPlOhWj/UUTC/Wb7rvv79rj4eNf0+s208v2oO071d48o+fAcPPgkwPL9WG+y+d2fIPupvAj+xT0i/LWXTvaqh5T7prB8+7RNIv/Ca7L4ul+Q+uUMHP404VL+1T9S9Ar0BP/6JND5/AFS/8EDtvisdAj9uMg0/dStgvzWSUr8qkRI/FxRtP+mteL9vGJe/jRQzP7GLpj8Hu46/EBVSvyeyYz8n+HQ/nuCavwyd7L61wII/QhIfP13SoL+NpVK/nG+MP8rHgT+I6ay/71OXv6R6nj8cErQ/zxS/v2cFxb802Lg/e3PmP506178CNfK/+XjbP2BwDEA1NPW/rBrEv/0dA0BclfE/EVgGwJs28b/B8RRAPO4SQCI0Vr9aBsK+mmmVvlf2/j67U2C/wQo8v3Yva74L0FY/1Zl2v7UZwL46UOG9ObPuPixFgL/2VDu/R8FYvZiaUD+bUIu/di+Lv0QIgT07C5U/F2Wcvx53uL944nM+vQnCP6dys783V+W/W1LyPhxc7z9nX9C/HM0IwBPqRD+qgQ5AEwLzv95B47+0NJA/UVvxP/KMB8CQuAfAlHq2P0S+EEB2UhjAf2AdwNfN5D+g1ihA4qYrwMRUBsDYfw1A93wVQM56O8DJdt+/O+kkQOmyA0B3EDK/HWbYvsqHhz4dkMU+Cw4/v1zXk71rvqE+RycrPbkeQL+7v9i+NMWdPqRHzD5uHU2/QCZGv64quT77Z0E/YAFmv1HUj78/6fM+x2+OP0Vehb95Vby/6xAnP3tnvD8tm52/KD+Pv2AGZD854pE/dcWvv1fZRL+RJ4k/s3lRP/bmu7/M/9a+gyuZPzg4AT8sDsK/+VOSvS4/oj9nYkk+2EvCvwyzjT6eiqQ/WPPdvVKwvL97cZ+93yqgP5QZkT6eEb2/q1ndvthbpD/Agiw/V2DDv7mpqb06B7E/Gbu8PqXNw78hVIg+eP22P/K5ij1kar6/kii4vQZltj9fYe4+PP2+v0/4hD6GaL4/xioqPuTCub+9Lse9z+e/P7oOET9Ge7q/P4LovhEDyj8XfHc/3xfBv2sw1L1IoNw/7kssP+Dgwb/vPHw+GcToP4jGxT5f57y/0n0YP5On7j/2NtU9yjmyvyTecT49e+4/NLQEP1CDrb8XU/y9BhT3P9Gabj9ot66/tgj3vmQkBEAqSKw/pMm1v2BZV7+79hBA1k/hP84fWD4hL76+eDWRPqoM4z6hiik+T5h0vPADsj5ud8o9pb4pPjDevr78l7U+MnftPlDg9T0ksYG8GijYPpOq+D2c9PU9Hb+/vgOu3T6Fufo+28mXPUxnjLwROwE/RnMZPjZulz2QEa4+xjUFP1STP74i6fQ9NF0yP+jO9T5g8wW/VyVYPle+hj9hrcQ+4EZcv81osj7+EzE/abttPk9v+76mDuE+a72pPhu0ED4fiwK+HAf4PsCgMD90uuI9iHb1vj1KEz89wag+nKaxPPRR9b3dth4/AoD5vF2R/bvpIXI+b00eP+3YqD4Qp7M8Vt36vSa5KT/3izA/J94BvGve9b7I9EA/pC+oPlQEx70drP29gk9MP9ODBL2OaQK+l4hqPsrQSz9omag+17fKvQflCr6pLFc/FskwP74/Br5Tj/++81luPy6Hhj9U82S+XwBdv46liD9jUjA/9dfAvp12A7/3I5Q/kJ2nPh+D8b5NGS6+J7CZP8DdCb0ofgK/9LotPm1TmT9rCqk+1Zf7vj6kV76h454/aZExP9w/Cb9XChe/R1aqP+vHqT4c0CS/zuaAvt7Yrz9VJTI/SgAyvyEZI7/tObs/N++qPrCGT7+gFZy+Vq3AP/nuMj+5616/wUwyvw76yz+7iaw+TeB+v535vb6mXNE/j5bIvMZ8iL9S71C9/ubQP69irz7tvoq/P9XnvvRR1j/JkjU/XXmVv3PLWr+viuE/k52JPwihqL+KAKG/9XnyP7Q2uD8aLcS/V9rUv2KABEDYaeY/IQ/ov7Z4BMAcehJA8P8JQOkTCsBhlR7ANN4avikVXz5Lf9g+P2CQvkZ18L3vahQ/jk25Po6HIL+/vBy9o/BwP1mieT4/TXm/5KmwPRyGEz8BNos9H0QbvwMtKT61JFk+Dt1AvTn+e766XEs++XgbvlTL0b29EeE9NuU+PsXzA7+gRMq9n2PtPlfCAz665WC/2fQUve8sUT92R888S8aev9G6rT3Y/ZU/5m79vavWX7/fz4c+kHdPP458aL54VZ6/jInFPgGYlj/1lcC+S3rMv7NGET9HvMU/xi8Sv7Ev+r9ckU8/lD31P6LDT784S8u/UuWOP3NOyj8SwoC/Td2cvyTGrj86TKE/fLqTv+h4Xb+WrMc/1apzPxjRoL9WOp2/HNjZP2oErj/ot7O/223LvwmC9D/GWOI/rlvMvxJhnb+rxQtAA7i9P7wQmT52urm+UBIevurevj6KOoM+3Ek5vyX60L2m9Tg/Re0rPtrBir82oQ08HWOJP5vlCT1KrLi/rlc2PjKctj9o+xW+TheKv+dZ0D5RKok/srOPvq91N79PpBM/gPs5P//ovL4MBoq/TlkwP3ISjT8x3QC/In83v/sgXT9In0Q/tYMXvyJltr4kkXs/YBziPvJsIr8nIHs7d/+FP8JY9j1RniG/AjC6PkpWhz98DUu+4BsVvzYeOT+X4oE/+igEvxTN+b7MeYo/jVhrP5ziVb/89bG+NS03PydYRT/dOOO+me6BvrSciT9WEzA/e/xFv7d16b3mcbc/2cAMP79+jb/yI5M94/TkP7pvtj6cfLi/IY6bPlkBtj9ooNk99LOIv3B++j7XjeM/CHCnveTdtb/HhTg/6U0IQCWeqL4bkOO//Id/P4B7HkDHMyO/5OsIwCQOqT8h/gZAMO+Av/6P5r/oL8w/1xsdQL7xqL+hwgvALeX0P63AMkDQGdm/7GskwP12EUBOahtAA6cIwMZoEcA6gxy/XPfBvslj4b5Vnb4+0Sgnv2DtOr8iq8e+1s00PwmLPb9lEsC+CN2QvpF/rD7qDUi/STanvHdOdL7bNOq8BbhGv8A/v76tOYO+pfWgPj0qUb/q7jm/tulcvlkLKD9HXWe/CNK9vjmK772rqpU+V65xv1/dfbyq2KO925uLvUsncL9i5K0+EJnVvYu/2r7U0GK/DXV7vFE1Qr4bFp+9v05hv61hrj7VzVy+Lu7hvvb4U79w32+8wlubviNsw70DdlK/qzq9vgk6qr7n9nw+1L1cv/MFSLzkSZu+t5b7vcMlW7/CQby+OIisvohuYT5yWWW/YAAgvFjtn75eExq+Aqxjv9o1sj5+kLO+yyQFv7gkVr+SqDQ/jV/nvov7Y79Kxzy/1waIP3W5Hb/es6G/QJIXv0G2ND8cAVi/zCVuv5Vl/L57K4g/ndyBv5B1qL+/07G+H821PwoSoL/JN9q/lMQevrw6iD9So8a/bLSyv3sQELw67LU/CUXmv6ds5r/Qujw+oJSIPxMwB8AG6cG/yzRIvo/y3b7TLks+IN7+PuZXeb5gV0q/uKuIPi1wWz/9HK2+yZvdvkgiyz64NAI/M7PFvlFhSr/BPu8+MR1gP/s19r40yd2+/K8ZPwvFCD8jbge/gUWcvdbgLD/JOEs+3dYHvyIs376EbjE/kooTP4lBFL8RUqG9vnFGP/KdeT6RwBS/KM7gvnP2TD81IyA/KUYhvyaNTL+/FmQ/6PSAPyXPOb9ECuK+BOeFPz+PLj/1Zka/XC+tvQCkkj931rk++xVHv2dEiz6ja5g/lNNFPZXjO7+jhrm991yXP+UI5D5uyDy/jPrnvjXonj8vgFc/7r9Jv4oFxL0N/64/S8IHP2DPSr86u4U+I0m4P//HZT5Q/z+/2b/SvXvvuj8nYSE/WU5Bv7Irgj6nTcY/vQuoPui9Nr8he+K9SR3LP2r2PD8+UDi/7V7zvlis2D+P+pI/VgFGvwwz8b065e4/QPhaP5fPR7+A1HQ+car+P8EiEj9KxT2/Qn0CvpkcBECekH4/d+c/v4TX/L7QTw1AipC1P7sNDwIAE1AAAAAAAAAAAAA=", "actions": [0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0], "advantages": [10.465134620666504, 9.558951377868652, 8.64449405670166, 7.721219062805176, 6.7887420654296875, 5.848034858703613, 4.896498680114746, 3.935507297515869, 2.966177463531494, 1.9867808818817139, 0.9976469278335571, 14.851643562316895, 13.990156173706055, 13.122979164123535, 12.24398422241211, 11.356945037841797, 10.461286544799805, 9.556633949279785, 8.642799377441406, 7.7205963134765625, 6.789434909820557, 5.848993301391602, 4.899431228637695, 3.940659523010254, 2.9721338748931885, 1.98960542678833, 1.0014081001281738, 17.384363174438477, 16.54799461364746, 15.703723907470703, 14.853952407836914, 13.995704650878906, 13.125182151794434, 12.249431610107422, 11.36128044128418, 10.46767520904541, 9.561532974243164, 8.646658897399902, 7.722966194152832, 6.791994571685791, 5.851794242858887, 4.899449825286865, 3.9379773139953613, 2.967099905014038, 1.9866347312927246, 0.9972004890441895, 12.250417709350586, 11.362405776977539, 10.465622901916504, 9.563157081604004, 8.648222923278809, 7.724398612976074, 6.791578769683838, 5.849516868591309, 4.898015975952148, 3.938345432281494, 2.9676320552825928, 1.9873064756393433, 0.9980080127716064, 22.99773597717285, 22.218196868896484, 21.43427848815918, 20.638975143432617, 19.83591651916504, 19.02516746520996, 18.206523895263672, 17.381380081176758, 16.547882080078125, 15.706232070922852, 14.856327056884766, 13.99761962890625, 13.127151489257812, 12.247806549072266, 11.362950325012207, 10.469094276428223, 9.5628023147583, 8.650845527648926, 7.726090431213379, 6.792216777801514, 5.852202415466309, 4.9028849601745605, 3.94358491897583, 2.9713473320007324, 1.9890291690826416, 0.9973821043968201, 31.743000030517578, 31.051677703857422, 30.357181549072266, 29.651836395263672, 28.94319725036621, 28.223552703857422, 27.500473022460938, 26.770591735839844, 26.03273582458496, 25.286470413208008, 24.529529571533203, 23.764684677124023, 22.996440887451172, 22.216341018676758, 21.42888069152832, 20.63691520690918, 19.836992263793945, 19.025272369384766, 18.20585823059082, 17.381135940551758, 16.548227310180664, 15.706583976745605, 14.853574752807617, 13.992034912109375, 13.122016906738281, 12.24587631225586, 11.36098575592041, 10.464285850524902, 9.561402320861816, 8.646574974060059, 7.725345134735107, 6.792006492614746, 5.849153995513916, 4.899849891662598, 3.9407782554626465, 2.9715206623077393, 1.992065191268921, 1.0024644136428833, 17.38301658630371, 16.55086326599121, 15.709551811218262, 14.858796119689941, 13.997166633605957, 13.126286506652832, 12.246413230895996, 11.358195304870605, 10.461910247802734, 9.557180404663086, 8.644814491271973, 7.721522331237793, 6.789085388183594, 5.84722900390625, 4.896944046020508, 3.9373061656951904, 2.968034267425537, 1.9870100021362305, 0.9964830279350281, 22.21661376953125, 21.428977966308594, 20.634206771850586, 19.832122802734375, 19.022281646728516, 18.20526123046875, 17.380239486694336, 16.54490852355957, 15.703113555908203, 14.853271484375, 13.995285034179688, 13.128463745117188, 12.252229690551758, 11.366600036621094, 10.469954490661621, 9.566506385803223, 8.653365135192871, 7.730539798736572, 6.797005653381348, 5.855619430541992, 4.904725551605225, 3.9444046020507812, 2.9733617305755615, 1.9936878681182861, 1.0039275884628296, 22.220312118530273, 21.43267250061035, 20.63744354248047, 19.838150024414062, 19.03026580810547, 18.210750579833984, 17.382997512817383, 16.550498962402344, 15.70899486541748, 14.858528137207031, 13.997553825378418, 13.129850387573242, 12.251380920410156, 11.363476753234863, 10.469820976257324, 9.56399917602539, 8.652084350585938, 7.730330944061279, 6.798867702484131, 5.857571125030518, 4.906495571136475, 3.945979118347168, 2.9757144451141357, 1.9954754114151, 1.0056191682815552, 22.217708587646484, 21.43006134033203, 20.63532257080078, 19.835224151611328, 19.0242862701416, 18.207836151123047, 17.383573532104492, 16.547073364257812, 15.705986976623535, 14.852555274963379, 13.991250038146973, 13.123802185058594, 12.248064994812012, 11.363618850708008, 10.465839385986328, 9.559557914733887, 8.647699356079102, 7.726883888244629, 6.792356491088867, 5.852783203125, 4.899358749389648, 3.9371378421783447, 2.968263864517212, 1.9899460077285767, 0.9976585507392883], "logits": [[0.0018730228766798973, 0.0009729244629852474], [0.004290523938834667, 0.0005304508376866579], [0.005822303704917431, -8.563511073589325e-06], [0.0065772454254329205, -0.0005721268826164305], [0.00688926549628377, -0.001068859943188727], [0.0065701608546078205, -0.0003405327443033457], [0.007003806531429291, -0.0006968469824641943], [0.007172829937189817, -0.0008902919944375753], [0.006430396810173988, -0.0002710056724026799], [0.004749960266053677, -0.00024838722310960293], [0.0028631966561079025, -0.0005082008428871632], [0.0024546871427446604, 0.0016291897045448422], [0.004467564634978771, 0.001338011003099382], [0.002448911312967539, 0.0016789193032309413], [0.00455792061984539, 0.0013523153029382229], [0.005880667828023434, 0.0007826853543519974], [0.006609274074435234, 0.00010682063293643296], [0.006969980429857969, -0.0005599716678261757], [0.007116046268492937, -0.001089233672246337], [0.007192671298980713, -3.7299469113349915e-06], [0.005904919467866421, 0.0004678125260397792], [0.003908143378794193, 0.00032881961669772863], [0.0019682906568050385, 0.00015612272545695305], [0.000395042123273015, 0.0003587061073631048], [-0.0006332822376862168, 0.0010015442967414856], [0.000501684786286205, 0.00011358968913555145], [-0.0007283922750502825, 0.0007489296840503812], [0.00023449453874491155, -0.0004513093736022711], [0.002778196707367897, -0.0006150067783892155], [0.0046654315665364265, -0.0008510006591677666], [0.002615532139316201, -0.0005799501668661833], [0.0001858807518146932, -0.00041730329394340515], [0.002569330157712102, -0.0006569377146661282], [0.00019470968982204795, -0.0004983374383300543], [0.002531093778088689, -0.0007487924303859472], [0.0002172387612517923, -0.0005897250957787037], [0.002502661431208253, -0.0008545331656932831], [0.004545916803181171, -0.0010847768280655146], [0.0057451361790299416, -0.0012395051307976246], [0.004011859651654959, -0.0011830523144453764], [0.0017552671488374472, -0.001091185724362731], [0.0037293280474841595, -0.001328598940744996], [0.005202310159802437, -0.0014073278289288282], [0.0060671609826385975, -0.0013768323697149754], [0.0065592387691140175, -0.001289214240387082], [0.005443255882710218, -0.0012108120135962963], [-0.0014384391251951456, -0.0018338479567319155], [0.0008638137951493263, -0.0014135788660496473], [0.0028028879314661026, -0.0012260943185538054], [0.0007852106355130672, -0.0011659066658467054], [0.002830040641129017, -0.001064385985955596], [0.004244465846568346, -0.0010346046183258295], [0.00512362876906991, -0.0010448119137436152], [0.005640823859721422, -0.0010751979425549507], [0.0059416512958705425, -0.0010996763594448566], [0.005388484336435795, -0.0011436666827648878], [0.005870678462088108, -0.0011450338643044233], [0.006240514572709799, -0.0011289867106825113], [0.0054639894515275955, -0.0011683120392262936], [-0.00020260902238078415, -0.0001231918577104807], [0.002081765327602625, -0.00026197335682809353], [-0.00022459871252067387, -0.00020687002688646317], [0.002002319321036339, -0.00034354557283222675], [0.004093364346772432, -0.0006071897223591805], [0.005406734999269247, -0.0008500721305608749], [0.006087023764848709, -0.0010279153939336538], [0.004844765178859234, -0.000915795098990202], [0.0027020806446671486, -0.00089048664085567], [0.0008062279084697366, -0.0008833038154989481], [-0.00013326655607670546, -0.0008577411063015461], [-0.000287756382022053, -0.0007591643370687962], [-3.2169686164706945e-05, -0.0010591207537800074], [0.0012095090933144093, -0.0013527092523872852], [3.875477705150843e-05, -0.0011937639210373163], [-0.0003108378150500357, -0.000931259011849761], [0.00019219674868509173, -0.0013504556845873594], [-0.0003281153040006757, -0.0009759513195604086], [0.0003581605269573629, -0.0014726938679814339], [0.0019252991769462824, -0.0018325257115066051], [0.00037760892882943153, -0.0014783914666622877], [-0.00046160025522112846, -0.0009052669629454613], [-0.0007327127968892455, -0.0002469599712640047], [-0.0004867948591709137, -0.000941780861467123], [0.0007499993080273271, -0.001613187836483121], [0.0024832882918417454, -0.0018251861911267042], [0.0009466708870604634, 0.0008189014624804258], [0.0033114019315689802, 0.0007644185679964721], [0.0009961524046957493, 0.0009050462977029383], [0.003406105563044548, 0.0008215404814109206], [0.001049425802193582, 0.0009797858074307442], [0.0034836737904697657, 0.0008612973033450544], [0.0011033574119210243, 0.0010415890719741583], [-0.001027027377858758, 0.0009833571966737509], [-0.0025289624463766813, 0.0008729939581826329], [-0.003691976424306631, 0.0008855729247443378], [-0.0031818044371902943, 0.00020940182730555534], [-0.0012097869766876101, -0.00023717607837170362], [-0.0032945931889116764, -4.689372144639492e-05], [-0.0010387805523350835, -0.00042825518175959587], [0.0011822017841041088, -0.00024837697856128216], [-0.0009306938736699522, -0.00037051434628665447], [-0.003134520258754492, -0.00015935534611344337], [-0.0007076968322508037, -0.0004049665294587612], [0.001198719022795558, -0.00013522757217288017], [-0.000618883641436696, -0.00030523957684636116], [-0.0028359955176711082, -0.0001266193576157093], [-0.004488013684749603, 0.00020274310372769833], [-0.002339197089895606, -7.26750586181879e-05], [-0.0001368847442790866, -5.076639354228973e-06], [0.0009364394354633987, 0.00032356660813093185], [-0.00014665862545371056, 0.0001887008547782898], [-0.0020811730064451694, 0.0002950585912913084], [-7.673504296690226e-05, 0.00042102369479835033], [-0.0018745474517345428, 0.0005222680047154427], [-5.43894711881876e-05, 0.0006670746952295303], [-0.001723689492791891, 0.0007759619038552046], [-7.258518598973751e-05, 0.0009103806223720312], [0.0005098591791465878, 0.0010859600733965635], [-0.00021551619283854961, 0.0011452566832304], [-0.0018115341663360596, 0.0012953078839927912], [-0.003546935273334384, 0.0014529444742947817], [-0.004883089102804661, 0.0014682263135910034], [-0.005774348974227905, 0.0013568045105785131], [0.0014907346339896321, 0.0015578665770590305], [-0.0007653517532162368, 0.0014713607961311936], [-0.002366543747484684, 0.0013007603120058775], [-0.003713145386427641, 0.001240862300619483], [-0.0034172828309237957, 0.0006979813915677369], [-0.0015204919036477804, 0.000314503675326705], [0.0011439686641097069, 0.00017047167057171464], [0.0030923322774469852, 1.5190104022622108e-05], [0.004387352149933577, -0.00026632024673745036], [0.0053530000150203705, -0.0006270584417507052], [0.005420805886387825, 0.00013346524792723358], [0.006288986653089523, -0.0004335724515840411], [0.006717691197991371, -0.0009434530511498451], [0.0068822926841676235, -0.001327803824096918], [0.006959269288927317, -0.0005740826018154621], [0.00570941437035799, -0.00022743921726942062], [0.0035806368105113506, -0.00027035572566092014], [0.005126916337758303, -0.0003658935893326998], [0.006133435759693384, -0.00036129914224147797], [0.0001791306131053716, -0.00017632870003581047], [0.0022401921451091766, -0.00021118007134646177], [0.0036309794522821903, -0.00035505957202985883], [0.004744833335280418, -0.0006011928198859096], [0.0056321704760193825, -0.0009355255751870573], [0.006099154241383076, -0.00019576289923861623], [0.005151916295289993, 0.0004341143066994846], [0.006385073531419039, -0.00012535275891423225], [0.004765480291098356, 0.0002673313720151782], [0.0023514223285019398, 0.0004509204300120473], [0.0005410940502770245, 0.0008177442941814661], [-0.00034477480221539736, 0.0011276802979409695], [-0.0008267340017482638, 0.0010387285146862268], [-0.0014067612355574965, 0.0007909251726232469], [-0.001590193947777152, 0.0007848043460398912], [-0.002413867274299264, 0.0006903823232278228], [-0.0033839396201074123, 0.0007639545947313309], [-0.004420401994138956, 0.0009599422919563949], [-0.005210109520703554, 0.0004440263146534562], [-0.005864160135388374, 0.0007837438024580479], [-0.0062626139260828495, 0.0010228084865957499], [-0.006457595620304346, 0.001137642888352275], [-0.006636663340032101, 0.0005094942171126604], [-0.006737500429153442, 0.0006073480471968651], [-0.006867130286991596, 0.0006161138880997896], [-0.0013549183495342731, -0.001734818797558546], [0.0008527591126039624, -0.001293704379349947], [0.002636233577504754, -0.0010763669852167368], [0.0008098300895653665, -0.0008658908773213625], [-0.001448366791009903, -0.0010912157595157623], [0.0007299408316612244, -0.0007344468031078577], [0.00270130205899477, -0.0006571803241968155], [0.0005603816825896502, -0.0004899310879409313], [-0.0014999804552644491, -0.0007970498409122229], [-0.0027082550805062056, -0.001206044340506196], [-0.0015748456353321671, -0.0008810283616185188], [-0.0029522667173296213, -0.0013197860680520535], [-0.0016379189910367131, -0.000997259747236967], [0.00041452073492109776, -0.0005696914158761501], [-0.001697314903140068, -0.0009196987375617027], [0.00029204064048826694, -0.0004697160329669714], [-0.0017602278385311365, -0.0008637350983917713], [-0.0031306184828281403, -0.0013879360631108284], [-0.004182435106486082, -0.001428078394383192], [-0.005187492351979017, -0.0012193301226943731], [-0.005061762407422066, -0.0022804082836955786], [-0.00618367176502943, -0.0019740560092031956], [-0.006934159900993109, -0.0014630305813625455], [-0.006400479003787041, -0.002497370122000575], [-0.007128198631107807, -0.0019889066461473703], [0.0007321659941226244, -5.8733741752803326e-05], [0.003320499323308468, -0.000250167737249285], [0.005083104595541954, -0.0005428490112535655], [0.003390202997252345, -0.00012517988216131926], [0.0052947732619941235, -0.0004770170198753476], [0.003243877552449703, -0.00016551732551306486], [0.0008662375621497631, 0.00014888367149978876], [0.003220522776246071, -0.00030728604178875685], [0.0009116397122852504, 3.2458920031785965e-05], [0.0031840482261031866, -0.00044436962343752384], [0.005138873588293791, -0.0007126545533537865], [0.002855108119547367, -0.0005519271362572908], [0.0008333235164172947, -0.00015260325744748116], [-0.00026370229898020625, 0.0004362426698207855], [0.0009671932784840465, -0.0002972200745716691], [0.0029775025323033333, -0.0007742184679955244], [0.0009143694769591093, -0.0003410588251426816], [-0.00035636924440041184, 0.0004113074392080307], [0.0009912053355947137, -0.000400767894461751], [-0.0004375521675683558, 0.00040283927228301764], [0.0010157795622944832, -0.00039675284642726183], [0.0029118037782609463, -0.0006565258372575045], [0.0008138251141645014, -0.0003238313365727663], [-0.0007833621930330992, 0.00043069408275187016], [0.0007280568825080991, -0.00024035153910517693]], "rewards": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "t": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], "prev_rewards": [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]} diff --git a/python/ray/rllib/test/data/cartpole_small/output-2019-02-03_20-27-20_worker-0_0.json b/python/ray/rllib/test/data/cartpole_small/output-2019-02-03_20-27-20_worker-0_0.json new file mode 100644 index 000000000..0030cb2d7 --- /dev/null +++ b/python/ray/rllib/test/data/cartpole_small/output-2019-02-03_20-27-20_worker-0_0.json @@ -0,0 +1,4 @@ +{"advantages": [19.83694076538086, 19.02721405029297, 18.209306716918945, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0], "eps_id": [767029556, 767029556, 767029556, 767029556, 767029556, 767029556, 767029556, 767029556, 767029556, 767029556, 767029556, 767029556, 767029556, 767029556, 767029556, 767029556, 767029556, 767029556, 767029556, 767029556, 767029556, 767029556], "action_prob": [0.49987873435020447, 0.4997735917568207, 0.5001150369644165, 0.5004363656044006, 0.5005561709403992, 0.5005311965942383, 0.4995429217815399, 0.499515563249588, 0.49954167008399963, 0.49970272183418274, 0.5000479817390442, 0.4995552599430084, 0.5001161694526672, 0.5005202293395996, 0.4992202818393707, 0.49941039085388184, 0.4997551143169403, 0.49998918175697327, 0.5000606179237366, 0.5000531673431396, 0.49997803568840027, 0.49995389580726624], "dones": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "type": "SampleBatch", "value_targets": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "agent_index": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "obs": "BCJNGGhAgAQAAAAAAAAg5QIAAFAAAAAAAQUAAwIAUcQAAAAQDACwCgAMAAYABQAIAAoQAOMBAwAMAAAACAAIAAAABAYAAjwAAAgA0cj///8AAAEOeAAAABgiAAIcABEwNAAASgACOgAAFgAEHAAAAgCREAAUAAgABgAHXgATEHwAcwECJAAAABRIAAACAABEADEIAAd4AACzAPEAIAAAAAcAAABuZGFycmF5VABBbGlzdB4AQwAAAMxAAAACADMMABbKABEMZgAxAAMDpAAAqAACAgBRCgAYAAzWAADsABNsBAEIGAESBSkADwIAAgASAQACAAwIAAJIAQYCAAQQAAQoAAACABACzQAACQAHAgAIeAAPAgBNGfwoARoaKAEwBAMAaAEZYJEAMQ4AKIgBEQy8ARIOoQBjA4wAAAA0/AEEAgAIOAAA6AAEAAEArAEEAgAAGAAAJAICXAIxEAAIhgECEAAIKAAAAgARCCwCAnwCAEwAEBYVAAkCAABmAjEGAAYUAAm1AA8CAAb//1I/TQc9XIHsPAL2WrxBCaI8s6oJPfoKKr7CelS8ic6dPnIg+DwJl+88ZPfeu0r0Ozwl6/w8VtplPr5y17tHE5G+9tgQPWfg1j6Ckki8Vf4Tv046Mz3Rbh0/ggDDvK/mX780m2U9/3ZPPzsmKb0PXJa/Xf+TPTSoHT+TsIS9ri1lv/84rT0B5Nc+uVupveejH79vfr49l0BpPpbmwr2EU7e+8NLHPVzSCz0ckdG9CVLGveI4yT3zNCO+gojVvZbGJT6lscI989IXPffmzr2VqSW+UDbEPf46IL5Zh9W9I3rHPY7NvT36LbO+B4rRvTP+tT71d689iTodvs/6wr0negQ97y2pPeRULz2rp8G97dyTvsnuqj0Q4XQ+53vNveAXHL9ZurQ9PfvePnx15r3GqG6/AZHGPePHIT98Uga+zwihv4xz4D3rF1Q/dBYgvu1dy79yMQE+Q7IiP1ygQL5Uoay/egEHYAAAAAAAAAAAAAA=", "prev_rewards": [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "infos": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], "rewards": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "new_obs": "BCJNGGhAgAQAAAAAAAAg5QIAAFAAAAAAAQUAAwIAUcQAAAAQDACwCgAMAAYABQAIAAoQAOMBAwAMAAAACAAIAAAABAYAAjwAAAgA0cj///8AAAEOeAAAABgiAAIcABEwNAAASgACOgAAFgAEHAAAAgCREAAUAAgABgAHXgATEHwAcwECJAAAABRIAAACAABEADEIAAd4AACzAPEAIAAAAAcAAABuZGFycmF5VABBbGlzdB4AQwAAAMxAAAACADMMABbKABEMZgAxAAMDpAAAqAACAgBRCgAYAAzWAADsABNsBAEIGAESBSkADwIAAgASAQACAAwIAAJIAQYCAAQQAAQoAAACABACzQAACQAHAgAIeAAPAgBNGfwoARoaKAEwBAMAaAEZYJEAMQ4AKIgBEQy8ARIOoQBjA4wAAAA0/AEEAgAIOAAA6AAEAAEArAEEAgAAGAAAJAICXAIxEAAIhgECEAAIKAAAAgARCCwCAnwCAEwAEBYVAAkCAABmAjEGAAYUAAm1AA8CAAb//1Kzqgk9+goqvsJ6VLyJzp0+ciD4PAmX7zxk9967SvQ7PCXr/DxW2mU+vnLXu0cTkb722BA9Z+DWPoKSSLxV/hO/TjozPdFuHT+CAMO8r+ZfvzSbZT3/dk8/OyYpvQ9clr9d/5M9NKgdP5OwhL2uLWW//zitPQHk1z65W6m956Mfv29+vj2XQGk+lubCvYRTt77w0sc9XNILPRyR0b0JUsa94jjJPfM0I76CiNW9lsYlPqWxwj3z0hc99+bOvZWpJb5QNsQ9/jogvlmH1b0jesc9js29Pfots74HitG9M/61PvV3rz2JOh2+z/rCvSd6BD3vLak95FQvPaunwb3t3JO+ye6qPRDhdD7ne8294Bccv1m6tD09+94+fHXmvcaobr8BkcY948chP3xSBr7PCKG/jHPgPesXVD90FiC+7V3Lv3IxAT5DsiI/XKBAvlShrL94NQ4+qhtVP0s/XL4xz9i/egEHYAAAAAAAAAAAAAA=", "prev_actions": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "actions": [0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1], "t": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]} +{"advantages": [13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0], "eps_id": [312569639, 312569639, 312569639, 312569639, 312569639, 312569639, 312569639, 312569639, 312569639, 312569639, 312569639, 312569639, 312569639, 312569639], "action_prob": [0.4998626112937927, 0.5002043843269348, 0.49963468313217163, 0.49981942772865295, 0.4998198449611664, 0.5001484155654907, 0.5002940893173218, 0.4997011423110962, 0.5002346634864807, 0.5002323985099792, 0.4998157322406769, 0.5001403093338013, 0.49990716576576233, 0.49997448921203613], "dones": [false, false, false, false, false, false, false, false, false, false, false, false, false, true], "type": "SampleBatch", "value_targets": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "agent_index": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "obs": "BCJNGGhAAAQAAAAAAABwYgIAAFAAAAAAAQUAAwIAUcQAAAAQDACwCgAMAAYABQAIAAoQAOMBAwAMAAAACAAIAAAABAYAAjwAAAgA0cj///8AAAEOeAAAABgiAAIcABEwNAAASgACOgAAFgAEHAAAAgCREAAUAAgABgAHXgATEHwAcwECJAAAABRIAAACAABEADEIAAd4AACzAPEAIAAAAAcAAABuZGFycmF5VABBbGlzdB4AQwAAAMxAAAACADMMABbKABEMZgAxAAMDpAAAqAACAgBRCgAYAAzWAADsABNsBAEIGAESBSkADwIAAgASAQACAAwIAAJIAQYCAAQQAAQoAAACABACzQAACQAHAgAIeAAPAgBNGfwoARoaKAEwBAMAaAEZ4IUAMQ4AKIgBEQy8ARIOHABjA4wAAAA0/AEEAgAIOAAA6AAEAAEArAEEAgAAGAAAJAICXAIxEAAIhgECEAAIKAAAAgARCCwCAnwCAEwAA3wABwIAAGYCMQYABhIAD0YBE/HR9EURvWhK+jyOCpA8wk1+OzbFDr2uxSi+T62QPHGnmj6qRRy9RGq4vpIqwjyPqhk/Tsc5vXlfKb6gQRI9WVmhPhBUR73MJO88exIsPfbyCj3b70S9kYAqvubZLj2t2q0+vpNSvWhzub74qko9aDglP84/cL0I2A6/0Yl/PU4HdD/K+o69AT66vlHQpj0tSC4/B+GdvRhWD7/ossI9iXp/PxXQtL3lk0G/UJPrPYzSqD8HydO9AQsQv5/MED5uPIg/B9XqvbZkQr/amCY+9eyyP7H3BL5yCxG/pTlDPpF/lD8LAQ8CAANQAAAAAAAAAAAA", "prev_rewards": [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "infos": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], "rewards": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "new_obs": "BCJNGGhAAAQAAAAAAABwYgIAAFAAAAAAAQUAAwIAUcQAAAAQDACwCgAMAAYABQAIAAoQAOMBAwAMAAAACAAIAAAABAYAAjwAAAgA0cj///8AAAEOeAAAABgiAAIcABEwNAAASgACOgAAFgAEHAAAAgCREAAUAAgABgAHXgATEHwAcwECJAAAABRIAAACAABEADEIAAd4AACzAPEAIAAAAAcAAABuZGFycmF5VABBbGlzdB4AQwAAAMxAAAACADMMABbKABEMZgAxAAMDpAAAqAACAgBRCgAYAAzWAADsABNsBAEIGAESBSkADwIAAgASAQACAAwIAAJIAQYCAAQQAAQoAAACABACzQAACQAHAgAIeAAPAgBNGfwoARoaKAEwBAMAaAEZ4IUAMQ4AKIgBEQy8ARIOHABjA4wAAAA0/AEEAgAIOAAA6AAEAAEArAEEAgAAGAAAJAICXAIxEAAIhgECEAAIKAAAAgARCCwCAnwCAEwAA3wABwIAAGYCMQYABhIAD0YBE/HRNsUOva7FKL5PrZA8caeaPqpFHL1Eari+kirCPI+qGT9Oxzm9eV8pvqBBEj1ZWaE+EFRHvcwk7zx7Eiw99vIKPdvvRL2RgCq+5tkuPa3arT6+k1K9aHO5vviqSj1oOCU/zj9wvQjYDr/RiX89Tgd0P8r6jr0BPrq+UdCmPS1ILj8H4Z29GFYPv+iywj2Jen8/FdC0veWTQb9Qk+s9jNKoPwfJ070BCxC/n8wQPm48iD8H1eq9tmRCv9qYJj717LI/sfcEvnILEb+lOUM+kX+UPzWSEL4yr7++I/xaPtbKbj8LAQ8CAANQAAAAAAAAAAAA", "prev_actions": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "actions": [0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1], "t": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]} +{"advantages": [41.296321868896484, 40.70335388183594, 40.104400634765625, 39.499393463134766, 38.888275146484375, 38.2709846496582, 37.6474609375, 37.017635345458984, 36.38145065307617, 35.73883819580078, 35.08973693847656, 34.434078216552734, 33.77179718017578, 33.10282516479492, 32.427093505859375, 31.74454116821289, 31.055091857910156, 30.358678817749023, 29.655229568481445, 28.944677352905273, 28.226945877075195, 27.50196647644043, 26.769662857055664, 26.02996253967285, 25.282791137695312, 24.5280704498291, 23.765727996826172, 22.995685577392578, 22.217864990234375, 21.432186126708984, 20.63857078552246, 19.83694076538086, 19.02721405029297, 18.209306716918945, 17.383136749267578, 16.54862403869629, 15.705680847167969, 14.854223251342773, 13.99416446685791, 13.125418663024902, 12.24789810180664, 11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0], "eps_id": [1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029, 1121492029], "action_prob": [0.4999432861804962, 0.49970677495002747, 0.49994567036628723, 0.49970558285713196, 0.4999447166919708, 0.4997081458568573, 0.49994054436683655, 0.4997144043445587, 0.5000669360160828, 0.49960795044898987, 0.500072181224823, 0.4996034502983093, 0.4999268054962158, 0.49972784519195557, 0.4999243915081024, 0.4997323453426361, 0.5000817775726318, 0.500409722328186, 0.49946051836013794, 0.4995920956134796, 0.5000770688056946, 0.4996024966239929, 0.5000657439231873, 0.49961769580841064, 0.5000503659248352, 0.49963781237602234, 0.500030517578125, 0.5003365874290466, 0.5004424452781677, 0.49958422780036926, 0.49961057305336, 0.4997643828392029, 0.5001019239425659, 0.5004988312721252, 0.500738799571991, 0.5007926821708679, 0.4992518424987793, 0.5008513927459717, 0.5007851719856262, 0.4993096590042114, 0.4992087185382843, 0.5008699297904968, 0.5007670521736145, 0.4993574619293213, 0.4992888867855072, 0.5007497668266296, 0.500627875328064, 0.49950355291366577, 0.49948444962501526, 0.499498575925827, 0.49960172176361084, 0.5001403093338013, 0.49976104497909546], "dones": [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true], "type": "SampleBatch", "value_targets": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "agent_index": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "obs": "BCJNGGhAgAYAAAAAAACa2wQAAFAAAAAAAQUAAwIAUcQAAAAQDACwCgAMAAYABQAIAAoQAOMBAwAMAAAACAAIAAAABAYAAjwAAAgA0cj///8AAAEOeAAAABgiAAIcABEwNAAASgACOgAAFgAEHAAAAgCREAAUAAgABgAHXgATEHwAcwECJAAAABRIAAACAABEADEIAAd4AACzAPEAIAAAAAcAAABuZGFycmF5VABBbGlzdB4AQwAAAMxAAAACADMMABbKABEMZgAxAAMDpAAAqAACAgBRCgAYAAzWAADsABNsBAEIGAESBSkADwIAAgASAQACAAwIAAJIAQYCAAQQAAQoAAACABACzQAACQAHAgAIeAAPAgBNGfwoARoaKAEwBAMAaAEoUAOGADEOACiIAREMvAESDhsAYwOMAAAANPwBBAIACDgAAOgABAABAKwBBAIAABgAACQCAlwCMRAACIYBAhAACCgAAAIAEQgsAgJ8AgBMABA1FQAJAgAAZgIxBgAGFAAPRgET8f///0h0hq67dp2wPBLe3Ltce6i8YGWgu9ugMb6UWOq7FjuKPgAKCbwGPrI8caTluqlfzLzN6AG8s38xvl6FE7sJzIg+gbU6vAe2sjztrUo7K7fWvIGPM7zjgjG+LlMoO8buiD46XWy8PAyyPNq3ATz8Fsi8BD5lvAyqMb7cbfM7I5+KPiMMj7wMQLA8s25SPDNxoLy8hYu8KKVdPsUDTDzbyp2+TR5QvHdJrTwgDs47pCA+vNcvSbxyX10+OXPGO6PGmr77WAK8P9erPCd9LDcCpPy7qfL2uy5TMr7S6Ba5ZuiRPrWJNLw+26s8zAu2O4gF/rvmqS28amcyvjT3sDuFx5I+wMBmvLCSqjzwazY8zseMuxXuX7wR9lw+igM1PH8+lr7xOBm8L03SPhS3qTvTJhW/JgmVuma5XD7zHNS72p2TvlYCUDujHag8+4dIvBUWGDtb6Go7NP9cPk/FR7w2oZa+J3IBPDP6qjw+FpS8w32wu/ZICDwMcF0+J/iUvNSAm74jJU88PDmvPPq6xrxX04m8bSdWPCoOXj6lfMm85VWivhmbjjxe8bQ8K2/9vHIJBL2HOZI829xePp1bAb3DRau+/OG1PHWO0z7wwhy9txEjv7CU+TxQ3Bs/kPFQvXsEcb9yqi49YzDUPtwIj72cTSq/tZ1QPa2iYT56SKq90mrKvriqYj3WENk8+3m6vdQcBb5p1mQ9MTkrvg7Nv73M0Ac+wSNXPZLEuL5OXrq92enJPqqTOT35+Q2/HjeqvbkyKD/0JAw9nJs/v7hNj73aI2w/36idPPJuDb/ZClO95swbP9tIBjzdMD+/ri8hvR52Yj8j4dy79P9wv+1vsbz2E5U/sXXRvGj6Pr8TH9Y6zoJdP8fXJb1aCA2/TCabPDq3Ej8r+VK97gw/vzQM+TyJJ18/Aw6Ivacacb/q7kM9dT6WP6Ohrr30UT+/cQuSPQx/ZT8cPs29TaINv5zDtj3ihCA/cOfjvYffP795ctA9H0lyP0hNAb6BInK/fjb3PbJ3oj80rBS+ZKBAv+uZFT5YI4I/MRUkvjo+D79fbCo+WvJFP9CKL76H7ru+UkI6PkWFCT87Dze+Hhczvr5CRT4QIJ0+LaQ6viiMvr60i0s+hFInPwAAAAB/Aw8CAA9QAAAAAAAAAAAA", "prev_rewards": [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "infos": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], "rewards": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "new_obs": "BCJNGGhAgAYAAAAAAACa2wQAAFAAAAAAAQUAAwIAUcQAAAAQDACwCgAMAAYABQAIAAoQAOMBAwAMAAAACAAIAAAABAYAAjwAAAgA0cj///8AAAEOeAAAABgiAAIcABEwNAAASgACOgAAFgAEHAAAAgCREAAUAAgABgAHXgATEHwAcwECJAAAABRIAAACAABEADEIAAd4AACzAPEAIAAAAAcAAABuZGFycmF5VABBbGlzdB4AQwAAAMxAAAACADMMABbKABEMZgAxAAMDpAAAqAACAgBRCgAYAAzWAADsABNsBAEIGAESBSkADwIAAgASAQACAAwIAAJIAQYCAAQQAAQoAAACABACzQAACQAHAgAIeAAPAgBNGfwoARoaKAEwBAMAaAEoUAOGADEOACiIAREMvAESDhsAYwOMAAAANPwBBAIACDgAAOgABAABAKwBBAIAABgAACQCAlwCMRAACIYBAhAACCgAAAIAEQgsAgJ8AgBMABA1FQAJAgAAZgIxBgAGFAAPRgET8f///0hgZaC726AxvpRY6rsWO4o+AAoJvAY+sjxxpOW6qV/MvM3oAbyzfzG+XoUTuwnMiD6BtTq8B7ayPO2tSjsrt9a8gY8zvOOCMb4uUyg7xu6IPjpdbLw8DLI82rcBPPwWyLwEPmW8DKoxvtxt8zsjn4o+IwyPvAxAsDyzblI8M3GgvLyFi7wopV0+xQNMPNvKnb5NHlC8d0mtPCAOzjukID681y9JvHJfXT45c8Y7o8aavvtYArw/16s8J30sNwKk/Lup8va7LlMyvtLoFrlm6JE+tYk0vD7bqzzMC7Y7iAX+u+apLbxqZzK+NPewO4XHkj7AwGa8sJKqPPBrNjzOx4y7Fe5fvBH2XD6KAzU8fz6WvvE4GbwvTdI+FLepO9MmFb8mCZW6ZrlcPvMc1LvanZO+VgJQO6MdqDz7h0i8FRYYO1voajs0/1w+T8VHvDahlr4ncgE8M/qqPD4WlLzDfbC79kgIPAxwXT4n+JS81ICbviMlTzw8Oa88+rrGvFfTibxtJ1Y8Kg5ePqV8ybzlVaK+GZuOPF7xtDwrb/28cgkEvYc5kjzb3F4+nVsBvcNFq7784bU8dY7TPvDCHL23ESO/sJT5PFDcGz+Q8VC9ewRxv3KqLj1jMNQ+3AiPvZxNKr+1nVA9raJhPnpIqr3Sasq+uKpiPdYQ2Tz7ebq91BwFvmnWZD0xOSu+Ds2/vczQBz7BI1c9ksS4vk5eur3Z6ck+qpM5Pfn5Db8eN6q9uTIoP/QkDD2cmz+/uE2PvdojbD/fqJ088m4Nv9kKU73mzBs/20gGPN0wP7+uLyG9HnZiPyPh3Lv0/3C/7W+xvPYTlT+xddG8aPo+vxMf1jrOgl0/x9clvVoIDb9MJps8OrcSPyv5Ur3uDD+/NAz5PIknXz8DDoi9pxpxv+ruQz11PpY/o6GuvfRRP79xC5I9DH9lPxw+zb1Nog2/nMO2PeKEID9w5+O9h98/v3ly0D0fSXI/SE0BvoEicr9+Nvc9sneiPzSsFL5koEC/65kVPlgjgj8xFSS+Oj4Pv19sKj5a8kU/0Iovvofuu75SQjo+RYUJPzsPN74eFzO+vkJFPhAgnT4tpDq+KIy+vrSLSz6EUic/YUNCvvmbOL537lg+HOjbPgAAAAB/Aw8CAA9QAAAAAAAAAAAA", "prev_actions": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "actions": [0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1], "t": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52]} +{"advantages": [11.361513137817383, 10.466174125671387, 9.561792373657227, 8.648275375366211, 7.725530624389648, 6.7934651374816895, 5.851984977722168, 4.900994777679443, 3.940398931503296, 2.970099925994873, 1.9900000095367432, 1.0], "eps_id": [701622330, 701622330, 701622330, 701622330, 701622330, 701622330, 701622330, 701622330, 701622330, 701622330, 701622330, 701622330], "action_prob": [0.4999730587005615, 0.5003005862236023, 0.5004333257675171, 0.49958109855651855, 0.49961304664611816, 0.5002139210700989, 0.5003206729888916, 0.4997044503688812, 0.5002343058586121, 0.5002065300941467, 0.5001469850540161, 0.49990975856781006], "dones": [false, false, false, false, false, false, false, false, false, false, false, true], "type": "SampleBatch", "value_targets": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "agent_index": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "obs": "BCJNGGhAwAMAAAAAAAArNgIAAFAAAAAAAQUAAwIAUcQAAAAQDACwCgAMAAYABQAIAAoQAOMBAwAMAAAACAAIAAAABAYAAjwAAAgA0cj///8AAAEOeAAAABgiAAIcABEwNAAASgACOgAAFgAEHAAAAgCREAAUAAgABgAHXgATEHwAcwECJAAAABRIAAACAABEADEIAAd4AACzAPEAIAAAAAcAAABuZGFycmF5VABBbGlzdB4AQwAAAMxAAAACADMMABbKABEMZgAxAAMDpAAAqAACAgBRCgAYAAzWAADsABNsBAEIGAESBSkADwIAAgASAQACAAwIAAJIAQYCAAQQAAQoAAACABACzQAACQAHAgAIeAAPAgBNGfwoARoaKAEwBAMAaAEZwIUAMQ4AKIgBEQy8ARIOHABjA4wAAAA0/AEEAgAIOAAA6AAEAAEArAEEAgAAGAAAJAICXAIxEAAIhgECEAAIKAAAAgARCCwCAnwCAEwAADQACgIAAGYCMQYABhUAD0YBE/CxMoQDvRjiELxH6ta8JzhvO6U9BL3sHz8+LlHWvLgpmL7T5um8kKbDPi+BA71AFxm/IEurvEDjEz9jfjS9yY9mv2lKGbxAMcQ+/0V+vS5DH78D0t26boVBPmOemL3GALO+B8wIO7cpxT5e8Ka9pTIqvy9iIDwbzBQ/rCvCvVBme78KbK88UmfGPgBl6r0ydzi/Q+nuPLmBFT9b9AO+JQOGvzlMJz1x1Ec/gGUZvhFHsL9JPmc9jit6P9WZNb7rOdu/AAAAAA==", "prev_rewards": [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "infos": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}], "rewards": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "new_obs": "BCJNGGhAwAMAAAAAAAArNgIAAFAAAAAAAQUAAwIAUcQAAAAQDACwCgAMAAYABQAIAAoQAOMBAwAMAAAACAAIAAAABAYAAjwAAAgA0cj///8AAAEOeAAAABgiAAIcABEwNAAASgACOgAAFgAEHAAAAgCREAAUAAgABgAHXgATEHwAcwECJAAAABRIAAACAABEADEIAAd4AACzAPEAIAAAAAcAAABuZGFycmF5VABBbGlzdB4AQwAAAMxAAAACADMMABbKABEMZgAxAAMDpAAAqAACAgBRCgAYAAzWAADsABNsBAEIGAESBSkADwIAAgASAQACAAwIAAJIAQYCAAQQAAQoAAACABACzQAACQAHAgAIeAAPAgBNGfwoARoaKAEwBAMAaAEZwIUAMQ4AKIgBEQy8ARIOHABjA4wAAAA0/AEEAgAIOAAA6AAEAAEArAEEAgAAGAAAJAICXAIxEAAIhgECEAAIKAAAAgARCCwCAnwCAEwAADQACgIAAGYCMQYABhUAD0YBE/CxpT0EvewfPz4uUda8uCmYvtPm6byQpsM+L4EDvUAXGb8gS6u8QOMTP2N+NL3Jj2a/aUoZvEAxxD7/RX69LkMfvwPS3bpuhUE+Y56YvcYAs74HzAg7tynFPl7wpr2lMiq/L2IgPBvMFD+sK8K9UGZ7vwpsrzxSZ8Y+AGXqvTJ3OL9D6e48uYEVP1v0A74lA4a/OUwnPXHURz+AZRm+EUewv0k+Zz2OK3o/1Zk1vus5278cpps97NZIP1atWL6Pcr2/AAAAAA==", "prev_actions": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "actions": [1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0], "t": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]} diff --git a/python/ray/rllib/test/test_io.py b/python/ray/rllib/test/test_io.py index 022960517..6fbb3cd14 100644 --- a/python/ray/rllib/test/test_io.py +++ b/python/ray/rllib/test/test_io.py @@ -69,7 +69,7 @@ class AgentIOTest(unittest.TestCase): env="CartPole-v0", config={ "input": self.test_dir, - "input_evaluation": None, + "input_evaluation": [], }) result = agent.train() self.assertEqual(result["timesteps_total"], 250) # read from input @@ -101,7 +101,7 @@ class AgentIOTest(unittest.TestCase): env="CartPole-v0", config={ "input": self.test_dir, - "input_evaluation": None, + "input_evaluation": [], "postprocess_inputs": True, # adds back 'advantages' }) @@ -115,7 +115,7 @@ class AgentIOTest(unittest.TestCase): env="CartPole-v0", config={ "input": self.test_dir, - "input_evaluation": "simulation", + "input_evaluation": ["simulation"], }) for _ in range(50): result = agent.train() @@ -130,7 +130,7 @@ class AgentIOTest(unittest.TestCase): env="CartPole-v0", config={ "input": glob.glob(self.test_dir + "/*.json"), - "input_evaluation": None, + "input_evaluation": [], "sample_batch_size": 99, }) result = agent.train() @@ -147,7 +147,7 @@ class AgentIOTest(unittest.TestCase): "sampler": 0.9, }, "train_batch_size": 2000, - "input_evaluation": None, + "input_evaluation": [], }) result = agent.train() self.assertTrue(not np.isnan(result["episode_reward_mean"])) @@ -185,7 +185,7 @@ class AgentIOTest(unittest.TestCase): config={ "num_workers": 0, "input": self.test_dir, - "input_evaluation": "simulation", + "input_evaluation": ["simulation"], "train_batch_size": 2000, "multiagent": { "policy_graphs": { diff --git a/test/jenkins_tests/run_multi_node_tests.sh b/test/jenkins_tests/run_multi_node_tests.sh index e748d9d70..b513a3148 100755 --- a/test/jenkins_tests/run_multi_node_tests.sh +++ b/test/jenkins_tests/run_multi_node_tests.sh @@ -251,12 +251,20 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ --ray-num-cpus 8 \ --stop '{"training_iteration": 2}' \ --config '{"num_workers": 2, "optimizer": {"num_replay_buffer_shards": 1}, "learning_starts": 100, "min_iter_time_s": 1}' + docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v0 \ --run MARWIL \ --stop '{"training_iteration": 2}' \ - --config '{"input": "/ray/python/ray/rllib/test/data/cartpole_small", "learning_starts": 0}' + --config '{"input": "/ray/python/ray/rllib/test/data/cartpole_small", "learning_starts": 0, "input_evaluation": ["wis", "is"], "shuffle_buffer_size": 10}' + +docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ + python /ray/python/ray/rllib/train.py \ + --env CartPole-v0 \ + --run DQN \ + --stop '{"training_iteration": 2}' \ + --config '{"input": "/ray/python/ray/rllib/test/data/cartpole_small", "learning_starts": 0, "input_evaluation": ["wis", "is"], "soft_q": true}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ python /ray/python/ray/rllib/test/test_local.py