[rllib] Basic infrastructure for off-policy estimation (IS, WIS) (#3941)

This commit is contained in:
Eric Liang
2019-02-13 16:25:05 -08:00
committed by GitHub
parent 729d0b2825
commit 2dccf383dd
34 changed files with 549 additions and 131 deletions
@@ -33,6 +33,7 @@ if __name__ == "__main__":
agent_index=0,
obs=obs,
actions=action,
action_prob=1.0, # put the true action probability here
rewards=rew,
prev_actions=prev_action,
prev_rewards=prev_reward,