mirror of
https://github.com/wassname/ray.git
synced 2026-07-01 03:08:48 +08:00
[rllib] Basic infrastructure for off-policy estimation (IS, WIS) (#3941)
This commit is contained in:
@@ -33,6 +33,7 @@ if __name__ == "__main__":
|
||||
agent_index=0,
|
||||
obs=obs,
|
||||
actions=action,
|
||||
action_prob=1.0, # put the true action probability here
|
||||
rewards=rew,
|
||||
prev_actions=prev_action,
|
||||
prev_rewards=prev_reward,
|
||||
|
||||
Reference in New Issue
Block a user