[RLlib] Fix offline logp vs prob bug in OffPolicyEstimator class. (#12158)

This commit is contained in:
Sven Mika
2020-11-20 08:59:43 +01:00
committed by GitHub
parent 6f2398d122
commit f6b84cb2f7
+2 -1
View File
@@ -70,7 +70,8 @@ class OffPolicyEstimator:
state_batches=[batch[k] for k in state_keys],
prev_action_batch=batch.data.get(SampleBatch.PREV_ACTIONS),
prev_reward_batch=batch.data.get(SampleBatch.PREV_REWARDS))
return convert_to_numpy(log_likelihoods)
log_likelihoods = convert_to_numpy(log_likelihoods)
return np.exp(log_likelihoods)
@DeveloperAPI
def process(self, batch: SampleBatchType):