[RLlib] Fix offline logp vs prob bug in OffPolicyEstimator class. (#12158)

2026-07-03 19:14:37 +08:00 · 2020-11-20 08:59:43 +01:00
parent 6f2398d122
commit f6b84cb2f7
1 changed files with 2 additions and 1 deletions
@@ -70,7 +70,8 @@ class OffPolicyEstimator:
            state_batches=[batch[k] for k in state_keys],
            prev_action_batch=batch.data.get(SampleBatch.PREV_ACTIONS),
            prev_reward_batch=batch.data.get(SampleBatch.PREV_REWARDS))
-        return convert_to_numpy(log_likelihoods)
+        log_likelihoods = convert_to_numpy(log_likelihoods)
+        return np.exp(log_likelihoods)

    @DeveloperAPI
    def process(self, batch: SampleBatchType):