mirror of
https://github.com/wassname/ray.git
synced 2026-07-05 17:01:57 +08:00
[rllib] Use suppress_output instead of run_silent.sh script for tests (#4386)
* fix * enable custom loss * Update run_rllib_tests.sh * enable tests * fix action prob * Update suppress_output * fix example * fix
This commit is contained in:
@@ -51,9 +51,9 @@ class CustomLossModel(Model):
|
||||
input_ops = reader.tf_input_ops()
|
||||
|
||||
# define a secondary loss by building a graph copy with weight sharing
|
||||
obs = tf.cast(input_ops["obs"], tf.float32)
|
||||
logits, _ = self._build_layers_v2({
|
||||
"obs": restore_original_dimensions(input_ops["obs"],
|
||||
self.obs_space)
|
||||
"obs": restore_original_dimensions(obs, self.obs_space)
|
||||
}, self.num_outputs, self.options)
|
||||
|
||||
# You can also add self-supervised losses easily by referencing tensors
|
||||
|
||||
@@ -92,8 +92,9 @@ class OffPolicyEstimator(object):
|
||||
raise ValueError(
|
||||
"Off-policy estimation is not possible unless the inputs "
|
||||
"include action probabilities (i.e., the policy is stochastic "
|
||||
"and emits the 'action_prob' key). You can set "
|
||||
"`input_evaluation: []` to resolve this.")
|
||||
"and emits the 'action_prob' key). For DQN this means using "
|
||||
"`soft_q: True`. You can also set `input_evaluation: []` to "
|
||||
"disable estimation.")
|
||||
|
||||
@DeveloperAPI
|
||||
def get_metrics(self):
|
||||
|
||||
+3
-4
File diff suppressed because one or more lines are too long
@@ -1,21 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
TMPFILE=`mktemp`
|
||||
DIRECTORY=`dirname $0`
|
||||
SCRIPT=$1
|
||||
shift
|
||||
|
||||
if [ -x $DIRECTORY/../$SCRIPT ]; then
|
||||
time $DIRECTORY/../$SCRIPT "$@" >$TMPFILE 2>&1
|
||||
else
|
||||
time python $DIRECTORY/../$SCRIPT "$@" >$TMPFILE 2>&1
|
||||
fi
|
||||
|
||||
CODE=$?
|
||||
if [ $CODE != 0 ]; then
|
||||
cat $TMPFILE
|
||||
echo "FAILED $CODE"
|
||||
exit $CODE
|
||||
fi
|
||||
|
||||
exit 0
|
||||
Reference in New Issue
Block a user