mirror of
https://github.com/wassname/ray.git
synced 2026-06-29 16:00:00 +08:00
Revert [rllib] Reserve CPUs for replay actors in apex (#4404)
* Revert "[rllib] Reserve CPUs for replay actors in apex (#4217)"
This reverts commit 2781d74680.
* comment
This commit is contained in:
@@ -12,7 +12,6 @@ from ray.rllib.agents.dqn.dqn_policy_graph import DQNPolicyGraph
|
||||
from ray.rllib.evaluation.metrics import collect_metrics
|
||||
from ray.rllib.utils.annotations import override
|
||||
from ray.rllib.utils.schedules import ConstantSchedule, LinearSchedule
|
||||
from ray.tune.trial import Resources
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -142,21 +141,6 @@ class DQNAgent(Agent):
|
||||
_policy_graph = DQNPolicyGraph
|
||||
_optimizer_shared_configs = OPTIMIZER_SHARED_CONFIGS
|
||||
|
||||
@classmethod
|
||||
@override(Agent)
|
||||
def default_resource_request(cls, config):
|
||||
cf = dict(cls._default_config, **config)
|
||||
Agent._validate_config(cf)
|
||||
if cf["optimizer_class"] == "AsyncReplayOptimizer":
|
||||
extra = cf["optimizer"]["num_replay_buffer_shards"]
|
||||
else:
|
||||
extra = 0
|
||||
return Resources(
|
||||
cpu=cf["num_cpus_for_driver"],
|
||||
gpu=cf["num_gpus"],
|
||||
extra_cpu=cf["num_cpus_per_worker"] * cf["num_workers"] + extra,
|
||||
extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"])
|
||||
|
||||
@override(Agent)
|
||||
def _init(self):
|
||||
self._validate_config()
|
||||
|
||||
@@ -230,8 +230,7 @@ class AsyncReplayOptimizer(PolicyOptimizer):
|
||||
return sample_timesteps, train_timesteps
|
||||
|
||||
|
||||
# reserve 1 CPU so that our method calls don't get stalled
|
||||
@ray.remote(num_cpus=1)
|
||||
@ray.remote(num_cpus=0)
|
||||
class ReplayActor(object):
|
||||
"""A replay buffer shard.
|
||||
|
||||
@@ -317,6 +316,8 @@ class ReplayActor(object):
|
||||
return stat
|
||||
|
||||
|
||||
# note: we set num_cpus=0 to avoid failing to create replay actors when
|
||||
# resources are fragmented. This isn't ideal.
|
||||
@ray.remote(num_cpus=0)
|
||||
class BatchReplayActor(object):
|
||||
"""The batch replay version of the replay actor.
|
||||
|
||||
@@ -105,7 +105,7 @@ def check_support_multiagent(alg, config):
|
||||
|
||||
class ModelSupportedSpaces(unittest.TestCase):
|
||||
def setUp(self):
|
||||
ray.init(num_cpus=10)
|
||||
ray.init(num_cpus=4)
|
||||
|
||||
def tearDown(self):
|
||||
ray.shutdown()
|
||||
|
||||
Reference in New Issue
Block a user