Files
ray/python/ray/tests/test_memory_scheduling.py
T
Eric Liang e8c77e2847 Remove memory quota enforcement from actors (#11480)
* wip

* fix

* deprecate
2020-10-21 14:29:03 -07:00

134 lines
3.8 KiB
Python

import numpy as np
import unittest
import ray
from ray import tune
from ray.rllib import _register_all
MB = 1024 * 1024
@ray.remote(memory=100 * MB)
class Actor:
def __init__(self):
pass
def ping(self):
return "ok"
@ray.remote(object_store_memory=100 * MB)
class Actor2:
def __init__(self):
pass
def ping(self):
return "ok"
def train_oom(config, reporter):
ray.put(np.zeros(200 * 1024 * 1024))
reporter(result=123)
class TestMemoryScheduling(unittest.TestCase):
def testMemoryRequest(self):
try:
ray.init(num_cpus=1, _memory=200 * MB)
# fits first 2
a = Actor.remote()
b = Actor.remote()
ok, _ = ray.wait(
[a.ping.remote(), b.ping.remote()],
timeout=60.0,
num_returns=2)
self.assertEqual(len(ok), 2)
# does not fit
c = Actor.remote()
ok, _ = ray.wait([c.ping.remote()], timeout=5.0)
self.assertEqual(len(ok), 0)
finally:
ray.shutdown()
def testObjectStoreMemoryRequest(self):
try:
ray.init(num_cpus=1, object_store_memory=300 * MB)
# fits first 2 (70% allowed)
a = Actor2.remote()
b = Actor2.remote()
ok, _ = ray.wait(
[a.ping.remote(), b.ping.remote()],
timeout=60.0,
num_returns=2)
self.assertEqual(len(ok), 2)
# does not fit
c = Actor2.remote()
ok, _ = ray.wait([c.ping.remote()], timeout=5.0)
self.assertEqual(len(ok), 0)
finally:
ray.shutdown()
def testTuneDriverStoreLimit(self):
try:
ray.init(
num_cpus=4,
_memory=100 * MB,
object_store_memory=100 * MB,
)
_register_all()
self.assertRaisesRegexp(
ray.tune.error.TuneError,
".*Insufficient cluster resources.*",
lambda: tune.run(
"PG",
stop={"timesteps_total": 10000},
config={
"env": "CartPole-v0",
# too large
"object_store_memory": 10000 * 1024 * 1024,
"framework": "tf",
}))
finally:
ray.shutdown()
def testTuneWorkerStoreLimit(self):
try:
ray.init(
num_cpus=4,
_memory=100 * MB,
object_store_memory=100 * MB,
)
_register_all()
self.assertRaisesRegexp(
ray.tune.error.TuneError,
".*Insufficient cluster resources.*",
lambda:
tune.run("PG", stop={"timesteps_total": 0}, config={
"env": "CartPole-v0",
"num_workers": 1,
# too large
"object_store_memory_per_worker": 10000 * 1024 * 1024,
"framework": "tf",
}))
finally:
ray.shutdown()
def testTuneObjectLimitApplied(self):
try:
ray.init(num_cpus=2, object_store_memory=500 * MB)
result = tune.run(
train_oom,
resources_per_trial={"object_store_memory": 150 * 1024 * 1024},
raise_on_failed_trial=False)
self.assertTrue(result.trials[0].status, "ERROR")
self.assertTrue("ObjectStoreFullError: Failed to put" in
result.trials[0].error_msg)
finally:
ray.shutdown()
if __name__ == "__main__":
import pytest
import sys
sys.exit(pytest.main(["-v", __file__]))