Files
ray/python/ray/tests/test_memory_scheduling.py
T
Eric Liang 53641f1f74 Move more unit tests to bazel (#6250)
* move more unit tests to bazel

* move to avoid conflict

* fix lint

* fix deps

* seprate

* fix failing tests

* show tests

* ignore mismatch

* try combining bazel runs

* build lint

* remove tests from install

* fix test utils

* better config

* split up

* exclusive

* fix verbosity

* fix tests class

* cleanup

* remove flaky

* fix metrics test

* Update .travis.yml

* no retry flaky

* split up actor

* split basic test

* split up trial runner test

* split stress

* fix basic test

* fix tests

* switch to pytest runner for main

* make microbench not fail

* move load code to py3

* test is no longer package

* bazel to end
2019-11-24 11:43:34 -08:00

158 lines
4.6 KiB
Python

import numpy as np
import unittest
import ray
from ray import tune
from ray.rllib import _register_all
MB = 1024 * 1024
@ray.remote(memory=100 * MB)
class Actor(object):
def __init__(self):
pass
def ping(self):
return "ok"
@ray.remote(object_store_memory=100 * MB)
class Actor2(object):
def __init__(self):
pass
def ping(self):
return "ok"
def train_oom(config, reporter):
ray.put(np.zeros(200 * 1024 * 1024))
reporter(result=123)
class TestMemoryScheduling(unittest.TestCase):
def testMemoryRequest(self):
try:
ray.init(num_cpus=1, memory=200 * MB)
# fits first 2
a = Actor.remote()
b = Actor.remote()
ok, _ = ray.wait(
[a.ping.remote(), b.ping.remote()],
timeout=60.0,
num_returns=2)
self.assertEqual(len(ok), 2)
# does not fit
c = Actor.remote()
ok, _ = ray.wait([c.ping.remote()], timeout=5.0)
self.assertEqual(len(ok), 0)
finally:
ray.shutdown()
def testObjectStoreMemoryRequest(self):
try:
ray.init(num_cpus=1, object_store_memory=300 * MB)
# fits first 2 (70% allowed)
a = Actor2.remote()
b = Actor2.remote()
ok, _ = ray.wait(
[a.ping.remote(), b.ping.remote()],
timeout=60.0,
num_returns=2)
self.assertEqual(len(ok), 2)
# does not fit
c = Actor2.remote()
ok, _ = ray.wait([c.ping.remote()], timeout=5.0)
self.assertEqual(len(ok), 0)
finally:
ray.shutdown()
def testTuneDriverHeapLimit(self):
try:
_register_all()
result = tune.run(
"PG",
stop={"timesteps_total": 10000},
config={
"env": "CartPole-v0",
"memory": 100 * 1024 * 1024, # too little
},
raise_on_failed_trial=False)
self.assertEqual(result.trials[0].status, "ERROR")
self.assertTrue(
"RayOutOfMemoryError: Heap memory usage for ray_PG_" in
result.trials[0].error_msg)
finally:
ray.shutdown()
def testTuneDriverStoreLimit(self):
try:
_register_all()
self.assertRaisesRegexp(
ray.tune.error.TuneError,
".*Insufficient cluster resources.*",
lambda: tune.run(
"PG",
stop={"timesteps_total": 10000},
config={
"env": "CartPole-v0",
# too large
"object_store_memory": 10000 * 1024 * 1024,
}))
finally:
ray.shutdown()
def testTuneWorkerHeapLimit(self):
try:
_register_all()
result = tune.run(
"PG",
stop={"timesteps_total": 10000},
config={
"env": "CartPole-v0",
"num_workers": 1,
"memory_per_worker": 100 * 1024 * 1024, # too little
},
raise_on_failed_trial=False)
self.assertEqual(result.trials[0].status, "ERROR")
self.assertTrue(
"RayOutOfMemoryError: Heap memory usage for ray_Rollout" in
result.trials[0].error_msg)
finally:
ray.shutdown()
def testTuneWorkerStoreLimit(self):
try:
_register_all()
self.assertRaisesRegexp(
ray.tune.error.TuneError,
".*Insufficient cluster resources.*",
lambda:
tune.run("PG", stop={"timesteps_total": 0}, config={
"env": "CartPole-v0",
"num_workers": 1,
# too large
"object_store_memory_per_worker": 10000 * 1024 * 1024,
}))
finally:
ray.shutdown()
def testTuneObjectLimitApplied(self):
try:
result = tune.run(
train_oom,
resources_per_trial={"object_store_memory": 150 * 1024 * 1024},
raise_on_failed_trial=False)
self.assertTrue(result.trials[0].status, "ERROR")
self.assertTrue("ObjectStoreFullError: Failed to put" in
result.trials[0].error_msg)
finally:
ray.shutdown()
if __name__ == "__main__":
import pytest
import sys
sys.exit(pytest.main(["-v", __file__]))