Release GPU resources as soon as an actor exits. (#1088)

* Release GPU resources as soon as an actor exits. * Add a test. * Store local_scheduler_id and driver_id in the worker object instead of the actor object.
2026-07-06 04:44:08 +08:00 · 2017-10-06 17:58:19 -07:00
parent aebe9f9374
commit 4669c59fa8
3 changed files with 95 additions and 4 deletions
@@ -315,6 +315,33 @@ class ActorMethods(unittest.TestCase):

        ray.worker.cleanup()

+    def testActorDeletionWithGPUs(self):
+        ray.init(num_workers=0, num_gpus=1)
+
+        # When an actor that uses a GPU exits, make sure that the GPU resources
+        # are released.
+
+        @ray.remote(num_gpus=1)
+        class Actor(object):
+            def getpid(self):
+                return os.getpid()
+
+        for _ in range(5):
+            # If we can successfully create an actor, that means that enough
+            # GPU resources are available.
+            a = Actor.remote()
+            pid = ray.get(a.getpid.remote())
+
+            # Make sure that we can't create another actor.
+            with self.assertRaises(Exception):
+                Actor.remote()
+
+            # Let the actor go out of scope, and wait for it to exit.
+            a = None
+            ray.test.test_utils.wait_for_pid_to_exit(pid)
+
+        ray.worker.cleanup()
+
    def testActorState(self):
        ray.init()