Files
ray/python/ray/internal/internal_api.py
T
Stephanie Wang 4e81804cba [core] Replace task resubmission in raylet with ownership protocol (#9394)
* Add intended worker ID to GetObjectStatus, tests

* Remove TaskID owner_id

* lint

* Add owner address to task args

* Make TaskArg a virtual class, remove multi args

* Set owner address for task args

* merge

* Fix tests

* Add ObjectRefs to task dependency manager, pass from task spec args

* tmp

* tmp

* Fix

* Add ownership info for task arguments

* Convert WaitForDirectActorCallArgs

* lint

* build

* update

* build

* java

* Move code

* build

* Revert "Fix Google log directory again (#9063)"

This reverts commit 275da2e400.

* Fix free

* Regression tests - shorten timeouts in reconstruction unit tests

* Remove timeout for non-actor tasks

* Modify tests using ray.internal.free

* Clean up future resolution code

* Raylet polls the owner

* todo

* comment

* Update src/ray/core_worker/core_worker.cc

Co-authored-by: Edward Oakes <ed.nmi.oakes@gmail.com>

* Drop stale actor table notifications

* Fix bug where actor restart hangs

* Revert buggy code for duplicate tasks

* build

* Fix errors for lru_evict and internal.free

* Revert "Drop stale actor table notifications"

This reverts commit 193c5d20e5577befd43f166e16c972e2f9247c91.

* Revert "build"

This reverts commit 5644edbac906ff6ef98feb40b6f62c9e63698c29.

* Fix free test

* Fixes for freed objects

Co-authored-by: Edward Oakes <ed.nmi.oakes@gmail.com>
2020-07-15 14:55:51 -07:00

80 lines
2.8 KiB
Python

import ray.worker
from ray import profiling
__all__ = ["free", "global_gc"]
def global_gc():
"""Trigger gc.collect() on all workers in the cluster."""
worker = ray.worker.global_worker
worker.core_worker.global_gc()
def memory_summary():
"""Returns a formatted string describing memory usage in the cluster."""
import grpc
from ray.core.generated import node_manager_pb2
from ray.core.generated import node_manager_pb2_grpc
# We can ask any Raylet for the global memory info.
raylet = ray.nodes()[0]
raylet_address = "{}:{}".format(raylet["NodeManagerAddress"],
ray.nodes()[0]["NodeManagerPort"])
channel = grpc.insecure_channel(raylet_address)
stub = node_manager_pb2_grpc.NodeManagerServiceStub(channel)
reply = stub.FormatGlobalMemoryInfo(
node_manager_pb2.FormatGlobalMemoryInfoRequest(), timeout=30.0)
return reply.memory_summary
def free(object_refs, local_only=False, delete_creating_tasks=False):
"""Free a list of IDs from the in-process and plasma object stores.
This function is a low-level API which should be used in restricted
scenarios.
If local_only is false, the request will be send to all object stores.
This method will not return any value to indicate whether the deletion is
successful or not. This function is an instruction to the object store. If
some of the objects are in use, the object stores will delete them later
when the ref count is down to 0.
Examples:
>>> x_id = f.remote()
>>> ray.get(x_id) # wait for x to be created first
>>> free([x_id]) # unpin & delete x globally
Args:
object_refs (List[ObjectRef]): List of object refs to delete.
local_only (bool): Whether only deleting the list of objects in local
object store or all object stores.
delete_creating_tasks (bool): Whether also delete the object creating
tasks.
"""
worker = ray.worker.global_worker
if isinstance(object_refs, ray.ObjectRef):
object_refs = [object_refs]
if not isinstance(object_refs, list):
raise TypeError("free() expects a list of ObjectRef, got {}".format(
type(object_refs)))
# Make sure that the values are object refs.
for object_ref in object_refs:
if not isinstance(object_ref, ray.ObjectRef):
raise TypeError(
"Attempting to call `free` on the value {}, "
"which is not an ray.ObjectRef.".format(object_ref))
worker.check_connected()
with profiling.profile("ray.free"):
if len(object_refs) == 0:
return
worker.core_worker.free_objects(object_refs, local_only,
delete_creating_tasks)