diff --git a/python/ray/experimental/sgd/modified_allreduce.py b/python/ray/experimental/sgd/modified_allreduce.py index a9d6879f9..7c446aa97 100644 --- a/python/ray/experimental/sgd/modified_allreduce.py +++ b/python/ray/experimental/sgd/modified_allreduce.py @@ -584,7 +584,15 @@ def pack_small_tensors(tower_grads, max_bytes=0): if len(small_ranges): new_tower_grads = [] for dev_idx, gv_list in enumerate(tower_grads): - assert len(gv_list) == num_gv + assert len(gv_list) == num_gv, ( + "Possible cause: " + "Networks constructed on different workers " + "don't have the same number of variables. " + "If you use tf.GraphKeys or tf.global_variables() " + "with multiple graphs per worker during network " + "construction, you need to use " + "appropriate scopes, see " + "https://github.com/ray-project/ray/issues/3136") new_gv_list = [] for r in small_ranges: key = '%d:%d' % (dev_idx, len(new_gv_list))