Files
ray/examples/policy_gradient/reinforce/utils.py
T
Eric Liang 4374ad1453 Policy gradient example: Support multi-GPU training (#584)
* add tf metrics

* comments

* fix network scopes

* add doc

* initial work

* try with 3 virtual cpus

* clean up metrics

* use format string

* fix trace level

* back to pong

* always run summary on cpu

* plot intermediate and final sgd stats

* add back a global step

* update

* add timeline

* use staging area and reuse weights properly

* stage at cpu

* whoops, stage only the batch

* clean up a bit

* fix py flake

* wip

* create an optimizer graph per device

* print timeline on 5th batch instead

* print examples per second

* log placement for training ops

* force placement on cpu:0

* try separating weights onto different gpus

* try using nccl

* add cpu fallback

* remove space from date

* check has gpu device

* fix flag config

* checkpoint

* wip

* update

* add some timing

* trace loading

* try cpu

* revert that

* remove expensive test

* lint

* cleanups

* clean up timers

* clean it up a bit

* fix code for non-scalar action spaces

* address some nits

* fix quotes

* efficient shuffling between sgd epochs
2017-06-13 06:03:25 +00:00

89 lines
2.5 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
def flatten(weights, start=0, stop=2):
"""This methods reshapes all values in a dictionary.
The indices from start to stop will be flattened into a single index.
Args:
weights: A dictionary mapping keys to numpy arrays.
start: The starting index.
stop: The ending index.
"""
for key, val in weights.items():
new_shape = val.shape[0:start] + (-1,) + val.shape[stop:]
weights[key] = val.reshape(new_shape)
return weights
def concatenate(weights_list):
keys = weights_list[0].keys()
result = {}
for key in keys:
result[key] = np.concatenate([l[key] for l in weights_list])
return result
def shuffle(trajectory):
permutation = np.random.permutation(trajectory["dones"].shape[0])
for key, val in trajectory.items():
trajectory[key] = val[permutation]
return trajectory
def make_divisible_by(array, n):
return array[0:array.shape[0] - array.shape[0] % n]
def average_gradients(tower_grads):
"""
Average gradients across towers.
Calculate the average gradient for each shared variable across all towers.
Note that this function provides a synchronization point across all towers.
Args:
tower_grads: List of lists of (gradient, variable) tuples. The outer list
is over individual gradients. The inner list is over the gradient
calculation for each tower.
Returns:
List of pairs of (gradient, variable) where the gradient has been averaged
across all towers.
TODO(ekl): We could use NCCL if this becomes a bottleneck.
"""
average_grads = []
for grad_and_vars in zip(*tower_grads):
# Note that each grad_and_vars looks like the following:
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
grads = []
for g, _ in grad_and_vars:
if g is not None:
# Add 0 dimension to the gradients to represent the tower.
expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension which we will average over below.
grads.append(expanded_g)
# Average over the 'tower' dimension.
grad = tf.concat(axis=0, values=grads)
grad = tf.reduce_mean(grad, 0)
# Keep in mind that the Variables are redundant because they are shared
# across towers. So .. we will just return the first tower's pointer to
# the Variable.
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads