Files
ray/examples/evolution_strategies/utils.py
T
Robert Nishihara 3c5375345f Initial version of evolution strategies example. (#544)
* Initial commit of evolution strategies example.

* Some small simplifications.

* Update example to use new API.

* Add example to documentation.
2017-05-14 17:53:51 -07:00

87 lines
2.3 KiB
Python

# Code in this file is copied and adapted from
# https://github.com/openai/evolution-strategies-starter.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
def compute_ranks(x):
"""Returns ranks in [0, len(x))
Note: This is different from scipy.stats.rankdata, which returns ranks in
[1, len(x)].
"""
assert x.ndim == 1
ranks = np.empty(len(x), dtype=int)
ranks[x.argsort()] = np.arange(len(x))
return ranks
def compute_centered_ranks(x):
y = compute_ranks(x.ravel()).reshape(x.shape).astype(np.float32)
y /= (x.size - 1)
y -= 0.5
return y
def make_session(single_threaded):
if not single_threaded:
return tf.InteractiveSession()
return tf.InteractiveSession(
config=tf.ConfigProto(inter_op_parallelism_threads=1,
intra_op_parallelism_threads=1))
def itergroups(items, group_size):
assert group_size >= 1
group = []
for x in items:
group.append(x)
if len(group) == group_size:
yield tuple(group)
del group[:]
if group:
yield tuple(group)
def batched_weighted_sum(weights, vecs, batch_size):
total = 0
num_items_summed = 0
for batch_weights, batch_vecs in zip(itergroups(weights, batch_size),
itergroups(vecs, batch_size)):
assert len(batch_weights) == len(batch_vecs) <= batch_size
total += np.dot(np.asarray(batch_weights, dtype=np.float32),
np.asarray(batch_vecs, dtype=np.float32))
num_items_summed += len(batch_weights)
return total, num_items_summed
class RunningStat(object):
def __init__(self, shape, eps):
self.sum = np.zeros(shape, dtype=np.float32)
self.sumsq = np.full(shape, eps, dtype=np.float32)
self.count = eps
def increment(self, s, ssq, c):
self.sum += s
self.sumsq += ssq
self.count += c
@property
def mean(self):
return self.sum / self.count
@property
def std(self):
return np.sqrt(np.maximum(self.sumsq / self.count - np.square(self.mean),
1e-2))
def set_from_init(self, init_mean, init_std, init_count):
self.sum[:] = init_mean * init_count
self.sumsq[:] = (np.square(init_mean) + np.square(init_std)) * init_count
self.count = init_count