mirror of
https://github.com/wassname/ray.git
synced 2026-06-30 17:07:32 +08:00
3c5375345f
* Initial commit of evolution strategies example. * Some small simplifications. * Update example to use new API. * Add example to documentation.
87 lines
2.3 KiB
Python
87 lines
2.3 KiB
Python
# Code in this file is copied and adapted from
|
|
# https://github.com/openai/evolution-strategies-starter.
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import numpy as np
|
|
import tensorflow as tf
|
|
|
|
|
|
def compute_ranks(x):
|
|
"""Returns ranks in [0, len(x))
|
|
|
|
Note: This is different from scipy.stats.rankdata, which returns ranks in
|
|
[1, len(x)].
|
|
"""
|
|
assert x.ndim == 1
|
|
ranks = np.empty(len(x), dtype=int)
|
|
ranks[x.argsort()] = np.arange(len(x))
|
|
return ranks
|
|
|
|
|
|
def compute_centered_ranks(x):
|
|
y = compute_ranks(x.ravel()).reshape(x.shape).astype(np.float32)
|
|
y /= (x.size - 1)
|
|
y -= 0.5
|
|
return y
|
|
|
|
|
|
def make_session(single_threaded):
|
|
if not single_threaded:
|
|
return tf.InteractiveSession()
|
|
return tf.InteractiveSession(
|
|
config=tf.ConfigProto(inter_op_parallelism_threads=1,
|
|
intra_op_parallelism_threads=1))
|
|
|
|
|
|
def itergroups(items, group_size):
|
|
assert group_size >= 1
|
|
group = []
|
|
for x in items:
|
|
group.append(x)
|
|
if len(group) == group_size:
|
|
yield tuple(group)
|
|
del group[:]
|
|
if group:
|
|
yield tuple(group)
|
|
|
|
|
|
def batched_weighted_sum(weights, vecs, batch_size):
|
|
total = 0
|
|
num_items_summed = 0
|
|
for batch_weights, batch_vecs in zip(itergroups(weights, batch_size),
|
|
itergroups(vecs, batch_size)):
|
|
assert len(batch_weights) == len(batch_vecs) <= batch_size
|
|
total += np.dot(np.asarray(batch_weights, dtype=np.float32),
|
|
np.asarray(batch_vecs, dtype=np.float32))
|
|
num_items_summed += len(batch_weights)
|
|
return total, num_items_summed
|
|
|
|
|
|
class RunningStat(object):
|
|
def __init__(self, shape, eps):
|
|
self.sum = np.zeros(shape, dtype=np.float32)
|
|
self.sumsq = np.full(shape, eps, dtype=np.float32)
|
|
self.count = eps
|
|
|
|
def increment(self, s, ssq, c):
|
|
self.sum += s
|
|
self.sumsq += ssq
|
|
self.count += c
|
|
|
|
@property
|
|
def mean(self):
|
|
return self.sum / self.count
|
|
|
|
@property
|
|
def std(self):
|
|
return np.sqrt(np.maximum(self.sumsq / self.count - np.square(self.mean),
|
|
1e-2))
|
|
|
|
def set_from_init(self, init_mean, init_std, init_count):
|
|
self.sum[:] = init_mean * init_count
|
|
self.sumsq[:] = (np.square(init_mean) + np.square(init_std)) * init_count
|
|
self.count = init_count
|