From fbac2569827558227e1e2cca71f837f5a3baa3b9 Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Wed, 11 Mar 2020 01:09:08 -0700 Subject: [PATCH] [sgd] Add benchmarks (#7454) * Init fp16 * fp16 and schedulers * scheduler linking and fp16 * to fp16 * loss scaling and documentation * more documentation * add tests, refactor config * moredocs * more docs * fix logo, add test mode, add fp16 flag * fix tests * fix scheduler * fix apex * improve safety * fix tests * fix tests * remove pin memory default * rm * fix * Update doc/examples/doc_code/raysgd_torch_signatures.py * fix * migrate changes from other PR * ok thanks * pass * signatures * lint' * Update python/ray/experimental/sgd/pytorch/utils.py * Apply suggestions from code review Co-Authored-By: Edward Oakes * should address most comments * comments * fix this ci * first_pass * add overrides * override * fixing up operators * format * sgd * constants * rm * revert * save * failures * fixes * trainer * run test * operator * code * op * ok done * operator * sgd test fixes * ok * trainer * format * Apply suggestions from code review Co-Authored-By: Edward Oakes * Update doc/source/raysgd/raysgd_pytorch.rst * docstring * dcgan * doc * commits * nit * testing * revert * Start renaming pytorch to torch * Rename PyTorchTrainer to TorchTrainer * Rename PyTorch runners to Torch runners * Finish renaming API * Rename to torch in tests * Finish renaming docs + tests * Run format + fix DeprecationWarning * fix * move tests up * benchmarks * rename * remove some args * better metrics output * fix up the benchmark * benchmark-yaml * horovod-benchmark * benchmarks * Remove benchmark code for cleanups * benchmark-code * nits * benchmark yamls * benchmark yaml * ok * ok * ok * benchmark * nit * finish_bench * makedatacreator * relax * metrics * autosetsampler * profile * movements * OK * smoothen * fix * nitdocs * loss * envflag * comments * nit * format * visible * images * move_images * fix * rernder * rrender * rest * multgpu * fix * nit * finish * extrra * setup * revert Co-authored-by: Edward Oakes Co-authored-by: Maksim Smolin --- doc/source/raysgd/raysgd_pytorch.rst | 47 +++++ .../sgd/torch/distributed_torch_runner.py | 4 +- .../sgd/torch/examples/benchmarks/README.rst | 162 ++++++++++++++++++ .../torch/examples/benchmarks/benchmark.py | 126 ++++++++++++++ .../torch/examples/benchmarks/dp_benchmark.py | 106 ++++++++++++ .../benchmarks/horovod-benchmark.yaml | 85 +++++++++ .../benchmarks/horovod_benchmark_apex.py | 144 ++++++++++++++++ .../benchmarks/raysgd_multigpu_benchmark.png | Bin 0 -> 15349 bytes .../benchmarks/raysgd_multinode_benchmark.png | Bin 0 -> 15172 bytes .../sgd/torch/examples/sgd-development.yaml | 94 ++++++++++ python/ray/util/sgd/torch/torch_runner.py | 2 +- python/ray/util/sgd/torch/torch_trainer.py | 2 - 12 files changed, 768 insertions(+), 4 deletions(-) create mode 100644 python/ray/util/sgd/torch/examples/benchmarks/README.rst create mode 100644 python/ray/util/sgd/torch/examples/benchmarks/benchmark.py create mode 100644 python/ray/util/sgd/torch/examples/benchmarks/dp_benchmark.py create mode 100644 python/ray/util/sgd/torch/examples/benchmarks/horovod-benchmark.yaml create mode 100644 python/ray/util/sgd/torch/examples/benchmarks/horovod_benchmark_apex.py create mode 100644 python/ray/util/sgd/torch/examples/benchmarks/raysgd_multigpu_benchmark.png create mode 100644 python/ray/util/sgd/torch/examples/benchmarks/raysgd_multinode_benchmark.png create mode 100644 python/ray/util/sgd/torch/examples/sgd-development.yaml diff --git a/doc/source/raysgd/raysgd_pytorch.rst b/doc/source/raysgd/raysgd_pytorch.rst index d6865a8fb..a7cc457aa 100644 --- a/doc/source/raysgd/raysgd_pytorch.rst +++ b/doc/source/raysgd/raysgd_pytorch.rst @@ -525,6 +525,53 @@ You can see the `DCGAN script `_. + +DISCLAIMER: RaySGD does not provide any custom communication primitives. If you see any performance issues, you may need to file them on the PyTorch github repository. + Feature Requests ---------------- diff --git a/python/ray/util/sgd/torch/distributed_torch_runner.py b/python/ray/util/sgd/torch/distributed_torch_runner.py index 58b01c48f..5c588ed80 100644 --- a/python/ray/util/sgd/torch/distributed_torch_runner.py +++ b/python/ray/util/sgd/torch/distributed_torch_runner.py @@ -18,13 +18,15 @@ class DistributedTorchRunner(TorchRunner): Args: args: Arguments for TorchRunner. - backend (string): backend used by distributed PyTorch. + backend (string): Backend used by distributed PyTorch. kwargs: Keyword arguments for TorchRunner. """ def __init__(self, *args, backend="gloo", **kwargs): super(DistributedTorchRunner, self).__init__(*args, **kwargs) + if backend not in ("gloo", "nccl"): + raise ValueError("Backend must be one of 'gloo' or 'nccl'.") self.backend = backend def setup(self, url, world_rank, world_size): diff --git a/python/ray/util/sgd/torch/examples/benchmarks/README.rst b/python/ray/util/sgd/torch/examples/benchmarks/README.rst new file mode 100644 index 000000000..42aa31f96 --- /dev/null +++ b/python/ray/util/sgd/torch/examples/benchmarks/README.rst @@ -0,0 +1,162 @@ +Running benchmarks +================== + +RaySGD provides comparable or better performance than other existing solutions for parallel or distributed training. + +You can run ``ray/python/ray/util/sgd/torch/examples/benchmarks/benchmark.py`` for benchmarking the RaySGD TorchTrainer implementation. To benchmark training on a multi-node multi-gpu cluster, you can use the `Ray Autoscaler `_. + +DISCLAIMER: RaySGD does not provide any custom communication primitives. If you see any performance issues, you may need to file them on the PyTorch github repository. + +Single Node Results +------------------- + +Here are benchmarking results comparing the following: + +* torch.nn.DataParallel +* torch.nn.Parallel with ``apex.amp`` enabled (``O1``) +* Ray (wrapping Pytorch DistributedDataParallel) +* Ray (wrapping Pytorch DistributedDataParallel) with ``apex.amp`` enabled (``O1``) + +on synthetic ImageNet data (via ``benchmark.py`` and ``dp_benchmark.py``) as of 03/04/2020. + +Framework versions used: + +* PyTorch Version: torch-1.4.0-cp36-cp36m +* Torchvision Version: torchvision-0.5.0-cp36-cp36m +* Apex Version: commit hash 5633f6d + +.. code-block:: + + # Images per second for ResNet50 + # Batch size per worker = 128 + # GPU Type = V100 + # Run on AWS us-east-1c, p3dn.24xlarge instance. + + + Number DataParallel Ray (PyTorch) DataParallel Ray (PyTorch) + of GPUs + Apex + Apex + ======= ============ ============= ============ ============== + 1 2769.7 5143 2962.7 6172 + 2 5492.2 9463 5886.1 10052.8 + 4 10733.4 18807 11705.9 20319.5 + 8 21872.5 36911.8 23317.9 38642 + + +.. image:: raysgd_multigpu_benchmark.png + :scale: 30% + :align: center + + +Multi Node Results +------------------ + +Here are benchmarking results comparing the following: + +* Horovod +* Horovod with ``apex.amp`` enabled (``O1``) +* Pytorch DistributedDataParallel +* Pytorch DistributedDataParallel with ``apex.amp`` enabled (``O1``) + +on synthetic ImageNet data (via ``benchmark.py`` and ``horovod_benchmark_apex.py``) as of 03/04/2020. + + +Framework versions used: + +* PyTorch Version: torch-1.4.0-cp36-cp36m +* Torchvision Version: torchvision-0.5.0-cp36-cp36m +* Apex Version: commit hash 5633f6d +* Horovod Version: horovod-0.19.0 + +.. code-block:: bash + + # Images per second for ResNet50 + # Batch size per worker = 128 + # GPU Type = V100 + # Run on AWS us-east-1c, p3dn.24xlarge instances. + + Number Horovod Ray (PyTorch) Horovod Ray (PyTorch) + of GPUs + Apex + Apex + ======= ======= ============= ======= ============== + 1 * 8 2769.7 5143 2962.7 6172 + 2 * 8 5492.2 9463 5886.1 10052.8 + 4 * 8 10733.4 18807 11705.9 20319.5 + 8 * 8 21872.5 36911.8 23317.9 38642 + + +.. image:: raysgd_multinode_benchmark.png + :scale: 30% + :align: center + + +Simple Instructions +------------------- + +Note that these instructions are not maintained and may require a bit of wrangling to get working. + +First, ``git clone https://github.com/ray-project/ray && cd ray/python/ray/util/sgd/torch/examples/``. + +You can use ``sgd-development.yaml`` to setup your cluster configuration and ``ray up sgd-development.yaml`` to launch the cluster. + +You can specify the number of nodes you want to use with the following configuration: + +.. code-block:: + + # The maximum number of workers nodes to launch in addition to the head + # node. This takes precedence over min_workers. min_workers default to 0. + min_workers: # Change this to a custom quantity + initial_workers: # same as above + max_workers: # same as above + +You may want to install FP16 support for PyTorch with the following configuration in the YAML file: + +.. code-block:: yaml + + setup_commands: + - ray || pip install -U ray[rllib] + - pip install -U ipdb torch torchvision + # Install apex, but continue if this command fails. + # For faster installation purposes, we do not install the apex cpp bindings + # The cpp bindings can improve your benchmarked performance. + - git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir ./ || true + +You should then run ``ray monitor sgd-development.yaml`` to monitor the progress of the cluster setup. When the cluster is done setting up, you should see something like the following: + +.. code-block:: bash + + 2020-03-05 01:24:53,613 INFO log_timer.py:17 -- AWSNodeProvider: Set tag ray-node-status=up-to-date on ['i-07ba946522fcb1d3d'] [LogTimer=134ms] + 2020-03-05 01:24:53,734 INFO log_timer.py:17 -- AWSNodeProvider: Set tag ray-runtime-config=c12bae3df69d4d6a207e90948dc4bf763319d7ed on ['i-07ba946522fcb1d3d'] [LogTimer=121ms] + 2020-03-05 01:24:58,475 INFO autoscaler.py:733 -- StandardAutoscaler: 7/7 target nodes (0 pending) + 2020-03-05 01:24:58,476 INFO autoscaler.py:734 -- LoadMetrics: MostDelayedHeartbeats={'172.31.38.189': 0.21588897705078125, '172.31.38.95': 0.21587467193603516, '172.31.42.196': 0.21586227416992188, '172.31.34.227': 0.2158496379852295, '172.31.42.101': 0.2158372402191162}, NodeIdleSeconds=Min=6 Mean=27 Max=40, NumNodesConnected=8, NumNodesUsed=0.0, ResourceUsage=0.0/512.0 CPU, 0.0/64.0 GPU, 0.0 GiB/4098.67 GiB memory, 0.0/1.0 node:172.31.34.227, 0.0/1.0 node:172.31.36.8, 0.0/1.0 node:172.31.36.82, 0.0/1.0 node:172.31.38.189, 0.0/1.0 node:172.31.38.95, 0.0/1.0 node:172.31.42.101, 0.0/1.0 node:172.31.42.196, 0.0/1.0 node:172.31.45.185, 0.0 GiB/5.45 GiB object_store_memory, TimeSinceLastHeartbeat=Min=0 Mean=0 Max=0 + +You can then launch a synthetic benchmark run with the following command: + +.. code-block:: bash + + $ ray submit sgd-development.yaml benchmarks/benchmark.py --args="--batch-size 128" + + # Or with apex fp16 + $ ray submit sgd-development.yaml benchmarks/benchmark.py --args="--batch-size 128 --use-fp16" + +You should see something like: + +.. code-block:: bash + + Model: resnet50 + Batch size: 128 + Number of GPUs: 16 + Iter #0: 354.2 img/sec per GPU + Iter #1: 354.0 img/sec per GPU + Iter #2: 353.0 img/sec per GPU + Iter #3: 353.3 img/sec per GPU + Iter #4: 352.8 img/sec per GPU + Iter #5: 348.5 img/sec per GPU + Iter #6: 352.5 img/sec per GPU + Iter #7: 352.5 img/sec per GPU + Iter #8: 352.1 img/sec per GPU + Iter #9: 352.2 img/sec per GPU + Img/sec per GPU: 352.5 +-3.0 + Total img/sec on 16 GPU(s): 5640.2 +-47.2 + + +You can run ``ray up benchmarks/horovod-benchmark.yaml`` to launch an AWS cluster that sets up Horovod on each machine. +See ``https://github.com/horovod/horovod`` for launching Horovod training. ``horovod_benchmark_apex.py`` can be used with ``horovodrun`` to obtain benchmarking results. diff --git a/python/ray/util/sgd/torch/examples/benchmarks/benchmark.py b/python/ray/util/sgd/torch/examples/benchmarks/benchmark.py new file mode 100644 index 000000000..6155d9e21 --- /dev/null +++ b/python/ray/util/sgd/torch/examples/benchmarks/benchmark.py @@ -0,0 +1,126 @@ +from __future__ import print_function + +import argparse +import torch.nn.functional as F +import torch.optim as optim +import torch.utils.data.distributed +from torchvision import models +import timeit +import numpy as np + +import ray +from ray.util.sgd import TorchTrainer +from ray.util.sgd.torch import TrainingOperator + +# Benchmark settings +parser = argparse.ArgumentParser( + description="PyTorch Synthetic Benchmark", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument( + "--fp16", action="store_true", default=False, help="use fp16 training") + +parser.add_argument( + "--model", type=str, default="resnet50", help="model to benchmark") +parser.add_argument( + "--batch-size", type=int, default=32, help="input batch size") + +parser.add_argument( + "--num-warmup-batches", + type=int, + default=10, + help="number of warm-up batches that don't count towards benchmark") +parser.add_argument( + "--num-batches-per-iter", + type=int, + default=10, + help="number of batches per benchmark iteration") +parser.add_argument( + "--num-iters", type=int, default=10, help="number of benchmark iterations") + +parser.add_argument( + "--no-cuda", + action="store_true", + default=False, + help="Disables CUDA training") +parser.add_argument( + "--local", + action="store_true", + default=False, + help="Disables cluster training") + +args = parser.parse_args() +args.cuda = not args.no_cuda and torch.cuda.is_available() +device = "GPU" if args.cuda else "CPU" + + +def init_hook(): + import torch.backends.cudnn as cudnn + cudnn.benchmark = True + + +class Training(TrainingOperator): + def setup(self, config): + data = torch.randn(args.batch_size, 3, 224, 224) + target = torch.LongTensor(args.batch_size).random_() % 1000 + if args.cuda: + data, target = data.cuda(), target.cuda() + + self.data, self.target = data, target + + def train_epoch(self, *pargs, **kwargs): + # print(self.model) + def benchmark(): + self.optimizer.zero_grad() + output = self.model(self.data) + loss = F.cross_entropy(output, self.target) + loss.backward() + self.optimizer.step() + + # print("Running warmup...") + if self.global_step == 0: + timeit.timeit(benchmark, number=args.num_warmup_batches) + self.global_step += 1 + # print("Running benchmark...") + time = timeit.timeit(benchmark, number=args.num_batches_per_iter) + img_sec = args.batch_size * args.num_batches_per_iter / time + return {"img_sec": img_sec} + + +if __name__ == "__main__": + ray.init(address=None if args.local else "auto") + num_workers = 2 if args.local else int(ray.cluster_resources().get(device)) + from ray.util.sgd.torch.examples.train_example import LinearDataset + + print("Model: %s" % args.model) + print("Batch size: %d" % args.batch_size) + print("Number of %ss: %d" % (device, num_workers)) + + trainer = TorchTrainer( + model_creator=lambda cfg: getattr(models, args.model)(), + optimizer_creator=lambda model, cfg: optim.SGD( + model.parameters(), lr=0.01 * cfg.get("lr_scaler")), + data_creator=lambda cfg: LinearDataset(4, 2), + initialization_hook=init_hook, + config=dict( + lr_scaler=num_workers), + training_operator_cls=Training, + num_workers=num_workers, + use_gpu=args.cuda, + use_fp16=args.fp16, + ) + + img_secs = [] + for x in range(args.num_iters): + result = trainer.train() + # print(result) + img_sec = result["img_sec"] + print("Iter #%d: %.1f img/sec per %s" % (x, img_sec, device)) + img_secs.append(img_sec) + + # Results + img_sec_mean = np.mean(img_secs) + img_sec_conf = 1.96 * np.std(img_secs) + print("Img/sec per %s: %.1f +-%.1f" % (device, img_sec_mean, img_sec_conf)) + print("Total img/sec on %d %s(s): %.1f +-%.1f" % + (num_workers, device, num_workers * img_sec_mean, + num_workers * img_sec_conf)) diff --git a/python/ray/util/sgd/torch/examples/benchmarks/dp_benchmark.py b/python/ray/util/sgd/torch/examples/benchmarks/dp_benchmark.py new file mode 100644 index 000000000..80fd80a5b --- /dev/null +++ b/python/ray/util/sgd/torch/examples/benchmarks/dp_benchmark.py @@ -0,0 +1,106 @@ +from __future__ import print_function + +import argparse +import timeit +import torch.backends.cudnn as cudnn +import torch.nn.functional as F +import torch.optim as optim +import torch.utils.data.distributed +from torch.nn import DataParallel +from torchvision import models +import numpy as np +import os +# Apex +from apex import amp + +# Benchmark settings +parser = argparse.ArgumentParser( + description="PyTorch DP Synthetic Benchmark", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument( + "--fp16-allreduce", + action="store_true", + default=False, + help="use fp16 compression during allreduce") + +parser.add_argument( + "--model", type=str, default="resnet50", help="model to benchmark") +parser.add_argument( + "--batch-size", type=int, default=32, help="input batch size") +parser.add_argument("--num-gpus", type=int, default=1, help="number of gpus") + +parser.add_argument( + "--num-warmup-batches", + type=int, + default=10, + help="number of warm-up batches that don\"t count towards benchmark") +parser.add_argument( + "--num-batches-per-iter", + type=int, + default=10, + help="number of batches per benchmark iteration") +parser.add_argument( + "--num-iters", type=int, default=10, help="number of benchmark iterations") +parser.add_argument( + "--amp-fp16", + action="store_true", + default=False, + help="Enables FP16 training with Apex.") + +args = parser.parse_args() +os.environ["CUDA_VISIBLE_DEVICES"] = ",".join( + str(i) for i in range(args.num_gpus)) + +cudnn.benchmark = True + +# Set up standard model. +model = getattr(models, args.model)().cuda() +model = DataParallel(model) + +optimizer = optim.SGD(model.parameters(), lr=0.01) + +# Apex +if args.amp_fp16: + model, optimizer = amp.initialize(model, optimizer, opt_level="O1") + +# Set up fixed fake data +data = torch.randn(args.batch_size, 3, 224, 224) +target = torch.LongTensor(args.batch_size).random_() % 1000 +data, target = data.cuda(), target.cuda() + + +def benchmark_step(): + optimizer.zero_grad() + output = model(data) + loss = F.cross_entropy(output, target) + loss.backward() + optimizer.step() + + +print("Model: %s" % args.model) +print("Batch size: %d" % args.batch_size) +device = "GPU" +print("Number of %ss: %d" % (device, args.num_gpus)) + +# Warm-up +print("Running warmup...") +timeit.timeit(benchmark_step, number=args.num_warmup_batches) + +# Benchmark +print("Running benchmark...") +img_secs = [] +for x in range(args.num_iters): + time = timeit.timeit(benchmark_step, number=args.num_batches_per_iter) + img_sec = args.batch_size * args.num_batches_per_iter / time + print("Iter #%d: %.1f img/sec per %s" % (x, img_sec, device)) + img_secs.append(img_sec) + +# Results +img_sec_mean = np.mean(img_secs) +img_sec_conf = 1.96 * np.std(img_secs) +print("Img/sec per %s: %.1f +-%.1f" % (device, img_sec_mean, img_sec_conf)) +print("Total img/sec on %d %s(s): %.1f +-%.1f" % ( + args.num_gpus, + device, + img_sec_mean, # we do NOT scale this by number workers + args.num_gpus * img_sec_conf)) diff --git a/python/ray/util/sgd/torch/examples/benchmarks/horovod-benchmark.yaml b/python/ray/util/sgd/torch/examples/benchmarks/horovod-benchmark.yaml new file mode 100644 index 000000000..b72651500 --- /dev/null +++ b/python/ray/util/sgd/torch/examples/benchmarks/horovod-benchmark.yaml @@ -0,0 +1,85 @@ +# An unique identifier for the head node and workers of this cluster. +cluster_name: horovod-pytorch + +# The maximum number of workers nodes to launch in addition to the head +# node. This takes precedence over min_workers. min_workers default to 0. +min_workers: 1 +initial_workers: 1 +max_workers: 1 + +target_utilization_fraction: 0.9 + +# If a node is idle for this many minutes, it will be removed. +idle_timeout_minutes: 50 +# docker: +# image: tensorflow/tensorflow:1.5.0-py3 +# container_name: ray_docker + +# Cloud-provider specific configuration. +provider: + type: aws + region: us-east-1 + availability_zone: us-east-1c + +# How Ray will authenticate with newly launched nodes. +auth: + ssh_user: ubuntu + + +head_node: + InstanceType: p3dn.24xlarge + ImageId: ami-0698bcaf8bd9ef56d + InstanceMarketOptions: + MarketType: spot + BlockDeviceMappings: + - DeviceName: /dev/sda1 + Ebs: + VolumeSize: 250 + # SpotOptions: + # MaxPrice: "9.0" + + +worker_nodes: + InstanceType: p3dn.24xlarge + ImageId: ami-0698bcaf8bd9ef56d + InstanceMarketOptions: + MarketType: spot + BlockDeviceMappings: + - DeviceName: /dev/sda1 + Ebs: + VolumeSize: 250 + # SpotOptions: + # MaxPrice: "9.0" + # # Run workers on spot by default. Comment this out to use on-demand. + # InstanceMarketOptions: + # MarketType: spot + +setup_commands: + - pip install torch torchvision ipdb + - pip install ray[rllib] # enable autoscaling + - git clone https://github.com/horovod/horovod || true + - git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir ./ || true + - tmux new -d -s my-session "HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_GPU_BROADCAST=NCCL pip install horovod" + + +file_mounts: {} + +# Custom commands that will be run on the head node after common setup. +head_setup_commands: + - cat ~/ray_bootstrap_key.pem > ~/.ssh/id_rsa + +# Custom commands that will be run on worker nodes after common setup. +worker_setup_commands: + - pip install horovod + +# # Command to start ray on the head node. You don't need to change this. +head_start_ray_commands: + - ray stop + - ray start --head --redis-port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml --object-store-memory=1000000000 + +# Command to start ray on worker nodes. You don't need to change this. +worker_start_ray_commands: + - ray stop + - ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076 + # - nvidia-docker run -it --network=host -d --rm -p 4321:22 horovod:latest bash -c "pip install Pillow==6.1; sleep infinity" + diff --git a/python/ray/util/sgd/torch/examples/benchmarks/horovod_benchmark_apex.py b/python/ray/util/sgd/torch/examples/benchmarks/horovod_benchmark_apex.py new file mode 100644 index 000000000..251511da2 --- /dev/null +++ b/python/ray/util/sgd/torch/examples/benchmarks/horovod_benchmark_apex.py @@ -0,0 +1,144 @@ +from __future__ import print_function + +import argparse +import torch.backends.cudnn as cudnn +import torch.nn.functional as F +import torch.optim as optim +import torch.utils.data.distributed +from torchvision import models +import horovod.torch as hvd +import timeit +import numpy as np +# Apex +from apex import amp + +# Benchmark settings +parser = argparse.ArgumentParser( + description="PyTorch Synthetic Benchmark", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument( + "--fp16-allreduce", + action="store_true", + default=False, + help="use fp16 compression during allreduce") + +parser.add_argument( + "--model", type=str, default="resnet50", help="model to benchmark") +parser.add_argument( + "--batch-size", type=int, default=32, help="input batch size") + +parser.add_argument( + "--num-warmup-batches", + type=int, + default=10, + help="number of warm-up batches that don\"t count towards benchmark") +parser.add_argument( + "--num-batches-per-iter", + type=int, + default=10, + help="number of batches per benchmark iteration") +parser.add_argument( + "--num-iters", type=int, default=10, help="number of benchmark iterations") + +parser.add_argument( + "--no-cuda", + action="store_true", + default=False, + help="disables CUDA training") +parser.add_argument( + "--amp-fp16", + action="store_true", + default=False, + help="Enables FP16 training with Apex.") + +args = parser.parse_args() +args.cuda = not args.no_cuda and torch.cuda.is_available() + +hvd.init() + +if args.cuda: + # Horovod: pin GPU to local rank. + torch.cuda.set_device(hvd.local_rank()) + +cudnn.benchmark = True + +# Set up standard model. +model = getattr(models, args.model)() + +if args.cuda: + # Move model to GPU. + model.cuda() + +optimizer = optim.SGD(model.parameters(), lr=0.01) + +# Horovod: (optional) compression algorithm. +compression = (hvd.Compression.fp16 + if args.fp16_allreduce else hvd.Compression.none) + +# Horovod: wrap optimizer with DistributedOptimizer. +optimizer = hvd.DistributedOptimizer( + optimizer, + named_parameters=model.named_parameters(), + compression=compression) + +# Horovod: broadcast parameters & optimizer state. +hvd.broadcast_parameters(model.state_dict(), root_rank=0) +hvd.broadcast_optimizer_state(optimizer, root_rank=0) + +# Apex +if args.amp_fp16: + model, optimizer = amp.initialize(model, optimizer, opt_level="O1") + +# Set up fixed fake data +data = torch.randn(args.batch_size, 3, 224, 224) +target = torch.LongTensor(args.batch_size).random_() % 1000 +if args.cuda: + data, target = data.cuda(), target.cuda() + + +def benchmark_step(): + optimizer.zero_grad() + output = model(data) + loss = F.cross_entropy(output, target) + # Apex + if args.amp_fp16: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + optimizer.synchronize() + with optimizer.skip_synchronize(): + optimizer.step() + else: + loss.backward() + optimizer.step() + + +def log(s, nl=True): + if hvd.rank() != 0: + return + print(s, end="\n" if nl else "") + + +log("Model: %s" % args.model) +log("Batch size: %d" % args.batch_size) +device = "GPU" if args.cuda else "CPU" +log("Number of %ss: %d" % (device, hvd.size())) + +# Warm-up +log("Running warmup...") +timeit.timeit(benchmark_step, number=args.num_warmup_batches) + +# Benchmark +log("Running benchmark...") +img_secs = [] +for x in range(args.num_iters): + time = timeit.timeit(benchmark_step, number=args.num_batches_per_iter) + img_sec = args.batch_size * args.num_batches_per_iter / time + log("Iter #%d: %.1f img/sec per %s" % (x, img_sec, device)) + img_secs.append(img_sec) + +# Results +img_sec_mean = np.mean(img_secs) +img_sec_conf = 1.96 * np.std(img_secs) +log("Img/sec per %s: %.1f +-%.1f" % (device, img_sec_mean, img_sec_conf)) +log("Total img/sec on %d %s(s): %.1f +-%.1f" % + (hvd.size(), device, hvd.size() * img_sec_mean, hvd.size() * img_sec_conf)) diff --git a/python/ray/util/sgd/torch/examples/benchmarks/raysgd_multigpu_benchmark.png b/python/ray/util/sgd/torch/examples/benchmarks/raysgd_multigpu_benchmark.png new file mode 100644 index 0000000000000000000000000000000000000000..325891d95e146b65dee8719afcd8704aa4749446 GIT binary patch literal 15349 zcmb_@c|4Tu+qWSUMH?+brPW%AvL{KAeH$~DNo1$&J1Hr$v=|{|9m`-a_GM5~WM}Lq zF?L3FGnV(f)P3K-=lMO)`+T1F^Ugn9GuL&l$9Wvz@Ao*aml|qH`&f>%(9zNDyL;!B z79AZuoQ@9Ww3`Wh^ONj`r=ydqxO+=p$GvZMkoDc+uDJTgmo6VTAkXmlH+G-< zD4hTG{=Vbl0pG0W%|b6=+d@lbl1>HG=D)sh@Z^n~aGe{kSdQ~79DSrQDsiAzVd&;) z=*g4c=Y5kmt`WE#5@KR}-C9~jz2(L`pG;=v%;g|;SM`hSb#|I5phDA2$^&;C!B!<2{L-`^)}kVOXPo24k9<5seZhl1}R;qdpw@zA4} zYb&>xCOcktugvyaNV(2j5FgCSt(r=ca-PsoUhJ+I@>wx;kN`cd9MXxll;DUBk7Y`-|I(@mH!Gb9>&kU3Uk!%j%N=^`(U zzE$t*P?xjYq%IfiIdrZEli(Zk%dN@TBhuC5p^7`A_H-(DR%I7iDg^TD*+0->{@k`fbk6v+HCRL zp}Rprt3zJl{<`uEdKLLG=}MHM{m}WX@@2 zruTuUMf0m?xw=_;ii3As666TC(sjwx|qV}=WwUO}IaKDWQDS@Vxp-L;C+u~?_EA)AoK%qZ}or(L;&wKid z>|!hDYgz4MUh^sCA3BYS^Ma`eN&Qkg6vlO}Iq(w6`n3v7g)7g&NZ`T+ut3lEaSb%< zVc6gH-enHuk?B)Srl?eBy2AvF4G!UwI-a~r^H^-U<|K?VPnO;Tk7maHsM4Sf#mjj8 zY1?Lbs%y^B1K;s1!Dl6Ud2X;j5N8P!ATm)Hp1ZEKrEyES+P-{fM#|-j>`+&U5_ajR zT54C>gNeIh9&FRpe!!kz%8K{K=2Z(RROQA(BeB@Qq`%UuSU_~PB!DB18f$fIi`q?a zRiadSF6YZ_46_%R)&-j`?h&zwjI7N>Y<4OqI_h^+4VBvumSHRPgM)(yBtNchv}|vL z(+KEjba!i_LJOjr1zkvtgr5RN$dJ>n(fXoTxDnW@U3<#i9J!AT3jKYbUmlwMQSE=#>&(|C6}%{QbfdF4O`*6Sr& zKY!XmvfrM6v4!Z0zE`a^{R3~+Vc1J`xwUEhiRN!ck0y$#9?GuhN=kVgQ|u7^LDV9w zf80{0&6V~uMCA71!;XD$>b^d`P7&b?(d4KZ@!j-#%}j!Eri)zWYgZGg1`{rEaxHcK zWP!thpW8E?Ifkx=lIERGp{$s(9yD`Im0`Z?ETa3{H9eEvg`;(>cl*9sD;>>#>x4_T zK1b0lVOg16e0tL#q1^HiEX4BGCT-BI`(oIoCWQ_wk9uU37<;5HK3}6Q$^7-bQfng( z=0(dzP0Zr_$`AFCz(#q@T7KS#(d{kD_JFf*{I?vhUub>)f0^+G)H*{5c;bf^TV$^J8*#MzU$4u;p>xTF>yD^jAmTEEV5u&o^%zpt+=Wr)wq$b1y%n z8vFI#9?{9OHkg!=^jwk85`>OBv8}>&et20$sgIgnwS10PHq)pa`6}QAKX;%iNknl` zJ9yOx3;SUoyzjB4`M&sb(k@f)`X#IsTDttn z$eT+>pPZvz@u5b9YZY(Rt^!Z*cQ?bJLXe)=5Gn9B9upSelYaN`n_6-rce1hmx;b?y zSt-0Mu2`+|S@$s`EfY?dKxF>PbovmNtXN`)_b>XVzFBG09hmT}k^p>_^|1*VmK(ns zbo=KOQVJn`q?ZbboOxvuojIWBkmnh?&DPi!E9!)<8{A5UBXIgpA znLZ}@>V{r_4(Zd&z%CxNk;+me^%>BF{L2Vdy>l=XA9}o#hD}IL(q04(^(6Ko5o>Un z5XG2e%iD&S=G%gUvYdt^qUF z-8}nb3QE*{{>MSgxeeC?HbyJiwV9}-UqXj{eO~G2P^AoA^FH%4>kef`biP4n`;(>0 zn2RxH-Icds0?{5R+VL6R<&gab75=H{WV4$|2m$Rqa9IszWPtZcwiUNUp^U9e4Mxep zG&7MmFI;gMKBUoaZ~3wv+Sq)x4gNO&h0W}ZHXOfZ;r?~LEFF3@`_X77K{-|T`NesR z>>CO8T(;UgdcCBEGhh8MmaAP0o&3Y<`y%zK7cerlQi7!MQ+*Nls#*MU0>X{;QB6yg zQ>=$5L$@-I9%97an!c4*bLyUtK>)|wssy!IX5@*a?8dE?Q`V|4R(&>_EF>JARE*1huZ0dh3(PHnsa5J=#RwB@^CZ zE!Ei+^+{jThSnvSJkMQqH(mIo@_AGsgArbBB14d1RxH0C=O~G?Qs}%WU&xZN4(Bq! zG1%e#Q?=PVo37mt3je4qh00i$+mJ}*#yPu$4K4QBs%P6?Pij@*7e6dt1$~|TF#G8eu|>rW`$RM z%9pfYQe>4)eH^SIMaLa&d*OEbeZ_on-nmv2N?zZJUp?o@T$|=fv?kV zcGrXvi{}?~>(W1t2u$R&z$N37t-N{P>vUIIHVSlkq#)umn*SK&@(s+@u5GdC4oB$R zDYjO0QIjKbT`qguL|87xZj7HPqj@H6rOWnQ#IAet{WY^@Ww-H1njmU`n+iC#;coGvg$>^giC>(Uz)X>YZ^B3OgtPKgP!LR+iKFNOXMzl`qpOw@)I zC+R>ro*qqjTVBNNw?B<&Pw>T*%_$;{Gn)Cj7Vf!h_4YL%G*X;!K>jKxZ8pbU(dnHM zb&)h;4)2Sy?!_F(Z8)?_X4wgN=icyfBd|?fTd&5T_5B?91M&Ch(Q6DypD1UvNq+yy z-EawBoO8;o{sM~m8yuz*sfedVPjn{DojN3fVx;j}TEsQ`-~zIFx2|l%p9(9nJX!ng?9)D+D-R_L2qJ~v* z2;LFWvzBP@YK~De$IchH|N5CC8FBs)?p8wLB>uxvoYo)r3wu#R3cG>%x9`*fEeU1_}>8(-hY z4u(=^hu9xZyIdF#tq#I^nD)Tw9N$pQGbqogp6Qers!uxm3+m(1KBqe8x(~n$tE}~3 zq}%E0f8uicaeyE0u*4mv2QJ@bj?{-omDlUOtEcRx+a-8XA-9T}>Gk#TBX@G7{!E

F60b~uk+2S#1w7Zojpv>pBW`fiI7yY<#Ya`V zj(v|_S!fg~Czaav=08hFByA9p4y6{H{3op9nb~>n(TalzP8BE9S{iO2mGW3HBaXQg zrO#|FL=k#o9jYdY2eRL;udf%`bmy3MZ2k)O6V%({yZs6$2cS`r=kk>4^jeIVjk5O_ zhzzAh88=Yfx)*q*T(LgXNlcxw+@l}N0} zmuH3*9X+Rp8oeHFp{?U*NN}+2#@a7U1;3*Pud4IFHN} zeLgV?^;N5yI{%5C&rjfmZGTY{WA^fNca%r97G2Es$4LT>sOt7rE%?S_!uCvUGV!A7 z5ko)Bz3888w%%Eon|*-&@{JT4TW{{#4Zk6=7d}i()W)cl5_D@SX>;Y`)}0r;HkZ5Y zY62KtsY{bzmoi6nPNf&#QRb{P{um! zMD6gM?Bi6nyqVsDdLR4iOX+VQE2%M+XxVb(T&jMxUQRN$~uc$ z*DqYS@Mw|Mmss+6WZ=qsal07gxxCMau^LWnq<}u!lDwGk_Fn02)+d0{J9T6ljyK04 zZK>{y9oH*OYT%SOQ46)s7R`xPie?XchhyhL2nVjBnB*&U#mInBFq@TXWF5`Ij0F-v zdFwbb#SFA!FX3@~`&T`$(w1?_qx&zPo)FT`#sJad0tVKoFI(lP^GG8wU0E+tG4UqH z%L^LmZ&tJSSxad2w7xE5hsTfrB{dA>5qX|uk&|$VE&H_2 zVn~OvqX7?F;*Dl#>o*cxbm47o?HA>?;|`;jHkS=z#uF8S3@>Rk8sI6|>W^soxFQ{a z(Ctu`6I%sKzLhzbk}jpw9a60@=b;=PX6E(&fsT@ui#F#GLS>x2oj{3FnwR<#o4^id zk!9&?Q*)bi*<{NWX|wdkZJ06&25LuIlmTRW0dZ*=CK?0yZOMEx

+RZQ>)}(TOfo z%%jXJos+i$sVG)-#r78M^3#jp06ShsWjJb(GxQGg9(J7@m<+=|<%s1!sH>Ja;BqFX zL6$fv#EbLJdh0N}hY5Qt;Q*Te%eBnUXknu%{y`twN@;`fu%gbM%m9}5)ebN3k{3mS zOUT+MH-!$xBaU{q@3*aELFAbRvfgc@NBbrZO+U53&mXj{(U>;Gau&NcO6vjhZ5{Vp z_;&uT$f6}llRRcesv2z4mX~hZ@UCXr`hu&it$n8!`5d76a&)n5M-Lc~U1CqmR=(`;n7@M4znp2F1LNs@6K2 z6wG%5chKiHo)deCky$nO>!)`R*Nl2{{qZ+XstuIW9CN6tMWozvai#P0k*&EY zCyw|8_Q%6&bBF}zNBj30u?XeSqigRQFbf817S2r4ec5IcQi=>$W6j*?3`iy3@T=Z$ zTiRKX9Ow*Gv{6e|G$O#6{DwdM$0S#1ILF=qXBIU~xiiaMjs*UfkE#!EMk5+2U`F^LHI{{-7VNTz3O0M zCg&Cwg?n)X2d46UX6(3h9#3rgRt@{7^0^1@*VU0lon1UugG;)Uyvig6XEs^n z(7beMlJ;4WJf`qcO*4=oP%iac)q{NBoL_aON9R<85FPGr&UPy6=n26Q!u;yopPx-8 z9ke@NLz-dGyVOeU4k&h`gUn|SJoDt?gPoYW92i$B6^TjdR89%L8E2=PNMj>X35c*I zW8G>NNo%8*oVZ&;hdsmD;V)$st{O*8Wu>Z~QuYDh^PaA>b;fQz<0(pN);FEJERl#> zjS3>OP{@z2a9NG?I(d@P6Ge^``jGUFs#^3|HYM4EM!V!uLz&sun+_byp2w&)Q7TuB z8vtRv!pqJzDMVRC!k6y|@(@gh2HASuWg?1iwmTf^RrF+gMveH+ovs zXFot!7Y}C{cABO-YE+b-mO@Ec*4B@-MhKY%exJ8HcKGweQ${F& zF`&~IbQn`uv~=!IO^GnFFrH(KLLG(Yo4w4sqx*XDs^+|}Ht0T`U3s3r79f@CL`sUr z>EjsX9@3qw?hkIaWyr|M=^76U#Q!l|D^YpMV_ZFZn*w zB+~1DmgT18VCJP%536IDq&L*^vrZiM@tmg_&6|`KweP%gNj0_FC7A7%rzSz2&+fn? z5*`e`(82psk-oqV>Bs?uV^(-=?H>=*En)*=iRz>)dm+riW!<}n^q6NC*6<@|v8{{| z44|0{+FP3E^XzXcR|Ao9UzLg3eA2!hVH)q57$_<#u7o;x^Ihr8(r>XZpB0al_3_jqO)j>` z6$4b*3y_o+^@F6#R8P}2lC4PPl9pan8-OG;3ES%lDjRaWt7gY@Rw~v^+ZC>yng(iP z9Xj^8O>QNT$NdMa_+E=%Nv3BD?;EHqY)~(fy-+abs+B2-FyAL1Et{q)-(~;y3~M-` z*PvkF?;l9eY}Db0sdSu2=2?xOsSo3JitOvsH?gA-wtcm-nLPdSIbyd@%)0C48ruN` zQw*S${)=E$A#K10RTFGe4`5FgIgsq-sBPgh-_9pU3rr^>(zN!#W8#-_GsI6Gi(>|{ z;&uZL)TIt}Cr6VsbOul+(nP}hl|3%7f`z#aYn%oG_+~$06FEu&lAb{br;uUe%|^74 z-EeK-+_Ds!Ulbq!+r44l*V zovCv#ohRGtsLRt9>@rIckWLp~QDs`kLK|lHvy78+D-y`9XrC*a7Va-|v8knq0RUD$ z5+-%J!-@wW)C)&oSyziAgS9wMq7q**5Z|0tnTfHb!^qs*_D^H|=MoK1b{jZODrPp~w+6t}IhXzNTR}HoQR+B?749 zjXYj6d5%YLo%MI5OlQrOxei9E%gaCM*Yr}C{`D;Mm}uI-Er_4-Ufi=!+ma-~R1 zvw?n>;Je4weYpEi`(c((+4uzo@kfl^Pv!FkyI7x-HY%Ysa_Af0-(CyfK3YgoS*y(K zIWBH{beR@}3DwPm#D5;|`I@gZyi+!^I429$s8G$aDlpksnAN|24#C3@(pwd6kr>xG zYmaYuIb{PTL83x|RYzJRocZd#cA!$1({YKqOyx}}(z=?N0kwtb z7QR|())XU_)1`FkepjaMF$aOvV>o3eWX20~7(Hjhb(EDwF88b43eS}ptI*?Ojy`*w zJ4XcqP<5ORW$=WNIAh28j)RhO0p8T=B+j*-nvcWc!XMjkaX!lfG%ghvf@R^ zqQAgws{LWaL3dpDQ50Id|4I-~DR#>x+lTz8&%jg#HD-A4KMnV7; zeoK3ZirQQ$Cb%d8;=-#|BJC{@@kNd{Q?LTmtjH|?I?wmssZ~znYTXv&0Gz+>cAe?D zPoffw4S9KVr>wI>u(?2zMIQ(aS*<)HBGSS*$Yozr!x&}A4mZ!s)_Io?u(O`4{sFb< zD|Ot1>&*!S+91VHj!~=t@VIZ>%VG{g(pT7xrgGxyBTgxpszs%nhO8bN4i*~1=kbd# zm$tI0`y5%4UCy=yv@h#@BNAw6#I`Jo{+o2~TXEGc&#>D2$)`|xJZbh5t%)nV+rN0v z8tXGVxPuF7R15~34#Fw>d+o4O8_ZE z(DM8xi{=le9hMm9E?xanwq(G6T=c8fl5dK6O$UFd3prB%VP_phYdMe=RwG80AXz3@ zT+pSN64RrfV`w0Z#BI3gZ6}f&qs|NR`b{Zd1oIQGLc2+wPE<3wJ;nB6>GdR74N6x8ZyshhkPexjkq+5UW^FmW3(D|90TJY%{|aXR z_sjoFGvGgpD88%Z_5*FnO4%S%iQli?B>=pG>ujGvu5sm+x1lgVV@{F+YtCI2I8>;9H z<+^Ga;WE>6`ARmToL@K7OeyTdU9S^Bw+Lj1L#bgAzxBpB=b@UX%(E}g$K!y)TK*XF zsm*axM`EiS$qhxox8ds*3&0UUmN{Q!*!d_(aOnhr9OT4cdGVlYzujv>IJeZp@i@15 zRwM&68y+~CNXwV)S0}?5=;+etVbdV^6`oU#xwS>KlIF#wsl{0fA@DC=b#y&od4c6NQo=gT>^KMSc5O z7c%IB6Rw!Q=~niLwle$rF(V@*F*C=Vln4|4g5*2Fe5Afsjc4 zRI$-wI~&C;{X>wDQ}HYGcq(^Jz^M=@`9B|c?^7i2Zcv{9Oan5Llc)pGU9g>Bcf*Z{#bqzxXc7v5u2sGjEuL#u={96-{dUX0fxBFjxiFG>E za(btS9|q5)Ln5FNTGk{n0z=-9g9iEkp@-w_UJs!u{J7sDJrfOjxWR}=4XW96c)iiB z9A~%MCpRb6Bs-vYiWKX z{1*#g#{6d_Ujt7eVIe^>JKG}!$3xW>|Fi+E@4xQD?+$gRVAIdJ?C*dh0o^MAjDgYK z9Z>7pu~>(qi`r>D!5jjDa&jd5%4NN2aO5(!alHi={b!>LwPp(cn^OyIT*DcZN1=6B zzPjvsN{>ys8>L7^q!O>=*9qE~oyy8Bn(E3r;-yNmsC~9a4-U7S?JF!WuJkm&%C<9* ztnUnZ;427x)fvZ%*`T{X9M|#F0R;-Pua$T%a^e;i7O>!4zb5EsBRJN1ppygbD*vMY z4lY{z88TbL%!`K#J$$Jh2?#J4NN7u4iDq%WHsqdc*Mm!hS0Dd#+!u<~7Imq}`@9m4 zXJ&!RD;G6wlyFcP4BJpkxTSlox) z5NQY1GK7cRZn=W^>-2vz;Lhj3UCp&+#2T7B%>2}-TFtK_nm3|XB6Gd#gb5pt zs}3D$s&+tZ!><2c`N>up9FNaZ+gDWym*u#vDgbifi?E#r+Y`sxc&+~>s zuFc;O_Lp6xF#ATsRu2lFWinDvVS_7+^o^Lt=MU@r@RXxq{(x9y#^R2Pt~ zjR_M%7QOm=iB<=k!l%7bgQRKx%eAZ`a1b*8S@ZgzYe6zJ|5SoUkLZqv*NZtWW^MNN zoGL#j&bj}&gCDJ*!}ZC6;LV+tj92_xuv7Z)X4#Cx-jeag5w-aXkS#0&?pO6c!BV7YOkczT7vW-=DB5v)@b_ntF z=but|-t+Y}-zQ*qwpxe!SI4&Y21_8aXpA zw=LntO?RDH40)h)Nsrn`A?n42ylHvPSJ0qw`dMw5b;+XRAC_Hbn(&5qR80d`Jr0e~^{ z2lZiv6%`dM+fnR6Z}YPn5M1ilRg zIOfRy13TCygi{cKYftRIeB%0}9~542gu6%(BL-8494+K&oUdAUeg&veM-W9GAMYJ^@X7a=A3Qg5Zd05i?qz|mlKcp|xyFM-zO?^vQ@hbX5F59(jOWs$meT{^ zM0$z*)jj|au-|Lj@Y+AMe&ksYq(|773^T1(k0&LaUNQT-uZ)TA+=N2Qw%UhHG3H#? z?C}y#WAH4!?3oNqydx|NBmri=?VM%^&S@oD?1wyTk4=IvHN)s}9mm|M)+xFKyB1j* zwmdq+p7C+NU2!RDyeUqqd#XFvK{r$9af^cVZ;k*3eW;a&t0*86$i=ay!Rv!F_<#6=v%dJsRF?q2$v3K#OwU0LN~0hh z!MZd3J}8~U$t->sT`5S|{Jtq;`g!-YtuI^-f?d2AqNq&TI4EeGd(u%O_4Nl!f|->uo?A`?)1&*?MvsmRJwIfXEsPE`Juk=vUy#adL=4&|?fXotc#r zEbXOn3IGQo*B{j|z*np@yoW$h?W`IpFbKrJ@?QYF&N-*bnp~6U5;|JpTn~eA3uGRV zR@GDe#kpQr!=$Mb$#cO~AdOtcU51b=|CH%<|JAj>(D`n-RqLk@E=WM-{&a8t&5b}a zT1^M2>922I5-wtR7B)gmHj;XNL!}+8u+bs6`4(s(1D;D0_W?!}6#omg190Iv_5;*A zwq!77#>PzDf4vuc$Sz+&GS$O8%e0wHhU$zwfJ@N~o7}hcWfW9-Y=J`NK_+&d@53i| zA}*j;c$B#cIEh$5RaXjgeVSza*TLBc+569QKLO>?^fwIOnSRX&38#bqaxD+aSO4Xj zo^BMYUGCbuRZ)Q#82uA$c3dz-SSox*>MaZ}3Yh&q3xI`i^yCE4H`;XZ&vJYC_7~~l zJ(nd4Dj+L=#|LA*!C3g4MQQa(E%2_!J><2g(X287*)*>Z)&F{W=S*EtqUZE?oK35* ze+YdksqZ>b&K{q?Aw~lsgM=(6t#Hez*4+a(52_dVueeTRL|0T?`a3pA9K`WULvC1f z#(m{j3oiM@MWuMfJJs|M|K!DcYlPuZZ2^qHv((+IyCqs+A1I+;n44vGIO2AbDGW#EXfQ*_$bmw`<2ma{+Jn^qTKu>4< ze%?iGP_EJ*Az#OK|Gl&~elM+9;7w>w z(6}KUO72$;4{vfm|1sk12$xs`^bg48{29z{_(#)+N)hP4Hq&jCd0}zZS@Qt-&dwNT z^!LC|qVo9WS3-Fcel;7O3UN%?ne0l|E|6VlSPnz2)@Y*%4^6<&5Hw89`00DUN6z#4*?f&vz3;b&U^ zTphsOs5T91W{Y!7*}1`=aX+pnQ5!tAvz!quU{`lG^-p6hr$O_af?@9^{)_?&q7g4` zhd#bh!r~Mxg4BNZ2~>*4xqbW|pIM}wT_^suwi}L8s{XqxXrSCMDS9xfzxswI^v@;a zXY%+^f12DGPjkS(Gvr;c5N_C?;Q?bhjY^%KXiL`1(i4ikbU%QDlM?|59#%is7)*TE z&PaAv1)z@WifzlouP%}|f>9u+g*KuZ_zrT+L^&7a`p~npXkjn-rV2%hF)qf)FvI6r8;z403N!Tz&btFT#bPdp@kXd~>CLQ1n#qfp^@qDJ_7|NjZ(}CBxZV zX8XE$JVsx?hlMzPw*${O3h56$+jD3J6hNf{S9tx1zypt&yn0h6EL5AsEnia0#zNT* zRkAycH@is1FtT#u_%0|MIcXnoX z!xw;H3{3>%WIvv0JR&1d2mnM8$Z_@Oo2z{HSCkm50s;{d2q}YCs(uCw3{1-IxDLHN zQ@9KzHpLELuBK7}kF<+0%(gZa4wn}p`sGIQxyBg57<iDJ(kVh=}m7 zQUJvh>8{}I2PMlv@*{a=9E_ORD1EX*E%_B z7O=On@+-GDSpe;~?ZMr*XTIz_(VF7A3rg&j2l|IKyQIF$@zv&i22zunbjLIdCfe<| zCC=ZJK(qfs;4ah;j3LwTC%Qn9$C)@{2(deO$4`<+$H&X}t%DaW!Ul55gKi|3kvF{u z(!-zRgtfDgvrINKAzBG?il1s*S^+ZRE+i{7#yVnu8w4a1K!`n4FWVYWe?0Ia(Conh z+|l*_h`0Ywm;c)o!4+bW9TEw6rVlvaE7=Y{0_0V>t+kPru3XC<{_fX^!cZ$VY{*Z_Mi1c_2)B)={e5Odd`pI@eY2~>@_`pqXQM7Zqh zK8U?5hH#cX2I#mDWEB*jFE&fH0MXF`%0P~+Qb-izZfE%&Kau|vazQU<0mRG)oa4Lf z#=HWphTS(}ftID4^^n(l{t2t)5MZa4;BfO_wgXONHuT!iqis3gjS^N1mdiE%E=v=d z;9j)q`%5(}tgP8U@*rmE=RO;$1>TOF<^$EqwZzMSszPBdpk6`DOLl$q?VW0^h=V@c z|Kc}x2}0SE3~d`#9AiB@ga9wkzPqT61sGHKUPhe_kR9fh8$}4x3$lfr9?WbfaIC^5 z)r_3#r|u2ku2G%?33D4#RVD4((SH=ut}?x1`@v@;0T8o|GWN_+jTO)Se4Q}`s+rpj zADS&D%^~{U?1581fC{o!;i@&$@`VP0+0ya&2X*woE}C6fP6CC~b(_+j8p5h|AE;tD zNKOOq;|<@ePY#!b?CI4jzSLvoye}UlUwd zG~~wt5qn(E*pwhgV#I=k~)k z+#2THssZ8eQ!ohnbRQbyJ;shfhiz~hiMPDpU+f@qIaDnxBloQvNZ-&J^s9cV71Xi9 zLD^_=qj356IA0z2^}g<^u>ckLb=2}?$N7WQ3h-J<`Nu;JhOHn|GDDrt4d3utn;$mK zcBX!R#ETX2nN(S6a`5d4ybcOvK>c_ZBqe}j6po*Qsy?myk&@WDZ^d|nfd_D`)sPQB0lnC9j4$3GrAmW$4Z J=O!xP{{Zm$I<^1+ literal 0 HcmV?d00001 diff --git a/python/ray/util/sgd/torch/examples/benchmarks/raysgd_multinode_benchmark.png b/python/ray/util/sgd/torch/examples/benchmarks/raysgd_multinode_benchmark.png new file mode 100644 index 0000000000000000000000000000000000000000..f2578522530269015d86855026d27dee1fd64a40 GIT binary patch literal 15172 zcmb7rc|4Tu_rE1eB$cfQZIZpGEM-WPgtCUQQz^Ub8CfE2LK_NW&6aKKyO|U!+4nt_ zZN|=+?7wrXp6B^~zTemH`}_Sf+%xxbUFSO2Iq&m6@B5LKh6)Q4Hxm^V6^rV{^Ey;i zG-xU+YF9>j_~zS)X8{!zidpr%;+5OIQv=&?v+GrDIKL`w+>sc-^!@&EGv%{dGbM*; zcN`Kw8Gzv!eVyTdeEQhJN4;%yyvhW+^ZT!B^<26BOgZ8J_3nq~Ep}a3xpJ2C8BV}b z-T%PaM%9=TIcwTY5Ub&NQE;efA%>x~!_?F?HKWluRYT9(f9-Tfl{H125qVo5r zqoO)Xp!xqiJYWGg_~+XH_3)wn=$F@>g?0u8HyWb!zPc)`j3*IXDU&^HZL2K2vh9@{ z<1I;d>+N&x*MbS9k)1ZqM+&A z>28xPNVQEg{>rvY-}M#cJA=8`>L!D@%pQ6ZR;J5mCSTu-&8-RMV#6__sTk2`dzjEU zRV!~-Wey)cJh@P{zSMV?G5987WxifwU?_n)LsI=bfk{PCZN_!>t726ea>)t$%zY*u1%%8IK?f3cnn5Z z56x7qI(CnR?mKyOXd(xT@+=$U7-@eon$s3p?lF}|8T4)vBeiKNI5<@;r(_uyiw`ZZ z?muPmLQu2ljL!<1GCeRb7AtAzxJW6TT)udyIZ)$AxcqiE3eUxsu)K9*k zZFCvUVy z<#d!Sg^?B>2ZUqPx1+&BkFinPy}!=_HWp?~FRmMkuRUy``bPJjG7B1$%}utdk0^~j zV*2{jou9WTC75Sx3FpID_~g2a90zPwLiam|Y#2LLj_~=dP8HKOkpo_i=HQZ>l1B72 z)vXCpr<@bLFOk&b)mH8>?b1!va1RUPlb1b6t=?wtJtajkE8C^0Ouuvgct9xTXyKW5 z=Jc`K^`|{1yZ?-p+?kUigtl8JN)gHP0f<*TU8GhI$>+YXX;WQMA?Ha{RZJ*g$ZYl+ zUySAJ@>bK(2qh^`bYw*Gp1E^symTc-b|7U;WY@91X$%+Nomh1Bs-L724dM#gN0<1$ zE^N%W%5%`Gu0h>Wd^fe-$mekFrZ!Y%x|S~bqOiRW=UCxe<}9|Xg=TX3on;pqPFSeV z8iePp3Twy5B@?)v*0cJAUM~4_$j+M0B-C}F1k6|m?rmqQZmc6Yw3aphOupEpw^vtd zfu?h5sxPUmvN}S{c+Vc(TB_8!^qUvmPkBz8blHsreY956vD{i2A+`0aw$=ppMUE=i zPp~v6sq1;1MzBY((7ts(Vb*j>n*gfE{n(?6G@Z2}JV{JwbH8F7kAaC!8etLv*no8m3H z_kyH|QD)71MMH{P7u=H*Qb?Wj!?C20-SadNW*K9)YesT2h8%W;`RasP-=NU#3lRef zuQtzb6O}i2-|e^~f`0TQJu?JPD{%|25S&tA_IT@o=>VF}0-H7(mNNQ>8G`By&2}AZ z+FFG9LG^{%Y=8ADg!u>0{nkn5iUzxdZ`})(Xbtx>+pFCaMqia;-r_60DP?WWWrE=;6IE9PPgp$iv`R2b z(v31wU@{oY;`z0{y433O(q`u-@i7WI1f8?nJ(~ouM(sXPOibvFIdxg7eIy@TqC-0e zl~k2rzmN)9e_0{YQI1HqT@Kcuvo&0&RyP>)N*$>l*tD5wO%~`3Om51GH)y_n+^3U` z7w2Y`;HiH9Rt-N5^)8(M8&>qjARYM_J16e^_h`v9qiaR%7LpguLToLm)l+e9T<$E~ z26NL(*fjqUI>E}+h|863vX_%Hda5fUV>9Bm&reHcqmsRgvl%O2ln2-q?@+0!;0g5QEK?onBlx+#c8=GBQvz<(+rR^<;u$OArnVcnai8oror5XB zxy>b+bj+M3)XUtH*>m12rpg2EgMK5z$l3qOw*PO3$J&ta-qX_~dJ+cW-#WEU8S+%P zPLhDclHCv`Rlk!@Zf_)I?`fAeK9Coe`GyK?Ua;^;ixQW|>u%$hN23WtJTi`7iYMDr zE;CSv>+PD53%6uiezaIt?_G@E0* z2GR3TUxfyH#{hnF-3jg#vl=>%dL0#IyKmn5=W|joPuFbKfIgzqVmJdWn(e@l_Udl1iAz-LzBG>MW@WnWAPD z-GC?9=9XXvg0 zO$wgF+CxI}O`GdjS@*Xn+#E)pqcuXpobOfkOmbBmc+ACWe~w<%%gD&F4Sib5C&9Sx z+2Qw7fvjh6LCjpeElDc0O$LWk=@l!LWm}h&9&_QUR)`{9@wXS4xo}JvR`MmUO7=LP zrPtn)tc>T~0PO$81`6 zdX@Mm$L_7f7oSnrk_$Pl&iszkni{$GXYo-Y7zf9>ef8(Eg>+Q-DDGQYgI$aLqBwMtH@T4AgSpSX?TtyvwHBbWahE{wB5#KZs~Sz`FVLJ z*K%eL*>3ANyKJ+($Sq-V&zNgPG~uq2JR2YRcv|Yaq2Q6?t2Inq&#UP~eJ)gXGRF>A z2jdyeRk}XI^fMWa!K%^t4{ko-_kD=LJ}|z}5k0m#gZEsxvP;6=*NialSF7c{G%3`Y z-rDS++>}kuqAU`#-qcwrv*9Y7jaD{R@NrCdv!}XWnX(wNynXlpoW|JKegkgq|O zwfM;fd00NB*`#}|!kx=;kdOQ^NW8q(B&e~8**O6Mx=-u7y9#bX1?QR2Ob-NjaqJg5 z@>6N|&bV63zx8!9(juqwiIV9t3_CVr)f-SI7a0J}3r)S#(R`Y1qC-2P;FC1*EZ}aO z#=8l(NR5N(!@7rebK}l8Y2Th@$h00z!ugMclFT^q@&9W==8ECw5F?uFz8Y(hS@zX zGCLR9!mqWSkd9mSY7!eyq!uez3d2W!(X2F2pcd&DQc}p`A&cH`rDIJP+N~*?%9ykr zZREL9WgYW+(>rbE;TiQ3+yraEiZ3Unni-blX>Mb(LABgSKfE{u~Y7Ki@yDequ)Ax0v$#V)toZj~mBMxYCqBI~r`1Gu-)rCQL)`tx6Vd864$& zr8LvMVfxa#Jvg@!(-LQOZl_eU#OK6!tr%?|;(?QOJ zkeyVwiFT-tQ`$7^-^`?D3wO8{`f{8&=Tskmpmwh)lFwMsiM6^cPq{B@2c~D>*^jZm zVmDc{wD|N~Ap4-+xs^Rtn9+%U9oxuYWcFsI+bQ-!>5+95x8=-*#0{R4k2R*e z7IeRJV3hTbnX-)enEAywXYG{q6MKQ7R8X`I=g@1XzHV)=a(a-ik zRW2AHBi`!QLJueZ8zKEqhyNSJ5H8{E8rD0X#gHUi>Q#rfr2F|7s?Q}tJml^E`cFcx zahpIect8>V#(3JP$jUH|760(<q@C+Q683#qHce>y|^}3%w_8Uru3Gr_$6$ zAMkU^;-LaBb)S&nGQX2s=rlBV;^fKiWqfA7%U0^@>aGi9ym<(>)MS)hwlmq_>9x90 z!~7c!ChK+YA9ILgs=he7yGtF?jp@1Fteaj#j2U;jqP4?^Hhm=f&QcHE70E8CmUSD~ zdQ0(2YCO|7n5}j2?Y!kcnSE-q1?qCd3@0_SVj_!yS%UK;(cW7l;ntr08`wq4ke`!h zzw-lma=Y#v>D}@uPxrRQc`k63a4wKQ%2)Q}k)ql=Y^~ zHO(qY%L0Vr{T_8w%OvwnBJp^uoKq#alh9H?tU5+sovY*T+X+Fua6!7+B+~(R%t;2VK zMF0{FI~l+%j_Y$QZ;*Bx-2W>3Y;%Q|Yld!WsQfH}w#akFjPet~e<{(9OmdnFdu83M zb+(BXwT&0I7xhVLj1_HVXlYj-e8>Ov{U=hVTlLp~k3MK&WNBxIvLt zxpkFV<@6yj{?ZR)2oaq8MJj2bZK;AX}QFHzE1%oY?#dhhre%Yt_&Y|t#;`YY@Ec{u5o595jT@`@&A!WRt0edI^z>;| z+6*}<>1#!szB7Im=S&KnLkJN?T~tey9vd~$;!5B zW2G4#x&bh{m`V=N;Lca`I#LCMs5n#E#TQj-fabD6{+x5k>ytlYNB6iMIE?_l0y*et1sT0JVSe2rITd>(O;J=1F}*k`#x~t%*O7M&{BEhJqnD$8XhgCv zar4$xEVioOtpg%1nq3|(Th(s;{zZb|E}W#jHgJl4rw3@MbUzAmhA zGloSzj~$!REELep>p7mzfWNh{I8zd=gI`}EGw&aHFkP~avYpydczDr!u7=B0wU#gQ zdLKO#$?9jl`Ns2{+(iuYl}Oo+AE`4Lwd@n+?AZx=PC+X2EE|KbTKfZG;TC@{+^Jz| zQiIIU4q2DhXF;Jcz`_xw?KeYH(ANR*HnRd!DwSAJHOFAf#h-xKKk(<+xC=MaxI$V z7#o{@=)LITTUC5TF{h`*IVpN+c`U9k+qArYdF472x>u3-upSk4xgrZ zL^-k?(3d}YjUcSvM~GJpoJ@GY_nA^cR0(`k=`1U2^*to06!urYtK?Bta?aCK8*V|j zk#K{unbM8UvDUrZAtG(*$DE$J*s!AQQ|Ed#UQCrM6RNkfqEB3_-O&}2eR#L&z&GBI z2aerz@?8rb4~LZ`QLN3AlI}QIbGOAi{A=Y25Qw5(jroU4NYSeW#43dmo7rJ+J^FMDxt@2YG`y#iow%OF>g5g)!XM_Jny) z#-wF1)_<0c*Hs;^ZA@brzR%-$%RQZXaK+a;z9^yw?`5EV;?0Zd9d_~WqG=&OgL`)?jz_~)b_kbV>^0V=9~9UyK6PQHpQy1*EsEtkQiAn zW`ttz-=O9Hjn?^h*90M({xnh4q+Q1v3!xz(Kb~7qAemuYoUQlv$|oQwZa!e;*BF;X zx-`NkU0hry6NAO`g!SHTlh?1rxJLttJ=K$6XH;lw1-$;H?q4VB^7*Yfd(XaO2&B;m z>I30LXDM&>GBTn4vOOBwP>g7++C=5x$+sEVc@S0^=rXBk>Cq2%^3wQ`u9yWeaZG-F z7r6cPuQLN^8m%HtZ;0q;;vjQDePdRMF@!`I5Nn=$Rk;uHmr_blzR#)$RIx%qN(oxW zUh1IRHP~>~0J>Bxc3iP05Qp%1_sQ=ywuz0DZTqcF%@4mO*SBD!gqA$JzeyXJWgU&H_rl9AS1o)^$_MJ)XwL#8_X9(sK zMHb``(gxdww3lEVl@SoTJ@3|lNBKzUC;jDYnP3wm3f9rC_&S1hsi+7uMDc4^0dzlA z{&i*Cw|gQv|6DHYRMK(`F`ruAhr<@mN!tJYSu~u~!AQprKDQpX)s3P$V6l7a0m!WY zj{g}LoXq^F4%;sTGpNS)m%c##9y)SgZ6&CI)8B5eSN0QVxC)Lz$jR8Ts)8O(u3YeEaao(I`Vd zEAwVk?3N3|(DjbO!hbX>cAVOXd;a_?(4RXl85i1ycVrr#v~2q|$j#+2AE~GEL zOmvIK%pmru&G7iq8?qs)h#}NeU*6s(+8~IyMrS0rhCVyee1n*ry$%h*PUxY@jLR1k z3(3gH9E8yzZj*UJHL#iEwj`C$AAc_h7cnTyU=i5;XS~}?8&Zvr!XE2|{-+Zds&EJ} zuVYlL@F_@54XZ{O7_#$olWAjeMC=aSGCLxw`1qW){$frjBIX?QNOY(dM?P>h6THwfTBx zF>rc5Q4H?!S;~DKFJgKsQqSaqxq6|B5tWkah?2CqI#BL8X3VL?UVJKwnsXjK;%X^$^G)?Up}CwN@xuaL`>6>VbDZu zPiMRG%lA2G2WE)5b$s~68I}B^(QM`ScKAzgDOyzWhD)GKS8>VRBbXOG~F`Vq*4FD3@ghKK%Xb z%D_A5;?T=SV6hI+SvxU=`SoygdzNt*gn5odgk7I{t}Z_O$I8I~S-YXaaGdHb%#W4# zF7tC=Xn_TbsF#rSbV&u5X7Y3y)0lS7^~!*- zc~hfcJhXs(X=<&n$a{go;R0#la>^DJi`zUoO$;8}3Ah z3+v^5i4*Bb!b6(>(n(8ocAFvF(GTp|YX@OJ->21wv5Aq#)Rw+Lw*nldZtIkq?FMEC zEy9QWrEFS{phjNmPKKhUagA71AK11Nh2mo)gy}`hc9k z{`9XEt8xGF@fl%i-~9Z1x6AL*hlht-c@Fs$W8GR+^@qp8jgv007#Gw+Ca){EKY(=OL?77sU>5iq2z;!G&I_Qz6DeU9N>ofmb#91&uIOZqs{mXjwY zGIk7!k{lWe!Qa=r^SN+~G73vOsE!4vhz_y+PC3*WT1fW#^g@tfd)zg4(@yJtOX;Hi28( z{uQr0M)KfHGG9KDp0(UT4MA#64u+ik@p9YQRb&@`c*N$=6f2)?ko7wn|4KpITQUqJ zP^8TK{!ORS5zg|_D7(orQg%MD(Iw~4m|VDUzg+pl-1j;$0Re$=VvSygz9F=-1~%FL zV)>nA_9$6i`(7I|Eolk*1P6%wMRr}{p6-^a&6;Er=NgXbOZ2Gy3#D~#!dbu_Y~Qqi#dN-wyZF5|aj-xGD@!M@lHdgP zLIBQBg?$qf5v`eqmsnhF>mwqT+o)x?((8)h>e5sea1L9s732y;zP3{WEcBX3);BD{ zOC7$v`s;4p3T4I#q!wSg`d&syMlg zeFIb+JJ%{*M${mZr`b^XZ|a(1&W^uJWXL#Aqp#W){#cgdwBN?sowolGDgAPk*Rx+v zHkIhQCDg3W)k3$(({uSB&?NWh75#4{;hYbO0#CbbTLA?eEM|?I<5##sCY8UNOu7(~ z2lK^wQX}yhfg!@#Kw$L%(`6}9^)pe*p|^eDdVz7VqckW8P%b|hy?LlE(lB>96dT7z z@7I0bUpC`rTrThet~_KR&f@03S=iC|el4?7mI|IhqLg*I#*^qw=;VF9P@w(?GhN?+z@4 zvYV3QNF#ge>sjbcBHsP7l5-2=AiGC&@!H3mSoet4O@!RaK2kAVMw*0T>_!gVFSiQz z=Ln9Hvg-wr0t9UbeSo~im6$^CfO-wol;o1pZ`TFh@Dz}14$za52KYnaxtlmNnPF5Y z>@D`&uG=!v1waNG6CO*ZP6H>DirspRxDH$VkttzpjbG7MVrkeb*inP&kUM^Y$wab?a)g>zbi#2aF#@M9BxI8YGg3<%bm8BLgEDZ(czs=NY z+RwnkeI2lYCtkS(8i`Bjzio&_a281f+o{n+`0@!BnB00t=Ydq{{C66^e7QdJ_p0cz zBi($2AB0};HKgkdkq;qKCn3doP-j58ovi%FZc2E?SI?g9UhIafF}1eP=?$K@tN#%!n0gjC1&JwdsF|y z@rNvG0&C5G$6~-d^n-eVE1zPJ{pQl5%oSdqYw)*uoOR1R9K7d031<0iZg@bsZv^Gt zswWWJ|6U^JR#ibnHV2D9UtBLUbm#q7kH0+(ZF+k+=%lgt9_=z=k+1G?!6lMlAN2sXO zwp?dLUhQD)7p?)m0fXH`Z5;p~=5MmY%3fWBW^N|A2r8$amP8k{=2hTso!DPXyK6xZPxOKq`N5_Y54P8g>A{Q77fudIn6q))ZjqBs{5fq zj?hO@^ap&Q=+?^Q^@fbQXwgEew65*Mf!Gb2d$Z z7zTvFHc^l1zlX9-l-rv4^#A@O*!78bQ+B2+K&EBhiWRh%!B9}*BY5Y}pp#85V9Jsu z?Ks;s4i;Le*PB#EgZ8T^R(p)uXRJqE^E@&ZN8Jo|k- z!+QS22~m`QKniwcu>A@u>h-6Vlkx`^Ewuf%L~tHEeF3Di+tp=1t!c#mu^g~3-KQtJ zBX6eL44RxT#p@0hLin0rEIvgbHp%YP0md2@7_8rQbg8p7p}WB-!*k-5B)E*AV!_+ zPd^8?Oxj>Xt3KfgaY<5hbh)^${lGr3onMBVZv5T1@z+T=iBDn19?h? zN?8!7NPtDYI$`-TXWJsjfgt?PnXYZgBpF=BlRJ$A`MgJ8-?WF^zdQ2W!b)4*#&wl2zRSHB(m;7}xx+^g ziP=MJEC%SM<)>;p-Cybf+(tgj(bv{pCz^ zhvD3O5KJ4wm>rfTd$ecvcV?T~0XlF^h%;9Rdh`4V0^Zz-?am2m#cbO0K=rH(@SDEA z10bZ7{89RqT%zd=I5qQ#767|}EWt&mH~#Kl3Dgu<#hbBSDLw#8kQRIV1jq&5R7_(Z zu?ZTzIC{By`bp!d?>nN;_+YARdQ05dx|*?pW3@a^ZamHfs0U!_J^;KlGl-_J*CrKv zgUjUBUGhYm;H+GOio&1|)a!3@)TgX9WhdU;A)i|h_l07=ME>X3z}>W*?r2%%@_1YV|@p`#-LC0-K2QQ)N{Z}Q*Q2j)mTHE!qC)y zB~NkR1QbI93X0>YJDlPsdpjBxQMB&SGXv#a>sid|)d=Y|E=&rIDr^ojtLKZG4Giv43Iv?8?#H}nAA6aunXC?MQHw zSyLA;Ju;rwsGxtgewVE8S8%uF^=VH$vzwwyzH75~jE556dVkC5v6yS;?(fKjqGQB3 z+heNNaHz`XMcGD?eb2Yrs}t;0zlJNRqG+WA6HM zwAr~|Ne*#>%*)@*~Tkd2Wj-alA~aP@yTJ9yCuog!x`KZmW!y(L{<3&^EYj zZi3nREwu3lW?nTci|sov1)cz98ckc7#}rX54|%mB6NYnl`VLXTYZ01`n5D^y4f7a( zW!~7mE!mz(n5NOcl+*Dnxb~KpWZJyK#&?Bz2LiWN4*ijSJ)xHA{pSOv@ISu=->Dx0 zaVW*l-87?IJ_fkCKIqIkKziLFmy#>d>vbdvT)n4;%Y47bC3>>K%ETc$Kc_$gr7rn@ zmYos(S7j&aZL4-zXqDTMH{uhwRGdL!fE(IaDxS98hiz*5@}&SUYB{zRnrMl=r~85F znO57+J4Yglm|jkuwy(rnf4NJ4jaXMY0*`k$7#V-4zR(r!0q-3!f0;q}i_@;7M@+Fc z0}~H*RPZ&5>)|_VCf+6P)O?bd`DeCnj%Egt;|#fg_-rA>KK|L?-hQRn*^YK;diWd5 z((dO+kaV&V|) zE}yC5S^W9*;JL}AJ|~I!wbr(_DWDU>`sS^!Uq?fQs(!uaFtUF-fQ39yN(SO&2?QLy zy~R#(PQ5=qy~qMA?(tbDwWKvsnc2AFwsW)W+-C{jIpz$#jEKsO6_O*y!=uRCq-mjT z5lY!`78-c|!&d?fjU$jUJ>P+b zkQKYWKI0d*S0_aSYYT*xD+?a;60kpK`*@sd9@}1E2;AF?Ory(VX2ZT677=@UK zNWFY)WaLeF7bwSj4bhZ<`sIgl5x74ZQJDZ&Q-JhRL-`fVepnfN%`nINc2kvH|Fj3h z1+NjYiGI3{fL(t{Zk9=DD^jL)oop|(>pDv)Lv$_+>{qg(gy;jBHX%+s2jSunrqTm~ zA(Vz7s8uma#bkHUCJzdSb@uGL8Dk0W5;YZPvp+4EOrWlJ1x-aS$ZQgs&@JsO;57cn zXd3(CP8Zy zV$;LSjOGy0FOV6${T^B&GaBU`Y1;37js}Gft&PfvFCIHK;e$x3D5u?4G~{iAd@jtm zGo43yPVX>U3g}F~0=*})#xfG<=`x$+_c>y3Q>&AJ2W=4hULC{?FP!`{u}lb{mV z*|7n(P?mm`B0gH~$nhPiR))$puy%X8K`rxfkV&u>?tv2gE#5dB%55NOl7CHjJ^4^<9}Dd}oq1^VhbRlUXv;o(ZX#|19E2RfKuAv6!kr zr;%0TikK8mceXHsSDhvw3C2P(*(G!&J$Z>W}QI62#8V1(L;wC3?TS!TPD-H z@+Z3wUntQ%Z+3ARqRqfwKB-Arv6ML_ZbtE;I}0Jv(l;CT_vK!DfV2>HSwN>{lIE^b zWbMKZc>fS<+jhV{}Yk)K%aYdEfcp2W z3ihC-Q%Q)+jrZ4UmJMOYRWzjrZ?6R~HYjoOWE(vz-(-YkbG)K9T_&jPnnDihzS70D zojM*ewSwf<#oJq3>MVh2yAZeCn*-nQk7uFA&x1HomNDg6Hj#2xGx!OQOk)s}RI9Aj zDE{#+P~vw2@kWs)-xD=0lWgWmdiRz4Ku&yu$RfMip4>_9nKxRAR#z@Q7O=4Cac8>x zZmh{~!?ih{d~s`#-N+!%TWV0Nk{Zk5rKr5coEHH!5w(s8jB><|n;9Ejz#5T1Bsu;< z$_VTFFW~$ihyOFIhAaM!vi~oKTiIjl