From 4846a6c2d04c4a71cf9619acab46ec82dec3f2b6 Mon Sep 17 00:00:00 2001 From: Alex Wu Date: Mon, 15 Feb 2021 11:40:49 -0800 Subject: [PATCH] Release process update (#13798) --- release/RELEASE_CHECKLIST.md | 3 +- release/RELEASE_PROCESS.rst | 7 +++-- release/release_logs/1.2.0/microbenchmark.txt | 28 +++++++++++++++++++ release/release_logs/1.2.0/notes.txt | 3 ++ .../1.2.0/rllib_regression_tf.txt | 27 ++++++++++++++++++ .../1.2.0/rllib_regression_torch.txt | 27 ++++++++++++++++++ .../1.2.0/scalability/distributed.txt | 4 +++ .../1.2.0/scalability/object_store.txt | 1 + .../1.2.0/scalability/single_node.txt | 5 ++++ .../1.2.0/stress_tests/test_dead_actors.txt | 4 +++ .../1.2.0/stress_tests/test_many_tasks.txt | 17 +++++++++++ .../stress_tests/test_placement_group.txt | 3 ++ 12 files changed, 125 insertions(+), 4 deletions(-) create mode 100644 release/release_logs/1.2.0/microbenchmark.txt create mode 100644 release/release_logs/1.2.0/notes.txt create mode 100644 release/release_logs/1.2.0/rllib_regression_tf.txt create mode 100644 release/release_logs/1.2.0/rllib_regression_torch.txt create mode 100644 release/release_logs/1.2.0/scalability/distributed.txt create mode 100644 release/release_logs/1.2.0/scalability/object_store.txt create mode 100644 release/release_logs/1.2.0/scalability/single_node.txt create mode 100644 release/release_logs/1.2.0/stress_tests/test_dead_actors.txt create mode 100644 release/release_logs/1.2.0/stress_tests/test_many_tasks.txt create mode 100644 release/release_logs/1.2.0/stress_tests/test_placement_group.txt diff --git a/release/RELEASE_CHECKLIST.md b/release/RELEASE_CHECKLIST.md index 0c742a94d..f529b38ec 100644 --- a/release/RELEASE_CHECKLIST.md +++ b/release/RELEASE_CHECKLIST.md @@ -56,6 +56,7 @@ This checklist is meant to be used in conjunction with the RELEASE_PROCESS.rst d - [ ] Results added to `release/release_logs` - [ ] stress_tests - [ ] unit_gpu_tests +- [ ] Scalability Envelope Tests - [ ] ASAN Test - [ ] K8s Test - [ ] K8s cluster launcher test @@ -107,4 +108,4 @@ This checklist is meant to be used in conjunction with the RELEASE_PROCESS.rst d - [ ] PR to bump master version is merged - [ ] Release is announced internally - [ ] Release is announced externally -- [ ] Any code/doc changes made during the release process contributed back to master branch \ No newline at end of file +- [ ] Any code/doc changes made during the release process contributed back to master branch diff --git a/release/RELEASE_PROCESS.rst b/release/RELEASE_PROCESS.rst index 2502a0865..f7eb6292f 100644 --- a/release/RELEASE_PROCESS.rst +++ b/release/RELEASE_PROCESS.rst @@ -316,10 +316,11 @@ to proceed with the final stages of the release! of the docs, trigger a new build of the "latest" branch in readthedocs to see if that fixes it. -7. **Update latest Docker Image:** Message Ian Rodney to bump the "latest" tag +7. **Update latest Docker Image:** SET THE VERSION NUMBER IN `docker/fix-docker-latest.sh`, then run the script ot update the "latest" tag in Dockerhub for the - ``rayproject/ray`` and ``rayproject/ray-ml`` Docker images to point to the Docker images built from the release. (If you have privileges in these - docker projects, you can do this step yourself.) + ``rayproject/ray`` and ``rayproject/ray-ml`` Docker images to point to the Docker images built from the release. (Make sure there is no permission denied error, you will likely have to ask Thomas for permissions). + + Check the dockerhub to verify the update worked. https://hub.docker.com/repository/docker/rayproject/ray/tags?page=1&name=latest&ordering=last_updated 8. **Send out an email announcing the release** to the engineering@anyscale.com Google group, and post a slack message in the Announcements channel of the diff --git a/release/release_logs/1.2.0/microbenchmark.txt b/release/release_logs/1.2.0/microbenchmark.txt new file mode 100644 index 000000000..064e8b441 --- /dev/null +++ b/release/release_logs/1.2.0/microbenchmark.txt @@ -0,0 +1,28 @@ +single client get calls per second 48106.48 +- 847.52 +single client put calls per second 42709.1 +- 84.85 +multi client put calls per second 172608.71 +- 3071.81 +single client get calls (Plasma Store) per second 10669.26 +- 286.63 +single client put calls (Plasma Store) per second 6622.51 +- 47.03 +multi client put calls (Plasma Store) per second 9804.51 +- 462.32 +single client put gigabytes per second 11.45 +- 10.79 +multi client put gigabytes per second 35.06 +- 0.26 +single client tasks sync per second 1899.11 +- 87.63 +single client tasks async per second 18599.58 +- 124.02 +multi client tasks async per second 50388.88 +- 2585.47 +1:1 actor calls sync per second 3053.21 +- 60.37 +1:1 actor calls async per second 7768.59 +- 268.78 +1:1 actor calls concurrent per second 7106.24 +- 219.87 +1:n actor calls async per second 17132.11 +- 881.8 +n:n actor calls async per second 51037.11 +- 1732.95 +n:n actor calls with arg async per second 13746.19 +- 171.94 +1:1 async-actor calls sync per second 2103.39 +- 52.51 +1:1 async-actor calls async per second 4100.13 +- 53.6 +1:1 async-actor calls with args async per second 3085.78 +- 165.8 +1:n async-actor calls async per second 13906.28 +- 363.9 +n:n async-actor calls async per second 40269.65 +- 1113.55 +client: get calls per second 2414.77 +- 43.07 +client: put calls per second 1346.13 +- 8.2 +client: remote put calls per second 58855.54 +- 849.21 +client: 1:1 actor calls sync per second 730.58 +- 11.66 +client: 1:1 actor calls async per second 774.79 +- 14.1 +client: 1:1 actor calls concurrent per second 805.73 +- 11.46 \ No newline at end of file diff --git a/release/release_logs/1.2.0/notes.txt b/release/release_logs/1.2.0/notes.txt new file mode 100644 index 000000000..91c693f44 --- /dev/null +++ b/release/release_logs/1.2.0/notes.txt @@ -0,0 +1,3 @@ +The test.pypi.org wheel does not match the release wheel because there was #14062 was discovered during the sanity check. + +Wheels were re-sanity checked by pip installing from s3. diff --git a/release/release_logs/1.2.0/rllib_regression_tf.txt b/release/release_logs/1.2.0/rllib_regression_tf.txt new file mode 100644 index 000000000..8760b66ff --- /dev/null +++ b/release/release_logs/1.2.0/rllib_regression_tf.txt @@ -0,0 +1,27 @@ +== Status == +Memory usage on this node: 8.8/480.3 GiB +Using FIFO scheduling algorithm. +Resources requested: 0/64 CPUs, 0.0/8 GPUs, 0.0/325.83 GiB heap, 0.0/99.07 GiB objects (0/1.0 accelerator_type:V100) +Result logdir: /home/ray/ray_results/a2c-tf-atari +Result logdir: /home/ray/ray_results/apex-dqn-tf-atari +Result logdir: /home/ray/ray_results/dqn-tf-atari +Result logdir: /home/ray/ray_results/impala-tf-atari +Result logdir: /home/ray/ray_results/ppo-tf-atari +Result logdir: /home/ray/ray_results/sac-tf-halfcheetah-pybullet +Number of trials: 12/12 (12 TERMINATED) ++-------------------------------------------+------------+-------+--------+------------------+---------+-----------+----------------------+----------------------+--------------------+ +| Trial name | status | loc | iter | total time (s) | ts | reward | episode_reward_max | episode_reward_min | episode_len_mean | +|-------------------------------------------+------------+-------+--------+------------------+---------+-----------+----------------------+----------------------+--------------------| +| A2C_BreakoutNoFrameskip-v4_e6509_00000 | TERMINATED | | 355 | 3604.01 | 4137500 | 1.86 | 10 | 0 | 815.78 | +| A2C_BreakoutNoFrameskip-v4_e6509_00001 | TERMINATED | | 354 | 3601.32 | 4067500 | 1.79 | 10 | 0 | 803.07 | +| APEX_BreakoutNoFrameskip-v4_e6509_00002 | TERMINATED | | 98 | 3626.91 | 7297440 | 1.4 | 9 | 0 | 739.886 | +| APEX_BreakoutNoFrameskip-v4_e6509_00003 | TERMINATED | | 97 | 3607.18 | 7222240 | 1.17816 | 5 | 0 | 702.362 | +| DQN_BreakoutNoFrameskip-v4_e6509_00004 | TERMINATED | | 35 | 3636.53 | 360000 | 1.25 | 6 | 0 | 710.49 | +| DQN_BreakoutNoFrameskip-v4_e6509_00005 | TERMINATED | | 35 | 3631.05 | 360000 | 1.36 | 9 | 0 | 723.54 | +| IMPALA_BreakoutNoFrameskip-v4_e6509_00006 | TERMINATED | | 350 | 3607.49 | 3024500 | 1.87 | 9 | 0 | 816.3 | +| IMPALA_BreakoutNoFrameskip-v4_e6509_00007 | TERMINATED | | 349 | 3601.95 | 3025500 | 1.21 | 6 | 0 | 716.7 | +| PPO_BreakoutNoFrameskip-v4_e6509_00008 | TERMINATED | | 1858 | 3600.41 | 9290000 | 1.69 | 10 | 0 | 792.13 | +| PPO_BreakoutNoFrameskip-v4_e6509_00009 | TERMINATED | | 1851 | 3601.2 | 9255000 | 1.6 | 11 | 0 | 770.95 | +| SAC_HalfCheetahBulletEnv-v0_e6509_00010 | TERMINATED | | 45 | 3670.33 | 54000 | 269.06 | 622.238 | -454.818 | 1000 | +| SAC_HalfCheetahBulletEnv-v0_e6509_00011 | TERMINATED | | 45 | 3654.38 | 54000 | 473.166 | 628.875 | 156.264 | 1000 | ++-------------------------------------------+------------+-------+--------+------------------+---------+-----------+----------------------+----------------------+--------------------+ diff --git a/release/release_logs/1.2.0/rllib_regression_torch.txt b/release/release_logs/1.2.0/rllib_regression_torch.txt new file mode 100644 index 000000000..11309f5e3 --- /dev/null +++ b/release/release_logs/1.2.0/rllib_regression_torch.txt @@ -0,0 +1,27 @@ +== Status == +Memory usage on this node: 8.6/480.3 GiB +Using FIFO scheduling algorithm. +Resources requested: 0/64 CPUs, 0.0/8 GPUs, 0.0/325.73 GiB heap, 0.0/99.07 GiB objects (0/1.0 accelerator_type:V100) +Result logdir: /home/ray/ray_results/a2c-torch-atari +Result logdir: /home/ray/ray_results/apex-dqn-torch-atari +Result logdir: /home/ray/ray_results/dqn-torch-atari +Result logdir: /home/ray/ray_results/impala-torch-atari +Result logdir: /home/ray/ray_results/ppo-torch-atari +Result logdir: /home/ray/ray_results/sac-torch-halfcheetah-pybullet +Number of trials: 12/12 (12 TERMINATED) ++-------------------------------------------+------------+-------+--------+------------------+---------+-----------+----------------------+----------------------+--------------------+ +| Trial name | status | loc | iter | total time (s) | ts | reward | episode_reward_max | episode_reward_min | episode_len_mean | +|-------------------------------------------+------------+-------+--------+------------------+---------+-----------+----------------------+----------------------+--------------------| +| A2C_BreakoutNoFrameskip-v4_a6f57_00000 | TERMINATED | | 353 | 3603.76 | 3378500 | 1.93 | 15 | 0 | 821.58 | +| A2C_BreakoutNoFrameskip-v4_a6f57_00001 | TERMINATED | | 353 | 3608.48 | 3404500 | 1.15 | 6 | 0 | 701.51 | +| APEX_BreakoutNoFrameskip-v4_a6f57_00002 | TERMINATED | | 113 | 3615.57 | 5680160 | 1.6381 | 9 | 0 | 773.381 | +| APEX_BreakoutNoFrameskip-v4_a6f57_00003 | TERMINATED | | 114 | 3636.38 | 5764800 | 1.39655 | 6 | 0 | 735.914 | +| DQN_BreakoutNoFrameskip-v4_a6f57_00004 | TERMINATED | | 27 | 3684.72 | 280000 | 1.79 | 12 | 0 | 743.6 | +| DQN_BreakoutNoFrameskip-v4_a6f57_00005 | TERMINATED | | 27 | 3685.26 | 280000 | 1.14 | 5 | 0 | 699.19 | +| IMPALA_BreakoutNoFrameskip-v4_a6f57_00006 | TERMINATED | | 356 | 3606.67 | 7850250 | 1.7803 | 12 | 0 | 795.455 | +| IMPALA_BreakoutNoFrameskip-v4_a6f57_00007 | TERMINATED | | 355 | 3609.98 | 7903500 | 1.68217 | 8 | 0 | 796.659 | +| PPO_BreakoutNoFrameskip-v4_a6f57_00008 | TERMINATED | | 1401 | 3601.51 | 7005000 | 2.61 | 10 | 0 | 897.83 | +| PPO_BreakoutNoFrameskip-v4_a6f57_00009 | TERMINATED | | 1406 | 3600.35 | 7030000 | 1.47 | 11 | 0 | 647.8 | +| SAC_HalfCheetahBulletEnv-v0_a6f57_00010 | TERMINATED | | 37 | 3686.44 | 46000 | 641.43 | 723.144 | 504.62 | 1000 | +| SAC_HalfCheetahBulletEnv-v0_a6f57_00011 | TERMINATED | | 37 | 3645.16 | 46000 | 631.65 | 664.021 | 599.864 | 1000 | ++-------------------------------------------+------------+-------+--------+------------------+---------+-----------+----------------------+----------------------+--------------------+ diff --git a/release/release_logs/1.2.0/scalability/distributed.txt b/release/release_logs/1.2.0/scalability/distributed.txt new file mode 100644 index 000000000..860875201 --- /dev/null +++ b/release/release_logs/1.2.0/scalability/distributed.txt @@ -0,0 +1,4 @@ +Actor time: 34.21903751100001 (10000 actors) │ +Task time: 386.82114117900005 (10000 tasks) │ +PG time: 31.368525181999985 (1000 placement groups) │ +Node launch time: 756.3447095859999 (250 nodes) \ No newline at end of file diff --git a/release/release_logs/1.2.0/scalability/object_store.txt b/release/release_logs/1.2.0/scalability/object_store.txt new file mode 100644 index 000000000..0471a93ba --- /dev/null +++ b/release/release_logs/1.2.0/scalability/object_store.txt @@ -0,0 +1 @@ +Broadcast time: 135.75278311699998 (1073741824 B x 50 nodes) diff --git a/release/release_logs/1.2.0/scalability/single_node.txt b/release/release_logs/1.2.0/scalability/single_node.txt new file mode 100644 index 000000000..7a100e3ea --- /dev/null +++ b/release/release_logs/1.2.0/scalability/single_node.txt @@ -0,0 +1,5 @@ +Many args time: 11.433474627000002 (10000 args) +Many returns time: 4.487700554 (3000 returns) +Ray.get time: 21.957432587999996 (10000 args) +Queued task time: 124.148238013 (1000000 tasks) +Ray.get large object time: 35.118229127000006 (107374182400 bytes) \ No newline at end of file diff --git a/release/release_logs/1.2.0/stress_tests/test_dead_actors.txt b/release/release_logs/1.2.0/stress_tests/test_dead_actors.txt new file mode 100644 index 000000000..2e73606f2 --- /dev/null +++ b/release/release_logs/1.2.0/stress_tests/test_dead_actors.txt @@ -0,0 +1,4 @@ +Finished in: 133.60612034797668s +Average iteration time: 1.3360581374168397s +Max iteration time: 5.137001276016235s +Min iteration time: 0.15551400184631348s diff --git a/release/release_logs/1.2.0/stress_tests/test_many_tasks.txt b/release/release_logs/1.2.0/stress_tests/test_many_tasks.txt new file mode 100644 index 000000000..ffc9bc3cd --- /dev/null +++ b/release/release_logs/1.2.0/stress_tests/test_many_tasks.txt @@ -0,0 +1,17 @@ +Stage 0 results: + Total time: 50.40076494216919 +Stage 1 results: + Total time: 191.78780102729797 + Average iteration time: 19.178766775131226 + Max iteration time: 21.238199949264526 + Min iteration time: 18.299438953399658 +Stage 2 results: + Total time: 280.4905333518982 + Average iteration time: 56.0978446483612 + Max iteration time: 56.96464133262634 + Min iteration time: 53.859785318374634 +Stage 3 results: + Actor creation time: 0.3304018974304199 + Total time: 2303.117142677307 +Stage 4 results: + Scheduling spread: 66.90121385927009. \ No newline at end of file diff --git a/release/release_logs/1.2.0/stress_tests/test_placement_group.txt b/release/release_logs/1.2.0/stress_tests/test_placement_group.txt new file mode 100644 index 000000000..62f8a7b74 --- /dev/null +++ b/release/release_logs/1.2.0/stress_tests/test_placement_group.txt @@ -0,0 +1,3 @@ +Avg placement group creating time: 0.2691924729741867 ms +Avg placement group removing time: 0.8786630945927776 ms +Stress Test succeed. \ No newline at end of file