From b506f871176a20db9a5e4152128ed3c0ee7d937d Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Sat, 25 Apr 2020 18:25:56 -0700 Subject: [PATCH] [tune] New Doc edits, add Concepts page (#8083) Co-Authored-By: Sven Mika --- doc/Makefile | 4 +- doc/source/_static/css/custom.css | 12 + doc/source/conf.py | 4 +- doc/source/images/tune-workflow.png | Bin 0 -> 39103 bytes doc/source/index.rst | 4 +- doc/source/rllib-concepts.rst | 2 +- doc/source/rllib-training.rst | 6 +- doc/source/tune-contrib.rst | 2 + doc/source/tune-schedulers.rst | 4 +- doc/source/tune-searchalg.rst | 5 +- doc/source/tune-usage.rst | 576 ------------------ doc/source/tune.rst | 16 +- doc/source/tune/.gitignore | 2 +- .../tune/{guides => _tutorials}/README.rst | 0 .../_tutorials/overview.rst} | 141 ++++- .../tune/_tutorials/tune-60-seconds.rst | 193 ++++++ .../tune-advanced-tutorial.rst} | 0 .../tune-distributed.rst} | 73 ++- .../tune-tutorial.rst} | 16 +- doc/source/tune/_tutorials/tune-usage.rst | 424 +++++++++++++ doc/source/tune/api_docs/analysis.rst | 59 +- doc/source/tune/api_docs/execution.rst | 3 + doc/source/tune/api_docs/logging.rst | 120 ++++ doc/source/tune/api_docs/overview.rst | 3 + doc/source/tune/api_docs/reporters.rst | 14 +- doc/source/tune/api_docs/schedulers.rst | 6 +- doc/source/tune/api_docs/suggestion.rst | 2 + doc/source/tune/api_docs/trainable.rst | 46 +- doc/source/tune/guides/overview.rst | 38 -- 29 files changed, 1041 insertions(+), 734 deletions(-) create mode 100644 doc/source/images/tune-workflow.png delete mode 100644 doc/source/tune-usage.rst rename doc/source/tune/{guides => _tutorials}/README.rst (100%) rename doc/source/{tune-examples.rst => tune/_tutorials/overview.rst} (54%) create mode 100644 doc/source/tune/_tutorials/tune-60-seconds.rst rename doc/source/tune/{guides/plot_tune-advanced-tutorial.rst => _tutorials/tune-advanced-tutorial.rst} (100%) rename doc/source/tune/{guides/plot_tune-distributed.rst => _tutorials/tune-distributed.rst} (72%) rename doc/source/tune/{guides/plot_tune-tutorial.rst => _tutorials/tune-tutorial.rst} (92%) create mode 100644 doc/source/tune/_tutorials/tune-usage.rst create mode 100644 doc/source/tune/api_docs/logging.rst delete mode 100644 doc/source/tune/guides/overview.rst diff --git a/doc/Makefile b/doc/Makefile index b4ba8a0a5..0a5f0d81e 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -2,11 +2,11 @@ # # You can set these variables from the command line. -SPHINXOPTS = +SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build -AUTOGALLERYDIR= source/auto_examples source/tune/generated_guides +AUTOGALLERYDIR= source/auto_examples source/tune/tutorials source/tune/generated_guides # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) diff --git a/doc/source/_static/css/custom.css b/doc/source/_static/css/custom.css index d71e387a0..68935539b 100644 --- a/doc/source/_static/css/custom.css +++ b/doc/source/_static/css/custom.css @@ -18,3 +18,15 @@ .rst-content .section ol p, .rst-content .section ul p { margin-bottom: 0px; } + +div.sphx-glr-bigcontainer { + display: inline-block; + width: 100% +} + + +td.tune-colab, th.tune-colab { + border: 1px solid #dddddd; + text-align: left; + padding: 8px; +} diff --git a/doc/source/conf.py b/doc/source/conf.py index 7f4ec4188..367b5be67 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -80,9 +80,9 @@ versionwarning_messages = { versionwarning_body_selector = "div.document" sphinx_gallery_conf = { - "examples_dirs": ["../examples", "tune/guides"], # path to example scripts + "examples_dirs": ["../examples", "tune/_tutorials"], # path to example scripts # path where to save generated examples - "gallery_dirs": ["auto_examples", "tune/generated_guides"], + "gallery_dirs": ["auto_examples", "tune/tutorials"], "ignore_pattern": "../examples/doc_code/", "plot_gallery": "False", # "filename_pattern": "tutorial.py", diff --git a/doc/source/images/tune-workflow.png b/doc/source/images/tune-workflow.png new file mode 100644 index 0000000000000000000000000000000000000000..3464b467db73e74f365c519ab7e1b389392ec263 GIT binary patch literal 39103 zcmeFZWmH^2(=G~u0KrLuI|O$fWRO5`cL?qf+#N#D;O;PJfZ*;B++l#=GB|^~+nu~e zzH`2R_vc;f&g`{z@9x^w)!o(I)lb#TPh~|Z3{)aiI5;>A8R;*oaBv7daB%RvC`hoJ zg``MN*bUxARZ0x5a)M+Z_MmL8En}ge0LKVRqrkm^e-DT7j|BFG3$unp{7)JVjuv)@ zvC4)2uhQH%|Em&&{|_eri_aHx7gJ|zM^|eHdy0R2jZGZfT!pBp{#Eq9KmS8cS8I#^s>$Bv ze>Mx&K(>D+Y#glYZ2y}#>{Z~uTmdC#Yjar5|M&}Y0RKbzf93tpdVp;ID*r!(`5!I) zPcE#f!l*#D|J`lEsPa8@I&g5Ja57&$t9!y9Wxs3JPP(^8=_8|)4g)b3d@S)tS^XPMF0Qm|78zQARr#7)5Pf7w-iaKsHg;x5B}EpDK#G< z8iR*pI#6G^74R!IE*8=Mk`H_3CcZe
$?04lV#fsTBs5R>$suueLVx`u}T)l{e#9Zk0`BO3;8cAtAk)qK! zCH4O02{n6>^CbfjCBa`Px=_mF4f|EB-@`~hh+e57j>w3%)@0@WW=M0@;tABKr;3Ar zlRO=J%@yYuZW4wNGp?>4A%%F6@~15EWOTW<)Ts2GG~m~6egPFW9mOg^aXjhcwEm(B z)<_77x`v3Ro0$?1#T%;scEyDtnjM)N7tzc8_Y!4|H=H-WWtHb6zoUfzpO+}!`?#9a zkmP*kpZ{kzEG??>ZfK#5@v}bLf3HVJT6DkwPcx$S(|<3~prJ6W`ide~;?;{r>Oue;4`xZ_NLPMkYkYXroJ_3Lh7jhiz=S>@RFE(`hM8 z*=D}rg8pm7)e}Tl1?-pesQ?;ChBWX8oH*ElsxXyJ?#Uznui;m2L+pIoui7y?o3$sU zcf%RNhw1J`5v>L$4mSLMuE2JRvr!Nl(eV0cg6in<=%3#ogp5T>YuvPAex=+Yr+I6L4~QbMqaUvgeYR|k3aLT*^p}c z4sRu&AO}$SP#h=aKWf5H6hUl3?6}D&9|>UOZ#6dNUOG!W9>eY+SNz!TS<>ZVbE4x=**hslc`sLoBL zEc#DgW%IHUs<=}`e^3`Y=N%riwxerjs@Bd2cpQ4*8_qk_stFB~|6AUeKU6_gCmn0+ zhiPtv>Y@L-EjI$zG7X$uR{TJi`R$ObiO47iTMW#H_XS{MSx$R42zfO*<4NjHot-TlgNdph2%}{DGgCyROo4cS zFU?mm|MDx`F=m_R3F5pg!*}jba9H*_lt$l+GcsCU2XcI(A`MVVhylZpha;8 zL$6H#*7cFp*VFZ(*8}3Ng?XZ8cOUb9T5;mT-66;%s7fbspovAV+40)@`XKpqWAZ-1 z(hz?4`KC;Kk9*Z|>u5B~`O_PIdAOj-=c`m*hh#~$$cTuq^QCHgK_sk-(PpH6SEB&| z&h;G5GA}u=sfS(%H8Bo_t; zdwJ1N!g>NEBJx4ySM*k-k}$e;MaxaAb??Hv3lrwXWha1OR?R7gM@-ZvUG|pJ;fd(ntdN>D@)lSE>-MY^gSsFg-97F!oF16zj#_x!z6AR0pGc0=ZIrv{R z9v_(4BtsDjbZsg-3|#HU-SCd3keO&)G$unGa-FXy#DMYG`~mZoj;59q zKcs#vmPW|=w!MAJjuy(G$m6_(@5CiZHoK1agg(t2 z_DwVSIIN+=D(FK8Hs$;NR|P-ny6(m{5}G$h%D$BPU2OPtlYU1a+4I$FTy@fFI=9db z7PlC}zKsxRIwxTnbS%TQ!6=(PtO!`06Xivr?KA?1%2%J9hLqe0Z^sJLr zA3`RmqMQW=<>xn(0|7rsxmM?@jj5X!Q*?{}GP!r8uwqezjEtk*9kKlhqO6+QAi2M#0_ zyGn`I!uK;uSxidAT$NY~gF5Z!KxG5o;4Sh_!O_D@6Rp2ynHlWo?p$44V(AQms_Bh( zz)G=2Ha^oTD%?@>r&1|$0@i)N_E&(d(WztNMI4ZSE6C{2)q%G!`>)OO3krRY!G$6H z{mx~3R{&NQGoqL8LtA&+&q2SP@!M9LFoTb)?Z6~D&(y&?dIBtl%fln2rYleB&{bch zq!iM<$GO9>7L|E@oO}1gD!HnbS-YC?n{%Vc;{t5ttYx04--|M2(`s!Xay^b?Z5_zg zX%|=4n)hS&N?@293KDU2!(IoPSSA7%!{M~*-q+(=sjJ6W2-KXUiD6Z8p}S*}xrmG7hPCy{+kwRX!?eP!@&1-I`?v?^ zlupRX3wY*aHZ>>q>ORwOJTgE0&~-Nr>=d47X-*=rsTgJ>NS#evv3zt|N#+(HD?Zx; z)kS2K6-6@qW-?n8o58!>x9WRsuA&?1Mrf5f^}><9vz+)mAzYLs%;=CgwizCKkaw!c zSFa%LvLe*PYH`$mJr(XV5O!0x&#@`qlnpQ{ksx)|n^cSW>6)D_-Dr8->^JHAj646_ zN8K|CuNCDO=FZox*g`iTLJ*ywFSrqic08M^hwYZ8v;Mq9y5V+-p;BzvzG~~8(r69g zQgT%T`AxH;LC#K2m!UpK%Z-1Xp0&T^p5*HvCrkW67wS1`fqc#zYm|@ z!a)0JRAL;5A^bX4o6qES@`J26_%6KHJ4ojX($04&POd#AC0cYEjmuUUq`H9*6^O0!m1dMTPts~0-We0$JdSJy(j@j&BRjXKt zJUYBl@S?f88FfdD?1mK?7f0`-{BIXyVmWchz0x1_&bMe0in`kBuHrALd|k5PHh^qP zsOVvHez6BsVo&0&1Y7^_9Vt8oo?9b{!E@HC6Am7;#2gollhVQ=fl=uuu-AJAa1q$tehl^r!nB$TpG!6(J{D{*4T`N&MpYKvQ@Rnpn;yDgJa0n089f?=))eEhFV ziG8u%*5;lghJqIvltq3^EEZ^Wht7BuKCy1?^lbHx`DY-PXJ=FrumO|p-9hK&ty(sMtqP^q4`Z2x!@ za8s!ycv|BDji$xunR1Dm`ssT>%v)OM1L>w*32!5JzsjNpnr-<_$_jX$w-i$W0^zk* z)|+omq;euh`lTr&@|e9A*h+pFt#d>m3yv~XrpXx9+vI|dT@A0out=TCiK ze>Q5h-aezGBtcqBqbq)!Jq+lY3cOwQS8GWqDu@P7-rXU4TteSz5Cu{npR28*c(wH2 zI%mc?{6tYl{%63J=EsheedkICOEv$%yonr~;w`8!%Ra1k;R{+qUK!Q-%1$sMMMLM-t8n)#|rYd$^foi zl;mGt%J?upzUf_-2C{fp;i$oFNDU(c{01kArNWe*qZFsXKihC5;AP#}2jsunH>v&I z`Vthu?{SsOj!c_P{1NLeshA8+1#BAREbd9L-}0HA*XO3~(-EDB{C!`nDYprt&F~Fh z^3~(pX6IFroB`GSpM}D8XAr8};oAUKi0$eMeYTH>96;i9 z1IMbH0P7nKN{ZBp-(cN~2H=dv5q8E@fQ{7i`QWMX8gb?N5Z>~UvMhPSl3OgevewI! zQaJ-FlgKn1ifF1Ht-`vbZ_e_!w#y@)TfAh)U63{9jX(qAvCfhQL3zCr$kQxJuL@Lc|-X^{? zJ-tS2xH4o8$N~U@c2W#ExT8FqJ#6X>m)_biEBd8UDO)kkYb=1dzP@@>PofKVb!n3B z#ax}1F#153Sm!({u#LKtY7t2-9|(FU1VEefV=tRCca!yx!tb zPVU;&4zsqzlPd{_Gu|mmov2M^UFGpFTMwsNST74~IvE$$`Cn|sBAQO>@$E-(DPivZ z>y&(@F*uIOzzgo{xgcx6y29h4K?|^S=IE}a49RU|Bc0+s7|)kCyg9Q zi0{ z3O?)&U-Krm-=4ee;42Mg-~B2}&BRYM0wo(@_@RPkq4ZU{kKY?pKxr~*-759rPvTG%;oeSwyys)vCq-#M#rBZL^2 z%45tKPA%>$4Mk9u@7-%u#nU*K;$tae&v!Vx0;Dx2%F0;wuSAdWt+h*Lg@PO1KH?Wp zR^3;M9P$;>DUU|w$cE)jp2Gt9;fn3!~sBdUWVd#e3c#)VC zDBz)ny(*|*A%~#L>cS?3ih~fGIA8P zlz54if=V`n;G!zSgB7wN_`Mu5GIb(erTckBaIUPZ??$SBDu&YipNlnNZ+~UQD=4$f zFwG3u0jGrZjfC*#r2n1EY+AQR6l}eG-CjNz(T*9k8NlvOyLsAH&JglFAb*tx68IsI;R$LYKW)Nl9aXYzhGEc=}L{iC#oWND%fya4?l z=3%&gY{$#gef?rAPC?T>Nw;ma$2v%qH`YE@MwVpy1iyH(#U8e!HALedI`w^w9t(Op zF|z)E<_`+rudF`03UV|%ngFrRse^sF4!#-SepC8IrR7_0|DYzIh6j&MOKiL)q{>a; zIWL{a6Cu^p%&Ru~XIil=s571xJ$+5?QoM(eUHP{Q;I+Krj9j>Hc}^p_SO|WHorj@}@xs zWe5e z0dkd`v^qf^&7b*BrH?LjrIuRvsL6W(*0I>>77uJ0=N5Bc^8?8amZC}I(!Mh~^V7+# zGB*-J@bTxk0i(@zcK;D$Z&c)St;&t%)6$^+@m9{3l_*;oO1)(9D|Yi#U2zRLHmXHX zz-&OpHP6iv zRq+xEANsgU3c&QJ>tgnI_-H+K$^A6#Psy?d=^O97z7Y(AC_KvvFFzv38rb<3^AGYg z){+Dc$#Js8kgx7@H`RE!cn{s;`JE7(U)gW8dbOW+HzFrHIvIB(;Awrf-QT$LT9zvP zq?J<({4j8*eWwm_avz}A;3NGIID*X{Vi+Rxb3s@CJb zFaSKv;pE|j2-i&sYZCsVOTEWOX%9Q1PxF}Vji_W zuBo*Dn%NTIy(d@IBpT+v)pH3iDIkq!MOgHH<_-^a? z(~OimR{BKU%?Iym*4L$QK?fo-bOsx?lqMmY^B%NS`;+5xJRo2{8X)eu-D22cyQk_KM2@otBmWW zTh(S*X)XEwE~$vPXy*6nfOd`3bJv`1(Nw#Do1P7%^g7U0>$+y*HZ7eiEDY%fz7AvR zjIX7#{tlCzND7@oi&IKiLJtg^{rD-c>g91hMWT|l~J0Cu^t9F3**q6 zrB3+%9$aEZBQu-FAjQU$t?^xW!+4_$X^_iZU*)sG?^4Bw#~E^m;ad|t&i=s;8Gt+Y z+YX3Jt%YtN0cb+q4)PgY{{>(As=DAhYN$sor``YDgA?=|6$d~RRNQi zgc;01Y&O3o2ytLIJsdotH}3bMT&lQUMCXYG-Dab}Og{U1!D}dj{vn=C8*Dq8Xjt`f znSVV}>m8*iQ^gN>dTHAdqD;6#Tt_IDAK@=1`;c;6VU@)ue5>A!*&!m#2{AJ4hW zs(I~bp_em+lS#;{mg)YEu2`ypVf_j$Q6j&2m+Q8o#Goa0JNe+lak4TI_rNHZkJWU_Jls3lnFviL7WUJ8KAW z?ndy4an*I`Qj?5qTe)Cr9xU!U)P%vJsDnNuIvp$L#cm#blsJJIpQ`f#SO3-kklR6X zV*|%z(bn*)UTTh?P3_QZ!W=%xZyV+vuxaSa9*;LEnGX(Ju7ec42)F#H=nM_eDExv5 z4AkSevYLqwSZDRvcA^tpA~W=TP>KFaj_}TP00u;L^CTE_()`$)hn?dtgE+D|zt;Z) z@Aex_x<)IV(4K~jDCHCheYnx%^luj0lJW*?EHZ6($tQwVVDmf{*=@XGe#a4e#a!6e zBPH{+Yu>u`b7?$9XlNJc(ysb}gNVR5|GZnc1G+*nyat!lO&;(>R@6QIJUS^7BBJiC zOh{vPWmCt}RKr~OteA>~?-+BpX-?^IbwHiu)t@X!ETfrIlxkp6DJrrNA_s(iKlpXJ+w7Zc~E5x4E%RoWAkK6@g$%Usa{h4=+J3 z(7FLRHsTz8!JH?>qJVK09u2aD`O8ep_D+Wnd4h=vze$H{CPf(Rxn}U957{dB?qAQt zTH?uS_!J<0qw;!x-|BI+?ma)0IY8|74@5!^H$*P4H@o^~=e7T8*4KmWP`r_!jqP-& zx%HKV0gJYy{hKeBv77OlOvuNwypaew&?YmTpTw&~ReS2U@;VPQuI2lGJiQX^mX!Z6 zKMgSe6dzsEkH~!)*%|zh4EJ-g&OfW)XLX(45U;K+^SaQHv}&C#u6Aj-0Ge;pr>-43 zZ=BOE2hZ36n3u0>o<*LYf2+j^fUlvx>YRl0dt-@D^d^T;KHrJ+Wu=F6AAQ;xL5VZKhb5IJ+skdk;i7y$ulJ)w}^%w5yMe26zBHZ~rte zzdPA;>##Y zP<>*LQ;wI}UD5Lj%msk<=+~M}FpVMdgghxAFq>nzX$hIGb{sw(ZI7{SlZzY*j{?zw z5GIa^NQnRsH>$4~g;FinC}}{!{di@(&aH&sQP4?2`*wj5uJfrgc~mDAgW!wa;5%j( z^#0~Tn3jW=p)8|-ouGKPO&sJX7Jq+TqB9}KJ-o<+rPoyUE;iP_pfyR6o)A}WHzDCC zEEP=|dz>K}@!0!2E`yBslGo7hIxg96YQl0*wv$s!kmp~>;j?(=(96t}z7r+$9?B$u z_!x0X@s_VxNvXWegwu$$WFwa_`9f_tr0>Upop(Y( zFY;?#&_B&FMXWXCB`%wHjA6gXx07m`dRcv%HQP@1e?7?d@@-fY#yf~r1rY|55br;C zoD;9L9p~9!5QCcMMIINgiJdF{d32|GB~GuuGf4*1$DEzKDHs55NQWVZhK2~8r6568 zq@%v4FGRat&Zf5Dc*J_ltbw)^kNy46*cy?+J+xi>vw0l24z4(%?OfgqHh;jm6wMos z)=-{xXTa7LxfoG1Agc8QRjQ!B?`a9Jn%}I&=;a$}7jdbO0W7zT3N`RPt_U2r_c^vq z{Q_y0ztO)YYLoP$4JYJP6H+{BF{kQo6*V$F82#LR1 ze&t-#;9t?trcv3h@Fay|3IUQ*FE%#49>tX&$tt|IOfeB&V#0Dz z=Qm})Q^rz;<=ReOA?+s>)3(A~TO@q%MTCsT-by4K^?dgG2n^In(PxZ;CKGs*|E0yg z`XDLuo262&^_GypyUKyU-N}qTy4@nY*GRDVwP-4wB$O-^xl(ScxP9Z-*i+AS%N^sO z9`wZ$|CK?_fC!Hu;_4!%J-`G7u$gn-{bxNr7LbwBw+F+xoouD60#n=a04szy3IXNK z7r!?a>DUTCw?K3AkM;{!;|D{4;B!3u-2o@RHfq7p-X{;erKYfDlT6YT=%4{g>l;7O zCH+_-PYwhCnU>C@Yv|O>=L+|!Gj7mCBcPUI7!{p%jKyzK6d)=RR-U{XYwC|O@934<$4B`Ej;zVuPPynA_aY}1 zQIEeaMbvkFeSG5RXUIX?`NAt#k85W(vGlp{=R`wGqSl(P1K7dIAqG4C6}sl5Y&DsO zu;KUY6Z8g1v6;&YO_1%Rx*=|WT3m1ls&PVFS5!>==q)`B#maQHix zo$84ETFHEABFMED=^E4Ro13k*`K1TU()v!~blbaub@6!Gi4pDM%?-ts;e~7dETtbH zkp}UY+5rUTIiM%PU{A~lqn-sH@ecM+6Bt4DYmabEtBP$Qvc3UxnHvkc@3iD!j-;7R z^6zgl*F?`xf&mZ!23iLbpP)<%Y_J=&-is`sg&hf@jW898TN2lv z|I+;uuuWar{)r$_E4Gyl*p-50ZBpGY*py&>JZI4rcT4vKa@V{2AZ3}q7iSZOj?!x0 ztc0K+Qz=Srw4R?zM(CpTbm4qD6Eqp{@s&&HQ9t2A;KhZWP3-=Dvb3BDjA1aC-(@7@ z&8-aab@Lu^-9i@3TA&&8*cEW)ji%{)xwZPJy0`j>j;XgU-n`MYW=I(%ob&6(CQOu- z&>MU6ewVPYcjYBAlXv;kpMqyx{3QdFNw{Mm5oD!ojxe^3Lkxjj>kPhE5|LE2Jy`H& zo2%y@w9ERnB86}3xD95+A6#BqqgaxsND0_kg`JU{lPpWp5qg>1^VpZ5On&r6(f69( z7YY&evJhWbZiZ5XtekuT@*a_L*zoKZ~oklXwvaPyIz;9wVrACk3+0-YzuR zfp2F(Ad;o6Ui(hELS(GX1xVFs_v?H6Wk7sE)TK(Oh# z^Vem3fcnenQO=@wShm>LfF$NI@nZT=@>jt=r8RJ%&;ur_Ik)geKFM7P2*HavS%NVk zv#20I+S)VfIux^m%JfMA*ryf2bHX2H{8`C3xU7)$ai*T>jy4ZsVi;yfeqon*}htK7{&;&xx86{n8ZZeu+f= zgF|YCn_|;sK(W0znK&~um6(f_zyh|_zWnvMB5l`RxN|$|jpvWO!t8%*zm3+9i|@VF z+T6JjSM&NXI!SWUG?Z~zj^7zA*l`$67+{BWh)tM8ve!+UquF*mJV|kN&^Kz0#eOTP z7ycn{kBHFkFg!P{VhSJgAr_A%QVIxQE7cOLbinxo)-}CHg3MQJxnEaig3J)&!6ge$ zO-VXB_94^RxURpl77u&aouy8BFQt(5-CSH*CV5GjVaL)-lh)}MFaaSonLxG`NuQuq zI7%-#O3G)q0Fkk|R~k6Uds)2Ylm`IYWGNkx$o&{lSiT|s!7)~+WH3eU+FDbMzh@o% z7GI*OE>RNC+c*voVcE_{^Q};4W#^t2LQu>@`By=8vLLpCtD+p}_}XV4lB^l?ihey7 zSeR4TEz{cdbUUcJaZN4wZaX6{jI2j3d~fdRomqw2(@g9dmSphMuZ~_W`8{tvi^m(J z;{rE7Ac`H6jp6xG*nQ0l*+4Q*jpf{ui>`N2Z?I*CJIVZ%ia42*irE!-vmS;eB!}D* zWUudeO)`I2BHMOTa)9<-6Ya@}*!L~+oS$L<*0`UZV8amPzl~fOm51Rj=|mj$`Arl} zRAO@+>&{(I)r1Y_p-9z3b-x{(-~a&Wld(b*_;dJGYso@2hRC!p81I+udB2_5;}mh+ zlO;Hi9DnNr61?ono;A~jNYFx$-<-i}z8Sm(*GmlFNfJ1xiLX>-vsUp-Oya92KZo;p zO!eViV+^9C^Zn!>_JKt9QYKxP^SOEKg?P)!?9zXN&<80oEC!hbUp!^cyz-0TY4Out zy@!~@()jf8-Hp3U>FFH^7>Yge)^rGQD=<@NvE(OuQc8cBd&YUwEqoC<>!$HVnGidk z!$qcu(uIlSI{+<4;>j9{IQ-dJF4&P{2T1`n0=XA^18FBzp#k;%4u^PJwZ&7B^YXT@ zUU;HGO7ojfhF9zBp-E%-FPBkdY4DPI^o5{j9`=C+N%n;}7Flf~-6S);qwbj7gHN#4 z9dzx#cztd_TKU;{#wIDL#ijFIn60Y&MY}&fEj-uN!ED9MG_ki!ZAg#%8kh^e_+tB~ zhRyxuX6#*|{a(tnTjSa6Ds`2cZ7JgF5AdONHcx4f#x=ntT`SURcepuY`r);r&JSH& zYS7r*jn@sYQ+MeX!WMC`K8k<{1JoqSSP#lXEjN9e{IgD=vQ)RpAbsjZn{(PYdfn4# zgx-`*@sr@K%Q!fQYR15725eA$jmoE}i^_rDcGep(=6yK}CWviX<+IFQo0}-`>u(nn zhyb&p+@3p+RuSBcZw)WGoQL|(B&bNBD=cN#^9)m~*(UU4AQJqjZy>SF0+n9>@k-lKaBhGeErkuWyg=@U7GW+oWC zw^z(We%}@P?0p^adgOW6OlYT=5mK;uJWy-u>+->oYYCLwg2OJt!rC{gkjxn4$#=6K z!VZA<{ubV4x;G=vj?QQ{+ZR2|F}?oTjFrR$EwT?6cko>=201{FUouu=TAA8c^$4Dq z+Z!y-Ik80-|32+aK(T|^jgj-$z1Z$K(I;Rdsy zK7K@UQdJI3`NeaQ5P+`7b=jcRQvhl&9 zoWrbU;DrHK^?urk-!Uy$$`mFYVU0`h;#-41Br;<|;7EH!M31fDx9*n$vl8&!VR+Dr7bNcAMF|r;{0ktO_wd zHc?00t%0nLVIjn%c1b~vCM(UKu-1wEAW!amqx9;pHu#irOq~6>mKtA0a-a zyQ<4?VlrvbcP`FK$JyO%xXvRA6%fe9nX_BAe|o4ppj-vpNUK^k--cKQSzsYW}47#v70W_4}$Q^UbhO8p}X8E1C!o*yi2Oy zfv4$l^nUW5rfeq+z}w5D=@6K!GampY&s?Nt8CrJ;SZsf7A08m_GX4Z7VPdVl^uQ|r z#^2vyjlkQL;JkRm#ql}Yv^U(heco4GgzoKUPvB?3)`+zoNsq3$@QG8O)8SR)4II;Fle23gPYHt6y?d$2+}Ja?$gcMh?iisI zneV*Ac`+t|)UgESy!`_`VEe1&T9WP+$&aJ*Kq34HswRw%$c5i~0=kSD`F5+ZNZ{}T;Ip^B8!wWLK%}VcqegL}v z9A=$OS`H%GTKc%2;Ei?A;^tRfd;KF5Ss|6(=0M4|Y!w-<7iLA60yPxWk=I4!aM+VmS0A&yW48pMx;(pTvtmD}sQVY6<*DQi6iGm^BOn?;HeVc1I{TUy=dC1HR*;(R=4$1F zJy!5pX(INa{v4o}ycZh*5ebQt;*>+?LsawStJf5Rt95la_evsf*OBTGf$XhwoNUjj zDYbf<@|hvY;!QHCU;wGbG5pNu8wG-!{j+=pTn88Mob)WK2^s!F)LTu*pr#np&3u-* znON+LN+EP1E^a={p1Lo{s7(B-NFSo&yZ1cM9$3kWvz7d3B=e5@Mo#5DeiDoJ#ZHJf z4UWnZHK*injUP{~R4ZV`W7^rw)l$Sr*p+Rn@SCQODO~{U_}bztwn9DOQqNmWi; zMZGk<^$yI51E78zEcZiR4#B%-I|79A|mYc{HIPg*Gdj5nQtJ zf^T6#$Q?-62evyH9k1p#8~2wMa&*lF)hK9^@3M9OdF|ZlvvyJka|FCp362W}KV*kc zE;N`5zP=Rl`YRr8uqM00@kQ&LE^os1O$w|LCAxD>uSEo*NWK@Hes32 zQDi#5P9;l+hc@?VbQEajqEx(M%blCX6keIzK77Z#cHv(&_vl=?Nm-7&u=V> z(EBnYT_&(6{cDsBBfDEG*0rN88!Bb18|pofCdH>UquOMjtzjcDz9e5YBquR5sUaTA ziz;_9qbVDc)=zk5dj<^Hz_072*bV^gad?U3M_OTN4_>VK*CZUak8V*osCOJoop^Ha z+kMnM{8QO?YqxcLSqO zSGKMh>k-cV|3t!9qWMX6gvbFH@h8=MK{wn;_;&rh&p&=ViR6xIhj1}GNrB^sKC-xS zdztT8@|h^7YrVbxkyevxU;V??Dppr8<}sq3UD@mhMt49`L&&kGGaP(pyKpfm0H4Bz zd85C9_@PzI7OuN;GUU`-V|cq8nH&hNG_)x&0a-p~?>mr5)8A6T<9z zesEdfU89&!@-aKR}d9OMMF8M4JR|7zVO`YDR7s zeh$ITAxlk%k#H6RZad}Xn36}dWVni#_2gF=Th|Tz2;A+ZF<tm)KbNyv4uf$KAK?HqRy!P0v2jbsqlAMyvf$y@1Te z`6goy*CQ-177&uaAsYz)3SOWz{uJAU^HsL2oahSW!H2L)iXa{s0LI-p5T&j{d$}JJ zy4^iXh5Z#hodcO(q~fY918@YEZX7%cL4y9t zk4`5ZYxK6HEzINDm-^C!BX~+|Ef6q(_Um0p>B}Cm6gRW`)hhrhG@YOQx?>4yHhg}4 zZ>_)XcBcnf#8X1^lL?|upHmWi1p7RO$6!3iCwb6y$i(l#f3Rb~)ILpmUU#bpz zJrxt7J`>eilFf#$!1UYYgCBJ~{{iN;>%J z`!CMH9r6oxluMl0*tF`lSxUrnkELl)*zVay-Mjn4lI?Qi63;>PMu7jk&@F~_`{NvO z=kt}+9pu!hyFX?x{07xYn|S-XcntGw$|p8YtY^ysZjX2eOIo*Kc?Z4xu;CBcqV;OF z%ioqs3w#}ItDcub+z)jf#!y<5c#lI~(m8u+HCna?{co5M?#3tn*M9+oD|&B#oiR)nHO{wPvejEuFBG}FF&K7J)IaESf;CMAw9{Pb zu2-8fA3{Fu+;P7S!W|C899eX3)9i-U-&u{GblwJ5zNpH*CH0Ot9s3l^rr@=d?fJ0S zP2W(>bFQ!PEKOWI&FQ>7MAhaQhJe>Hyinu&+PTukH-$Djif89q!D&vm@;C;5`HbGR$^0OzLLUZ&lyFkED`40y6QQY) zQ;P$e?_8}oiPB|C->s$%KWSR(;ZAL&b6?RXx@zrb?BrOwX5vr+vA(Gh{4@==hJ}XU z5l@wm`|jv2$p`e5u`OyIIZel(z(U}$UKup3FSSCoG_Fl2nw>O}6x|Gi+Ti8_%o(#c z>>B}3XZ)c^_L>@Q7{5<0cDN=eB7q9~42`aE(^{O@^_8{mTp>nf+qp{Fq zs6HT)u2=)es2~3pk)Wl=Ez5(41AssIF9@kB&HMWBt=6hihs|G!R6}xtX(*|`^((so z!EH*3=TR$_8X~71)SS`q9viDNnEOr#9ILolCdG14QZan7(Kwv%%&Zu~BzO|C>-{zH zp1!a?P50+0yUQa%g9JNV@qH;qbD`$1al_>&vm=dKq`R$NDZW|3<&h6GXsvp#iZwnT zEBTmlSW6Aw47v+b-vxV!oQE5oZJ2zg>R0N3n9r*_Z1NC4l61v&6Y*r49Tzp8LGoMb zI`2DVBwVloSO0>~1s$Lo-&BGlewCfgN`1dIU-Bs@h)*!6AhB8TO)nev!B2{xe6mkBoEKsq`iDtjGI`+D$rZ(Y`jwb;~o04)mq?jYy|E9 z!`@qX#kDMNpg|G_3l=Q6L-62Ea0!+K3j}v}*Wfxh!AT&&-C=;>4#6_G26r1|khgQr zJ?GqWe}BPy>#eodti2d!@9ygA>Z-4+zV6nbXZ!G06mb~ZrS8^SOZf4#-g3Kjyk5pH zxc(ra4lMLkX6Va6$!x|*^ae(1L8OcZRORQKL!FG@l2lfM$+miu9xSwPI`Z-{+i?kG z^&?SEYD#O<=xAnDE;9NlWZ%1n^E5@rFeI1x#4LV_4H&}dv7C@Qj$@$ibNse_SH<<@ znHx~_z~2u~vgH}+k=&7^M^!xnt9bKmi!Q13@A3XFW+5W#Hxc9^PK|C=nCRsxgd}Rb zrRxYDXA^O*38Pf^blM=g2t_Ywa@JKa+Th0mhN57uHE2dAtpY8CD+2!3yO6U67ZIASh zE3{^48Wi&u0J4Z@xInK%-p#)#0hy!($iQ9>y{-CPfld|pb$8m4gvXLak{x|dMjIEX zHO~mZXsy29E(IPq3#9QSkI0$>wee1wf2R7uqTHT|6yUUyR=hUVM@9om?pqN{9Pl-f zivs}$2&;f%#@~hXK>@^G{_7KT zapFNYu3e-e7_I6l+PjscEFY^`fcp7dWRq=ti=L7hYE?{R?rKd;Rs63&qYiK_YTqq# z$f|_u9sCUXoqSnHits@GWBG2k0}jUz(l6G2ZDvF-bhSA zs8qUw$Y-TonDcS>&5U9@luG)9ID>?toGVmOZ(~5Z;o{iB8DSCtdH~q`+ zdi?!A>hXbg`XaKJeMg`4&jbWGSAn_lH$x1i4#;9uuYM}{6yO_BwM10XL=9cCby_Ut zmiWN<7d=N@81SeK!>`41)T(mGk(uyoxc)=iD2Z3Wvo}rZ8h_C$AmC`>0$pK?h#Lh) z-h>CC-D+`AJL{FH^6xJEd09mON??TwNx=V;isA3iz+GCP?nrRwh21}o`;)GrkPgVv z{QrOQ|8OU&{_ z&#H?2-?Q`Yo0m!k_23`TkR88=%Y^0VKEe%7me5&v+r z07==uFL55c?alQ*E$dxAlCSWkqoeJHNI4w}kL+a**iEwTOsz@IxM=Kr=uqL`hUzcV zAeD=T%rOTfILUm_o*2mKCQ{@Pt0>)D+p-~F&NvC8gQsB0Tsb@>{pU@8zVDCEcX8k6 zM1AbrTaWlilrq$1NP{0{?M+9yp!W|2TL23U(T0DeY8^f$0Gfw z&yQ$!9om)7=f(WEXpV&1#k%dIo4+QW+)H_GZGDZR@gzm6rgjR)vhloMIKn#C5e%xL zK6wEwRsYot;Qkjkf=n+_R+queq`mQ7uW+*qjphBjxVu|(>8-6~-SqK_Z=&^4ccB>t z#J)c5+k6<~_;>U`lKFp#Y5qe`^N}`S&kU=*?0w*bF|Aof?Fn&nm;KqTStx~R;{Uub zTzm<~AUHN&P-Z`Nv}NKe8}`|Q;LQPXyZRI>w`;WEi@$6*uo(SuX-fn&xeYfNA7;fk zDGek*OeJ|g<@FclM0eN}Vs5}C=9*mT|gUjaR*2|$1j*uTB){lc6ujJtB@ zgLM0oTB%mii%dFw$VKbly%0x!gZI76A!&K7;#s4Zt14NU4B~LeFX_`TxDrjlH0ZtB zzgVrrMez*o$)buOM|`^Iwq1+Z4U(+%YgALYl~!9~el=cK9Y@y6CYTxl34qh`&%g!x zQ2m%9?8bEyR5sNEtsaITXuIZw*Hpm?;BM% zMD81H=|rIZ?yNmDbEjhPwCRD z{ceS?1#5Vw!r&bm+rKq1&KCK}6^5mqUj4Mf$Yxl)sEkt|{6GbmdKv#C9m*(F?laGp z8T-=V{x&J(6sds6UK5MVq$=)q@+Z5H|LFie>hYU1WxtFyKIf73mWE|oYE^v1%d4oS zRK0T@!gfOHtuC!+=QS0ij>@YpH{Fa8s6B^fmQ)~GF^ zscQ9G(PL&|ppo66^D{UsdIc5%H-Gy-y`?MxaV)bni9g2FdR`lCAO{Sf0-nKYib)@+ zmtt&uz)$fq9p!gV{vIUYhDSCxOeHe(h2j|aG(>_dAjjAJHsdUd3$n$-uY8WKmSoza4(*I|IB7$JN3dBpa@SrUqy;}|&NFVVoFm(Acx zF{;1yq)z-1R9WNF1Yf5~7AoT$mI>W_tbD&R=EnuC7Q6FP{G*dr*^ivB z_qA{Pj#N%{WZNz4bpEYxK&->fxO|zx^WxfX*8aooH))#RJ(Yt+vb3hGGE|-2P_71!0+`dujlf z>wx9%XGU~AE#`g^4Kn#8uBL{A8L&wlt1&)}7+5nZIJD@{g<0--wt3i%r{Y2;w~5@GT;I3$biWY4qGqv_{>L1 z`CPVDp)GvA5(x_os^^Ybg06g{cTR=K)T*vPTvI%%%K3{E!G0g#_qHh^%pBn3WStLgt*MPj_zlC(!O{3Bb7Q5=cyN6TQ zeNEPM(y3ios1s|>(y1FL7En9XENI=-%ewdK-%D#%u5ii*kZQfl`HC|MVKPCov;?Fr z)Ua9s1C<8Ls7=H6dQq0K47>4#x>^6RH?x!#Llu>}E-aIfFX44Gf2{{`m%jlIU;L7Q zYl%lls}uLB>>8N>i^+i<#4De~Z-@ifI4C0&%CSY^I4`GgGw#<=j}eJp;!g~XdEWeX zY!{*xzjepUuHane3N3XtJ{pa`Y_vD?-+Q{}AZN1As$2OZhU~7gWjA5cbp}uZ^vxEER*5#`|-~{_=2IvK$mp?7L{axt%=Y;4NMu^!r z%`P%}78s4^ai3PCmWieOoDAc!ki%?p=6+bV{6PEuUC{{ zFnpe7!@L&bNy}lAqDJanTZB7niWr95f4K}M3BOT3`L~V{;+WVTZWGR5-|YB!G4P$= zD0cqzIa5-|BJqXa*;UwHI%jatQrq;AFAQ*^urLMBH>l4uE|^)@y9E21Z?>rYNVA8K ztx>n7>SyAFh_(4{F=B-N97l19UYqsH%lh!({Nx*1+0E%$#e%$S=JOEqhZ?XLc#YY5 zYgYd4!q~%|0CLkx!cw_mSE%7bhqodamW+>5p2&yU-g7VmaqUN1Zdmrv{Io19V2DkK zp%A_I;r%}G#`UqUP_SpxiG!mC(XIwcfeT}Ho*aa<#b`x+wc5Ek^C&y>>h-h7Tl^`{ zW|DgLiu$r6c0pYYItAph&H49DhkZ?Ap*IX;r1MT$2NHMPm`boj(^@a?r2(IvhQAzE zakt;dsoT#eCpyHhAP&N#bSh|=aijB++j^GWn6I0=(0+C6e32`2yZ8{**Uq!`V#RXD z23>a@{?UFYqWuhnb6xBZm=dmhORgWd+m8ZUdYe0Z+0a*0^vo>#Le<1)5`AU}&6J<^ z-QQdL5ed*3jJ@@i4*lu1S3y!vCbp>t-738|To&bs?OlS$bzuTYNir?K#=>mTam;v- zqals+WI@|nGr9XWZ?E3~men1;r?R<4>S|`u%Upl|i_zX??CdsG-|t#gvy$#Xf9#%~ zsCSBMwJ2vJRev-~?0CW9L(U=Llh~_m-;S~OKEGc1$Xy8i=U=dA$lB$ z7FS30(DFmqb`H=gkPh}L9L<ZD)JojU~9- zH=&jz?N!`0gDEB^)`rE2*3*uuI57g|SBxmV(%K(iovk^uhfp5G3NsSLyNPU&q<$Jo zeZe-tDGBz0{2EfyTgl`;Z@izQ;*$y`Kcic|^$E@3w*5F?Kg~zGLR=>#42LGWY1tdf3Hl0nW7%%Ix=@ z2{*k4GHhg`{UWCeGnJf9!y=94eEU0~xWCQLuP1<6*pLxnN~K;==0D(XxEq~s8gCF= zp%**R<*p8OP`jflyiHxkk$+)2%DYTz~+jTl3l9+}hzB?6a7i8OLgx^viw3y;vV+XHvH%esY&rsCFXz@ zv%$XYrdPQshF9BU%8;LQ{G@VQ*9o97-IEQ|cyHNb(OyahsImZZP>6tIeZWZZlEFtL z8fW&K>mbwCNNT$u)q-Za9`1$`1*Ugvd>RD4e*-1_ z48|(%fuLWH5D111&dqGaz$LOdT#~Vzw=*8`SBI>ZPid^V^!=@?CTO?(RwKi-B-`yT z@5jTRV|pzhS)R|6cJxr6qu6kdj&f4`=$47tch_Df3c^oL)%T%$DM##B;|`-+_xw)s z!j#y1;P_;@{h{s3!tB;Z8U}se;*qT6!0?Q~?;s?>n2)09OkW+bMZvj6QWL#(GaD9?fAMdw0g{Q*|E9woW5 z#wktP96kCJZOG$yj0|T#7;{Dx6+_oIcIhjW{aKyfb~nnW_{Tixh!8arrIywb6HZi1 ztJzegUtR0Yo13`ci&+6FF;t(2YnHsARfwe{2)?^j0{|;U*_&kjKNjr3kH2Zr4NCML zkI^B@lDT_$6bXicdMAlRS{kPxDk9+5Tlx|-3%a-mbbD~}+J=^sUEm-A78a+~xV6Wc zL{vULxA_LQO6m6&+(+O5UX}70C8zN{^vx0YGOpS|txk!B^vGUC<9ON8A{QTwEez88 zoq*Nw;zu!BD1M18HH)BAU!z)-^uX>HNM@!JN7(2lln&|TZXby53F+A+wl1lx7oiMB zb|VMkZ2jGR2Zx#ACz8;`XPc^i4~v(gWl?##L~Ci-x^jEdC0n2Hv=Tc@xQJRMNBIjB ziKNZch#EgtHB1a{I5(6H_%N59;^gA3Pw!NuGIKa<+MbI|KNd#n|E9xT;lS{eZ5tx^ zSo@7&@T+!_HBGqh;+W#*?60Pc*_Es>3pvR+f)+N1_8Ca9Xx%wxJWbE?&lv8@PWGVp zv*({k?(R#mBwD{K{SotTPyqrJPI@MTY?UV7H`*6`S!u}6>EyaZ>`O3&6O*G%hxuqh z2z>s8dT4`dYxhK!-gEu4pGh~V^O{=6im|qzOrF+wJX8StfzC7)fbK@Ma|2EqWMzNLAt-GbHJ*w{K?V{=Oa+W>C6H z?YXO+)uk&W$*~=Y-nBiK)%NO{JJ0t(vlp9rmSCLL(QGAWJZoG(!^Q>u-sjtM&WeU zHI^K@{}4SH%JW@2T8YtqF4I5Ew{!|E#s3yl>uVyu-4?k%LId*X;Rr+4qD-QE!)L6(8l%AKuuo zjs5xr=l2`qwwcKt{y5`H&>l7f$;D+L&JufwZtM7A%ncrQ%{G;})QU(Me<@f5NyJqE+wh9h4WT0p-OSh`-T->615Q5lJ+sHa~F`2mO2;>6c+34-$;=jR|ym)0V)eskbv) z>lZ5&cC>xfR2&lX833Gt%J-=&FneOf7t<9Ln11i*CjcAr#gmY!7Vm6FiOU}rCZZUa z;+xYm!jffz+OEBkRD7ty7c*QP{2k(k`PuIBb`vJ((6YB+Rm#|!51f@(B3Yy+%A98( z-EEcxgXwO6?lm}Vxd@0oN3CFqA-hH zthyY1Jq~=#bYXi+R>E?=>@}}Wjj3E>bFK^=5CCbh#+9?`GOE3n=Up6H$lkSG-naPJ8B!jj@dy`K+PXy!tVzK5JgpnQI77pi{%)t&F^NWfb1wjx~*WhqEw9%Sflo;UqjqGBvKL#ys@4*07_|_LPees@PP|=GhIVtcwbonLAFA}87Yx8fRcUOI2XMZ%BFH<{3iz={< zOakIQ4g()aXPDBEsH#@h*3RVwUcdS-bzZ~6aR1Cv9MJFZG zc=?f!RjSX3E&85nxRp%QN=vTgdXpr~_ylaMEG+o4S8D{SAyl6n&iRSs9ved&L-tpx zUO@{0T9C=~-kfoLVEF{-u_S)eWBmcE?bE0( zL2(U+Pwa+@#+01xuhktA%OjF`NI7Ud_326S+}Lm83jr=BLJVSy)zFBlzHofD3{Z(% z@7GeOWa*qqGGk%c^I-&%&+x$(*YwOaeI!HXxwTNs(+zX|$HrhaaTD7ka^lp@413&% zwD>!ZHZ3(GcWpc^%Eo+p(@BD_XVrVzLMZO)x)PkZwsq#w?=@BrM2;7p>p&RNH17aE zuuu@dCe_UgiT=Wu?NM1;UDoMRFlnM5D-wBj$<26Pl66tPplE$?^ib-=rIEu4kbdJJ zecM68Msvq(#L-PeXZ$X*EY+VCslwoU$kb-;*YIzHVFcaLg3W4DVARl>MnU=YP)Rh7Tn6mN>~9Kd zHz?SN;Xm+`w#v#*L}JiqS(zgucSVl#nRdgOHRoUadgO+|JF6( zRP%oRyB=Xd=COX-@MLa81-GnZbSPE1R2uf?EOyRF-GEePB#_@k$rE^0BYf2G5F$?M zJpsl@qg@m{@Mu1g1ltkDe7V>Y_`c}3{$cfkk-eS2>%+ojb-!)=II~P0hmG5OiAVO* zJ2C415?qHKDZ&Ns`MZvTvpaXu*QvU5>$bOFSa{R>A~QE%2>$9E>e|EbFb!%_e1AhE z`-||dOF(CZE*o^1M{ty6t_Sj@yXBFrU{77@Y@Noagdh2EysY=ELqwlBe*XBhlJ-@_ zS^8s*3zKK|p7z{aV`GDz7eo`lHbUUSZ#bN9PpOF@(27o+rfkpAyg3hRdz<_?Cfu|A zp5QqJ*ekV&jjScanK$R_OXAN|F8zA4!I!URFW9f1Xcs?;&#KtwLw^=ccBeY^sd)PW z4LC7L)eo`7d;L2GA%W6oXA1x=bI~n>PI#>v}Rpw6+glc zKp0F|feEOs!!!5^H1!n0Yx%mX&vy$5${pDu7tthKmM)(rJDk=b-vXQ=j7`;@LM5M! z@$AKIb~G2)9}kf5Ef-dgr!>bxY7}ySeL_ke1es6{lNppFh|ky!^y%E5R>w)c`4YgbNl*vjbtn`B!V#JNMLxIwD(WjeY>Q z3FT!jRH<~vQbM%%+I{EbJ@Uk>NZDIAOu^x$D%rGB41&?M%5@Fi*mY_Fiy7%vGT&`E zf=DXT66dGQM)6exJj4F;wiO?OPNEQo_nseSc(@Oqhw8aI@~iM;%weXN9UYPiy?`jj zU9Ec>avUKHw~f`@S!@ z*5rzA+?!(*`Hc<#A`cX7N;Tck`t5ebr-J=F3N~?vKPEN1?&2(DjkZ)42wjwpGC7+V zT1)|X=ov|dgMy7M{sJ%2$sAgxkY7j|tWUph6x4g+Y)fCkBt;f4?}s0QE@@3{KRm-| zBwe9-)k*n#2Z_U?BxzPzoW^@rPZHpfoT5b7++O__Jg+P$3Ep=R&3oIAU$T*=D|8-O z`JBR8)MNI9Ua8(eh}Zsr;Tl{=Y4{Y=SyC%IbIBN%HYR}-dv|5@v@y<2?0VtB{zx|< zzqp*ohmpyiuE?VJ#K@u9eyqTq7Rr+|2D z+4`Uk_oq~f7zJ1gGuUF0^IjQ!e2t$<*72?^c_^pyL<>EXFyUkf4vzM-^Pts2GD>os zJ?ngah3~H-1Hv~>Z4e8+lknLrwGcAu|MV;A32TjWW!pLFaXy8Okq-CKE1|=!J<`?g zC6dnG6)L^rxDSo<gGd70w^5_|S1DQK;5x@^fl-VB666fQRIsB-6vk$qZ!g#W5AUJ6GcjS{ELdfXFq9 zYccq8AmglkeFd_<(axcw$~>>m9}Cc{4>OUHo2s-~rTJ-R11h=U0CB>Ab0pxUCcFCs zZa$KQCp{jR3X&2e_*o{aYA{0&O5Mm{%@Z(N82@gyXVb5IFhW5Gg^13cjeLvsBNUUkKi! z?2uY)c4aMnLGFC8n~IpzPQn+P$OCqkws_-SqPaZ8imtyPTk&@y1zKW2x1OP%dsZDN zNpU6FwIV%N;H&AGlh|cYn|tq`Fje}}LhWp4$)SGzcUawz`a;(-!JiH9J&1(#;Xl4@7HrVc)R-Xc4e zh(h@qTg_ud7yffu80AEfmeqkrRTsThf zG(w7{N&?xstZpCNdUnTA7E2WFCf~6&q`z4*@5n^4d7vV1oJp_0`rLZK)05Lv0qo`b zWvvJ6Q%j&mNYhrkp*`#B56Qk16RGW3*&fT3J$uXLJl}B~U>9oWBGAS&Jb5e4ba#K4sMStEOO z#J%Cc5dA1`*vTvdFbVsmU8s9OvaS9<5z%E`4R02T32kOv47X1=Uufr`qI~Ulw4vMM z3Q5aIjuhXNl)}Z`(;IGV)%!>6ya*Akoo)Am-Z+esD>|P!IG30pF5YM;Cha7fof~y; z#&q|m*PW5)>dTHyqRs$f#q}O?LB4LBhb(tB6tN^5kmbC50eO_5uyyHBNGu-=Ta}`Q z`m@xRPicl&R1(hbca}scxGm%4!eRo_>ptA(+;Re6R}UY!8)qt5`_?Oz+$6Uel%38`~EWVBC@z64^+ zEemOoqgB6}2u*)I<p zu@De#?Id(ff+>4Ftsq;emq3Ms%6&OOy9ISNNW%9;sp;M%9&Sx{FS#V{*KWgtldb!? z$1^b7`NY1|G7Nd`r%QW?qhR(S+FnN$dY>XbBIv_M8a;Vok2Zqkd95_;3!Yu=zZ#rd z39=Z|GyGtmRR-FR7YD0`lLm6SU6m1#f3kxJ(~_oftnuP0Wm7&@AtM*+22q9B1rv>a z5}uUO>SIeg9)&81XqkL=akfwP-dq@(-mk5`fXg+1EC7oHRE@XETY?z+J}@?E#^p0h z!m;=QrA}70#IO0=vbVB5m#}8Jv?Wk3$P*x^6E`pWhP;C`pHlVuiA!l8#feObxYjb9 zF~V+Z(j1h%-`-@4K6?eT$@h@Ft82*MH>y6EjgmAl>?GZ(Uy>cG%XuXBN%d(k1q;l? z=g3m_%jdf)Z}Z`y6qMPVd++%$3MZ?H87Nwqi`hbYX3dwXYj63Tdw?hg=kRcW4pAYr*Sj|cCLrI4_}5ina5TtMwVStOhBb(7t+qkq#oIR@L0X*B*$_+ zRt9}JkhOh3t0cFd#*9i8JtNQHPX+QE8$pv8>s$L27UK1iV7bf964ck-SoKRLY&~4w z>a0g&s+4iZ&s1}s-9v^El~at?^7CuN^|B9VZy3iaXF5eINPdbPYOGpIa1Rin_3KKZIgxtbX+`_j#*s?T{nG=|{N ziej6J9TnSa7|iG+CgN=4$)FG;{ct(!@34HD{>~$s1UBENRC7vCp8mmb{q8p4NO&#i z=%%gQUjLhquv4ZjXR6w&!~lN z%^I0xESfH@C->~1IC@l)ogwZyK4|eP<#>nP4T}21km#SDd{Udo`b>yE+~|Mf8g#^0 zNpjfH6XdoR#IFZAO&utsVPI%@a>U4|KkR7Qp6zx<;!WD8LYAny@4Fqa+WYw^S5!VK zrGR#7Er_{ygv-rp$J;wE3_|^#M`oMqYI*KfBV;SIQha-y?WulwrQ|M>%I&&*>>CQr zq50{KsGeoq!MLe>vgKoMSdhBfWG|0^Qi$ANa*BNtFLH15BSI!CDCARFLp6WErm$%=W=ccYkQU z#Jm%9LRs#$0flw->$%SL$#|s8?VnG4%&?N!Xh+;w7fRxNh%9I4@FNAuq2x)Rb(5yX8&2QBl8|WtyT>R51zV zEQ(NZvGM+PPj9(|X7{cFMcmIFk9gp^p5WBP){p`o;mJ+=#r&)gA#Z&NbQ}SYVuQ7p zp7|N&H-nD3>k5w_;fo0hRbhu;>Oa(!9@ker*@W@H8+W#(ol|M^BmrtdOmG8*cleWI z-+sq=0mC~GjJ&m_FDyt)Y-$P4W9BVbvl-s>9*^Ip$Tp zar9kI*4U2SR^R1k^*mm;ES%!1j*LACl6zw^<`mHsTf{sehSADP$(u!sakP7?Wt5-W zzLrB^f-1+@=mBA$u`53()*;1H{L4veAjrO>p{#1~Uzkz^qCf28YE(yllh1)2H3Knd z-p^3sCP!ahBNqn+Ft7_2tk3c;{A9IHB~Br^_gwXsjlk;6!67nn?7{W|)!44~-fSKt zb>D^^DLz@^r}VU&K12OTS?`iSBlUFIe3$R$UeFHa?bF)tTVLnubqt z1LKAjUr4s1;|l#qWcJ_4nwX zX{Vl zeq&_6>1_kLtzP%%`540-3i)!oW>brh&)I}7BFix836wE%2x^3E>C;1@VJQ(P*x81n zO;guAL9B@?68_|=u1*`5bLtWvOp{M86pJ)IE{(`gZd-sj+Omr@tNDY$qQ8K-;meiC zR|C*49#fwi+?^iHhh+n9ela)zwy#d17(A`kFmnH@ova=jnPCfSppHEzicbPL25atrmc~j&WyojscGSuC z^m^&N`e9b!eddy|0DQ_hQJ}riz9E6GKK%Imf?f);V_%^YEO4xO*;~PvvUM&bV6D&@DG$SNvF-s9k_Q`72*GC6GQ9kN!UasI zJMeP&{Y=ZAOp2bcp_ZUw5&(?zCyftv=U=KM2#20IRPFhCnmurCLL={hQ^RYOQilB* zo9rz7@$zW>n@tA`(X=Ni+Rj&jlYf{nRtH2a753sdhGO2Z?_bbjEMtz5T0XSoa42&UltG>%_Q3MvY9^J zzhs)1X9qmV;$$Gis#}{fpjOpFZq#0wf1P}-U^bi-f9+)@Sb-$>y$7340Ndd<_c(RM z)p33ECY}yF2WAcD!L#`q5ozJ9f^BLkd9~8i(dK!@w|@<;fS8SW*gpx2gB0ZW(5JQU zFWIbNi|Rgu&5d7vqkN?M+9?xC3wlr3DRgPV*13lLRs}5)7Th8p1x|&$FLqkK<~~}Uy7&tID`yXI zW^=tz<@VLtRbuG>gTkuQ#}P<+Sb+vyj4LK_WPjG+QnYKQ$s?}8VHRB@Jch0&g?CqZ z;2^oSr=Uh365WfqKMwALm|XD7pB+9n)WB>xGvd=6jMaen?( z%`bAQ?+hgi%sZoS`OGyrACEdbPfr^vE4vLCQD6i6G z%yJj9A$gRksChfNVbO6g88pT`nx)gug z%khexeyz242SMO!BOw|YzBKpA)=xVyuiuztD-f9!rxwTn2Sfh>`@g`Gi=yfd3S&u- z3;rPl)1$)XMIIyE?2PCp+;8b5@qjg%&pNl#V5#hewCd1uSdt2B*wMYBG+ek9J1>zY zs1IjghiTDZgWm4X)!ee=u$B7^pF6(~J`w1{mJFYnnaK%j+CGWv=YO3rhEoGV!Ah2D z#Eu;hmE_Aw{l!%^4&5@NAoE=uB5YWPTnlgF`|B-P2NQgZ@(r{!W%U>G5*iEmR!C}4}}J<7_k?~X!GAx<=ZPaPX3ff}bJtO~|rwn`~d+@bPc z*iY07u3xAWBiLQVMD<=b8hfTkv~0g+9^5DqT?_JVeo=iYMPz%@{Pi*E=fZu(s8(n4 z8=@$}n-&m@Rl0omFT;=I@3qDu=ZnXu6g3!61!xl~Hz|scnQC>o?NL<*2HE)+-fgDGrNbnfYd?Bk-pO|+EP?xl6xggRYk&u?;o zts2{EXqk&XU7rcygYzhg&+=QRRLBqpA}IurNKSl$$NWI}5KX_#fC$)J)Hr;41iQJD zOpiv>AUv+m{)(NLf)Clg{=|rJmuybCA*r!(-CEmJ|^urFZi}eXg-w&Klfe5D?zQ-t|DN%4;9$ZX@G;1`T0T-PI_n z+M)&`ee*V15BxNW_2!d@~1*b|%c5eJGa-)uKN)p(FFZLunvHl-r*s z)rnz2M9KVq)cNh6_^nyWbBEs=32HwBRXc)q%|6EHHd2#f z)5WB7pRQJq(OXHZVG-i6-3!-NLY(us^ypbOdH9pCu4J*(_W6Q{>(&cp!+So?KYe$4 z>;#fDRoX!9j?Eftpk+vMrhJy_Tp;}9pdd6(m+$9y@r#|`<;{QQo2x;^gOYYxq7RRBZ@|{-T@ie17Fa)}?PIlJpP=Yv(rNr) zZcT5^m_54@MLytHaD#PdzKjBi&m=+Bxr#mR>N;8czVr+|z`p452z$Y^lx z3Fk^E6kI9Nd5Nti{1yvSO;Tr&-QXz9XdeQ$Z#$vIRqj_3`-nzki2_G*M`p)yodR$d z(A}<%K#&XLH`j1-Y;f(hD>jBl0>?eQQaWdc-dnis&nba=ia1YgtczogZrUZWgv9RN zIk&@wv|51{SJ*MbYUcZT(UIQG6_|T{M!)097UnfVh(LP1#ah^F)%d`q==^XNVUjqJ>tHscVl{6I>(rt5VLH|-Umg*b?`d31i>>O!BnWoAu-C3( zwSq5)r+-a^=iw#D)xQ};>PE!0XEee%S@)(XIkUkQsrpeoTE(Mr+`rksjPd~;r~fek zPHuFn|9CEy`vl~`^@^Wz>5+t!U~My{z{zp%02Ww;heWxjHfmFZD01n(8)lfF+Clz& z!XEFwhp2X4)3DQrXzy2Eh7QiAN6r)DANN1w6U(qG#&+P2YXXe7!&kUCdXI~LyXQEegYD0+?Ehk&YUpk%}*q&D$#we##s>9*@{>7 zx5?+(mUn3d!rw~4O*=JEJs)jb4{1M&|GsIMq0#s`-f|DsF_NJzxCOcu)%7n zxoiJ^-jJV`5s$swo*Xodn) z%0@IIOpQ$)4@!&)LwZl8NU4N?n80}Tbu5Wm7!l=d$-_z;{Vr(dvr0jwdAN-;n8kD0 zim$4mPjU!bdU5n1J}%aKr4oTEX$xgQhl(X8j-s>aP$vP0d^m_`73v~UeicnZDUmw! zR-$9uK1`LfGi~OfbAcB7_~J*701?t2S8nP^Pj=?)DybMc>SGOi4?*~z03!H&+1_^6 ziWtQ`grjn=^G5Ckhu4gjkc<=BPsv5r&Ajlcf?W6P@GX^xJIfW%v=K!)5~f4L*9jqR zRMU5J)9mjb0HC5y(1Kg)9hl6oWTdN(GHCI(^1;eh4x`FrgT0KsGnS+5aScUfQE&lw{AA==e1HP`uXXilfe3NR<6djxF#^P}I|lME=O?oMd{uoi{=p;? zkB!?=1|BUOQ&&CYT?RyZd>;y36u$qqkNIQAJTnDy-Mbn5{xutnJ{B_4Yh)jJv~}_k z68-|B0f5>pA6>DEGhV0hjKdyZFXU&*t}Hmg3xAz+Dc5xkhqyb>H9wItx(QEkSDhH?qfR0b9!;g5v>U}caTyzYteuVi z5hcL6*>^L)JBsgZDREW*6%nw#Qcv-Ef5 z#d0zNgkYaohLhKh~iM_P9v6MmPt?y%L!kYd2cK*c|LASfsK*^aI9Ve2=0%_G1gMO&fXdxoAUF5jOLTIi&< z^$c6oS;HaoXImT&ci9bAl3mK$}E473T2?sQQ`ehN~RjlO7{nbSID5DINzu?6W!VH;C% z%i|xCH$9;e$|0dn##W|CTrb6E3%2XsT&p$&iS_(H?Oj_m+u0h|lo7Y)qD7~KKY}wf zv_?@O>Ylo!m}wDpk4P({w5o9}RSZgtN+_w)DXMJ-(~Cpe8I8m(rL+=;q@pv9%d8|t zDo1hBHFLVom|>fkg0QvUz>fh$=dJv4bppG;`2alxmu_Z3m#K;=E7%h!I9nr~NXY`Z{!v!;YTB zB1BMJvsSJfKPHi5=YX30s&+K)Z1&tItmr9&BrlKmXP>7VwuDT*Bt@F&BL3MiHZ5Zd;yFd;rO6ugBu7%Yx#EfK9Cw?50KLiE5;gFvDo)Wo#dp@ zX}0s%ss>dsoIp284|K`HOpk!DImqv_=-fG*tEu1}Q#~dsSz&|P<)wx45SbZzlrHJF z+&KA7p^qq!Q}WiVJ+1hRFYD{cwIr(*`i=zm&GuE7!!G#U=*W=3an0tnXTm7)IJ(|9 z%nFFxoDM%Tfw{Bw&4$Rq96tCZ`C6;x@t3KwDZRw;h_JEeIh_LgPP@N`Ku${?7dRBr zX5#s>*#!FbCIH9vK8b}jxO=>cGns)Z> z!*ch~0)Edfy@5oVUhHJ2Zb^8VchDuhvaEr4>zn^Mkdc7I%+pvKVme$$Gd`y z=nTr)zoZ%)OH_7v?2OJQiEB%AbfJPdzBr2)5j1rXIvk?ZWzz(yv78+Dx?ucm5WnQm zsWI*(1q#hPzwyNBQo{z7$&U5GyB+P0ye3tuzL&%O)t6zS6W`=^h^ufe)9fWb$sw2F zD%MEtJ2MsaC2>gorV0HTQ_R-7c%TPLUw;N@!kw1C_tR_57#iz+R|I{ab`uHI!Pu@sH$PwTt8k6awqPg?ZBT`$R}K`I zS4SHHdF6?Z=23%= z=d9NT*;%V$|0EWTFp5X~7f$M{&>{uZY-%Z&PTn8VpW`YJ^rQ?<3~o0E=5&Dy{pOxl8Z4xnD$eW*UCoeg5bLRcys8KL_+@L+kHT@5tbuJp&(6~rRk4_DH?cQ)q-Aw!Zq(ZSUus_&K1&5m~y}lBU zvMCodwN=-gXie|0#tfbej;~Q2(s)W09`*J#!lrzekaFw8mC>{S=9@o)S`<@9xzIU*!4Ubh6tA*Gp@T5K@6vTBTLBC-*#V3eNZ!{f8p)Wn~vx z0zq<+C9K~U^okd|9U;tRc7__xX(MO7d5HQ-vHb%I12N#8F6;V|RB_bizYd@)Am-nk zEgxFBf~LxYFY2Cb4z~oq9i;1TUTN7Ask^`9N5x852zvftoExZMYp6}SUPJ$BSkJu+ zy~=M>cV7FqMhzbo`1(kr_CL4$d1JpLouB6aEn!-MufJHd1jz&5tc+lC?Ca)W@@)EH z46Nm{vY;)v`NcKW^qFn>NRG>{C5Kg4hWm;_y;228ve(mEx6`Oc8toM@+W#%Cij}tv YgZ48%-^iryQBn-7XMo28x2U{-08K9yQvd(} literal 0 HcmV?d00001 diff --git a/doc/source/index.rst b/doc/source/index.rst index 866c8f8fe..a24fcfef5 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -249,11 +249,9 @@ Getting Involved :caption: Tune tune.rst - Tune Guides and Tutorials - tune-usage.rst + Tutorials, Guides, Examples tune-schedulers.rst tune-searchalg.rst - tune-examples.rst tune/api_docs/overview.rst tune-contrib.rst diff --git a/doc/source/rllib-concepts.rst b/doc/source/rllib-concepts.rst index 57a272145..b98d2b1db 100644 --- a/doc/source/rllib-concepts.rst +++ b/doc/source/rllib-concepts.rst @@ -605,7 +605,7 @@ This is how the example in the previous section looks when written using a polic Trainers -------- -Trainers are the boilerplate classes that put the above components together, making algorithms accessible via Python API and the command line. They manage algorithm configuration, setup of the rollout workers and optimizer, and collection of training metrics. Trainers also implement the `Trainable API `__ for easy experiment management. +Trainers are the boilerplate classes that put the above components together, making algorithms accessible via Python API and the command line. They manage algorithm configuration, setup of the rollout workers and optimizer, and collection of training metrics. Trainers also implement the :ref:`Tune Trainable API ` for easy experiment management. Example of three equivalent ways of interacting with the PPO trainer, all of which log results in ``~/ray_results``: diff --git a/doc/source/rllib-training.rst b/doc/source/rllib-training.rst index 1b7f13c41..46fe06355 100644 --- a/doc/source/rllib-training.rst +++ b/doc/source/rllib-training.rst @@ -172,9 +172,9 @@ Here is an example of the basic usage (for a more complete example, see `custom_ .. note:: - It's recommended that you run RLlib trainers with `Tune `__, for easy experiment management and visualization of results. Just set ``"run": ALG_NAME, "env": ENV_NAME`` in the experiment config. + It's recommended that you run RLlib trainers with :ref:`Tune `, for easy experiment management and visualization of results. Just set ``"run": ALG_NAME, "env": ENV_NAME`` in the experiment config. -All RLlib trainers are compatible with the `Tune API `__. This enables them to be easily used in experiments with `Tune `__. For example, the following code performs a simple hyperparam sweep of PPO: +All RLlib trainers are compatible with the :ref:`Tune API `. This enables them to be easily used in experiments with :ref:`Tune `. For example, the following code performs a simple hyperparam sweep of PPO: .. code-block:: python @@ -461,7 +461,7 @@ Advanced Python APIs Custom Training Workflows ~~~~~~~~~~~~~~~~~~~~~~~~~ -In the `basic training example `__, Tune will call ``train()`` on your trainer once per training iteration and report the new training results. Sometimes, it is desirable to have full control over training, but still run inside Tune. Tune supports `custom trainable functions `__ that can be used to implement `custom training workflows (example) `__. +In the `basic training example `__, Tune will call ``train()`` on your trainer once per training iteration and report the new training results. Sometimes, it is desirable to have full control over training, but still run inside Tune. Tune supports :ref:`custom trainable functions ` that can be used to implement `custom training workflows (example) `__. For even finer-grained control over training, you can use RLlib's lower-level `building blocks `__ directly to implement `fully customized training workflows `__. diff --git a/doc/source/tune-contrib.rst b/doc/source/tune-contrib.rst index 33fbd3b0c..64573eea3 100644 --- a/doc/source/tune-contrib.rst +++ b/doc/source/tune-contrib.rst @@ -1,3 +1,5 @@ +.. _tune-contrib: + Contributing to Tune ==================== diff --git a/doc/source/tune-schedulers.rst b/doc/source/tune-schedulers.rst index 5f260c38b..84916f0fb 100644 --- a/doc/source/tune-schedulers.rst +++ b/doc/source/tune-schedulers.rst @@ -38,7 +38,7 @@ Tune includes a distributed implementation of `Population Based Training (PBT) < }) tune.run( ... , scheduler=pbt_scheduler) -When the PBT scheduler is enabled, each trial variant is treated as a member of the population. Periodically, top-performing trials are checkpointed (this requires your Trainable to support `save and restore `__). Low-performing trials clone the checkpoints of top performers and perturb the configurations in the hope of discovering an even better variation. +When the PBT scheduler is enabled, each trial variant is treated as a member of the population. Periodically, top-performing trials are checkpointed (this requires your Trainable to support :ref:`save and restore `). Low-performing trials clone the checkpoints of top performers and perturb the configurations in the hope of discovering an even better variation. You can run this `toy PBT example `__ to get an idea of how how PBT operates. When training in PBT mode, a single trial may see many different hyperparameters over its lifetime, which is recorded in its ``result.json`` file. The following figure generated by the example shows PBT with optimizing a LR schedule over the course of a single experiment: @@ -72,7 +72,7 @@ Compared to the original version of HyperBand, this implementation provides bett HyperBand --------- -.. note:: Note that the HyperBand scheduler requires your trainable to support saving and restoring, which is described in `Tune User Guide `__. Checkpointing enables the scheduler to multiplex many concurrent trials onto a limited size cluster. +.. note:: Note that the HyperBand scheduler requires your trainable to support :ref:`saving and restoring `. Checkpointing enables the scheduler to multiplex many concurrent trials onto a limited size cluster. Tune also implements the `standard version of HyperBand `__. You can use it as such: diff --git a/doc/source/tune-searchalg.rst b/doc/source/tune-searchalg.rst index 2bbfdd5a7..6c27e7bd6 100644 --- a/doc/source/tune-searchalg.rst +++ b/doc/source/tune-searchalg.rst @@ -26,14 +26,15 @@ Currently, Tune offers the following search algorithms (and library integrations Variant Generation (Grid Search/Random Search) ---------------------------------------------- -By default, Tune uses the `default search space and variant generation process `__ to create and queue trials. This supports random search and grid search as specified by the ``config`` parameter of ``tune.run``. +By default, Tune uses a BasicVariantGenerator to sample trials. This supports random search and grid search as specified by the ``config`` parameter of ``tune.run``. .. autoclass:: ray.tune.suggest.BasicVariantGenerator :show-inheritance: :noindex: +Read about this in the :ref:`Grid/Random Search API `. -Note that other search algorithms will not necessarily extend this class and may require a different search space declaration than the default Tune format. +Note that other search algorithms will require a different search space declaration than the default Tune format. Repeated Evaluations diff --git a/doc/source/tune-usage.rst b/doc/source/tune-usage.rst deleted file mode 100644 index 62113a480..000000000 --- a/doc/source/tune-usage.rst +++ /dev/null @@ -1,576 +0,0 @@ -.. _tune-user-guide: - -Tune User Guide -=============== - -The basic Tune API [``tune.run(Trainable)``] has two main parts: a :ref:`Training API ` and :ref:`tune.run `. - -.. _guide-training-api: - -Training API ------------- - -Training can be done with either a **Class API** (``tune.Trainable``) or **function-based API** (``track.log``). Here is an example ``tune.Trainable`` that you can use to dry-run Tune: - -.. code-block:: python - - from ray import tune - - class trainable(tune.Trainable): - def _setup(self, config): - if config["print_me"]: - print(config["print_me"]) - - def _train(self): - # run one step of training code. - # important: this method is called repeatedly! - result_dict = {"accuracy": 0.5, "f1": 0.1, ...} - return result_dict - - tune.run(trainable, config={"print_me": "hello-world"}, stop={"training_iteration": 200}) - -The **function-based API** is for fast prototyping but has limited functionality. Here is a **function-based API** example: - -.. code-block:: python - - from ray import tune - import time - - def trainable(config): - if config["print_me"]: - print(config["print_me"]) - - for i in range(200): - time.sleep(1) - result_dict = {"accuracy": 0.5, "f1": 0.1, ...} - tune.track.log(**result_dict) - - tune.run(trainable, config={"print_me": "hello-world"}) - -To read more, check out the :ref:`Trainable API docs`. - -.. _guide-running-tune: - -Running Tune ------------- - -Use ``tune.run`` to generate and execute your hyperparameter sweep: - -.. code-block:: python - - tune.run(trainable) - - # Run a total of 10 evaluations of the Trainable. Tune runs in - # parallel and automatically determines concurrency. - tune.run(trainable, num_samples=10) - -This function will report status on the command line until all Trials stop: - -.. code-block:: bash - - == Status == - Memory usage on this node: 11.4/16.0 GiB - Using FIFO scheduling algorithm. - Resources requested: 4/12 CPUs, 0/0 GPUs, 0.0/3.17 GiB heap, 0.0/1.07 GiB objects - Result logdir: /Users/foo/ray_results/myexp - Number of trials: 4 (4 RUNNING) - +----------------------+----------+---------------------+-----------+--------+--------+--------+--------+------------------+-------+ - | Trial name | status | loc | param1 | param2 | param3 | acc | loss | total time (s) | iter | - |----------------------+----------+---------------------+-----------+--------+--------+--------+--------+------------------+-------| - | MyTrainable_a826033a | RUNNING | 10.234.98.164:31115 | 0.303706 | 0.0761 | 0.4328 | 0.1289 | 1.8572 | 7.54952 | 15 | - | MyTrainable_a8263fc6 | RUNNING | 10.234.98.164:31117 | 0.929276 | 0.158 | 0.3417 | 0.4865 | 1.6307 | 7.0501 | 14 | - | MyTrainable_a8267914 | RUNNING | 10.234.98.164:31111 | 0.068426 | 0.0319 | 0.1147 | 0.9585 | 1.9603 | 7.0477 | 14 | - | MyTrainable_a826b7bc | RUNNING | 10.234.98.164:31112 | 0.729127 | 0.0748 | 0.1784 | 0.1797 | 1.7161 | 7.05715 | 14 | - +----------------------+----------+---------------------+-----------+--------+--------+--------+--------+------------------+-------+ - -All results reported by the trainable will be logged locally to a unique directory per experiment, e.g. ``~/ray_results/example-experiment`` in the above example. On a cluster, incremental results will be synced to local disk on the head node. All results will have `autofilled metrics `__ in addition to your own user-defined metrics. - -Trial Parallelism -~~~~~~~~~~~~~~~~~ - -Tune automatically runs N concurrent trials, where N is the number of CPUs (cores) on your machine. By default, Tune assumes that each trial will only require 1 CPU. You can override this with ``resources_per_trial``: - -.. code-block:: python - - # If you have 4 CPUs on your machine, this will run 4 concurrent trials at a time. - tune.run(trainable, num_samples=10) - - # If you have 4 CPUs on your machine, this will run 2 concurrent trials at a time. - tune.run(trainable, num_samples=10, resources_per_trial={"cpu": 2}) - - # If you have 4 CPUs on your machine, this will run 1 trial at a time. - tune.run(trainable, num_samples=10, resources_per_trial={"cpu": 4}) - -To leverage GPUs, you can set ``gpu`` in ``resources_per_trial``. A trial will only be executed if there are resources available. See the section on `resource allocation `_, which provides more details about GPU usage and trials that are distributed: - -.. code-block:: python - - # If you have 4 CPUs on your machine and 1 GPU, this will run 1 trial at a time. - tune.run(trainable, num_samples=10, resources_per_trial={"cpu": 2, "gpu": 1}) - - -To attach to a Ray cluster or use ``ray.init`` manual resource overrides, simply run ``ray.init`` before ``tune.run``: - -.. code-block:: python - - # Setup a local ray cluster and override resources. This will run 50 trials in parallel: - ray.init(num_cpus=100) - tune.run(trainable, num_samples=100, resources_per_trial={"cpu": 2}) - - # Connect to an existing distributed Ray cluster - ray.init(address=) - tune.run(trainable, num_samples=100, resources_per_trial={"cpu": 2, "gpu": 1}) - -.. tip:: To run everything sequentially, use `Ray Local Mode `_. - - -Analyzing Results ------------------ - -Tune provides an ``ExperimentAnalysis`` object for analyzing results from ``tune.run``. - -.. code-block:: python - - analysis = tune.run( - trainable, - name="example-experiment", - num_samples=10, - ) - -You can use the ``ExperimentAnalysis`` object to obtain the best configuration of the experiment: - -.. code-block:: python - - >>> print("Best config is", analysis.get_best_config(metric="mean_accuracy")) - Best config is: {'lr': 0.011537575723482687, 'momentum': 0.8921971713692662} - - -See the full documentation for the ``Analysis`` object: :ref:`exp-analysis-docstring`. - - -Grid Search/Random Search -------------------------- - -.. warning:: If you use a Search Algorithm, you may not be able to specify lambdas or grid search with this - interface, as the search algorithm may require a different search space declaration. - -You can specify a grid search or random search via the dict passed into ``tune.run(config=)``. - -.. code-block:: python - - tune.run( - trainable, - config={ - "qux": tune.sample_from(lambda spec: 2 + 2), - "bar": tune.grid_search([True, False]), - "foo": tune.grid_search([1, 2, 3]), - "baz": "asd", - } - ) - -Read about this in the :ref:`Grid/Random Search API ` page. - -Custom Trial Names ------------------- - -To specify custom trial names, you can pass use the ``trial_name_creator`` argument -to `tune.run`. This takes a function with the following signature: - -.. code-block:: python - - def trial_name_string(trial): - """ - Args: - trial (Trial): A generated trial object. - - Returns: - trial_name (str): String representation of Trial. - """ - return str(trial) - - tune.run( - MyTrainableClass, - name="example-experiment", - num_samples=1, - trial_name_creator=trial_name_string - ) - -An example can be found in `logging_example.py `__. - -Sampling Multiple Times ------------------------ - -By default, each random variable and grid search point is sampled once. To take multiple random samples, add ``num_samples: N`` to the experiment config. If `grid_search` is provided as an argument, the grid will be repeated `num_samples` of times. - -.. code-block:: python - :emphasize-lines: 12 - - tune.run( - my_trainable, - name="my_trainable", - config={ - "alpha": tune.sample_from(lambda spec: np.random.uniform(100)), - "beta": tune.sample_from(lambda spec: spec.config.alpha * np.random.normal()), - "nn_layers": [ - tune.grid_search([16, 64, 256]), - tune.grid_search([16, 64, 256]), - ], - }, - num_samples=10 - ) - -E.g. in the above, ``num_samples=10`` repeats the 3x3 grid search 10 times, for a total of 90 trials, each with randomly sampled values of ``alpha`` and ``beta``. - - -Resource Allocation (Using GPUs) --------------------------------- - -Tune will allocate the specified GPU and CPU ``resources_per_trial`` to each individual trial (defaulting to 1 CPU per trial). Under the hood, Tune runs each trial as a Ray actor, using Ray's resource handling to allocate resources and place actors. A trial will not be scheduled unless at least that amount of resources is available in the cluster, preventing the cluster from being overloaded. - -Fractional values are also supported, (i.e., ``"gpu": 0.2``). You can find an example of this in the `Keras MNIST example `__. - -If GPU resources are not requested, the ``CUDA_VISIBLE_DEVICES`` environment variable will be set as empty, disallowing GPU access. -Otherwise, it will be set to the GPUs in the list (this is managed by Ray). - -Advanced Resource Allocation ----------------------------- - -Trainables can themselves be distributed. If your trainable function / class creates further Ray actors or tasks that also consume CPU / GPU resources, you will also want to set ``extra_cpu`` or ``extra_gpu`` to reserve extra resource slots for the actors you will create. For example, if a trainable class requires 1 GPU itself, but will launch 4 actors each using another GPU, then it should set ``"gpu": 1, "extra_gpu": 4``. - -.. code-block:: python - :emphasize-lines: 4-8 - - tune.run( - my_trainable, - name="my_trainable", - resources_per_trial={ - "cpu": 1, - "gpu": 1, - "extra_gpu": 4 - } - ) - -The ``Trainable`` also provides the ``default_resource_requests`` interface to automatically declare the ``resources_per_trial`` based on the given configuration. - -.. automethod:: ray.tune.Trainable.default_resource_request - :noindex: - - -Trainable (Trial) Checkpointing -------------------------------- - -When running a hyperparameter search, Tune can automatically and periodically save/checkpoint your model. Checkpointing is used for - - * saving a model at the end of training - * modifying a model in the middle of training - * fault-tolerance in experiments with pre-emptible machines. - * enables certain Trial Schedulers such as HyperBand and PBT. - -To enable checkpointing, you must implement a `Trainable class `__ (Trainable functions are not checkpointable, since they never return control back to their caller). - -Checkpointing assumes that the model state will be saved to disk on whichever node the Trainable is running on. You can checkpoint with three different mechanisms: manually, periodically, and at termination. - -**Manual Checkpointing**: A custom Trainable can manually trigger checkpointing by returning ``should_checkpoint: True`` (or ``tune.result.SHOULD_CHECKPOINT: True``) in the result dictionary of `_train`. This can be especially helpful in spot instances: - -.. code-block:: python - - def _train(self): - # training code - result = {"mean_accuracy": accuracy} - if detect_instance_preemption(): - result.update(should_checkpoint=True) - return result - - -**Periodic Checkpointing**: periodic checkpointing can be used to provide fault-tolerance for experiments. This can be enabled by setting ``checkpoint_freq=`` and ``max_failures=`` to checkpoint trials every *N* iterations and recover from up to *M* crashes per trial, e.g.: - -.. code-block:: python - - tune.run( - my_trainable, - checkpoint_freq=10, - max_failures=5, - ) - -**Checkpointing at Termination**: The checkpoint_freq may not coincide with the exact end of an experiment. If you want a checkpoint to be created at the end -of a trial, you can additionally set the ``checkpoint_at_end=True``: - -.. code-block:: python - :emphasize-lines: 5 - - tune.run( - my_trainable, - checkpoint_freq=10, - checkpoint_at_end=True, - max_failures=5, - ) - -The checkpoint will be saved at a path that looks like ``local_dir/exp_name/trial_name/checkpoint_x/``, where the x is the number of iterations so far when the checkpoint is saved. To restore the checkpoint, you can use the ``restore`` argument and specify a checkpoint file. By doing this, you can change whatever experiments' configuration such as the experiment's name, the training iteration or so: - -.. code-block:: python - - # Restored previous trial from the given checkpoint - tune.run( - "PG", - name="RestoredExp", # The name can be different. - stop={"training_iteration": 10}, # train 5 more iterations than previous - restore="~/ray_results/Original/PG_/checkpoint_5/checkpoint-5", - config={"env": "CartPole-v0"}, - ) - -.. _tune-fault-tol: - -Fault Tolerance ---------------- - -Tune will automatically restart trials in case of trial failures/error (if ``max_failures != 0``), both in the single node and distributed setting. - -Tune will restore trials from the latest checkpoint, where available. In the distributed setting, if using the autoscaler with ``rsync`` enabled, Tune will automatically sync the trial folder with the driver. For example, if a node is lost while a trial (specifically, the corresponding Trainable actor of the trial) is still executing on that node and a checkpoint of the trial exists, Tune will wait until available resources are available to begin executing the trial again. - -If the trial/actor is placed on a different node, Tune will automatically push the previous checkpoint file to that node and restore the remote trial actor state, allowing the trial to resume from the latest checkpoint even after failure. - -Take a look at an example: :ref:`tune-distributed-spot`. - -Recovering From Failures -~~~~~~~~~~~~~~~~~~~~~~~~ - -Tune automatically persists the progress of your entire experiment (a ``tune.run`` session), so if an experiment crashes or is otherwise cancelled, it can be resumed by passing one of True, False, "LOCAL", "REMOTE", or "PROMPT" to ``tune.run(resume=...)``. Note that this only works if trial checkpoints are detected, whether it be by manual or periodic checkpointing. - -**Settings:** - - - The default setting of ``resume=False`` creates a new experiment. - - ``resume="LOCAL"`` and ``resume=True`` restore the experiment from ``local_dir/[experiment_name]``. - - ``resume="REMOTE"`` syncs the upload dir down to the local dir and then restores the experiment from ``local_dir/experiment_name``. - - ``resume="PROMPT"`` will cause Tune to prompt you for whether you want to resume. You can always force a new experiment to be created by changing the experiment name. - -Note that trials will be restored to their last checkpoint. If trial checkpointing is not enabled, unfinished trials will be restarted from scratch. - -E.g.: - -.. code-block:: python - - tune.run( - my_trainable, - checkpoint_freq=10, - local_dir="~/path/to/results", - resume=True - ) - -Upon a second run, this will restore the entire experiment state from ``~/path/to/results/my_experiment_name``. Importantly, any changes to the experiment specification upon resume will be ignored. For example, if the previous experiment has reached its termination, then resuming it with a new stop criterion makes no effect: the new experiment will terminate immediately after initialization. If you want to change the configuration, such as training more iterations, you can do so restore the checkpoint by setting ``restore=`` - note that this only works for a single trial. - -.. warning:: - - This feature is still experimental, so any provided Trial Scheduler or Search Algorithm will not be preserved. Only ``FIFOScheduler`` and ``BasicVariantGenerator`` will be supported. - - -Handling Large Datasets ------------------------ - -You often will want to compute a large object (e.g., training data, model weights) on the driver and use that object within each trial. Tune provides a ``pin_in_object_store`` utility function that can be used to broadcast such large objects. Objects pinned in this way will never be evicted from the Ray object store while the driver process is running, and can be efficiently retrieved from any task via ``get_pinned_object``. - -.. code-block:: python - - import ray - from ray import tune - from ray.tune.utils import pin_in_object_store, get_pinned_object - - import numpy as np - - ray.init() - - # X_id can be referenced in closures - X_id = pin_in_object_store(np.random.random(size=100000000)) - - def f(config, reporter): - X = get_pinned_object(X_id) - # use X - - tune.run(f) - -Custom Stopping Criteria ------------------------- - -You can control when trials are stopped early by passing the ``stop`` argument to ``tune.run``. This argument takes either a dictionary or a function. - -If a dictionary is passed in, the keys may be any field in the return result of ``tune.track.log`` in the Function API or ``train()`` (including the results from ``_train`` and auto-filled metrics). - -In the example below, each trial will be stopped either when it completes 10 iterations OR when it reaches a mean accuracy of 0.98. Note that `training_iteration` is an auto-filled metric by Tune. - -.. code-block:: python - - tune.run( - my_trainable, - stop={"training_iteration": 10, "mean_accuracy": 0.98} - ) - -For more flexibility, you can pass in a function instead. If a function is passed in, it must take ``(trial_id, result)`` as arguments and return a boolean (``True`` if trial should be stopped and ``False`` otherwise). - -.. code-block:: python - - - def stopper(trial_id, result): - return result["mean_accuracy"] / result["training_iteration"] > 5 - - tune.run(my_trainable, stop=stopper) - -Finally, you can implement the ``Stopper`` abstract class for stopping entire experiments. For example, the following example stops all trials after the criteria is fulfilled by any individual trial, and prevents new ones from starting: - -.. code-block:: python - - from ray.tune import Stopper - - class CustomStopper(Stopper): - def __init__(self): - self.should_stop = False - - def __call__(self, trial_id, result): - if not self.should_stop and result['foo'] > 10: - self.should_stop = True - return self.should_stop - - def stop_all(self): - """Returns whether to stop trials and prevent new ones from starting.""" - return self.should_stop - - stopper = CustomStopper() - tune.run(my_trainable, stop=stopper) - - -Note that in the above example the currently running trials will not stop immediately but will do so once their current iterations are complete. - -Auto-Filled Results -------------------- - -During training, Tune will automatically fill certain fields if not already provided. All of these can be used as stopping conditions or in the Scheduler/Search Algorithm specification. - -.. literalinclude:: ../../python/ray/tune/result.py - :language: python - :start-after: __sphinx_doc_begin__ - :end-before: __sphinx_doc_end__ - -The following fields will automatically show up on the console output, if provided: - -1. ``episode_reward_mean`` -2. ``mean_loss`` -3. ``mean_accuracy`` -4. ``timesteps_this_iter`` (aggregated into ``timesteps_total``). - - -TensorBoard ------------ - -To visualize learning in tensorboard, install tensorboardX: - -.. code-block:: bash - - $ pip install tensorboardX - -Then, after you run a experiment, you can visualize your experiment with TensorBoard by specifying the output directory of your results. Note that if you running Ray on a remote cluster, you can forward the tensorboard port to your local machine through SSH using ``ssh -L 6006:localhost:6006
``: - -.. code-block:: bash - - $ tensorboard --logdir=~/ray_results/my_experiment - -If you are running Ray on a remote multi-user cluster where you do not have sudo access, you can run the following commands to make sure tensorboard is able to write to the tmp directory: - -.. code-block:: bash - - $ export TMPDIR=/tmp/$USER; mkdir -p $TMPDIR; tensorboard --logdir=~/ray_results - -.. image:: ray-tune-tensorboard.png - -If using TF2, Tune also automatically generates TensorBoard HParams output, as shown below: - -.. code-block:: python - - tune.run( - ..., - config={ - "lr": tune.grid_search([1e-5, 1e-4]), - "momentum": tune.grid_search([0, 0.9]) - } - ) - -.. image:: images/tune-hparams.png - - -Logging -------- - -You can pass in your own logging mechanisms to output logs in custom formats as follows: - -.. code-block:: python - - from ray.tune.logger import DEFAULT_LOGGERS - - tune.run( - MyTrainableClass, - name="experiment_name", - loggers=DEFAULT_LOGGERS + (CustomLogger1, CustomLogger2) - ) - -These loggers will be called along with the default Tune loggers. All loggers must inherit the Logger interface (:ref:`logger-interface`). Tune enables default loggers for Tensorboard, CSV, and JSON formats. You can also check out `logger.py `__ for implementation details. An example can be found in `logging_example.py `__. See the :ref:`Logging API `. - -Uploading/Syncing ------------------ - -Tune automatically syncs the trial folder on remote nodes back to the head node. This requires the ray cluster to be started with the `autoscaler `__. -By default, local syncing requires rsync to be installed. You can customize the sync command with the ``sync_to_driver`` argument in ``tune.run`` by providing either a function or a string. - -If a string is provided, then it must include replacement fields ``{source}`` and ``{target}``, like ``rsync -savz -e "ssh -i ssh_key.pem" {source} {target}``. Alternatively, a function can be provided with the following signature: - -.. code-block:: python - - def custom_sync_func(source, target): - sync_cmd = "rsync {source} {target}".format( - source=source, - target=target) - sync_process = subprocess.Popen(sync_cmd, shell=True) - sync_process.wait() - - tune.run( - MyTrainableClass, - name="experiment_name", - sync_to_driver=custom_sync_func, - ) - -When syncing results back to the driver, the source would be a path similar to ``ubuntu@192.0.0.1:/home/ubuntu/ray_results/trial1``, and the target would be a local path. -This custom sync command would be also be used in node failures, where the source argument would be the path to the trial directory and the target would be a remote path. The `sync_to_driver` would be invoked to push a checkpoint to new node for a queued trial to resume. - -If an upload directory is provided, Tune will automatically sync results to the given directory, natively supporting standard S3/gsutil commands. -You can customize this to specify arbitrary storages with the ``sync_to_cloud`` argument. This argument is similar to ``sync_to_cloud`` in that it supports strings with the same replacement fields and arbitrary functions. See `syncer.py `__ for implementation details. - -.. code-block:: python - - tune.run( - MyTrainableClass, - name="experiment_name", - sync_to_cloud=custom_sync_func, - ) - - -Debugging ---------- - -By default, Tune will run hyperparameter evaluations on multiple processes. However, if you need to debug your training process, it may be easier to do everything on a single process. You can force all Ray functions to occur on a single process with ``local_mode`` by calling the following before ``tune.run``. - -.. code-block:: python - - ray.init(local_mode=True) - -Note that some behavior such as writing to files by depending on the current working directory in a Trainable and setting global process variables may not work as expected. Local mode with multiple configuration evaluations will interleave computation, so it is most naturally used when running a single configuration evaluation. - - -Further Questions or Issues? ----------------------------- - -You can post questions or issues or feedback through the following channels: - -1. `ray-dev@googlegroups.com`_: For discussions about development or any general - questions and feedback. -2. `StackOverflow`_: For questions about how to use Ray. -3. `GitHub Issues`_: For bug reports and feature requests. - -.. _`ray-dev@googlegroups.com`: https://groups.google.com/forum/#!forum/ray-dev -.. _`StackOverflow`: https://stackoverflow.com/questions/tagged/ray -.. _`GitHub Issues`: https://github.com/ray-project/ray/issues diff --git a/doc/source/tune.rst b/doc/source/tune.rst index 62718cb8c..d06826881 100644 --- a/doc/source/tune.rst +++ b/doc/source/tune.rst @@ -1,3 +1,5 @@ +.. _tune-index: + Tune: Scalable Hyperparameter Tuning ==================================== @@ -8,7 +10,7 @@ Tune: Scalable Hyperparameter Tuning Tune is a Python library for experiment execution and hyperparameter tuning at any scale. Core features: * Launch a multi-node :ref:`distributed hyperparameter sweep ` in less than 10 lines of code. - * Supports any machine learning framework, including PyTorch, XGBoost, MXNet, and Keras. See `examples here `_. + * Supports any machine learning framework, including PyTorch, XGBoost, MXNet, and Keras. See :ref:`examples here `. * Natively `integrates with optimization libraries `_ such as `HyperOpt `_, `Bayesian Optimization `_, and `Facebook Ax `_. * Choose among `scalable algorithms `_ such as `Population Based Training (PBT)`_, `Vizier's Median Stopping Rule`_, `HyperBand/ASHA`_. * Visualize results with `TensorBoard `__. @@ -21,24 +23,16 @@ Tune is a Python library for experiment execution and hyperparameter tuning at a For more information, check out: + * :ref:`Tune in 60 Seconds `: A quick overview of Tune and its key concepts. + * :ref:`Tune Guides and Examples `: Examples, Tutorials, and Guides for how to use Tune. * `Code `__: GitHub repository for Tune. - * `User Guide `__: A comprehensive overview on how to use Tune's features. - * `Tutorial Notebooks `__: Our tutorial notebooks of using Tune with Keras or PyTorch. -**Try out a tutorial notebook on Colab**: - -.. raw:: html - - - Tune Tutorial - Quick Start ----------- To run this example, install the following: ``pip install 'ray[tune]' torch torchvision``. - This example runs a small grid search to train a convolutional neural network using PyTorch and Tune. .. literalinclude:: ../../python/ray/tune/tests/example.py diff --git a/doc/source/tune/.gitignore b/doc/source/tune/.gitignore index 8f18b41db..ca3c7a515 100644 --- a/doc/source/tune/.gitignore +++ b/doc/source/tune/.gitignore @@ -1 +1 @@ -generated_guides/ +tutorials/ diff --git a/doc/source/tune/guides/README.rst b/doc/source/tune/_tutorials/README.rst similarity index 100% rename from doc/source/tune/guides/README.rst rename to doc/source/tune/_tutorials/README.rst diff --git a/doc/source/tune-examples.rst b/doc/source/tune/_tutorials/overview.rst similarity index 54% rename from doc/source/tune-examples.rst rename to doc/source/tune/_tutorials/overview.rst index ef2b8dcb0..a05eeaf54 100644 --- a/doc/source/tune-examples.rst +++ b/doc/source/tune/_tutorials/overview.rst @@ -1,15 +1,132 @@ +.. _tune-guides-overview: + +Tutorials, User Guides, Examples +================================ + +In this section, you can find material on how to use Tune and its various features. If any of the materials is out of date or broken, or if you'd like to add an example to this page, feel free to raise an issue on our Github repository. + + +Tutorials +--------- + +Take a look at any of the below tutorials to get started with Tune. + +.. raw:: html + +
+ +.. customgalleryitem:: + :tooltip: A gentle 60 second tour of core Tune concepts. + :figure: /images/tune-workflow.png + :description: :doc:`A gentle 60 second tour of Tune ` + +.. customgalleryitem:: + :tooltip: A simple Tune walkthrough. + :figure: /images/tune.png + :description: :doc:`A walkthrough to setup your first Tune experiment ` + +.. raw:: html + +
+ +.. toctree:: + :hidden: + + tune-60-seconds.rst + tune-tutorial.rst + + +User Guides +----------- + +These pages will demonstrate the various features and configurations of Tune. + +.. raw:: html + +
+ +.. customgalleryitem:: + :tooltip: A guide to Tune features. + :figure: /images/tune.png + :description: :doc:`A guide to Tune features ` + +.. customgalleryitem:: + :tooltip: A simple guide to Population-based Training + :figure: /images/tune-pbt-small.png + :description: :doc:`A simple guide to Population-based Training ` + +.. customgalleryitem:: + :tooltip: A guide to distributed hyperparameter tuning + :figure: /images/tune.png + :description: :doc:`A guide to distributed hyperparameter tuning ` + +.. raw:: html + +
+ +.. toctree:: + :hidden: + + tune-usage.rst + tune-advanced-tutorial.rst + tune-distributed.rst + +Colab Exercises +--------------- + +Learn how to use Tune in your browser with the following Colab-based exercises. + +.. raw:: html + + + + + + + + + + + + + + + + + + + + + + + + +
Exercise DescriptionLibraryColab Link
Basics of using Tune.TF/Keras + + Tune Tutorial + +
Using Search algorithms and Trial Schedulers to optimize your model.Pytorch + + Tune Tutorial + +
Using Population-Based Training (PBT).Pytorch + + Tune Tutorial + +
+ +Tutorial source files `can be found here `_. + Tune Examples -============= +------------- .. Keep this in sync with ray/python/ray/tune/examples/README.rst -In our repository, we provide a variety of examples for the various use cases and features of Tune. - If any example is broken, or if you'd like to add an example to this page, feel free to raise an issue on our Github repository. General Examples ----------------- +~~~~~~~~~~~~~~~~ - `async_hyperband_example `__: Example of using a Trainable class with AsyncHyperBandScheduler. - `hyperband_example `__: Example of using a Trainable class with HyperBandScheduler. Also uses the Experiment class API for specifying the experiment configuration. Also uses the AsyncHyperBandScheduler. @@ -18,7 +135,7 @@ General Examples - `logging_example `__: Example of custom loggers and custom trial directory naming. Search Algorithm Examples -------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~ - `Ax example `__: Optimize a Hartmann function with `Ax `_ with 4 parallel workers. - `HyperOpt Example `__: Optimizes a basic function using the function-based API and the HyperOptSearch (SearchAlgorithm wrapper for HyperOpt TPE). @@ -26,7 +143,7 @@ Search Algorithm Examples - `Bayesian Optimization example `__: Optimize a simple toy function using `Bayesian Optimization `_ with 4 parallel workers. Tensorflow/Keras Examples -------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~ - `tune_mnist_keras `__: Converts the Keras MNIST example to use Tune with the function-based API and a Keras callback. Also shows how to easily convert something relying on argparse to use Tune. - `pbt_memnn_example `__: Example of training a Memory NN on bAbI with Keras using PBT. @@ -34,27 +151,27 @@ Tensorflow/Keras Examples PyTorch Examples ----------------- +~~~~~~~~~~~~~~~~ - `mnist_pytorch `__: Converts the PyTorch MNIST example to use Tune with the function-based API. Also shows how to easily convert something relying on argparse to use Tune. - `mnist_pytorch_trainable `__: Converts the PyTorch MNIST example to use Tune with Trainable API. Also uses the HyperBandScheduler and checkpoints the model at the end. XGBoost Example ---------------- +~~~~~~~~~~~~~~~ -- `xgboost_example `__: Trains a basic XGBoost model with Tune with the function-based API and a XGBoost callback. +- `xgboost_example `__: Trains a basic XGBoost model with Tune with the function-based API and an XGBoost callback. LightGBM Example ----------------- +~~~~~~~~~~~~~~~~ - `lightgbm_example `__: Trains a basic LightGBM model with Tune with the function-based API and a LightGBM callback. Contributed Examples --------------------- +~~~~~~~~~~~~~~~~~~~~ - `pbt_tune_cifar10_with_keras `__: A contributed example of tuning a Keras model on CIFAR10 with the PopulationBasedTraining scheduler. -- `genetic_example `__: Optimizing the michalewicz function using the contributed GeneticSearch search algorithm with AsyncHyperBandScheduler. +- `genetic_example `__: Optimizing the michalewicz function using the contributed GeneticSearch algorithm with AsyncHyperBandScheduler. - `tune_cifar10_gluon `__: MXNet Gluon example to use Tune with the function-based API on CIFAR-10 dataset. diff --git a/doc/source/tune/_tutorials/tune-60-seconds.rst b/doc/source/tune/_tutorials/tune-60-seconds.rst new file mode 100644 index 000000000..28493bab1 --- /dev/null +++ b/doc/source/tune/_tutorials/tune-60-seconds.rst @@ -0,0 +1,193 @@ +.. _tune-60-seconds: + +Tune in 60 Seconds +================== + +Let's quickly walk through the key concepts you need to know to use Tune. In this guide, we'll be covering the following: + +.. contents:: + :local: + :depth: 1 + +Tune takes a user-defined Python function or class and evaluates it on a set of hyperparameter configurations. Each hyperparameter configuration evaluation is called a *trial*, and Tune runs multiple trials in parallel, leveraging Search Algorithms and Trial Schedulers to optimize your hyperparameters. + +.. image:: /images/tune-workflow.png + +Trainables +---------- + +To allow Tune to optimize your model, Tune will need to control your training process. This is done via the Trainable API. Each *trial* corresponds to one instance of a Trainable; Tune will create multiple instances of the Trainable. + +The Trainable API is where you specify how to set up your model and track intermediate training progress. There are two types of Trainables - a **function-based API** is for fast prototyping, and **class-based** API that unlocks many Tune features such as checkpointing, pausing. + +.. code-block:: python + + from ray import tune + + class Trainable(tune.Trainable): + """Tries to iteratively find the password.""" + + def _setup(self, config): + self.iter = 0 + self.password = 1024 + + def _train(self): + """Execute one step of 'training'. This function will be called iteratively""" + self.iter += 1 + return { + "accuracy": abs(self.iter - self.password), + "training_iteration": self.iter # Tune will automatically provide this. + } + + def _stop(self): + # perform any cleanup necessary. + pass + +Function API example: + +.. code-block:: python + + def trainable(config): + """ + Args: + config (dict): Parameters provided from the search algorithm + or variant generation. + """ + + while True: + # ... + tune.track.log(**kwargs) + +.. tip:: Do not use ``tune.track.log`` within a ``Trainable`` class. + +See the documentation: :ref:`trainable-docs`. + +tune.run +-------- + +Use ``tune.run`` execute hyperparameter tuning using the core Ray APIs. This function manages your distributed experiment and provides many features such as logging, checkpointing, and early stopping. + +.. code-block:: python + + # Pass in a Trainable class or function to tune.run. + tune.run(trainable) + + # Run 10 trials (each trial is one instance of a Trainable). Tune runs in + # parallel and automatically determines concurrency. + tune.run(trainable, num_samples=10) + + # Run 1 trial, stop when trial has reached 10 iterations OR a mean accuracy of 0.98. + tune.run(my_trainable, stop={"training_iteration": 10, "mean_accuracy": 0.98}) + + # Run 1 trial, search over hyperparameters, stop after 10 iterations. + hyperparameters = {"lr": tune.uniform(0, 1), "momentum": tune.uniform(0, 1)} + tune.run(my_trainable, config=hyperparameters, stop={"training_iteration": 10}) + +This function will report status on the command line until all Trials stop: + +.. code-block:: bash + + == Status == + Memory usage on this node: 11.4/16.0 GiB + Using FIFO scheduling algorithm. + Resources requested: 4/12 CPUs, 0/0 GPUs, 0.0/3.17 GiB heap, 0.0/1.07 GiB objects + Result logdir: /Users/foo/ray_results/myexp + Number of trials: 4 (4 RUNNING) + +----------------------+----------+---------------------+-----------+--------+--------+----------------+-------+ + | Trial name | status | loc | param1 | param2 | acc | total time (s) | iter | + |----------------------+----------+---------------------+-----------+--------+--------+----------------+-------| + | MyTrainable_a826033a | RUNNING | 10.234.98.164:31115 | 0.303706 | 0.0761 | 0.1289 | 7.54952 | 15 | + | MyTrainable_a8263fc6 | RUNNING | 10.234.98.164:31117 | 0.929276 | 0.158 | 0.4865 | 7.0501 | 14 | + | MyTrainable_a8267914 | RUNNING | 10.234.98.164:31111 | 0.068426 | 0.0319 | 0.9585 | 7.0477 | 14 | + | MyTrainable_a826b7bc | RUNNING | 10.234.98.164:31112 | 0.729127 | 0.0748 | 0.1797 | 7.05715 | 14 | + +----------------------+----------+---------------------+-----------+--------+--------+----------------+-------+ + +See the documentation: :ref:`tune-run-ref`. + + +Search Algorithms +----------------- + +To optimize the hyperparameters of your training process, you will want to explore a “search space”. + +Search Algorithms are Tune modules that help explore a provided search space. It will use previous results from evaluating different hyperparameters to suggest better hyperparameters. Tune has SearchAlgorithms that integrate with many popular **optimization** libraries, such as `Nevergrad `_ and `Hyperopt `_. + +.. code-block:: python + + # https://github.com/hyperopt/hyperopt/ + # pip install hyperopt + import hyperopt as hp + from ray.tune.suggest.hyperopt import HyperOptSearch + + # Create a HyperOpt search space + space = {"momentum": hp.uniform("momentum", 0, 20), "lr": hp.uniform("lr", 0, 1)} + # Pass the search space into Tune's HyperOpt wrapper and maximize accuracy + hyperopt = HyperOptSearch(space, metric="accuracy", mode="max") + + # Execute 20 trials using HyperOpt, stop after 20 iterations + max_iters = {"training_iteration": 20} + tune.run(trainable, search_alg=hyperopt, num_samples=20, stop=max_iters) + +See the documentation: :ref:`searchalg-ref`. + +Trial Schedulers +---------------- + +In addition, you can make your training process more efficient by stopping, pausing, or changing the hyperparameters of running trials. + +Trial Schedulers are Tune modules that adjust and change distributed training runs during execution. These modules can stop/pause/tweak the hyperparameters of running trials, making your hyperparameter tuning process much faster. Population-based training and HyperBand are examples of popular optimization algorithms implemented as Trial Schedulers. + +.. code-block:: python + + from ray.tune.schedulers import HyperBandScheduler + + # Create HyperBand scheduler and maximize accuracy + hyperband = HyperBandScheduler(metric="accuracy", mode="max") + + # Execute 20 trials using HyperBand using a search space + configs = {"lr": tune.uniform(0, 1), "momentum": tune.uniform(0, 1)} + tune.run(MyTrainableClass, num_samples=20, config=configs, scheduler=hyperband) + +Unlike **Search Algorithms**, Trial Schedulers do not select which hyperparameter configurations to evaluate. However, you can use them together. + +See the documentation: :ref:`schedulers-ref`. + + +Analysis +-------- + +After running a hyperparameter tuning job, you will want to analyze your results to determine what specific parameters are important and which hyperparameter values are the best. + +``tune.run`` returns an :ref:`Analysis ` object which has methods you can use for analyzing your results. This object can also retrieve all training runs as dataframes, allowing you to do ad-hoc data analysis over your results. + +.. code-block:: python + + analysis = tune.run(trainable, search_alg=algo, stop={"training_iteration": 20}) + + # Get the best hyperparameters + best_hyperparameters = analysis.get_best_config() + + # Get a dataframe for the max accuracy seen for each trial + df = analysis.dataframe(metric="mean_accuracy", mode="max") + +What's Next? +~~~~~~~~~~~~ + + +Now that you have a working understanding of Tune, check out: + + * :ref:`Tune Guides and Examples `: Examples and templates for using Tune with your preferred machine learning library. + * :ref:`tune-tutorial`: A simple tutorial that walks you through the process of setting up a Tune experiment. + * :ref:`tune-user-guide`: A comprehensive overview of Tune's features. + + +Further Questions or Issues? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Reach out to us if you have any questions or issues or feedback through the following channels: + +1. `StackOverflow`_: For questions about how to use Ray. +2. `GitHub Issues`_: For bug reports and feature requests. + +.. _`StackOverflow`: https://stackoverflow.com/questions/tagged/ray +.. _`GitHub Issues`: https://github.com/ray-project/ray/issues diff --git a/doc/source/tune/guides/plot_tune-advanced-tutorial.rst b/doc/source/tune/_tutorials/tune-advanced-tutorial.rst similarity index 100% rename from doc/source/tune/guides/plot_tune-advanced-tutorial.rst rename to doc/source/tune/_tutorials/tune-advanced-tutorial.rst diff --git a/doc/source/tune/guides/plot_tune-distributed.rst b/doc/source/tune/_tutorials/tune-distributed.rst similarity index 72% rename from doc/source/tune/guides/plot_tune-distributed.rst rename to doc/source/tune/_tutorials/tune-distributed.rst index a1f3ad193..a89b65388 100644 --- a/doc/source/tune/guides/plot_tune-distributed.rst +++ b/doc/source/tune/_tutorials/tune-distributed.rst @@ -115,7 +115,7 @@ Launching a cloud cluster If you have already have a list of nodes, go to :ref:`tune-distributed-local`. -Ray currently supports AWS and GCP. Follow the instructions below to launch nodes on AWS (using the Deep Learning AMI). See the `cluster setup documentation `_. Save the below cluster configuration (``tune-default.yaml``): +Ray currently supports AWS and GCP. Follow the instructions below to launch nodes on AWS (using the Deep Learning AMI). See the :ref:`cluster setup documentation `. Save the below cluster configuration (``tune-default.yaml``): .. literalinclude:: /../../python/ray/tune/examples/tune-default.yaml :language: yaml @@ -149,6 +149,33 @@ Analyze your results on TensorBoard by starting TensorBoard on the remote head m Note that you can customize the directory of results by running: ``tune.run(local_dir=..)``. You can then point TensorBoard to that directory to visualize results. You can also use `awless `_ for easy cluster management on AWS. +Syncing +------- + +Tune automatically syncs the trial folder on remote nodes back to the head node. This requires the ray cluster to be started with the :ref:`autoscaler `. +By default, local syncing requires rsync to be installed. You can customize the sync command with the ``sync_to_driver`` argument in ``tune.run`` by providing either a function or a string. + +If a string is provided, then it must include replacement fields ``{source}`` and ``{target}``, like ``rsync -savz -e "ssh -i ssh_key.pem" {source} {target}``. Alternatively, a function can be provided with the following signature: + +.. code-block:: python + + def custom_sync_func(source, target): + sync_cmd = "rsync {source} {target}".format( + source=source, + target=target) + sync_process = subprocess.Popen(sync_cmd, shell=True) + sync_process.wait() + + tune.run( + MyTrainableClass, + name="experiment_name", + sync_to_driver=custom_sync_func, + ) + +When syncing results back to the driver, the source would be a path similar to ``ubuntu@192.0.0.1:/home/ubuntu/ray_results/trial1``, and the target would be a local path. +This custom sync command is used to restart trials under failure. The ``sync_to_driver`` is invoked to push a checkpoint to new node for a paused/pre-empted trial to resume. + + .. _tune-distributed-spot: Pre-emptible Instances (Cloud) @@ -245,12 +272,54 @@ You should see Tune eventually continue the trials on a different worker node. S You can also specify ``tune.run(upload_dir=...)`` to sync results with a cloud storage like S3, allowing you to persist results in case you want to start and stop your cluster automatically. +.. _tune-fault-tol: + +Fault Tolerance +--------------- + +Tune will automatically restart trials in case of trial failures/error (if ``max_failures != 0``), both in the single node and distributed setting. + +Tune will restore trials from the latest checkpoint, where available. In the distributed setting, if using the autoscaler with ``rsync`` enabled, Tune will automatically sync the trial folder with the driver. For example, if a node is lost while a trial (specifically, the corresponding Trainable actor of the trial) is still executing on that node and a checkpoint of the trial exists, Tune will wait until available resources are available to begin executing the trial again. + +If the trial/actor is placed on a different node, Tune will automatically push the previous checkpoint file to that node and restore the remote trial actor state, allowing the trial to resume from the latest checkpoint even after failure. + +Recovering From Failures +~~~~~~~~~~~~~~~~~~~~~~~~ + +Tune automatically persists the progress of your entire experiment (a ``tune.run`` session), so if an experiment crashes or is otherwise cancelled, it can be resumed by passing one of True, False, "LOCAL", "REMOTE", or "PROMPT" to ``tune.run(resume=...)``. Note that this only works if trial checkpoints are detected, whether it be by manual or periodic checkpointing. + +**Settings:** + + - The default setting of ``resume=False`` creates a new experiment. + - ``resume="LOCAL"`` and ``resume=True`` restore the experiment from ``local_dir/[experiment_name]``. + - ``resume="REMOTE"`` syncs the upload dir down to the local dir and then restores the experiment from ``local_dir/experiment_name``. + - ``resume="PROMPT"`` will cause Tune to prompt you for whether you want to resume. You can always force a new experiment to be created by changing the experiment name. + +Note that trials will be restored to their last checkpoint. If trial checkpointing is not enabled, unfinished trials will be restarted from scratch. + +E.g.: + +.. code-block:: python + + tune.run( + my_trainable, + checkpoint_freq=10, + local_dir="~/path/to/results", + resume=True + ) + +Upon a second run, this will restore the entire experiment state from ``~/path/to/results/my_experiment_name``. Importantly, any changes to the experiment specification upon resume will be ignored. For example, if the previous experiment has reached its termination, then resuming it with a new stop criterion will not run. The new experiment will terminate immediately after initialization. If you want to change the configuration, such as training more iterations, you can do so restore the checkpoint by setting ``restore=`` - note that this only works for a single trial. + +.. warning:: + + This feature is still experimental, so any provided Trial Scheduler or Search Algorithm will not be checkpointed and able to resume. Only ``FIFOScheduler`` and ``BasicVariantGenerator`` will be supported. + .. _tune-distributed-common: Common Commands --------------- -Below are some commonly used commands for submitting experiments. Please see the `Autoscaler page `__ to see find more comprehensive documentation of commands. +Below are some commonly used commands for submitting experiments. Please see the :ref:`Autoscaler page ` to see find more comprehensive documentation of commands. .. code-block:: bash diff --git a/doc/source/tune/guides/plot_tune-tutorial.rst b/doc/source/tune/_tutorials/tune-tutorial.rst similarity index 92% rename from doc/source/tune/guides/plot_tune-tutorial.rst rename to doc/source/tune/_tutorials/tune-tutorial.rst index fe935e52c..01e3b695d 100644 --- a/doc/source/tune/guides/plot_tune-tutorial.rst +++ b/doc/source/tune/_tutorials/tune-tutorial.rst @@ -1,5 +1,9 @@ -Tune Walkthrough -================ +.. _tune-tutorial: + +A Basic Tune Tutorial +===================== + +.. image:: /images/tune-api.svg This tutorial will walk you through the following process to setup a Tune experiment. Specifically, we'll leverage ASHA and Bayesian Optimization (via HyperOpt) via the following steps: @@ -14,7 +18,7 @@ This tutorial will walk you through the following process to setup a Tune experi .. code-block:: bash - $ pip install ray torch torchvision filelock + $ pip install ray torch torchvision We first run some imports: @@ -35,6 +39,8 @@ Notice that there's a couple helper functions in the above training script. You .. code:: python + EPOCH_SIZE = 20 + def train(model, optimizer, train_loader): model.train() for batch_idx, (data, target) in enumerate(train_loader): @@ -66,7 +72,7 @@ We can then plot the performance of this trial. Early Stopping with ASHA ~~~~~~~~~~~~~~~~~~~~~~~~ -Let's integrate an early stopping algorithm to our search - ASHA, a scalable algorithm for principled early stopping. +Let's integrate a Trial Scheduler to our search - ASHA, a scalable algorithm for principled early stopping. How does it work? On a high level, it terminates trials that are less promising and allocates more time and resources to more promising trials. See `this blog post `__ for more details. @@ -120,4 +126,4 @@ You can evaluate best trained model using the Analysis object to retrieve the be Next Steps ---------- -Take a look at the :ref`tune-user-guide` for a more comprehensive overview of Tune's features. +Take a look at the :ref:`tune-user-guide` for a more comprehensive overview of Tune's features. diff --git a/doc/source/tune/_tutorials/tune-usage.rst b/doc/source/tune/_tutorials/tune-usage.rst new file mode 100644 index 000000000..bbb43d0c0 --- /dev/null +++ b/doc/source/tune/_tutorials/tune-usage.rst @@ -0,0 +1,424 @@ +.. _tune-user-guide: + +Tune User Guide +=============== + +.. warning:: Before you continue, be sure to have read :ref:`tune-60-seconds`. + +This document provides an overview of the core concepts as well as some of the configurations for running Tune. + +.. contents:: :local: + +Parallelism / GPUs +------------------ + +.. tip:: To run everything sequentially, use :ref:`Ray Local Mode `. + +Parallelism is determined by ``resources_per_trial`` (defaulting to 1 CPU, 0 GPU per trial) and the resources available to Tune (``ray.cluster_resources()``). + +Tune will allocate the specified GPU and CPU from ``resources_per_trial`` to each individual trial. A trial will not be scheduled unless at least that amount of resources is available, preventing the cluster from being overloaded. + +By default, Tune automatically runs N concurrent trials, where N is the number of CPUs (cores) on your machine. + +.. code-block:: python + + # If you have 4 CPUs on your machine, this will run 4 concurrent trials at a time. + tune.run(trainable, num_samples=10) + +You can override this parallelism with ``resources_per_trial``: + +.. code-block:: python + + # If you have 4 CPUs on your machine, this will run 2 concurrent trials at a time. + tune.run(trainable, num_samples=10, resources_per_trial={"cpu": 2}) + + # If you have 4 CPUs on your machine, this will run 1 trial at a time. + tune.run(trainable, num_samples=10, resources_per_trial={"cpu": 4}) + + # Fractional values are also supported, (i.e., {"cpu": 0.5}). + tune.run(trainable, num_samples=10, resources_per_trial={"cpu": 0.5}) + +To leverage GPUs, you must set ``gpu`` in ``resources_per_trial``. This will automatically set ``CUDA_VISIBLE_DEVICES`` for each trial. + +.. code-block:: python + + # If you have 8 GPUs, this will run 8 trials at once. + tune.run(trainable, num_samples=10, resources_per_trial={"gpu": 1}) + + # If you have 4 CPUs on your machine and 1 GPU, this will run 1 trial at a time. + tune.run(trainable, num_samples=10, resources_per_trial={"cpu": 2, "gpu": 1}) + +You can find an example of this in the `Keras MNIST example `__. + +.. warning:: If 'gpu' is not set, ``CUDA_VISIBLE_DEVICES`` environment variable will be set as empty, disallowing GPU access. + +To attach to a Ray cluster, simply run ``ray.init`` before ``tune.run``: + +.. code-block:: python + + # Connect to an existing distributed Ray cluster + ray.init(address=) + tune.run(trainable, num_samples=100, resources_per_trial={"cpu": 2, "gpu": 1}) + +Search Space (Grid/Random) +-------------------------- + +.. warning:: If you use a Search Algorithm, you will need to use a different search space API. + +You can specify a grid search or random search via the dict passed into ``tune.run(config=)``. + +.. code-block:: python + + parameters = { + "qux": tune.sample_from(lambda spec: 2 + 2), + "bar": tune.grid_search([True, False]), + "foo": tune.grid_search([1, 2, 3]), + "baz": "asd", # a constant value + } + + tune.run(trainable, config=parameters) + +By default, each random variable and grid search point is sampled once. To take multiple random samples, add ``num_samples: N`` to the experiment config. If `grid_search` is provided as an argument, the grid will be repeated `num_samples` of times. + +.. code-block:: python + :emphasize-lines: 13 + + # num_samples=10 repeats the 3x3 grid search 10 times, for a total of 90 trials + tune.run( + my_trainable, + name="my_trainable", + config={ + "alpha": tune.uniform(100), + "beta": tune.sample_from(lambda spec: spec.config.alpha * np.random.normal()), + "nn_layers": [ + tune.grid_search([16, 64, 256]), + tune.grid_search([16, 64, 256]), + ], + }, + num_samples=10 + ) + +Read about this in the :ref:`Grid/Random Search API ` page. + +Reporting Metrics +----------------- + +You can log arbitrary values and metrics in both training APIs: + +.. code-block:: python + + def trainable(config): + num_epochs = 100 + for i in range(num_epochs): + accuracy = model.train() + metric_1 = f(model) + metric_2 = model.get_loss() + tune.track.log(acc=accuracy, metric_foo=random_metric_1, bar=metric_2) + + class Trainable(tune.Trainable): + ... + + def _train(self): # this is called iteratively + accuracy = self.model.train() + metric_1 = f(self.model) + metric_2 = self.model.get_loss() + # don't call track.log here! + return dict(acc=accuracy, metric_foo=random_metric_1, bar=metric_2) + +During training, Tune will automatically log the below metrics in addition to the user-provided values. All of these can be used as stopping conditions or passed as a parameter to Trial Schedulers/Search Algorithms. + +.. literalinclude:: ../../../../python/ray/tune/result.py + :language: python + :start-after: __sphinx_doc_begin__ + :end-before: __sphinx_doc_end__ + +.. _tune-checkpoint: + +Checkpointing +------------- + +When running a hyperparameter search, Tune can automatically and periodically save/checkpoint your model. Checkpointing is used for + + * saving a model throughout training + * fault-tolerance when using pre-emptible machines. + * Pausing trials when using Trial Schedulers such as HyperBand and PBT. + +To enable checkpointing, you must implement a :ref:`Trainable class ` (the function-based API are not checkpointable, since they never return control back to their caller). + +Checkpointing assumes that the model state will be saved to disk on whichever node the Trainable is running on. You can checkpoint with three different mechanisms: manually, periodically, and at termination. + +**Manual Checkpointing**: A custom Trainable can manually trigger checkpointing by returning ``should_checkpoint: True`` (or ``tune.result.SHOULD_CHECKPOINT: True``) in the result dictionary of `_train`. This can be especially helpful in spot instances: + +.. code-block:: python + + def _train(self): + # training code + result = {"mean_accuracy": accuracy} + if detect_instance_preemption(): + result.update(should_checkpoint=True) + return result + + +**Periodic Checkpointing**: periodic checkpointing can be used to provide fault-tolerance for experiments. This can be enabled by setting ``checkpoint_freq=`` and ``max_failures=`` to checkpoint trials every *N* iterations and recover from up to *M* crashes per trial, e.g.: + +.. code-block:: python + + tune.run( + my_trainable, + checkpoint_freq=10, + max_failures=5, + ) + +**Checkpointing at Termination**: The checkpoint_freq may not coincide with the exact end of an experiment. If you want a checkpoint to be created at the end +of a trial, you can additionally set the ``checkpoint_at_end=True``: + +.. code-block:: python + :emphasize-lines: 5 + + tune.run( + my_trainable, + checkpoint_freq=10, + checkpoint_at_end=True, + max_failures=5, + ) + +The checkpoint will be saved at a path that looks like ``local_dir/exp_name/trial_name/checkpoint_x/``, where the x is the number of iterations so far when the checkpoint is saved. To restore the checkpoint, you can use the ``restore`` argument and specify a checkpoint file. By doing this, you can change whatever experiments' configuration such as the experiment's name, the training iteration or so: + +.. code-block:: python + + # Restored previous trial from the given checkpoint + tune.run( + "PG", + name="RestoredExp", # The name can be different. + stop={"training_iteration": 10}, # train 5 more iterations than previous + restore="~/ray_results/Original/PG_/checkpoint_5/checkpoint-5", + config={"env": "CartPole-v0"}, + ) + +Handling Large Datasets +----------------------- + +You often will want to compute a large object (e.g., training data, model weights) on the driver and use that object within each trial. Tune provides a ``pin_in_object_store`` utility function that can be used to broadcast such large objects. Objects pinned in this way will never be evicted from the Ray object store while the driver process is running, and can be efficiently retrieved from any task via ``get_pinned_object``. + +.. code-block:: python + + import ray + from ray import tune + from ray.tune.utils import pin_in_object_store, get_pinned_object + + import numpy as np + + ray.init() + + # X_id can be referenced in closures + X_id = pin_in_object_store(np.random.random(size=100000000)) + + def f(config, reporter): + X = get_pinned_object(X_id) + # use X + + tune.run(f) + +Stopping Trials +--------------- + +You can control when trials are stopped early by passing the ``stop`` argument to ``tune.run``. This argument takes either a dictionary or a function. + +If a dictionary is passed in, the keys may be any field in the return result of ``tune.track.log`` in the Function API or ``_train()`` (including the results from ``_train`` and auto-filled metrics). + +In the example below, each trial will be stopped either when it completes 10 iterations OR when it reaches a mean accuracy of 0.98. These metrics are assumed to be **increasing**. + +.. code-block:: python + + # training_iteration is an auto-filled metric by Tune. + tune.run( + my_trainable, + stop={"training_iteration": 10, "mean_accuracy": 0.98} + ) + +For more flexibility, you can pass in a function instead. If a function is passed in, it must take ``(trial_id, result)`` as arguments and return a boolean (``True`` if trial should be stopped and ``False`` otherwise). + +.. code-block:: python + + + def stopper(trial_id, result): + return result["mean_accuracy"] / result["training_iteration"] > 5 + + tune.run(my_trainable, stop=stopper) + +Finally, you can implement the ``Stopper`` abstract class for stopping entire experiments. For example, the following example stops all trials after the criteria is fulfilled by any individual trial, and prevents new ones from starting: + +.. code-block:: python + + from ray.tune import Stopper + + class CustomStopper(Stopper): + def __init__(self): + self.should_stop = False + + def __call__(self, trial_id, result): + if not self.should_stop and result['foo'] > 10: + self.should_stop = True + return self.should_stop + + def stop_all(self): + """Returns whether to stop trials and prevent new ones from starting.""" + return self.should_stop + + stopper = CustomStopper() + tune.run(my_trainable, stop=stopper) + + +Note that in the above example the currently running trials will not stop immediately but will do so once their current iterations are complete. See the :ref:`tune-stop-ref` documentation. + +Logging/Tensorboard +------------------- + +Tune will log the results of each trial to a subfolder under a specified local dir, which defaults to ``~/ray_results``. +Tune by default will log results for Tensorboard, CSV, and JSON formats. + +.. code-block:: bash + + # This logs to 2 different trial folders: + # ~/ray_results/trainable_name/trial_name_1 and ~/ray_results/trainable_name/trial_name_2 + # trainable_name and trial_name are autogenerated. + tune.run(trainable, num_samples=2) + +Learn about how to customize logging paths and outputs: :ref:`loggers-docstring`. + +Tune automatically outputs Tensorboard files during ``tune.run``. To visualize learning in tensorboard, install tensorboardX: + +.. code-block:: bash + + $ pip install tensorboardX + +Then, after you run an experiment, you can visualize your experiment with TensorBoard by specifying the output directory of your results. + +.. code-block:: bash + + $ tensorboard --logdir=~/ray_results/my_experiment + +If you are running Ray on a remote multi-user cluster where you do not have sudo access, you can run the following commands to make sure tensorboard is able to write to the tmp directory: + +.. code-block:: bash + + $ export TMPDIR=/tmp/$USER; mkdir -p $TMPDIR; tensorboard --logdir=~/ray_results + +.. image:: ../../ray-tune-tensorboard.png + +If using TF2, Tune also automatically generates TensorBoard HParams output, as shown below: + +.. code-block:: python + + tune.run( + ..., + config={ + "lr": tune.grid_search([1e-5, 1e-4]), + "momentum": tune.grid_search([0, 0.9]) + } + ) + +.. image:: ../../images/tune-hparams.png + +Console Output +-------------- + +The following fields will automatically show up on the console output, if provided: + +1. ``episode_reward_mean`` +2. ``mean_loss`` +3. ``mean_accuracy`` +4. ``timesteps_this_iter`` (aggregated into ``timesteps_total``). + +Below is an example of the console output: + +.. code-block:: bash + + == Status == + Memory usage on this node: 11.4/16.0 GiB + Using FIFO scheduling algorithm. + Resources requested: 4/12 CPUs, 0/0 GPUs, 0.0/3.17 GiB heap, 0.0/1.07 GiB objects + Result logdir: /Users/foo/ray_results/myexp + Number of trials: 4 (4 RUNNING) + +----------------------+----------+---------------------+-----------+--------+--------+----------------+-------+ + | Trial name | status | loc | param1 | param2 | acc | total time (s) | iter | + |----------------------+----------+---------------------+-----------+--------+--------+----------------+-------| + | MyTrainable_a826033a | RUNNING | 10.234.98.164:31115 | 0.303706 | 0.0761 | 0.1289 | 7.54952 | 15 | + | MyTrainable_a8263fc6 | RUNNING | 10.234.98.164:31117 | 0.929276 | 0.158 | 0.4865 | 7.0501 | 14 | + | MyTrainable_a8267914 | RUNNING | 10.234.98.164:31111 | 0.068426 | 0.0319 | 0.9585 | 7.0477 | 14 | + | MyTrainable_a826b7bc | RUNNING | 10.234.98.164:31112 | 0.729127 | 0.0748 | 0.1797 | 7.05715 | 14 | + +----------------------+----------+---------------------+-----------+--------+--------+----------------+-------+ + +You can use a :ref:`Reporter ` object to customize the console output. + + +Uploading Results +----------------- + +If an upload directory is provided, Tune will automatically sync results from the ``local_dir`` to the given directory, natively supporting standard S3/gsutil URIs. + +.. code-block:: python + + tune.run( + MyTrainableClass, + local_dir="~/ray_results", + upload_dir="s3://my-log-dir" + ) + +You can customize this to specify arbitrary storages with the ``sync_to_cloud`` argument in ``tune.run``. This argument supports either strings with the same replacement fields OR arbitrary functions. + +.. code-block:: python + + tune.run( + MyTrainableClass, + upload_dir="s3://my-log-dir", + sync_to_cloud=custom_sync_str_or_func, + ) + +If a string is provided, then it must include replacement fields ``{source}`` and ``{target}``, like ``s3 sync {source} {target}``. Alternatively, a function can be provided with the following signature: + +.. code-block:: python + + def custom_sync_func(source, target): + # do arbitrary things inside + sync_cmd = "s3 {source} {target}".format( + source=source, + target=target) + sync_process = subprocess.Popen(sync_cmd, shell=True) + sync_process.wait() + +.. _tune-debugging: + +Debugging +--------- + +By default, Tune will run hyperparameter evaluations on multiple processes. However, if you need to debug your training process, it may be easier to do everything on a single process. You can force all Ray functions to occur on a single process with ``local_mode`` by calling the following before ``tune.run``. + +.. code-block:: python + + ray.init(local_mode=True) + +Local mode with multiple configuration evaluations will interleave computation, so it is most naturally used when running a single configuration evaluation. + +Stopping after the first failure +-------------------------------- + +By default, ``tune.run`` will continue executing until all trials have terminated or errored. To stop the entire Tune run as soon as **any** trial errors: + +.. code-block:: python + + tune.run(trainable, fail_fast=True) + +This is useful when you are trying to setup a large hyperparameter experiment. + + +Further Questions or Issues? +---------------------------- + +You can post questions or issues or feedback through the following channels: + +1. `StackOverflow`_: For questions about how to use Ray. +2. `GitHub Issues`_: For bug reports and feature requests. + +.. _`StackOverflow`: https://stackoverflow.com/questions/tagged/ray +.. _`GitHub Issues`: https://github.com/ray-project/ray/issues diff --git a/doc/source/tune/api_docs/analysis.rst b/doc/source/tune/api_docs/analysis.rst index 902fe4abf..d0c42f1bb 100644 --- a/doc/source/tune/api_docs/analysis.rst +++ b/doc/source/tune/api_docs/analysis.rst @@ -1,5 +1,7 @@ -Analysis/Logging (tune.analysis / tune.logger) -============================================== +.. _tune-analysis-docs: + +Analysis (tune.analysis) +======================== Analyzing Results ----------------- @@ -52,56 +54,3 @@ Analysis .. autoclass:: ray.tune.Analysis :members: -.. _loggers-docstring: - -Loggers (tune.logger) ---------------------- - -Viskit -~~~~~~ - -Tune automatically integrates with Viskit via the ``CSVLogger`` outputs. To use VisKit (you may have to install some dependencies), run: - -.. code-block:: bash - - $ git clone https://github.com/rll/rllab.git - $ python rllab/rllab/viskit/frontend.py ~/ray_results/my_experiment - -The nonrelevant metrics (like timing stats) can be disabled on the left to show only the relevant ones (like accuracy, loss, etc.). - -.. image:: /ray-tune-viskit.png - - -.. _logger-interface: - -Logger -~~~~~~ - -.. autoclass:: ray.tune.logger.Logger - -UnifiedLogger -~~~~~~~~~~~~~ - -.. autoclass:: ray.tune.logger.UnifiedLogger - -TBXLogger -~~~~~~~~~ - -.. autoclass:: ray.tune.logger.TBXLogger - -JsonLogger -~~~~~~~~~~ - -.. autoclass:: ray.tune.logger.JsonLogger - -CSVLogger -~~~~~~~~~ - -.. autoclass:: ray.tune.logger.CSVLogger - -MLFLowLogger -~~~~~~~~~~~~ - -Tune also provides a default logger for `MLFlow `_. You can install MLFlow via ``pip install mlflow``. An example can be found `mlflow_example.py `__. Note that this currently does not include artifact logging support. For this, you can use the native MLFlow APIs inside your Trainable definition. - -.. autoclass:: ray.tune.logger.MLFLowLogger diff --git a/doc/source/tune/api_docs/execution.rst b/doc/source/tune/api_docs/execution.rst index 3b82d3256..48e32d716 100644 --- a/doc/source/tune/api_docs/execution.rst +++ b/doc/source/tune/api_docs/execution.rst @@ -1,6 +1,8 @@ Training (tune.run, tune.Experiment) ==================================== +.. _tune-run-ref: + tune.run -------- @@ -16,6 +18,7 @@ tune.Experiment .. autofunction:: ray.tune.Experiment +.. _tune-stop-ref: Stopper (tune.Stopper) ---------------------- diff --git a/doc/source/tune/api_docs/logging.rst b/doc/source/tune/api_docs/logging.rst new file mode 100644 index 000000000..9b29a4fcc --- /dev/null +++ b/doc/source/tune/api_docs/logging.rst @@ -0,0 +1,120 @@ +.. _loggers-docstring: + +Loggers (tune.logger) +===================== + +Tune has default loggers for Tensorboard, CSV, and JSON formats. + +Logging Path +------------ + +Tune will log the results of each trial to a subfolder under a specified local dir, which defaults to ``~/ray_results``. + +.. code-block:: python + + # This logs to 2 different trial folders: + # ~/ray_results/trainable_name/trial_name_1 and ~/ray_results/trainable_name/trial_name_2 + # trainable_name and trial_name are autogenerated. + tune.run(trainable, num_samples=2) + +You can specify the ``local_dir`` and ``trainable_name``: + +.. code-block:: python + + # This logs to 2 different trial folders: + # ./results/test_experiment/trial_name_1 and ./results/test_experiment/trial_name_2 + # Only trial_name is autogenerated. + tune.run(trainable, num_samples=2, local_dir="./results", name="test_experiment") + +To specify custom trial folder names, you can pass use the ``trial_name_creator`` argument +to `tune.run`. This takes a function with the following signature: + +.. code-block:: python + + def trial_name_string(trial): + """ + Args: + trial (Trial): A generated trial object. + + Returns: + trial_name (str): String representation of Trial. + """ + return str(trial) + + tune.run( + MyTrainableClass, + name="example-experiment", + num_samples=1, + trial_name_creator=trial_name_string + ) + +See the documentation on Trials: :ref:`trial-docstring`. + + +Custom Loggers +-------------- + +You can pass in your own logging mechanisms to output logs in custom formats as follows: + +.. code-block:: python + + from ray.tune.logger import DEFAULT_LOGGERS + + tune.run( + MyTrainableClass, + name="experiment_name", + loggers=DEFAULT_LOGGERS + (CustomLogger1, CustomLogger2) + ) + +These loggers will be called along with the default Tune loggers. All loggers must inherit the Logger interface (:ref:`logger-interface`). You can also check out `logger.py `__ for implementation details. + +An example can be found in `logging_example.py `__. + +Viskit +------ + +Tune automatically integrates with `Viskit `_ via the ``CSVLogger`` outputs. To use VisKit (you may have to install some dependencies), run: + +.. code-block:: bash + + $ git clone https://github.com/rll/rllab.git + $ python rllab/rllab/viskit/frontend.py ~/ray_results/my_experiment + +The nonrelevant metrics (like timing stats) can be disabled on the left to show only the relevant ones (like accuracy, loss, etc.). + +.. image:: /ray-tune-viskit.png + + +.. _logger-interface: + +Logger +------ + +.. autoclass:: ray.tune.logger.Logger + +UnifiedLogger +------------- + +.. autoclass:: ray.tune.logger.UnifiedLogger + +TBXLogger +--------- + +.. autoclass:: ray.tune.logger.TBXLogger + +JsonLogger +---------- + +.. autoclass:: ray.tune.logger.JsonLogger + +CSVLogger +--------- + +.. autoclass:: ray.tune.logger.CSVLogger + +MLFLowLogger +------------ + +Tune also provides a default logger for `MLFlow `_. You can install MLFlow via ``pip install mlflow``. An example can be found `mlflow_example.py `__. Note that this currently does not include artifact logging support. For this, you can use the native MLFlow APIs inside your Trainable definition. + +.. autoclass:: ray.tune.logger.MLFLowLogger diff --git a/doc/source/tune/api_docs/overview.rst b/doc/source/tune/api_docs/overview.rst index 73c009b81..66946bf7a 100644 --- a/doc/source/tune/api_docs/overview.rst +++ b/doc/source/tune/api_docs/overview.rst @@ -1,3 +1,5 @@ +.. _tune-api-ref: + Tune API Reference ================== @@ -16,6 +18,7 @@ on `Github`_. grid_random.rst suggestion.rst schedulers.rst + logging.rst internals.rst client.rst cli.rst diff --git a/doc/source/tune/api_docs/reporters.rst b/doc/source/tune/api_docs/reporters.rst index 4b784aa67..a5e41fd71 100644 --- a/doc/source/tune/api_docs/reporters.rst +++ b/doc/source/tune/api_docs/reporters.rst @@ -1,3 +1,5 @@ +.. _tune-reporter-doc: + Console Output (Reporters) ========================== @@ -73,20 +75,22 @@ The default reporting style can also be overriden more broadly by extending the tune.run(my_trainable, progress_reporter=CustomReporter()) -ProgressReporter ----------------- - -.. autoclass:: ray.tune.ProgressReporter - :members: CLIReporter ----------- .. autoclass:: ray.tune.CLIReporter + :members: add_metric_column JupyterNotebookReporter ----------------------- .. autoclass:: ray.tune.JupyterNotebookReporter + :members: add_metric_column +ProgressReporter +---------------- + +.. autoclass:: ray.tune.ProgressReporter + :members: diff --git a/doc/source/tune/api_docs/schedulers.rst b/doc/source/tune/api_docs/schedulers.rst index cc06f51b9..a67bbceed 100644 --- a/doc/source/tune/api_docs/schedulers.rst +++ b/doc/source/tune/api_docs/schedulers.rst @@ -1,5 +1,7 @@ -Schedulers (tune.schedulers) -============================ +.. _schedulers-ref: + +Trial Schedulers (tune.schedulers) +================================== FIFOScheduler ~~~~~~~~~~~~~ diff --git a/doc/source/tune/api_docs/suggestion.rst b/doc/source/tune/api_docs/suggestion.rst index 47c10d6a2..e422921fa 100644 --- a/doc/source/tune/api_docs/suggestion.rst +++ b/doc/source/tune/api_docs/suggestion.rst @@ -1,3 +1,5 @@ +.. _searchalg-ref: + Search Algorithms (tune.suggest) ================================ diff --git a/doc/source/tune/api_docs/trainable.rst b/doc/source/tune/api_docs/trainable.rst index 3dabdcdce..0a23165c1 100644 --- a/doc/source/tune/api_docs/trainable.rst +++ b/doc/source/tune/api_docs/trainable.rst @@ -3,7 +3,7 @@ Training (tune.Trainable, tune.track) ===================================== -Training can be done with either a **Class API** (``tune.Trainable``) < or **function-based API** (``track.log``). +Training can be done with either a **Class API** (``tune.Trainable``) or **function-based API** (``track.log``). You can use the **function-based API** for fast prototyping. On the other hand, the ``tune.Trainable`` interface supports checkpoint/restore functionality and provides more control for advanced algorithms. @@ -41,26 +41,26 @@ The Trainable **class API** will require users to subclass ``ray.tune.Trainable` from ray import tune class Guesser(tune.Trainable): - """Randomly picks 10 number from [1, 10000) to find the password.""" + """Randomly picks a number from [1, 10000) to find the password.""" def _setup(self, config): - self.config = config + self.guess = config["guess"] + self.iter = 0 self.password = 1024 def _train(self): - """Execute one step of 'training'.""" - result_dict = {"diff": abs(self.config['guess'] - self.password)} - return result_dict + """Execute one step of 'training'. This function will be called iteratively""" + self.iter += 1 + self.guess += 1 + return { + "accuracy": abs(self.guess - self.password), + "training_iteration": self.iter # Tune will automatically provide this. + } - def _stop(self): - # perform any cleanup necessary. - pass analysis = tune.run( Guesser, - stop={ - "training_iteration": 1, - }, + stop={"training_iteration": 10}, num_samples=10, config={ "guess": tune.randint(1, 10000) @@ -109,6 +109,28 @@ Use ``validate_save_restore`` to catch ``_save``/``_restore`` errors before exec validate_save_restore(MyTrainableClass) validate_save_restore(MyTrainableClass, use_object_store=True) + +Advanced Resource Allocation +---------------------------- + +Trainables can themselves be distributed. If your trainable function / class creates further Ray actors or tasks that also consume CPU / GPU resources, you will want to set ``extra_cpu`` or ``extra_gpu`` inside ``tune.run`` to reserve extra resource slots. For example, if a trainable class requires 1 GPU itself, but also launches 4 actors, each using another GPU, then you should set ``"gpu": 1, "extra_gpu": 4``. + +.. code-block:: python + :emphasize-lines: 4-8 + + tune.run( + my_trainable, + name="my_trainable", + resources_per_trial={ + "cpu": 1, + "gpu": 1, + "extra_gpu": 4 + } + ) + +The ``Trainable`` also provides the ``default_resource_requests`` interface to automatically declare the ``resources_per_trial`` based on the given configuration. + + Advanced: Reusing Actors ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/tune/guides/overview.rst b/doc/source/tune/guides/overview.rst deleted file mode 100644 index 9b0ef243e..000000000 --- a/doc/source/tune/guides/overview.rst +++ /dev/null @@ -1,38 +0,0 @@ -Tune Guides and Tutorials -========================= - - -Tune takes a user-defined Python function or class and evaluates it on a set of hyperparameter configurations. - -Each hyperparameter configuration evaluation is called a *trial*, and multiple trials are run in parallel. Configurations are either generated by Tune or drawn from a user-specified **search algorithm**. The trials are scheduled and managed by a **trial scheduler**. - -.. image:: /images/tune-api.svg - - -.. customgalleryitem:: - :tooltip: Getting started with Tune. - :figure: /images/tune.png - :description: :doc:`plot_tune-tutorial` - -.. customgalleryitem:: - :tooltip: A simple guide to Population-based Training - :figure: /images/tune-pbt-small.png - :description: :doc:`plot_tune-advanced-tutorial` - -.. customgalleryitem:: - :tooltip: Distributed Tuning - :figure: /images/tune.png - :description: :doc:`plot_tune-distributed` - - -.. toctree:: - :hidden: - - plot_tune-tutorial.rst - plot_tune-advanced-tutorial.rst - plot_tune-distributed.rst - - - -.. :figure: /images/param_actor.png -