mirror of
https://github.com/wassname/ray.git
synced 2026-07-03 16:58:23 +08:00
[Tune] Added XGBoost tutorial and template (#9060)
* Added XGBoost tutorial and template * XGBoost tutorial: Cut some clutter * Apply suggestions from code review Co-authored-by: Richard Liaw <rliaw@berkeley.edu> * Added XGboost logo * Fixed further references Co-authored-by: Kai Fricke <kai@anyscale.com> Co-authored-by: Richard Liaw <rliaw@berkeley.edu>
This commit is contained in:
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 115 KiB |
@@ -0,0 +1,680 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
width="211.64905mm"
|
||||
height="56.237804mm"
|
||||
viewBox="0 0 211.64905 56.237804"
|
||||
version="1.1"
|
||||
id="svg1175"
|
||||
inkscape:version="1.0 (4035a4f, 2020-05-01)"
|
||||
sodipodi:docname="tune-xgboost-ensemble.svg">
|
||||
<defs
|
||||
id="defs1169" />
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="1.5487054"
|
||||
inkscape:cx="324.93356"
|
||||
inkscape:cy="167.26118"
|
||||
inkscape:document-units="mm"
|
||||
inkscape:current-layer="layer1"
|
||||
inkscape:document-rotation="0"
|
||||
showgrid="false"
|
||||
inkscape:window-width="1745"
|
||||
inkscape:window-height="943"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="23"
|
||||
inkscape:window-maximized="0" />
|
||||
<metadata
|
||||
id="metadata1172">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title />
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
transform="translate(-8.2646953,-8.2003976)"
|
||||
inkscape:label="Layer 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1">
|
||||
<g
|
||||
style="fill:#646464;fill-opacity:1"
|
||||
id="g981"
|
||||
transform="translate(-20.914502,12.407242)">
|
||||
<rect
|
||||
style="fill:#646464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.887561;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect10"
|
||||
width="9.1124344"
|
||||
height="7.1124392"
|
||||
x="47.443779"
|
||||
y="10.443781" />
|
||||
<path
|
||||
style="fill:#646464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 52.112562,18 c 0,0.333333 0,0.666666 0,1"
|
||||
id="path931" />
|
||||
</g>
|
||||
<rect
|
||||
style="fill:#646464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.529167;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect10-2"
|
||||
width="9.1124344"
|
||||
height="7.1124392"
|
||||
x="38.529278"
|
||||
y="32.851025" />
|
||||
<path
|
||||
id="path931-6-1"
|
||||
d="m 43.198061,31.407242 c 0,0.333333 0,0.666666 0,1"
|
||||
style="fill:#646464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<path
|
||||
id="path931-3"
|
||||
d="m 43.198061,40.407242 c 0,0.333333 0,0.666666 0,1"
|
||||
style="fill:none;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<g
|
||||
style="fill:#646464;fill-opacity:1"
|
||||
id="g1118"
|
||||
transform="translate(-20.914502,12.407242)">
|
||||
<g
|
||||
style="fill:#646464;fill-opacity:1"
|
||||
id="g977"
|
||||
transform="translate(-4.9999985)">
|
||||
<rect
|
||||
style="fill:#646464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.887561;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect10-6"
|
||||
width="9.1124344"
|
||||
height="7.1124392"
|
||||
x="40.443779"
|
||||
y="20.443781" />
|
||||
<path
|
||||
style="fill:#646464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 45.1125,19 c 0,0.333333 0,0.666666 0,1"
|
||||
id="path931-6" />
|
||||
</g>
|
||||
<path
|
||||
id="path931-3-6"
|
||||
d="m 40.112562,28 c 0,0.333333 0,0.666666 0,1"
|
||||
style="fill:#646464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
</g>
|
||||
<rect
|
||||
y="42.851021"
|
||||
x="8.5292788"
|
||||
height="7.1124392"
|
||||
width="9.1124344"
|
||||
id="rect10-6-7"
|
||||
style="fill:#b46464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.529167;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<path
|
||||
id="path931-6-8"
|
||||
d="m 13.197999,41.407242 c 0,0.333333 0,0.666666 0,1"
|
||||
style="fill:#b46464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<rect
|
||||
style="fill:#64b464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.887561;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect10-6-7-6"
|
||||
width="9.1124344"
|
||||
height="7.1124392"
|
||||
x="20.529278"
|
||||
y="42.851021" />
|
||||
<path
|
||||
style="fill:#64b464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 25.197998,41.407242 c 0,0.333333 0,0.666666 0,1"
|
||||
id="path931-6-8-9" />
|
||||
<rect
|
||||
style="fill:#b46464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.529167;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect10-6-7-3"
|
||||
width="9.1124344"
|
||||
height="7.1124392"
|
||||
x="32.529278"
|
||||
y="42.851021" />
|
||||
<path
|
||||
style="fill:#b46464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 37.197999,41.407242 c 0,0.333333 0,0.666666 0,1"
|
||||
id="path931-6-8-6" />
|
||||
<rect
|
||||
y="42.851021"
|
||||
x="44.529278"
|
||||
height="7.1124392"
|
||||
width="9.1124344"
|
||||
id="rect10-6-7-3-4"
|
||||
style="fill:#64b464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.529167;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<path
|
||||
id="path931-6-8-6-1"
|
||||
d="m 49.198,41.407242 c 0,0.333333 0,0.666666 0,1"
|
||||
style="fill:#64b464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<path
|
||||
id="path1125"
|
||||
d="M 19.197998,31.407242 H 43.198061"
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:round;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<path
|
||||
id="path1129"
|
||||
d="m 13.197999,41.407242 h 12"
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:round;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<path
|
||||
id="path1131"
|
||||
d="m 37.197999,41.407242 h 12"
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:round;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<g
|
||||
id="g1853"
|
||||
transform="translate(-15.342183,2.407242)">
|
||||
<rect
|
||||
style="fill:#b4b464;fill-opacity:1;stroke:#141414;stroke-width:0.853376;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none"
|
||||
id="rect1837"
|
||||
width="19.146624"
|
||||
height="9.1466246"
|
||||
x="37.033566"
|
||||
y="6.4266882" />
|
||||
<text
|
||||
id="text1848"
|
||||
y="13.511477"
|
||||
x="38.212898"
|
||||
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
|
||||
xml:space="preserve"><tspan
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.264583"
|
||||
y="13.511477"
|
||||
x="38.212898"
|
||||
id="tspan1846"
|
||||
sodipodi:role="line">Data</tspan></text>
|
||||
</g>
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 31.217986,17.793242 v 4.901646"
|
||||
id="path1857" />
|
||||
<rect
|
||||
style="fill:#64b464;fill-opacity:1;stroke:#141414;stroke-width:0.888;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none"
|
||||
id="rect1837-9"
|
||||
width="44.726048"
|
||||
height="8.7260494"
|
||||
x="8.7395678"
|
||||
y="54.936249" />
|
||||
<text
|
||||
id="text1848-8"
|
||||
y="61.810753"
|
||||
x="19.842289"
|
||||
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
|
||||
xml:space="preserve"><tspan
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.264583"
|
||||
y="61.810753"
|
||||
x="19.842289"
|
||||
id="tspan1846-5"
|
||||
sodipodi:role="line">acc: 0.7</tspan></text>
|
||||
<g
|
||||
id="g2224"
|
||||
transform="translate(0,-0.29521561)">
|
||||
<rect
|
||||
style="fill:#64b464;fill-opacity:1;stroke:#141414;stroke-width:0.888;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none"
|
||||
id="rect1837-9-0"
|
||||
width="151.43695"
|
||||
height="9.0579548"
|
||||
x="67.992767"
|
||||
y="55.231464" />
|
||||
<text
|
||||
id="text1848-8-5"
|
||||
y="62.387329"
|
||||
x="132.45094"
|
||||
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
|
||||
xml:space="preserve"><tspan
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.264583"
|
||||
y="62.387329"
|
||||
x="132.45094"
|
||||
id="tspan1846-5-1"
|
||||
sodipodi:role="line">acc: 0.9</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g1886-8"
|
||||
transform="translate(47.534944,2.407242)">
|
||||
<g
|
||||
style="fill:#646464;fill-opacity:1"
|
||||
id="g981-8"
|
||||
transform="translate(-9.179197,9.9999996)">
|
||||
<rect
|
||||
style="fill:#646464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.887561;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect10-4"
|
||||
width="9.1124344"
|
||||
height="7.1124392"
|
||||
x="47.443779"
|
||||
y="10.443781" />
|
||||
<path
|
||||
style="fill:#646464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 52.112562,18 c 0,0.333333 0,0.666666 0,1"
|
||||
id="path931-2" />
|
||||
</g>
|
||||
<rect
|
||||
style="fill:#646464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.529167;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect10-2-0"
|
||||
width="9.1124344"
|
||||
height="7.1124392"
|
||||
x="50.264584"
|
||||
y="30.443781" />
|
||||
<path
|
||||
id="path931-6-1-0"
|
||||
d="m 54.933366,29 c 0,0.333333 0,0.666666 0,1"
|
||||
style="fill:#646464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<path
|
||||
id="path931-3-64"
|
||||
d="m 54.933366,38 c 0,0.333333 0,0.666666 0,1"
|
||||
style="fill:none;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<g
|
||||
style="fill:#646464;fill-opacity:1"
|
||||
id="g1118-5"
|
||||
transform="translate(-9.179197,9.9999996)">
|
||||
<g
|
||||
style="fill:#646464;fill-opacity:1"
|
||||
id="g977-5"
|
||||
transform="translate(-4.9999985)">
|
||||
<rect
|
||||
style="fill:#646464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.887561;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect10-6-6"
|
||||
width="9.1124344"
|
||||
height="7.1124392"
|
||||
x="40.443779"
|
||||
y="20.443781" />
|
||||
<path
|
||||
style="fill:#646464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 45.1125,19 c 0,0.333333 0,0.666666 0,1"
|
||||
id="path931-6-4" />
|
||||
</g>
|
||||
<path
|
||||
id="path931-3-6-5"
|
||||
d="m 40.112562,28 c 0,0.333333 0,0.666666 0,1"
|
||||
style="fill:#646464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
</g>
|
||||
<rect
|
||||
y="40.443779"
|
||||
x="20.264584"
|
||||
height="7.1124392"
|
||||
width="9.1124344"
|
||||
id="rect10-6-7-7"
|
||||
style="fill:#b46464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.529167;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<path
|
||||
id="path931-6-8-3"
|
||||
d="m 24.933304,39 c 0,0.333333 0,0.666666 0,1"
|
||||
style="fill:#b46464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<rect
|
||||
style="fill:#64b464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.887561;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect10-6-7-6-9"
|
||||
width="9.1124344"
|
||||
height="7.1124392"
|
||||
x="32.264584"
|
||||
y="40.443779" />
|
||||
<path
|
||||
style="fill:#64b464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 36.933303,39 c 0,0.333333 0,0.666666 0,1"
|
||||
id="path931-6-8-9-1" />
|
||||
<rect
|
||||
style="fill:#b46464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.529167;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect10-6-7-3-40"
|
||||
width="9.1124344"
|
||||
height="7.1124392"
|
||||
x="44.264584"
|
||||
y="40.443779" />
|
||||
<path
|
||||
style="fill:#b46464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 48.933304,39 c 0,0.333333 0,0.666666 0,1"
|
||||
id="path931-6-8-6-0" />
|
||||
<rect
|
||||
y="40.443779"
|
||||
x="56.264584"
|
||||
height="7.1124392"
|
||||
width="9.1124344"
|
||||
id="rect10-6-7-3-4-5"
|
||||
style="fill:#64b464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.529167;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<path
|
||||
id="path931-6-8-6-1-7"
|
||||
d="m 60.933305,39 c 0,0.333333 0,0.666666 0,1"
|
||||
style="fill:#64b464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<path
|
||||
id="path1125-9"
|
||||
d="M 30.933303,29 H 54.933366"
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:round;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<path
|
||||
id="path1129-6"
|
||||
d="m 24.933304,39 h 12"
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:round;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<path
|
||||
id="path1131-8"
|
||||
d="m 48.933304,39 h 12"
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:round;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<g
|
||||
id="g1853-8"
|
||||
transform="translate(-3.6068775)">
|
||||
<rect
|
||||
style="fill:#b4b464;fill-opacity:1;stroke:#141414;stroke-width:0.853376;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none"
|
||||
id="rect1837-2"
|
||||
width="19.146624"
|
||||
height="9.1466246"
|
||||
x="37.033566"
|
||||
y="6.4266882" />
|
||||
<text
|
||||
id="text1848-6"
|
||||
y="13.511477"
|
||||
x="38.212898"
|
||||
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
|
||||
xml:space="preserve"><tspan
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.264583"
|
||||
y="13.511477"
|
||||
x="38.212898"
|
||||
id="tspan1846-7"
|
||||
sodipodi:role="line">Data</tspan></text>
|
||||
</g>
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 42.953291,15.386 v 4.901646"
|
||||
id="path1857-3" />
|
||||
</g>
|
||||
<g
|
||||
transform="translate(100.90353,2.407242)"
|
||||
id="g1886-8-2">
|
||||
<g
|
||||
transform="translate(-9.179197,9.9999996)"
|
||||
id="g981-8-2"
|
||||
style="fill:#646464;fill-opacity:1">
|
||||
<rect
|
||||
y="10.443781"
|
||||
x="47.443779"
|
||||
height="7.1124392"
|
||||
width="9.1124344"
|
||||
id="rect10-4-2"
|
||||
style="fill:#646464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.887561;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<path
|
||||
id="path931-2-4"
|
||||
d="m 52.112562,18 c 0,0.333333 0,0.666666 0,1"
|
||||
style="fill:#646464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
</g>
|
||||
<rect
|
||||
y="30.443781"
|
||||
x="50.264584"
|
||||
height="7.1124392"
|
||||
width="9.1124344"
|
||||
id="rect10-2-0-3"
|
||||
style="fill:#646464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.888;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<path
|
||||
style="fill:#646464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 54.933366,29 c 0,0.333333 0,0.666666 0,1"
|
||||
id="path931-6-1-0-9" />
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 54.933366,38 c 0,0.333333 0,0.666666 0,1"
|
||||
id="path931-3-64-2" />
|
||||
<g
|
||||
transform="translate(-9.179197,9.9999996)"
|
||||
id="g1118-5-5"
|
||||
style="fill:#646464;fill-opacity:1">
|
||||
<g
|
||||
transform="translate(-4.9999985)"
|
||||
id="g977-5-3"
|
||||
style="fill:#646464;fill-opacity:1">
|
||||
<rect
|
||||
y="20.443781"
|
||||
x="40.443779"
|
||||
height="7.1124392"
|
||||
width="9.1124344"
|
||||
id="rect10-6-6-6"
|
||||
style="fill:#646464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.529;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<path
|
||||
id="path931-6-4-3"
|
||||
d="m 45.1125,19 c 0,0.333333 0,0.666666 0,1"
|
||||
style="fill:#646464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
</g>
|
||||
<path
|
||||
style="fill:#646464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 40.112562,28 c 0,0.333333 0,0.666666 0,1"
|
||||
id="path931-3-6-5-2" />
|
||||
</g>
|
||||
<rect
|
||||
style="fill:#64b464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.529167;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect10-6-7-7-1"
|
||||
width="9.1124344"
|
||||
height="7.1124392"
|
||||
x="20.264584"
|
||||
y="40.443779" />
|
||||
<path
|
||||
style="fill:#b46464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 24.933304,39 c 0,0.333333 0,0.666666 0,1"
|
||||
id="path931-6-8-3-8" />
|
||||
<rect
|
||||
y="40.443779"
|
||||
x="32.264584"
|
||||
height="7.1124392"
|
||||
width="9.1124344"
|
||||
id="rect10-6-7-6-9-9"
|
||||
style="fill:#b46464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.529;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<path
|
||||
id="path931-6-8-9-1-6"
|
||||
d="m 36.933303,39 c 0,0.333333 0,0.666666 0,1"
|
||||
style="fill:#64b464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<rect
|
||||
y="40.443779"
|
||||
x="44.264584"
|
||||
height="7.1124392"
|
||||
width="9.1124344"
|
||||
id="rect10-6-7-3-40-0"
|
||||
style="fill:#b46464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.888;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<path
|
||||
id="path931-6-8-6-0-6"
|
||||
d="m 48.933304,39 c 0,0.333333 0,0.666666 0,1"
|
||||
style="fill:#b46464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<rect
|
||||
style="fill:#64b464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.529167;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect10-6-7-3-4-5-0"
|
||||
width="9.1124344"
|
||||
height="7.1124392"
|
||||
x="56.264584"
|
||||
y="40.443779" />
|
||||
<path
|
||||
style="fill:#64b464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 60.933305,39 c 0,0.333333 0,0.666666 0,1"
|
||||
id="path931-6-8-6-1-7-7" />
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:round;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="M 30.933303,29 H 54.933366"
|
||||
id="path1125-9-4" />
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:round;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 24.933304,39 h 12"
|
||||
id="path1129-6-4" />
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:round;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 48.933304,39 h 12"
|
||||
id="path1131-8-1" />
|
||||
<g
|
||||
transform="translate(-3.6068775)"
|
||||
id="g1853-8-6">
|
||||
<rect
|
||||
y="6.4266882"
|
||||
x="37.033566"
|
||||
height="9.1466246"
|
||||
width="19.146624"
|
||||
id="rect1837-2-6"
|
||||
style="fill:#b4b464;fill-opacity:1;stroke:#141414;stroke-width:0.853376;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
|
||||
x="38.212898"
|
||||
y="13.511477"
|
||||
id="text1848-6-4"><tspan
|
||||
sodipodi:role="line"
|
||||
id="tspan1846-7-0"
|
||||
x="38.212898"
|
||||
y="13.511477"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.264583">Data</tspan></text>
|
||||
</g>
|
||||
<path
|
||||
id="path1857-3-8"
|
||||
d="m 42.953291,15.386 v 4.901646"
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
</g>
|
||||
<g
|
||||
transform="translate(154.27214,2.407242)"
|
||||
id="g1886-8-9">
|
||||
<g
|
||||
transform="translate(-9.179197,9.9999996)"
|
||||
id="g981-8-7"
|
||||
style="fill:#646464;fill-opacity:1">
|
||||
<rect
|
||||
y="10.443781"
|
||||
x="47.443779"
|
||||
height="7.1124392"
|
||||
width="9.1124344"
|
||||
id="rect10-4-23"
|
||||
style="fill:#646464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.887561;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<path
|
||||
id="path931-2-0"
|
||||
d="m 52.112562,18 c 0,0.333333 0,0.666666 0,1"
|
||||
style="fill:#646464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
</g>
|
||||
<rect
|
||||
y="30.443781"
|
||||
x="50.264584"
|
||||
height="7.1124392"
|
||||
width="9.1124344"
|
||||
id="rect10-2-0-5"
|
||||
style="fill:#646464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.529167;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<path
|
||||
style="fill:#646464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 54.933366,29 c 0,0.333333 0,0.666666 0,1"
|
||||
id="path931-6-1-0-8" />
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 54.933366,38 c 0,0.333333 0,0.666666 0,1"
|
||||
id="path931-3-64-8" />
|
||||
<g
|
||||
transform="translate(-9.179197,9.9999996)"
|
||||
id="g1118-5-6"
|
||||
style="fill:#646464;fill-opacity:1">
|
||||
<g
|
||||
transform="translate(-4.9999985)"
|
||||
id="g977-5-4"
|
||||
style="fill:#646464;fill-opacity:1">
|
||||
<rect
|
||||
y="20.443781"
|
||||
x="40.443779"
|
||||
height="7.1124392"
|
||||
width="9.1124344"
|
||||
id="rect10-6-6-1"
|
||||
style="fill:#646464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.887561;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<path
|
||||
id="path931-6-4-9"
|
||||
d="m 45.1125,19 c 0,0.333333 0,0.666666 0,1"
|
||||
style="fill:#646464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
</g>
|
||||
<path
|
||||
style="fill:#646464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 40.112562,28 c 0,0.333333 0,0.666666 0,1"
|
||||
id="path931-3-6-5-0" />
|
||||
</g>
|
||||
<rect
|
||||
style="fill:#64b464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.888;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect10-6-7-7-5"
|
||||
width="9.1124344"
|
||||
height="7.1124392"
|
||||
x="20.264584"
|
||||
y="40.443779" />
|
||||
<path
|
||||
style="fill:#b46464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 24.933304,39 c 0,0.333333 0,0.666666 0,1"
|
||||
id="path931-6-8-3-2" />
|
||||
<rect
|
||||
y="40.443779"
|
||||
x="32.264584"
|
||||
height="7.1124392"
|
||||
width="9.1124344"
|
||||
id="rect10-6-7-6-9-90"
|
||||
style="fill:#b46464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.529;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<path
|
||||
id="path931-6-8-9-1-3"
|
||||
d="m 36.933303,39 c 0,0.333333 0,0.666666 0,1"
|
||||
style="fill:#64b464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<rect
|
||||
y="40.443779"
|
||||
x="44.264584"
|
||||
height="7.1124392"
|
||||
width="9.1124344"
|
||||
id="rect10-6-7-3-40-2"
|
||||
style="fill:#b46464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.529167;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1" />
|
||||
<path
|
||||
id="path931-6-8-6-0-2"
|
||||
d="m 48.933304,39 c 0,0.333333 0,0.666666 0,1"
|
||||
style="fill:#b46464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<rect
|
||||
style="fill:#64b464;fill-opacity:1;fill-rule:evenodd;stroke:#141414;stroke-width:0.529167;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect10-6-7-3-4-5-7"
|
||||
width="9.1124344"
|
||||
height="7.1124392"
|
||||
x="56.264584"
|
||||
y="40.443779" />
|
||||
<path
|
||||
style="fill:#64b464;fill-opacity:1;stroke:#000000;stroke-width:0.225123px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 60.933305,39 c 0,0.333333 0,0.666666 0,1"
|
||||
id="path931-6-8-6-1-7-5" />
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:round;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="M 30.933303,29 H 54.933366"
|
||||
id="path1125-9-1" />
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:round;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 24.933304,39 h 12"
|
||||
id="path1129-6-1" />
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:round;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 48.933304,39 h 12"
|
||||
id="path1131-8-0" />
|
||||
<g
|
||||
transform="translate(-3.6068775)"
|
||||
id="g1853-8-62">
|
||||
<rect
|
||||
y="6.4266882"
|
||||
x="37.033566"
|
||||
height="9.1466246"
|
||||
width="19.146624"
|
||||
id="rect1837-2-2"
|
||||
style="fill:#b4b464;fill-opacity:1;stroke:#141414;stroke-width:0.853376;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:80;stroke-dasharray:none" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
|
||||
x="38.212898"
|
||||
y="13.511477"
|
||||
id="text1848-6-8"><tspan
|
||||
sodipodi:role="line"
|
||||
id="tspan1846-7-09"
|
||||
x="38.212898"
|
||||
y="13.511477"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.264583">Data</tspan></text>
|
||||
</g>
|
||||
<path
|
||||
id="path1857-3-6"
|
||||
d="m 42.953291,15.386 v 4.901646"
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
</g>
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 25.217291,49.858659 v 4.782606"
|
||||
id="path2244" />
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="M 84.532546,49.739094 V 54.5217"
|
||||
id="path2246" />
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 149.85629,49.739094 v 4.902171"
|
||||
id="path2248" />
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="M 179.22529,50.09779 V 54.5217"
|
||||
id="path2250" />
|
||||
<path
|
||||
style="fill:none;stroke:#000000;stroke-width:0.888;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="M 60.665415,64.065607 V 8.2003976"
|
||||
id="path2252" />
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 32 KiB |
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 116 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 22 KiB |
@@ -25,6 +25,11 @@ Take a look at any of the below tutorials to get started with Tune.
|
||||
:figure: /images/tune.png
|
||||
:description: :doc:`A walkthrough to setup your first Tune experiment <tune-tutorial>`
|
||||
|
||||
.. customgalleryitem::
|
||||
:tooltip: Tuning XGBoost parameters.
|
||||
:figure: /images/xgboost_logo.png
|
||||
:description: :doc:`A guide to tuning XGBoost parameters with Tune <tune-xgboost>`
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
@@ -34,6 +39,7 @@ Take a look at any of the below tutorials to get started with Tune.
|
||||
|
||||
tune-60-seconds.rst
|
||||
tune-tutorial.rst
|
||||
tune-xgboost.rst
|
||||
|
||||
|
||||
User Guides
|
||||
@@ -161,6 +167,7 @@ PyTorch Examples
|
||||
XGBoost Example
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
- :ref:`XGBoost tutorial <tune-xgboost>`: A guide to tuning XGBoost parameters with Tune.
|
||||
- `xgboost_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/xgboost_example.py>`__: Trains a basic XGBoost model with Tune with the function-based API and an XGBoost callback.
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,518 @@
|
||||
.. _tune-xgboost:
|
||||
|
||||
Tuning XGBoost parameters
|
||||
=========================
|
||||
|
||||
XGBoost is currently one of the most popular machine learning algorithms. It performs
|
||||
very well on a large selection of tasks, and was the key to success in many Kaggle
|
||||
competitions.
|
||||
|
||||
.. image:: /images/xgboost_logo.png
|
||||
:width: 200px
|
||||
:alt: XGBoost
|
||||
:align: center
|
||||
:target: https://xgboost.readthedocs.io/en/latest/
|
||||
|
||||
|
||||
This tutorial will give you a quick introduction to XGBoost, show you how
|
||||
to train an XGBoost model, and then guide you on how to optimize XGBoost
|
||||
parameters using Tune to get the best performance. We tackle the following topics:
|
||||
|
||||
.. contents:: Table of contents
|
||||
:depth: 2
|
||||
|
||||
.. note::
|
||||
|
||||
To run this tutorial, you will need to install the following:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ pip install xgboost
|
||||
|
||||
What is XGBoost
|
||||
---------------
|
||||
|
||||
XGBoost is an acronym for e\ **X**\ treme **G**\ radient **Boost**\ ing. Internally,
|
||||
XGBoost uses `decision trees <https://en.wikipedia.org/wiki/Decision_tree>`_. Instead
|
||||
of training just one large decision tree, XGBoost and other related algorithms train
|
||||
many small decision trees. The intuition behind this is that even though single
|
||||
decision trees can be inaccurate and suffer from high variance,
|
||||
combining the output of a large number of these weak learners can actually lead to
|
||||
strong learner, resulting in better predictions and less variance.
|
||||
|
||||
.. figure:: /images/tune-xgboost-ensemble.svg
|
||||
:alt: Single vs. ensemble learning
|
||||
|
||||
A single decision tree (left) might be able to get to an accuracy of 70%
|
||||
for a binary classification task. By combining the output of several small
|
||||
decision trees, an ensemble learner (right) might end up with a higher accuracy
|
||||
of 90%.
|
||||
|
||||
Boosting algorithms start with a single small decision tree and evaluate how well
|
||||
it predicts the given examples. When building the next tree, those samples that have
|
||||
been misclassified before have a higher chance of being used to generate the tree.
|
||||
This is useful because it avoids overfitting to samples that can be easily classified
|
||||
and instead tries to come up with models that are able to classify hard examples, too.
|
||||
Please see `here for a more thorough introduction to bagging and boosting algorithms
|
||||
<https://towardsdatascience.com/ensemble-methods-bagging-boosting-and-stacking-c9214a10a205>`_.
|
||||
|
||||
There are many boosting algorithms. In their core, they are all very similar. XGBoost
|
||||
uses second-level derivatives to find splits that maximize the *gain* (the inverse of
|
||||
the *loss*) - hence the name. In practice, there really is no drawback in using
|
||||
XGBoost over other boosting algorithms - in fact, it usually shows the best performance.
|
||||
|
||||
Training a simple XGBoost classifier
|
||||
------------------------------------
|
||||
|
||||
Let's first see how a simple XGBoost classifier can be trained. We'll use the
|
||||
``breast_cancer``-Dataset included in the ``sklearn`` dataset collection. This is
|
||||
a binary classification dataset. Given 30 different input features, our task is to
|
||||
learn to identify subjects with breast cancer and those without.
|
||||
|
||||
Here is the full code to train a simple XGBoost model:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import numpy as np
|
||||
import sklearn.datasets
|
||||
import sklearn.metrics
|
||||
from sklearn.model_selection import train_test_split
|
||||
import xgboost as xgb
|
||||
|
||||
|
||||
def train_breast_cancer(config):
|
||||
# Load dataset
|
||||
data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True)
|
||||
# Split into train and test set
|
||||
train_x, test_x, train_y, test_y = train_test_split(
|
||||
data, labels, test_size=0.25)
|
||||
# Build input matrices for XGBoost
|
||||
train_set = xgb.DMatrix(train_x, label=train_y)
|
||||
test_set = xgb.DMatrix(test_x, label=test_y)
|
||||
# Train the classifier
|
||||
bst = xgb.train(config, train_set, evals=[(test_set, "eval")], verbose_eval=False)
|
||||
# Predict labels for the test set
|
||||
preds = bst.predict(test_set)
|
||||
pred_labels = np.rint(preds)
|
||||
# Return prediction accuracy
|
||||
accuracy = sklearn.metrics.accuracy_score(test_y, pred_labels)
|
||||
return accuracy
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
accuracy = train_breast_cancer({
|
||||
"objective": "binary:logistic"
|
||||
})
|
||||
print("Accuracy: {:.2f}".format(accuracy))
|
||||
|
||||
As you can see, the code is quite simple. First, the dataset is loaded and split
|
||||
into a ``test`` and ``train`` set. The XGBoost model is trained with ``xgb.train()``
|
||||
and the predictions for the test set are obtained with ``bst.predict()``. Lastly, we
|
||||
return the accuracy of our predictions. Even in this simple example, most runs result
|
||||
in a good accuracy of over ``0.90``.
|
||||
|
||||
Maybe you have noticed the ``config`` parameter we pass to the XGBoost algorithm. This
|
||||
is a ``dict`` in which you can specify parameters for the XGBoost algorithm. In this
|
||||
simple example, the only parameter we passed is the ``objective`` parameter. The value
|
||||
``binary:logistic`` tells XGBoost that we aim to train a logistic regression model for
|
||||
a binary classification task. You can find an overview over all valid objectives
|
||||
`here in the XGBoost documentation <https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters>`_.
|
||||
|
||||
XGBoost Hyperparameters
|
||||
-----------------------
|
||||
Even with the default settings, XGBoost was able to get to a good accuracy on the
|
||||
breast cancer dataset. However, as in many machine learning algorithms, there are
|
||||
many knobs to tune which might lead to even better performance. Let's explore some of
|
||||
them below.
|
||||
|
||||
Maximum tree depth
|
||||
..................
|
||||
Remember that XGBoost internally uses many decision tree models to come up with
|
||||
predictions. When training a decision tree, we need to tell the algorithm how
|
||||
large the tree may get. The parameter for this is called the tree *depth*.
|
||||
|
||||
.. figure:: /images/tune-xgboost-depth.svg
|
||||
:alt: Decision tree depth
|
||||
:align: center
|
||||
|
||||
In this image, the left tree has a depth of 2, and the right tree a depth of 3.
|
||||
Note that with each level, :math:`2^{(d-1)}` splits are added, where *d* is the depth
|
||||
of the tree.
|
||||
|
||||
Tree depth is a property that concerns the model complexity. If you only allow short
|
||||
trees, the models are likely not very precise - they underfit the data. If you allow
|
||||
very large trees, the single models are likely to overfit to the data. In practice,
|
||||
a number between ``2`` and ``6`` is often a good starting point for this parameter.
|
||||
|
||||
XGBoost's default value is ``3``.
|
||||
|
||||
Minimum child weight
|
||||
....................
|
||||
When a decision tree creates new leaves, it splits up the remaining data at one node
|
||||
into two groups. If there are only few samples in one of these groups, it often
|
||||
doesn't make sense to split it further. One of the reasons for this is that the
|
||||
model is harder to train when we have fewer samples.
|
||||
|
||||
.. figure:: /images/tune-xgboost-weight.svg
|
||||
:alt: Minimum child weight
|
||||
:align: center
|
||||
|
||||
In this example, we start with 100 examples. At the first node, they are split
|
||||
into 4 and 96 samples, respectively. In the next step, our model might find
|
||||
that it doesn't make sense to split the 4 examples more. It thus only continues
|
||||
to add leaves on the right side.
|
||||
|
||||
The parameter used by the model to decide if it makes sense to split a node is called
|
||||
the *minimum child weight*. In the case of linear regression, this is just the absolute
|
||||
number of nodes requried in each child. In other objectives, this value is determined
|
||||
using the weights of the examples, hence the name.
|
||||
|
||||
The larger the value, the more constrained the trees are and the less deep they will be.
|
||||
This parameter thus also affects the model complexity. Values can range between 0
|
||||
and infinity and are dependent on the sample size. For our ca. 500 examples in the
|
||||
breast cancer dataset, values between ``0`` and ``10`` should be sensible.
|
||||
|
||||
XGBoost's default value is ``1``.
|
||||
|
||||
Subsample size
|
||||
..............
|
||||
Each decision tree we add is trained on a subsample of the total training dataset.
|
||||
The probabilities for the samples are weighted according to the XGBoost algorithm,
|
||||
but we can decide on which fraction of the samples we want to train each decision
|
||||
tree on.
|
||||
|
||||
Setting this value to ``0.7`` would mean that we randomly sample ``70%`` of the
|
||||
training dataset before each training iteration.
|
||||
|
||||
XGBoost's default value is ``1``.
|
||||
|
||||
Learning rate / Eta
|
||||
...................
|
||||
Remember that XGBoost sequentially trains many decision trees, and that later trees
|
||||
are more likely trained on data that has been misclassified by prior trees. In effect
|
||||
this means that earlier trees make decisions for easy samples (i.e. those samples that
|
||||
can easily be classified) and later trees make decisions for harder samples. It is then
|
||||
sensible to assume that the later trees are less accurate than earlier trees.
|
||||
|
||||
To address this fact, XGBoost uses a parameter called *Eta*, which is sometimes called
|
||||
the *learning rate*. Don't confuse this with learning rates from gradient descent!
|
||||
The original `paper on stochastic gradient boosting <https://www.sciencedirect.com/science/article/abs/pii/S0167947301000652>`_
|
||||
introduces this parameter like so:
|
||||
|
||||
.. math::
|
||||
F_m(x) = F_{m-1}(x) + \eta \cdot \gamma_{lm} \textbf{1}(x \in R_{lm})
|
||||
|
||||
This is just a complicated way to say that when we train we new decision tree,
|
||||
represented by :math:`\gamma_{lm} \textbf{1}(x \in R_{lm})`, we want to dampen
|
||||
its effect on the previous prediction :math:`F_{m-1}(x)` with a factor
|
||||
:math:`\eta`.
|
||||
|
||||
Typical values for this parameter are between ``0.01`` and ``0.3```.
|
||||
|
||||
XGBoost's default value is ``0.3``.
|
||||
|
||||
Number of boost rounds
|
||||
......................
|
||||
Lastly, we can decide on how many boosting rounds we perform, which means how
|
||||
many decision trees we ultimately train. When we do heavy subsampling or use small
|
||||
learning rate, it might make sense to increase the number of boosting rounds.
|
||||
|
||||
XGBoost's default value is ``10``.
|
||||
|
||||
Putting it together
|
||||
...................
|
||||
Let's see how this looks like in code! We just need to adjust our ``config`` dict:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
if __name__ == "__main__":
|
||||
config = {
|
||||
"objective": "binary:logistic",
|
||||
"max_depth": 2,
|
||||
"min_child_weight": 0,
|
||||
"subsample": 0.8,
|
||||
"eta": 0.2
|
||||
}
|
||||
accuracy = train_breast_cancer(config)
|
||||
print("Accuracy: {:.2f}".format(accuracy))
|
||||
|
||||
The rest stays the same. Please note that we do not adjust the ``num_boost_rounds`` here.
|
||||
The result should also show a high accuracy of over 90%.
|
||||
|
||||
Tuning the configuration parameters
|
||||
-----------------------------------
|
||||
XGBoosts default parameters already lead to a good accuracy, and even our guesses in the
|
||||
last section should result in accuracies well above 90%. However, our guesses were
|
||||
just that: guesses. Often we do not know what combination of parameters would actually
|
||||
lead to the best results on a machine learning task.
|
||||
|
||||
Unfortunately, there are infinitely many combinations of hyperparameters we could try
|
||||
out. Should we combine ``max_depth=3`` with ``subsample=0.8`` or with ``subsample=0.9``?
|
||||
What about the other parameters?
|
||||
|
||||
This is where hyperparameter tuning comes into play. By using tuning libraries such as
|
||||
Ray Tune we can try out combinations of hyperparameters. Using sophisticated search
|
||||
strategies, these parameters can be selected so that they are likely to lead to good
|
||||
results (avoiding an expensive *exhaustive search*). Also, trials that do not perform
|
||||
well can be preemptively stopped to reduce waste of computing resources. Lastly, Ray Tune
|
||||
also takes care of training these runs in parallel, greatly increasing search speed.
|
||||
|
||||
Let's start with a basic example on how to use Tune for this. We just need to make
|
||||
a few changes to our code-block:
|
||||
|
||||
.. code-block:: python
|
||||
:emphasize-lines: 26,32,33,34,35,37,38,39,40,41
|
||||
|
||||
import numpy as np
|
||||
import sklearn.datasets
|
||||
import sklearn.metrics
|
||||
from sklearn.model_selection import train_test_split
|
||||
import xgboost as xgb
|
||||
|
||||
from ray import tune
|
||||
|
||||
|
||||
def train_breast_cancer(config):
|
||||
# Load dataset
|
||||
data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True)
|
||||
# Split into train and test set
|
||||
train_x, test_x, train_y, test_y = train_test_split(
|
||||
data, labels, test_size=0.25)
|
||||
# Build input matrices for XGBoost
|
||||
train_set = xgb.DMatrix(train_x, label=train_y)
|
||||
test_set = xgb.DMatrix(test_x, label=test_y)
|
||||
# Train the classifier
|
||||
bst = xgb.train(config, train_set, evals=[(test_set, "eval")], verbose_eval=False)
|
||||
# Predict labels for the test set
|
||||
preds = bst.predict(test_set)
|
||||
pred_labels = np.rint(preds)
|
||||
# Return prediction accuracy
|
||||
accuracy = sklearn.metrics.accuracy_score(test_y, pred_labels)
|
||||
tune.report(mean_accuracy=accuracy, done=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
config = {
|
||||
"objective": "binary:logistic",
|
||||
"max_depth": tune.randint(1, 9),
|
||||
"min_child_weight": tune.choice([1, 2, 3]),
|
||||
"subsample": tune.uniform(0.5, 1.0),
|
||||
"eta": tune.loguniform(1e-4, 1e-1)
|
||||
}
|
||||
tune.run(
|
||||
train_breast_cancer,
|
||||
resources_per_trial={"cpu": 1},
|
||||
config=config,
|
||||
num_samples=10)
|
||||
|
||||
As you can see, the changes in the actual training function are minimal. Instead of
|
||||
returning the accuracy value, we report it back to Tune using ``tune.report()``.
|
||||
Our ``config`` dictionary only changed slightly. Instead of passing hard-coded
|
||||
parameters, we tell Tune to choose values from a range of valid options. There are
|
||||
a number of options we have here, all of which are explained in
|
||||
:ref:`the Tune docs <tune-sample-docs>`.
|
||||
|
||||
For a brief explanation, this is what they do:
|
||||
|
||||
* ``tune.randint(min, max)`` chooses a random integer value between *min* and *max*.
|
||||
Note that *max* is exclusive, so it will not be sampled.
|
||||
* ``tune.choice([a, b, c])`` chooses one of the items of the list at random. Each item
|
||||
has the same chance to be sampled.
|
||||
* ``tune.uniform(min, max)`` samples a floating point number between *min* and *max*.
|
||||
Note that *max* is exclusive here, too.
|
||||
* ``tune.loguniform(min, max, base=10)`` samples a floating point number between *min* and *max*,
|
||||
but applies a logarithmic transformation to these boundaries first. Thus, this makes
|
||||
it easy to sample values from different orders of magnitude.
|
||||
|
||||
|
||||
|
||||
The ``num_samples=10`` option we pass to ``tune.run()`` means that we sample 10 different
|
||||
hyperparameter configurations from this search space.
|
||||
|
||||
The output of our training run coud look like this:
|
||||
|
||||
.. code-block::
|
||||
:emphasize-lines: 10
|
||||
|
||||
+---------------------------------+------------+-------+-------------+-------------+--------------------+-------------+----------+--------+------------------+
|
||||
| Trial name | status | loc | eta | max_depth | min_child_weight | subsample | acc | iter | total time (s) |
|
||||
|---------------------------------+------------+-------+-------------+-------------+--------------------+-------------+----------+--------+------------------|
|
||||
| train_breast_cancer_c817a_00000 | TERMINATED | | 0.00334038 | 8 | 1 | 0.640256 | 0.93007 | 1 | 0.050081 |
|
||||
| train_breast_cancer_c817a_00001 | TERMINATED | | 0.00285335 | 4 | 3 | 0.951621 | 0.93007 | 1 | 0.0453899 |
|
||||
| train_breast_cancer_c817a_00002 | TERMINATED | | 0.0597631 | 5 | 2 | 0.96479 | 0.986014 | 1 | 0.0503612 |
|
||||
| train_breast_cancer_c817a_00003 | TERMINATED | | 0.000650095 | 6 | 2 | 0.923812 | 0.951049 | 1 | 0.0588872 |
|
||||
| train_breast_cancer_c817a_00004 | TERMINATED | | 0.00753275 | 1 | 1 | 0.973499 | 0.881119 | 1 | 0.0347321 |
|
||||
| train_breast_cancer_c817a_00005 | TERMINATED | | 0.000411214 | 5 | 1 | 0.672503 | 0.958042 | 1 | 0.0477931 |
|
||||
| train_breast_cancer_c817a_00006 | TERMINATED | | 0.0940201 | 5 | 2 | 0.711124 | 0.972028 | 1 | 0.069901 |
|
||||
| train_breast_cancer_c817a_00007 | TERMINATED | | 0.0372492 | 1 | 1 | 0.76303 | 0.895105 | 1 | 0.0496318 |
|
||||
| train_breast_cancer_c817a_00008 | TERMINATED | | 0.000140322 | 1 | 2 | 0.885415 | 0.909091 | 1 | 0.045424 |
|
||||
| train_breast_cancer_c817a_00009 | TERMINATED | | 0.000341654 | 5 | 3 | 0.720523 | 0.937063 | 1 | 0.0657773 |
|
||||
+---------------------------------+------------+-------+-------------+-------------+--------------------+-------------+----------+--------+------------------+
|
||||
|
||||
The best configuration we found used ``eta=0.0940201``, ``max_depth=5``,
|
||||
``min_child_weight=2``, ``subsample=0.711124`` and reached an accuracy of
|
||||
``0.972028``.
|
||||
|
||||
Early stopping
|
||||
--------------
|
||||
Currently, Tune samples 10 different hyperparameter configurations and trains a full
|
||||
XGBoost on all of them. In our small example, training is very fast. However,
|
||||
if training takes longer, a significant amount of computer resources is spent on trials
|
||||
that will eventually show a bad performance, e.g. a low accuracy. It would be good
|
||||
if we could identify these trials early and stop them, so we don't waste any resources.
|
||||
|
||||
This is where Tune's *Schedulers* shine. A Tune ``TrialScheduler`` is responsible
|
||||
for starting and stopping trials. Tune implements a number of different schedulers, each
|
||||
described :ref:`in the Tune documentation <tune-schedulers>`.
|
||||
For our example, we will use the ``AsyncHyperBandScheduler`` or ``ASHAScheduler``.
|
||||
|
||||
The basic idea of this scheduler: We sample a number of hyperparameter configurations.
|
||||
Each of these configurations is trained for a specific number of iterations.
|
||||
After these iterations, only the best performing hyperparameters are retained. These
|
||||
are selected according to some loss metric, usually an evaluation loss. This cycle is
|
||||
repeated until we end up with the best configuration.
|
||||
|
||||
The ``ASHAScheduler`` needs to know three things:
|
||||
|
||||
1. Which metric should be used to identify badly performing trials?
|
||||
2. Should this metric be maximized or minimized?
|
||||
3. How many iterations does each trial train for?
|
||||
|
||||
There are more parameters, which are explained in the
|
||||
:ref:`documentation <tune-scheduler-hyperband>`.
|
||||
|
||||
Lastly, we have to report the loss metric to Tune. We do this with a ``Callback`` that
|
||||
XGBoost accepts and calls after each training iteration. We also tell XGBoost which
|
||||
loss metrics to calculate in the ``eval_metric`` parameter. These are the metrics
|
||||
available in ``env.evaluation_result_list`` below.
|
||||
|
||||
.. code-block:: python
|
||||
:emphasize-lines: 11,12,13,26,42,44,45,46,47,48,49
|
||||
|
||||
import numpy as np
|
||||
import sklearn.datasets
|
||||
import sklearn.metrics
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
from sklearn.model_selection import train_test_split
|
||||
import xgboost as xgb
|
||||
|
||||
from ray import tune
|
||||
|
||||
|
||||
def XGBCallback(env):
|
||||
# After every training iteration, report loss to Tune
|
||||
tune.report(**dict(env.evaluation_result_list))
|
||||
|
||||
|
||||
def train_breast_cancer(config):
|
||||
# Load dataset
|
||||
data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True)
|
||||
# Split into train and test set
|
||||
train_x, test_x, train_y, test_y = train_test_split(
|
||||
data, labels, test_size=0.25)
|
||||
# Build input matrices for XGBoost
|
||||
train_set = xgb.DMatrix(train_x, label=train_y)
|
||||
test_set = xgb.DMatrix(test_x, label=test_y)
|
||||
# Train the classifier
|
||||
bst = xgb.train(config, train_set, evals=[(test_set, "eval")], verbose_eval=False, callbacks=[XGBCallback])
|
||||
# Predict labels for the test set
|
||||
preds = bst.predict(test_set)
|
||||
pred_labels = np.rint(preds)
|
||||
# Return prediction accuracy
|
||||
accuracy = sklearn.metrics.accuracy_score(test_y, pred_labels)
|
||||
tune.report(mean_accuracy=accuracy, done=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
config = {
|
||||
"objective": "binary:logistic",
|
||||
"max_depth": tune.randint(1, 9),
|
||||
"min_child_weight": tune.choice([1, 2, 3]),
|
||||
"subsample": tune.uniform(0.5, 1.0),
|
||||
"eta": tune.loguniform(1e-4, 1e-1),
|
||||
"eval_metric": ["auc", "ams@0", "logloss"]
|
||||
}
|
||||
scheduler = ASHAScheduler(
|
||||
metric="eval-logloss", # The `eval` prefix is defined in xgb.train
|
||||
mode="min", # Retain configurations with a low logloss
|
||||
max_t=11, # 10 training iterations + 1 final evaluation
|
||||
grace_period=1, # Number of minimum iterations for each trial
|
||||
reduction_factor=2) # How aggressively to stop trials
|
||||
tune.run(
|
||||
train_breast_cancer,
|
||||
resources_per_trial={"cpu": 1},
|
||||
config=config,
|
||||
num_samples=10,
|
||||
scheduler=scheduler)
|
||||
|
||||
The output of our run could look like this:
|
||||
|
||||
.. code-block::
|
||||
:emphasize-lines: 13
|
||||
|
||||
+---------------------------------+------------+-------+-------------+-------------+--------------------+-------------+----------+--------+------------------+
|
||||
| Trial name | status | loc | eta | max_depth | min_child_weight | subsample | acc | iter | total time (s) |
|
||||
|---------------------------------+------------+-------+-------------+-------------+--------------------+-------------+----------+--------+------------------|
|
||||
| train_breast_cancer_806ea_00000 | TERMINATED | | 0.0371055 | 2 | 1 | 0.611729 | 0.951049 | 11 | 0.339279 |
|
||||
| train_breast_cancer_806ea_00001 | TERMINATED | | 0.0324613 | 3 | 2 | 0.643815 | | 4 | 0.230338 |
|
||||
| train_breast_cancer_806ea_00002 | TERMINATED | | 0.0100875 | 4 | 3 | 0.985147 | | 2 | 0.0661929 |
|
||||
| train_breast_cancer_806ea_00003 | TERMINATED | | 0.00124263 | 1 | 3 | 0.890299 | | 1 | 0.0201721 |
|
||||
| train_breast_cancer_806ea_00004 | TERMINATED | | 0.000230373 | 5 | 3 | 0.627611 | | 1 | 0.0265107 |
|
||||
| train_breast_cancer_806ea_00005 | TERMINATED | | 0.000186942 | 5 | 2 | 0.831801 | | 1 | 0.026082 |
|
||||
| train_breast_cancer_806ea_00006 | TERMINATED | | 0.00871051 | 2 | 3 | 0.721523 | 0.958042 | 11 | 0.299392 |
|
||||
| train_breast_cancer_806ea_00007 | TERMINATED | | 0.00440949 | 2 | 3 | 0.606252 | | 1 | 0.0210171 |
|
||||
| train_breast_cancer_806ea_00008 | TERMINATED | | 0.00948289 | 5 | 2 | 0.892979 | | 2 | 0.140424 |
|
||||
| train_breast_cancer_806ea_00009 | TERMINATED | | 0.0514017 | 2 | 1 | 0.859864 | 0.972028 | 11 | 0.365437 |
|
||||
+---------------------------------+------------+-------+-------------+-------------+--------------------+-------------+----------+--------+------------------+
|
||||
|
||||
As you can see, four trials have been stopped after just one iteration, two after two iterations,
|
||||
one after four iterations, and the three most promising configurations have been run for
|
||||
ten iterations. The 11 is due to the fact that we finally report the accuracy after
|
||||
training the full model, which is internally interpreted as another iteration.
|
||||
|
||||
Using fractional GPUs
|
||||
---------------------
|
||||
You can often accelerate your training by using GPUs in addition to CPUs. However,
|
||||
you usually don't have as many GPUs as you have trials to run. For instance, if you
|
||||
run 10 Tune trials in parallel, you usually don't have access to 10 separate GPUs.
|
||||
|
||||
Tune supports *fractional GPUs*. This means that each task is assigned a fraction
|
||||
of the GPU memory for training. For 10 tasks, this could look like this:
|
||||
|
||||
.. code-block:: python
|
||||
:emphasize-lines: 8,12
|
||||
|
||||
config = {
|
||||
"objective": "binary:logistic",
|
||||
"max_depth": tune.randint(1, 9),
|
||||
"min_child_weight": tune.choice([1, 2, 3]),
|
||||
"subsample": tune.uniform(0.5, 1.0),
|
||||
"eta": tune.loguniform(1e-4, 1e-1),
|
||||
"eval_metric": ["auc", "ams@0", "logloss"],
|
||||
"tree_method": "gpu_hist"
|
||||
}
|
||||
tune.run(
|
||||
train_breast_cancer,
|
||||
resources_per_trial={"cpu": 1, "gpu": 0.1},
|
||||
config=config,
|
||||
num_samples=10,
|
||||
scheduler=scheduler)
|
||||
|
||||
Each task thus works with 10% of the available GPU memory. You also have to tell
|
||||
XGBoost to use the ``gpu_hist`` tree method, so it knows it should use the GPU.
|
||||
|
||||
Conclusion
|
||||
----------
|
||||
You should now have a basic understanding on how to train XGBoost models and on how
|
||||
to tune the hyperparameters to yield the best results. In our simple example,
|
||||
Tuning the parameters didn't make a huge difference for the accuracy.
|
||||
But in larger applications, intelligent hyperparameter tuning can make the
|
||||
difference between a model that doesn't seem to learn at all, and a model
|
||||
that outperforms all the other ones.
|
||||
|
||||
Further References
|
||||
------------------
|
||||
|
||||
* `XGBoost Hyperparameter Tuning - A Visual Guide <https://kevinvecmanis.io/machine%20learning/hyperparameter%20tuning/dataviz/python/2019/05/11/XGBoost-Tuning-Visual-Guide.html>`_
|
||||
* `Notes on XGBoost Parameter Tuning <https://xgboost.readthedocs.io/en/latest/tutorials/param_tuning.html>`_
|
||||
* `Doing XGBoost Hyperparameter Tuning the smart way <https://towardsdatascience.com/doing-xgboost-hyper-parameter-tuning-the-smart-way-part-1-of-2-f6d255a45dde>`_
|
||||
@@ -1,49 +1,61 @@
|
||||
import xgboost as xgb
|
||||
import numpy as np
|
||||
import sklearn.datasets
|
||||
import sklearn.metrics
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
from sklearn.model_selection import train_test_split
|
||||
import xgboost as xgb
|
||||
|
||||
from ray import tune
|
||||
|
||||
|
||||
def XGBCallback(env):
|
||||
# After every training iteration, report loss to Tune
|
||||
tune.report(**dict(env.evaluation_result_list))
|
||||
|
||||
|
||||
def train_breast_cancer(config):
|
||||
data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
|
||||
# Load dataset
|
||||
data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True)
|
||||
# Split into train and test set
|
||||
train_x, test_x, train_y, test_y = train_test_split(
|
||||
data, target, test_size=0.25)
|
||||
data, labels, test_size=0.25)
|
||||
# Build input matrices for XGBoost
|
||||
train_set = xgb.DMatrix(train_x, label=train_y)
|
||||
test_set = xgb.DMatrix(test_x, label=test_y)
|
||||
# Train the classifier
|
||||
bst = xgb.train(
|
||||
config, train_set, evals=[(test_set, "eval")], callbacks=[XGBCallback])
|
||||
config,
|
||||
train_set,
|
||||
evals=[(test_set, "eval")],
|
||||
verbose_eval=False,
|
||||
callbacks=[XGBCallback])
|
||||
# Predict labels for the test set
|
||||
preds = bst.predict(test_set)
|
||||
pred_labels = np.rint(preds)
|
||||
tune.report(
|
||||
mean_accuracy=sklearn.metrics.accuracy_score(test_y, pred_labels),
|
||||
done=True)
|
||||
# Return prediction accuracy
|
||||
accuracy = sklearn.metrics.accuracy_score(test_y, pred_labels)
|
||||
tune.report(mean_accuracy=accuracy, done=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
num_threads = 2
|
||||
config = {
|
||||
"verbosity": 0,
|
||||
"num_threads": num_threads,
|
||||
"objective": "binary:logistic",
|
||||
"booster": "gbtree",
|
||||
"eval_metric": ["auc", "ams@0", "logloss"],
|
||||
"max_depth": tune.randint(1, 9),
|
||||
"min_child_weight": tune.choice([1, 2, 3]),
|
||||
"subsample": tune.uniform(0.5, 1.0),
|
||||
"eta": tune.loguniform(1e-4, 1e-1),
|
||||
"gamma": tune.loguniform(1e-8, 1.0),
|
||||
"grow_policy": tune.choice(["depthwise", "lossguide"])
|
||||
"eval_metric": ["auc", "ams@0", "logloss"]
|
||||
}
|
||||
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
# The ASHAScheduler stops bad performing configurations early
|
||||
scheduler = ASHAScheduler(
|
||||
metric="eval-logloss", # The `eval` prefix is defined in xgb.train
|
||||
mode="min", # Retain configurations with a low logloss
|
||||
max_t=11, # 10 training iterations + 1 final evaluation
|
||||
grace_period=1, # Number of minimum iterations for each trial
|
||||
reduction_factor=2) # How aggressively to stop trials
|
||||
tune.run(
|
||||
train_breast_cancer,
|
||||
resources_per_trial={"cpu": num_threads},
|
||||
train_breast_cancer, # your training function
|
||||
resources_per_trial={"cpu": 1}, # You can add "gpu": 0.1 here
|
||||
config=config,
|
||||
num_samples=2,
|
||||
scheduler=ASHAScheduler(metric="eval-logloss", mode="min"))
|
||||
num_samples=10, # number of parameter configurations to try
|
||||
scheduler=scheduler)
|
||||
|
||||
Reference in New Issue
Block a user