[release-branch.go1.7] go1.7.2

Change-Id: I546e8b1aa4facdbf13bb80d386bf4839a3aff9d1 Reviewed-on: https://go-review.googlesource.com/31314 Run-TryBot: Chris Broadfoot <cbro@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Chris Broadfoot <cbro@golang.org>
[release-branch.go1.7] doc: document go1.7.2
2026-02-02 09:02:05 +03:00 · 2016-10-17 21:08:12 +00:00 · 2016-10-17 20:41:45 +00:00 · 2016-10-17 20:25:13 +00:00 · 2016-10-17 20:25:07 +00:00 · 2016-10-17 20:25:00 +00:00
253 changed files with 11683 additions and 98769 deletions
--- a/1
+++ b/1
@@ -0,0 +1 @@
+go1.7.2
--- a/doc/devel/release.html
+++ b/doc/devel/release.html
@@ -37,6 +37,26 @@ Go 1.7 is a major release of Go.
 Read the <a href="/doc/go1.7">Go 1.7 Release Notes</a> for more information.
 </p>

+<h3 id="go1.7.minor">Minor revisions</h3>
+
+<p>
+go1.7.1 (released 2016/09/07) includes fixes to the compiler, runtime,
+documentation, and the <code>compress/flate</code>, <code>hash/crc32</code>,
+<code>io</code>, <code>net</code>, <code>net/http</code>,
+<code>path/filepath</code>, <code>reflect</code>, and <code>syscall</code>
+packages.
+See the <a href="https://github.com/golang/go/issues?q=milestone%3AGo1.7.1">Go
+1.7.1 milestone</a> on our issue tracker for details.
+</p>
+
+<p>
+go1.7.2 (released 2016/10/17) includes fixes to the compiler, runtime,
+and the <code>crypto/cipher</code>, <code>crypto/tls</code>,
+<code>net/http</code>, and <code>strings</code> packages.
+See the <a href="https://github.com/golang/go/issues?q=milestone%3AGo1.7.2">Go
+1.7.2 milestone</a> on our issue tracker for details.
+</p>
+
 <h2 id="go1.6">go1.6 (released 2016/02/17)</h2>

 <p>
--- a/doc/go1.7.html
+++ b/doc/go1.7.html
@@ -358,7 +358,7 @@ the code generation changes alone typically reduce program CPU time by 5-35%.
 </p>

 <p>
-<!-- git log -''-grep '-[0-9][0-9]\.[0-9][0-9]%' go1.6.. -->
+<!-- git log &#45&#45grep '-[0-9][0-9]\.[0-9][0-9]%' go1.6.. -->
 There have been significant optimizations bringing more than 10% improvements
 to implementations in the
 <a href="/pkg/crypto/sha1/"><code>crypto/sha1</code></a>,
@@ -553,9 +553,10 @@ The

 <dd>
 <p>
-There are many performance optimizations throughout the package.
+As noted above,
+there are significant performance optimizations throughout the package.
 Decompression speed is improved by about 10%,
-while compression for <code>DefaultCompression</code> is twice as fast.
+while compression speed for <code>DefaultCompression</code> is roughly doubled.
 </p>

 <p>
@@ -909,7 +910,7 @@ For example, the address on which a request received is
 <p>
 The server's <a href="/pkg/net/http/#Server.Serve"><code>Serve</code></a> method
 now only enables HTTP/2 support if the <code>Server.TLSConfig</code> field is <code>nil</code>
-or includes <code>"h2"</code> in its <code>TLSConfig.NextProto</code>.
+or includes <code>"h2"</code> in its <code>TLSConfig.NextProtos</code>.
 </p>

 <p>
--- a/doc/gopher/favicon.svg
+++ b/doc/gopher/favicon.svg
@@ -0,0 +1,238 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="32"
+   height="32"
+   viewBox="0 0 32 32.000001"
+   id="svg4416"
+   version="1.1"
+   inkscape:version="0.91 r13725"
+   sodipodi:docname="favicon.svg"
+   inkscape:export-filename="../../favicon.png"
+   inkscape:export-xdpi="90"
+   inkscape:export-ydpi="90">
+  <defs
+     id="defs4418" />
+  <sodipodi:namedview
+     id="base"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageopacity="0.0"
+     inkscape:pageshadow="2"
+     inkscape:zoom="15.839192"
+     inkscape:cx="17.966652"
+     inkscape:cy="9.2991824"
+     inkscape:document-units="px"
+     inkscape:current-layer="layer1"
+     showgrid="true"
+     units="px"
+     inkscape:snap-bbox="true"
+     inkscape:snap-bbox-edge-midpoints="false"
+     inkscape:bbox-nodes="true"
+     showguides="false"
+     inkscape:window-width="1920"
+     inkscape:window-height="1018"
+     inkscape:window-x="1912"
+     inkscape:window-y="-8"
+     inkscape:window-maximized="1"
+     inkscape:object-nodes="true"
+     inkscape:snap-smooth-nodes="true"
+     inkscape:snap-global="false">
+    <inkscape:grid
+       type="xygrid"
+       id="grid5148" />
+  </sodipodi:namedview>
+  <metadata
+     id="metadata4421">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title />
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     inkscape:label="icon"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(0,-1020.3622)">
+    <ellipse
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#384e54;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       id="ellipse4216"
+       cx="-907.35657"
+       cy="479.90009"
+       rx="3.5793996"
+       ry="3.8207953"
+       transform="matrix(-0.49169095,-0.87076978,-0.87076978,0.49169095,0,0)"
+       inkscape:transform-center-x="0.67794294"
+       inkscape:transform-center-y="-2.3634048" />
+    <ellipse
+       inkscape:transform-center-y="-2.3633882"
+       inkscape:transform-center-x="-0.67793718"
+       transform="matrix(0.49169095,-0.87076978,0.87076978,0.49169095,0,0)"
+       ry="3.8207953"
+       rx="3.5793996"
+       cy="507.8461"
+       cx="-891.57654"
+       id="ellipse4463"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#384e54;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" />
+    <path
+       inkscape:connector-curvature="0"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#384e54;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       d="m 16.091693,1021.3642 c -1.105749,0.01 -2.210341,0.049 -3.31609,0.09 C 6.8422558,1021.6738 2,1026.3942 2,1032.3622 c 0,2.9786 0,13 0,20 l 28,0 c 0,-8 0,-16 0,-20 0,-5.9683 -4.667345,-10.4912 -10.59023,-10.908 -1.10575,-0.078 -2.212328,-0.099 -3.318077,-0.09 z"
+       id="path4465"
+       sodipodi:nodetypes="ccsccscc" />
+    <path
+       inkscape:transform-center-y="-1.3604657"
+       inkscape:transform-center-x="-0.98424303"
+       sodipodi:nodetypes="sssssss"
+       inkscape:connector-curvature="0"
+       id="path4469"
+       d="m 4.6078867,1025.0462 c 0.459564,0.2595 1.818262,1.2013 1.980983,1.648 0.183401,0.5035 0.159385,1.0657 -0.114614,1.551 -0.346627,0.6138 -1.005341,0.9487 -1.696421,0.9365 -0.339886,-0.01 -1.720283,-0.6372 -2.042561,-0.8192 -0.97754,-0.5519 -1.350795,-1.7418 -0.833686,-2.6576 0.517109,-0.9158 1.728749,-1.2107 2.706299,-0.6587 z"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#76e1fe;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" />
+    <rect
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:0.32850246;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       id="rect4473"
+       width="3.0866659"
+       height="3.5313663"
+       x="14.406213"
+       y="1035.6842"
+       ry="0.62426329" />
+    <path
+       inkscape:connector-curvature="0"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#76e1fe;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       d="m 16,1023.3622 c -9,0 -12,3.7153 -12,9 l 0,20 24,0 c -0.04889,-7.3562 0,-18 0,-20 0,-5.2848 -3,-9 -12,-9 z"
+       id="path4471"
+       sodipodi:nodetypes="zsccsz" />
+    <path
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#76e1fe;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       d="m 27.074073,1025.0462 c -0.45957,0.2595 -1.818257,1.2013 -1.980979,1.648 -0.183401,0.5035 -0.159384,1.0657 0.114614,1.551 0.346627,0.6138 1.005335,0.9487 1.696415,0.9365 0.33988,-0.01 1.72029,-0.6372 2.04256,-0.8192 0.97754,-0.5519 1.35079,-1.7418 0.83369,-2.6576 -0.51711,-0.9158 -1.72876,-1.2107 -2.7063,-0.6587 z"
+       id="path4481"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="sssssss"
+       inkscape:transform-center-x="0.98424094"
+       inkscape:transform-center-y="-1.3604657" />
+    <circle
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       id="circle4477"
+       cx="21.175734"
+       cy="1030.3542"
+       r="4.6537542"
+       inkscape:export-filename=".\rect4485.png"
+       inkscape:export-xdpi="90"
+       inkscape:export-ydpi="90" />
+    <circle
+       r="4.8316345"
+       cy="1030.3542"
+       cx="10.339486"
+       id="circle4483"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       inkscape:export-filename=".\rect4485.png"
+       inkscape:export-xdpi="90"
+       inkscape:export-ydpi="90" />
+    <rect
+       inkscape:export-ydpi="90"
+       inkscape:export-xdpi="90"
+       inkscape:export-filename=".\rect4485.png"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:0.32941176;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       id="rect4246"
+       width="3.6673687"
+       height="4.1063409"
+       x="14.115863"
+       y="1035.9174"
+       ry="0.72590536" />
+    <rect
+       ry="0.72590536"
+       y="1035.2253"
+       x="14.115863"
+       height="4.1063409"
+       width="3.6673687"
+       id="rect4485"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#fffcfb;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       inkscape:export-filename=".\rect4485.png"
+       inkscape:export-xdpi="90"
+       inkscape:export-ydpi="90" />
+    <path
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:0.32941176;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       d="m 19.999735,1036.5289 c 0,0.838 -0.871228,1.2682 -2.144766,1.1659 -0.02366,0 -0.04795,-0.6004 -0.254147,-0.5832 -0.503669,0.042 -1.095902,-0.02 -1.685964,-0.02 -0.612939,0 -1.206342,0.1826 -1.68549,0.017 -0.110233,-0.038 -0.178298,0.5838 -0.261532,0.5816 -1.243685,-0.033 -2.078803,-0.3383 -2.078803,-1.1618 0,-1.2118 1.815635,-2.1941 4.055351,-2.1941 2.239704,0 4.055351,0.9823 4.055351,2.1941 z"
+       id="path4487"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="sssssssss"
+       inkscape:export-filename=".\rect4485.png"
+       inkscape:export-xdpi="90"
+       inkscape:export-ydpi="90" />
+    <path
+       sodipodi:nodetypes="sssssssss"
+       inkscape:connector-curvature="0"
+       id="path4489"
+       d="m 19.977414,1035.7004 c 0,0.5685 -0.433659,0.8554 -1.138091,1.0001 -0.291933,0.06 -0.630371,0.096 -1.003719,0.1166 -0.56405,0.032 -1.207782,0.031 -1.89122,0.031 -0.672834,0 -1.307182,0 -1.864904,-0.029 -0.306268,-0.017 -0.589429,-0.043 -0.843164,-0.084 -0.813833,-0.1318 -1.324962,-0.417 -1.324962,-1.0344 0,-1.1601 1.805642,-2.1006 4.03303,-2.1006 2.227377,0 4.03303,0.9405 4.03303,2.1006 z"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#c38c74;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       inkscape:export-filename=".\rect4485.png"
+       inkscape:export-xdpi="90"
+       inkscape:export-ydpi="90" />
+    <ellipse
+       cy="1033.8501"
+       cx="15.944382"
+       id="ellipse4491"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#23201f;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       rx="2.0801733"
+       ry="1.343747"
+       inkscape:export-filename=".\rect4485.png"
+       inkscape:export-xdpi="90"
+       inkscape:export-ydpi="90" />
+    <circle
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#171311;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       id="circle4493"
+       cx="12.414201"
+       cy="1030.3542"
+       r="1.9630634"
+       inkscape:export-filename=".\rect4485.png"
+       inkscape:export-xdpi="90"
+       inkscape:export-ydpi="90" />
+    <circle
+       r="1.9630634"
+       cy="1030.3542"
+       cx="23.110121"
+       id="circle4495"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#171311;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       inkscape:export-filename=".\rect4485.png"
+       inkscape:export-xdpi="90"
+       inkscape:export-ydpi="90" />
+    <path
+       sodipodi:nodetypes="cc"
+       inkscape:connector-curvature="0"
+       id="path4497"
+       d="m 5.0055377,1027.2727 c -1.170435,-1.0835 -2.026973,-0.7721 -2.044172,-0.7463"
+       style="display:inline;fill:none;fill-rule:evenodd;stroke:#384e54;stroke-width:0.39730874;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
+    <path
+       style="display:inline;fill:none;fill-rule:evenodd;stroke:#384e54;stroke-width:0.39730874;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="m 4.3852457,1026.9152 c -1.158557,0.036 -1.346704,0.6303 -1.33881,0.6523"
+       id="path4499"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cc" />
+    <path
+       style="display:inline;fill:none;fill-rule:evenodd;stroke:#384e54;stroke-width:0.39730874;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="m 26.630533,1027.1724 c 1.17043,-1.0835 2.02697,-0.7721 2.04417,-0.7463"
+       id="path4501"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cc" />
+    <path
+       sodipodi:nodetypes="cc"
+       inkscape:connector-curvature="0"
+       id="path4503"
+       d="m 27.321773,1026.673 c 1.15856,0.036 1.3467,0.6302 1.3388,0.6522"
+       style="display:inline;fill:none;fill-rule:evenodd;stroke:#384e54;stroke-width:0.39730874;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
+  </g>
+</svg>
--- a/doc/install-source.html
+++ b/doc/install-source.html
@@ -203,7 +203,7 @@ To build without <code>cgo</code>, set the environment variable
 Change to the directory that will be its parent
 and make sure the <code>go</code> directory does not exist.
 Then clone the repository and check out the latest release tag
-(<code class="versionTag">go1.7</code>, for example):</p>
+(<code class="versionTag">go1.7.2</code>, for example):</p>

 <pre>
 $ git clone https://go.googlesource.com/go
@@ -391,7 +391,7 @@ New releases are announced on the
 <a href="//groups.google.com/group/golang-announce">golang-announce</a>
 mailing list.
 Each announcement mentions the latest release tag, for instance,
-<code class="versionTag">go1.7</code>.
+<code class="versionTag">go1.7.2</code>.
 </p>

 <p>
--- a/favicon.ico
+++ b/favicon.ico
--- a/misc/nacl/testzip.proto
+++ b/misc/nacl/testzip.proto
@@ -18,10 +18,6 @@ go	src=..
 					asm
 						testdata
 							+
-			compile
-				internal
-					syntax
-						parser.go
 			doc
 				main.go
 				pkg.go
--- a/src/archive/tar/writer.go
+++ b/src/archive/tar/writer.go
@@ -317,7 +317,7 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro
 	var buf bytes.Buffer

 	// Keys are sorted before writing to body to allow deterministic output.
-	keys := make([]string, 0, len(paxHeaders))
+	var keys []string
 	for k := range paxHeaders {
 		keys = append(keys, k)
 	}
--- a/src/bytes/example_test.go
+++ b/src/bytes/example_test.go
@@ -11,7 +11,6 @@ import (
 	"io"
 	"os"
 	"sort"
-	"unicode"
 )

 func ExampleBuffer() {
@@ -84,205 +83,3 @@ func ExampleTrimPrefix() {
 	fmt.Printf("Hello%s", b)
 	// Output: Hello, world!
 }
-
-func ExampleFields() {
-	fmt.Printf("Fields are: %q", bytes.Fields([]byte("  foo bar  baz   ")))
-	// Output: Fields are: ["foo" "bar" "baz"]
-}
-
-func ExampleFieldsFunc() {
-	f := func(c rune) bool {
-		return !unicode.IsLetter(c) && !unicode.IsNumber(c)
-	}
-	fmt.Printf("Fields are: %q", bytes.FieldsFunc([]byte("  foo1;bar2,baz3..."), f))
-	// Output: Fields are: ["foo1" "bar2" "baz3"]
-}
-
-func ExampleContains() {
-	fmt.Println(bytes.Contains([]byte("seafood"), []byte("foo")))
-	fmt.Println(bytes.Contains([]byte("seafood"), []byte("bar")))
-	fmt.Println(bytes.Contains([]byte("seafood"), []byte("")))
-	fmt.Println(bytes.Contains([]byte(""), []byte("")))
-	// Output:
-	// true
-	// false
-	// true
-	// true
-}
-
-func ExampleCount() {
-	fmt.Println(bytes.Count([]byte("cheese"), []byte("e")))
-	fmt.Println(bytes.Count([]byte("five"), []byte(""))) // before & after each rune
-	// Output:
-	// 3
-	// 5
-}
-
-func ExampleEqualFold() {
-	fmt.Println(bytes.EqualFold([]byte("Go"), []byte("go")))
-	// Output: true
-}
-
-func ExampleHasPrefix() {
-	fmt.Println(bytes.HasPrefix([]byte("Gopher"), []byte("Go")))
-	fmt.Println(bytes.HasPrefix([]byte("Gopher"), []byte("C")))
-	fmt.Println(bytes.HasPrefix([]byte("Gopher"), []byte("")))
-	// Output:
-	// true
-	// false
-	// true
-}
-
-func ExampleHasSuffix() {
-	fmt.Println(bytes.HasSuffix([]byte("Amigo"), []byte("go")))
-	fmt.Println(bytes.HasSuffix([]byte("Amigo"), []byte("O")))
-	fmt.Println(bytes.HasSuffix([]byte("Amigo"), []byte("Ami")))
-	fmt.Println(bytes.HasSuffix([]byte("Amigo"), []byte("")))
-	// Output:
-	// true
-	// false
-	// false
-	// true
-}
-
-func ExampleIndex() {
-	fmt.Println(bytes.Index([]byte("chicken"), []byte("ken")))
-	fmt.Println(bytes.Index([]byte("chicken"), []byte("dmr")))
-	// Output:
-	// 4
-	// -1
-}
-
-func ExampleIndexFunc() {
-	f := func(c rune) bool {
-		return unicode.Is(unicode.Han, c)
-	}
-	fmt.Println(bytes.IndexFunc([]byte("Hello, 世界"), f))
-	fmt.Println(bytes.IndexFunc([]byte("Hello, world"), f))
-	// Output:
-	// 7
-	// -1
-}
-
-func ExampleIndexAny() {
-	fmt.Println(bytes.IndexAny([]byte("chicken"), "aeiouy"))
-	fmt.Println(bytes.IndexAny([]byte("crwth"), "aeiouy"))
-	// Output:
-	// 2
-	// -1
-}
-
-func ExampleIndexRune() {
-	fmt.Println(bytes.IndexRune([]byte("chicken"), 'k'))
-	fmt.Println(bytes.IndexRune([]byte("chicken"), 'd'))
-	// Output:
-	// 4
-	// -1
-}
-
-func ExampleLastIndex() {
-	fmt.Println(bytes.Index([]byte("go gopher"), []byte("go")))
-	fmt.Println(bytes.LastIndex([]byte("go gopher"), []byte("go")))
-	fmt.Println(bytes.LastIndex([]byte("go gopher"), []byte("rodent")))
-	// Output:
-	// 0
-	// 3
-	// -1
-}
-
-func ExampleJoin() {
-	s := [][]byte{[]byte("foo"), []byte("bar"), []byte("baz")}
-	fmt.Printf("%s", bytes.Join(s, []byte(", ")))
-	// Output: foo, bar, baz
-}
-
-func ExampleRepeat() {
-	fmt.Printf("ba%s", bytes.Repeat([]byte("na"), 2))
-	// Output: banana
-}
-
-func ExampleReplace() {
-	fmt.Printf("%s\n", bytes.Replace([]byte("oink oink oink"), []byte("k"), []byte("ky"), 2))
-	fmt.Printf("%s\n", bytes.Replace([]byte("oink oink oink"), []byte("oink"), []byte("moo"), -1))
-	// Output:
-	// oinky oinky oink
-	// moo moo moo
-}
-
-func ExampleSplit() {
-	fmt.Printf("%q\n", bytes.Split([]byte("a,b,c"), []byte(",")))
-	fmt.Printf("%q\n", bytes.Split([]byte("a man a plan a canal panama"), []byte("a ")))
-	fmt.Printf("%q\n", bytes.Split([]byte(" xyz "), []byte("")))
-	fmt.Printf("%q\n", bytes.Split([]byte(""), []byte("Bernardo O'Higgins")))
-	// Output:
-	// ["a" "b" "c"]
-	// ["" "man " "plan " "canal panama"]
-	// [" " "x" "y" "z" " "]
-	// [""]
-}
-
-func ExampleSplitN() {
-	fmt.Printf("%q\n", bytes.SplitN([]byte("a,b,c"), []byte(","), 2))
-	z := bytes.SplitN([]byte("a,b,c"), []byte(","), 0)
-	fmt.Printf("%q (nil = %v)\n", z, z == nil)
-	// Output:
-	// ["a" "b,c"]
-	// [] (nil = true)
-}
-
-func ExampleSplitAfter() {
-	fmt.Printf("%q\n", bytes.SplitAfter([]byte("a,b,c"), []byte(",")))
-	// Output: ["a," "b," "c"]
-}
-
-func ExampleSplitAfterN() {
-	fmt.Printf("%q\n", bytes.SplitAfterN([]byte("a,b,c"), []byte(","), 2))
-	// Output: ["a," "b,c"]
-}
-
-func ExampleTitle() {
-	fmt.Printf("%s", bytes.Title([]byte("her royal highness")))
-	// Output: Her Royal Highness
-}
-
-func ExampleToTitle() {
-	fmt.Printf("%s\n", bytes.ToTitle([]byte("loud noises")))
-	fmt.Printf("%s\n", bytes.ToTitle([]byte("хлеб")))
-	// Output:
-	// LOUD NOISES
-	// ХЛЕБ
-}
-
-func ExampleTrim() {
-	fmt.Printf("[%q]", bytes.Trim([]byte(" !!! Achtung! Achtung! !!! "), "! "))
-	// Output: ["Achtung! Achtung"]
-}
-
-func ExampleMap() {
-	rot13 := func(r rune) rune {
-		switch {
-		case r >= 'A' && r <= 'Z':
-			return 'A' + (r-'A'+13)%26
-		case r >= 'a' && r <= 'z':
-			return 'a' + (r-'a'+13)%26
-		}
-		return r
-	}
-	fmt.Printf("%s", bytes.Map(rot13, []byte("'Twas brillig and the slithy gopher...")))
-	// Output: 'Gjnf oevyyvt naq gur fyvgul tbcure...
-}
-
-func ExampleTrimSpace() {
-	fmt.Printf("%s", bytes.TrimSpace([]byte(" \t\n a lone gopher \n\t\r\n")))
-	// Output: a lone gopher
-}
-
-func ExampleToUpper() {
-	fmt.Printf("%s", bytes.ToUpper([]byte("Gopher")))
-	// Output: GOPHER
-}
-
-func ExampleToLower() {
-	fmt.Printf("%s", bytes.ToLower([]byte("Gopher")))
-	// Output: gopher
-}
--- a/src/cmd/asm/internal/asm/asm.go
+++ b/src/cmd/asm/internal/asm/asm.go
@@ -403,7 +403,7 @@ func (p *Parser) asmJump(op obj.As, cond string, a []obj.Addr) {

 		fallthrough
 	default:
-		p.errorf("wrong number of arguments to %s instruction", op)
+		p.errorf("wrong number of arguments to %s instruction", obj.Aconv(op))
 		return
 	}
 	switch {
@@ -476,7 +476,7 @@ func (p *Parser) branch(jmp, target *obj.Prog) {
 // asmInstruction assembles an instruction.
 // MOVW R9, (R10)
 func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
-	// fmt.Printf("%s %+v\n", op, a)
+	// fmt.Printf("%s %+v\n", obj.Aconv(op), a)
 	prog := &obj.Prog{
 		Ctxt:   p.ctxt,
 		Lineno: p.histLineNum,
@@ -525,7 +525,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
 					prog.To = a[1]
 					break
 				}
-				p.errorf("unrecognized addressing for %s", op)
+				p.errorf("unrecognized addressing for %s", obj.Aconv(op))
 				return
 			}
 			if arch.IsARMFloatCmp(op) {
@@ -572,7 +572,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
 			// Catch missing operand here, because we store immediate as part of From3, and can't distinguish
 			// missing operand from legal value 0 in obj/x86/asm6.
 			if arch.IsAMD4OP(op) {
-				p.errorf("4 operands required, but only 3 are provided for %s instruction", op)
+				p.errorf("4 operands required, but only 3 are provided for %s instruction", obj.Aconv(op))
 			}
 			prog.From = a[0]
 			prog.From3 = newAddr(a[1])
@@ -583,7 +583,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
 				prog.From = a[0]
 				prog.To = a[1]
 				if a[2].Type != obj.TYPE_REG {
-					p.errorf("invalid addressing modes for third operand to %s instruction, must be register", op)
+					p.errorf("invalid addressing modes for third operand to %s instruction, must be register", obj.Aconv(op))
 					return
 				}
 				prog.RegTo2 = a[2].Reg
@@ -619,7 +619,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
 				prog.From3 = newAddr(a[1])
 				prog.To = a[2]
 			default:
-				p.errorf("invalid addressing modes for %s instruction", op)
+				p.errorf("invalid addressing modes for %s instruction", obj.Aconv(op))
 				return
 			}
 		case sys.S390X:
@@ -656,10 +656,10 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
 			prog.From = a[1]
 			prog.From3 = newAddr(a[2])
 			if a[0].Type != obj.TYPE_CONST {
-				p.errorf("first operand must be an immediate in %s instruction", op)
+				p.errorf("first operand must be an immediate in %s instruction", obj.Aconv(op))
 			}
 			if prog.From3.Type != obj.TYPE_REG {
-				p.errorf("third operand must be a register in %s instruction", op)
+				p.errorf("third operand must be a register in %s instruction", obj.Aconv(op))
 			}
 			prog.From3.Offset = int64(p.getImmediate(prog, op, &a[0]))
 			prog.To = a[3]
@@ -690,7 +690,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
 			prog.To = a[3]
 			break
 		}
-		p.errorf("can't handle %s instruction with 4 operands", op)
+		p.errorf("can't handle %s instruction with 4 operands", obj.Aconv(op))
 		return
 	case 5:
 		if p.arch.Family == sys.PPC64 && arch.IsPPC64RLD(op) {
@@ -712,7 +712,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
 			prog.To = a[4]
 			break
 		}
-		p.errorf("can't handle %s instruction with 5 operands", op)
+		p.errorf("can't handle %s instruction with 5 operands", obj.Aconv(op))
 		return
 	case 6:
 		if p.arch.Family == sys.ARM && arch.IsARMMRC(op) {
@@ -736,7 +736,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
 		}
 		fallthrough
 	default:
-		p.errorf("can't handle %s instruction with %d operands", op, len(a))
+		p.errorf("can't handle %s instruction with %d operands", obj.Aconv(op), len(a))
 		return
 	}

@@ -771,7 +771,7 @@ func (p *Parser) getConstantPseudo(pseudo string, addr *obj.Addr) int64 {
 // getConstant checks that addr represents a plain constant and returns its value.
 func (p *Parser) getConstant(prog *obj.Prog, op obj.As, addr *obj.Addr) int64 {
 	if addr.Type != obj.TYPE_MEM || addr.Name != 0 || addr.Reg != 0 || addr.Index != 0 {
-		p.errorf("%s: expected integer constant; found %s", op, obj.Dconv(prog, addr))
+		p.errorf("%s: expected integer constant; found %s", obj.Aconv(op), obj.Dconv(prog, addr))
 	}
 	return addr.Offset
 }
@@ -779,7 +779,7 @@ func (p *Parser) getConstant(prog *obj.Prog, op obj.As, addr *obj.Addr) int64 {
 // getImmediate checks that addr represents an immediate constant and returns its value.
 func (p *Parser) getImmediate(prog *obj.Prog, op obj.As, addr *obj.Addr) int64 {
 	if addr.Type != obj.TYPE_CONST || addr.Name != 0 || addr.Reg != 0 || addr.Index != 0 {
-		p.errorf("%s: expected immediate constant; found %s", op, obj.Dconv(prog, addr))
+		p.errorf("%s: expected immediate constant; found %s", obj.Aconv(op), obj.Dconv(prog, addr))
 	}
 	return addr.Offset
 }
@@ -787,7 +787,7 @@ func (p *Parser) getImmediate(prog *obj.Prog, op obj.As, addr *obj.Addr) int64 {
 // getRegister checks that addr represents a register and returns its value.
 func (p *Parser) getRegister(prog *obj.Prog, op obj.As, addr *obj.Addr) int16 {
 	if addr.Type != obj.TYPE_REG || addr.Offset != 0 || addr.Name != 0 || addr.Index != 0 {
-		p.errorf("%s: expected register; found %s", op, obj.Dconv(prog, addr))
+		p.errorf("%s: expected register; found %s", obj.Aconv(op), obj.Dconv(prog, addr))
 	}
 	return addr.Reg
 }
--- a/src/cmd/asm/internal/asm/operand_test.go
+++ b/src/cmd/asm/internal/asm/operand_test.go
@@ -17,7 +17,6 @@ import (

 func setArch(goarch string) (*arch.Arch, *obj.Link) {
 	os.Setenv("GOOS", "linux") // obj can handle this OS for all architectures.
-	os.Setenv("GOARCH", goarch)
 	architecture := arch.Set(goarch)
 	if architecture == nil {
 		panic("asm: unrecognized architecture " + goarch)
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -239,87 +239,89 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		}
 		opregreg(v.Op.Asm(), r, gc.SSARegNum(v.Args[1]))

-	case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
-		// Arg[0] (the dividend) is in AX.
-		// Arg[1] (the divisor) can be in any other register.
-		// Result[0] (the quotient) is in AX.
-		// Result[1] (the remainder) is in DX.
-		r := gc.SSARegNum(v.Args[1])
+	case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW,
+		ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU,
+		ssa.OpAMD64MODQ, ssa.OpAMD64MODL, ssa.OpAMD64MODW,
+		ssa.OpAMD64MODQU, ssa.OpAMD64MODLU, ssa.OpAMD64MODWU:

-		// Zero extend dividend.
-		c := gc.Prog(x86.AXORL)
-		c.From.Type = obj.TYPE_REG
-		c.From.Reg = x86.REG_DX
-		c.To.Type = obj.TYPE_REG
-		c.To.Reg = x86.REG_DX
+		// Arg[0] is already in AX as it's the only register we allow
+		// and AX is the only output
+		x := gc.SSARegNum(v.Args[1])

-		// Issue divide.
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = r
+		// CPU faults upon signed overflow, which occurs when most
+		// negative int is divided by -1.
+		var j *obj.Prog
+		if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL ||
+			v.Op == ssa.OpAMD64DIVW || v.Op == ssa.OpAMD64MODQ ||
+			v.Op == ssa.OpAMD64MODL || v.Op == ssa.OpAMD64MODW {

-	case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
-		// Arg[0] (the dividend) is in AX.
-		// Arg[1] (the divisor) can be in any other register.
-		// Result[0] (the quotient) is in AX.
-		// Result[1] (the remainder) is in DX.
-		r := gc.SSARegNum(v.Args[1])
+			var c *obj.Prog
+			switch v.Op {
+			case ssa.OpAMD64DIVQ, ssa.OpAMD64MODQ:
+				c = gc.Prog(x86.ACMPQ)
+				j = gc.Prog(x86.AJEQ)
+				// go ahead and sign extend to save doing it later
+				gc.Prog(x86.ACQO)

-		// CPU faults upon signed overflow, which occurs when the most
-		// negative int is divided by -1. Handle divide by -1 as a special case.
-		var c *obj.Prog
-		switch v.Op {
-		case ssa.OpAMD64DIVQ:
-			c = gc.Prog(x86.ACMPQ)
-		case ssa.OpAMD64DIVL:
-			c = gc.Prog(x86.ACMPL)
-		case ssa.OpAMD64DIVW:
-			c = gc.Prog(x86.ACMPW)
-		}
-		c.From.Type = obj.TYPE_REG
-		c.From.Reg = r
-		c.To.Type = obj.TYPE_CONST
-		c.To.Offset = -1
-		j1 := gc.Prog(x86.AJEQ)
-		j1.To.Type = obj.TYPE_BRANCH
+			case ssa.OpAMD64DIVL, ssa.OpAMD64MODL:
+				c = gc.Prog(x86.ACMPL)
+				j = gc.Prog(x86.AJEQ)
+				gc.Prog(x86.ACDQ)
+
+			case ssa.OpAMD64DIVW, ssa.OpAMD64MODW:
+				c = gc.Prog(x86.ACMPW)
+				j = gc.Prog(x86.AJEQ)
+				gc.Prog(x86.ACWD)
+			}
+			c.From.Type = obj.TYPE_REG
+			c.From.Reg = x
+			c.To.Type = obj.TYPE_CONST
+			c.To.Offset = -1
+
+			j.To.Type = obj.TYPE_BRANCH

-		// Sign extend dividend.
-		switch v.Op {
-		case ssa.OpAMD64DIVQ:
-			gc.Prog(x86.ACQO)
-		case ssa.OpAMD64DIVL:
-			gc.Prog(x86.ACDQ)
-		case ssa.OpAMD64DIVW:
-			gc.Prog(x86.ACWD)
 		}

-		// Issue divide.
+		// for unsigned ints, we sign extend by setting DX = 0
+		// signed ints were sign extended above
+		if v.Op == ssa.OpAMD64DIVQU || v.Op == ssa.OpAMD64MODQU ||
+			v.Op == ssa.OpAMD64DIVLU || v.Op == ssa.OpAMD64MODLU ||
+			v.Op == ssa.OpAMD64DIVWU || v.Op == ssa.OpAMD64MODWU {
+			c := gc.Prog(x86.AXORQ)
+			c.From.Type = obj.TYPE_REG
+			c.From.Reg = x86.REG_DX
+			c.To.Type = obj.TYPE_REG
+			c.To.Reg = x86.REG_DX
+		}
+
 		p := gc.Prog(v.Op.Asm())
 		p.From.Type = obj.TYPE_REG
-		p.From.Reg = r
+		p.From.Reg = x

-		// Skip over -1 fixup code.
-		j2 := gc.Prog(obj.AJMP)
-		j2.To.Type = obj.TYPE_BRANCH
+		// signed division, rest of the check for -1 case
+		if j != nil {
+			j2 := gc.Prog(obj.AJMP)
+			j2.To.Type = obj.TYPE_BRANCH

-		// Issue -1 fixup code.
-		// n / -1 = -n
-		n1 := gc.Prog(x86.ANEGQ)
-		n1.To.Type = obj.TYPE_REG
-		n1.To.Reg = x86.REG_AX
+			var n *obj.Prog
+			if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL ||
+				v.Op == ssa.OpAMD64DIVW {
+				// n * -1 = -n
+				n = gc.Prog(x86.ANEGQ)
+				n.To.Type = obj.TYPE_REG
+				n.To.Reg = x86.REG_AX
+			} else {
+				// n % -1 == 0
+				n = gc.Prog(x86.AXORQ)
+				n.From.Type = obj.TYPE_REG
+				n.From.Reg = x86.REG_DX
+				n.To.Type = obj.TYPE_REG
+				n.To.Reg = x86.REG_DX
+			}

-		// n % -1 == 0
-		n2 := gc.Prog(x86.AXORL)
-		n2.From.Type = obj.TYPE_REG
-		n2.From.Reg = x86.REG_DX
-		n2.To.Type = obj.TYPE_REG
-		n2.To.Reg = x86.REG_DX
-
-		// TODO(khr): issue only the -1 fixup code we need.
-		// For instance, if only the quotient is used, no point in zeroing the remainder.
-
-		j1.To.Val = n1
-		j2.To.Val = s.Pc()
+			j.To.Val = n
+			j2.To.Val = s.Pc()
+		}

 	case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULW, ssa.OpAMD64HMULB,
 		ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU, ssa.OpAMD64HMULWU, ssa.OpAMD64HMULBU:
@@ -498,8 +500,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		gc.AddAux(&p.From, v)
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL:
-		p := gc.Prog(v.Op.Asm())
+	case ssa.OpAMD64LEAQ:
+		p := gc.Prog(x86.ALEAQ)
 		p.From.Type = obj.TYPE_MEM
 		p.From.Reg = gc.SSARegNum(v.Args[0])
 		gc.AddAux(&p.From, v)
@@ -703,7 +705,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))
 		p.To.Offset = v.AuxInt

-	case ssa.OpCopy, ssa.OpAMD64MOVQconvert, ssa.OpAMD64MOVLconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
+	case ssa.OpCopy, ssa.OpAMD64MOVQconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
 		if v.Type.IsMemory() {
 			return
 		}
@@ -752,14 +754,27 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 			p.To.Name = obj.NAME_AUTO
 		}
 	case ssa.OpPhi:
-		gc.CheckLoweredPhi(v)
+		// just check to make sure regalloc and stackalloc did it right
+		if v.Type.IsMemory() {
+			return
+		}
+		f := v.Block.Func
+		loc := f.RegAlloc[v.ID]
+		for _, a := range v.Args {
+			if aloc := f.RegAlloc[a.ID]; aloc != loc { // TODO: .Equal() instead?
+				v.Fatalf("phi arg at different location than phi: %v @ %v, but arg %v @ %v\n%s\n", v, loc, a, aloc, v.Block.Func)
+			}
+		}
 	case ssa.OpInitMem:
 		// memory arg needs no code
 	case ssa.OpArg:
 		// input args need no code
 	case ssa.OpAMD64LoweredGetClosurePtr:
-		// Closure pointer is DX.
-		gc.CheckLoweredGetClosurePtr(v)
+		// Output is hardwired to DX only,
+		// and DX contains the closure pointer on
+		// closure entry, and this "instruction"
+		// is scheduled to the very beginning
+		// of the entry block.
 	case ssa.OpAMD64LoweredGetG:
 		r := gc.SSARegNum(v)
 		// See the comments in cmd/internal/obj/x86/obj6.go
@@ -856,8 +871,6 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.To.Reg = gc.SSARegNum(v)
 	case ssa.OpSP, ssa.OpSB:
 		// nothing to do
-	case ssa.OpSelect0, ssa.OpSelect1:
-		// nothing to do
 	case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
 		ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
 		ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
--- a/src/cmd/compile/internal/arm/peep.go
+++ b/src/cmd/compile/internal/arm/peep.go
@@ -538,7 +538,7 @@ gotit:
 	}

 	if gc.Debug['P'] != 0 {
-		fmt.Printf(" => %v\n", p.As)
+		fmt.Printf(" => %v\n", obj.Aconv(p.As))
 	}
 	return true
 }
@@ -1036,7 +1036,7 @@ func xtramodes(g *gc.Graph, r *gc.Flow, a *obj.Addr) bool {
 func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int {
 	switch p.As {
 	default:
-		fmt.Printf("copyu: can't find %v\n", p.As)
+		fmt.Printf("copyu: can't find %v\n", obj.Aconv(p.As))
 		return 2

 	case arm.AMOVM:
--- a/src/cmd/compile/internal/arm/prog.go
+++ b/src/cmd/compile/internal/arm/prog.go
@@ -79,8 +79,6 @@ var progtable = [arm.ALAST & obj.AMask]obj.ProgInfo{
 	arm.AMULF & obj.AMask:  {Flags: gc.SizeF | gc.LeftRead | RightRdwr},
 	arm.ASUBD & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | RightRdwr},
 	arm.ASUBF & obj.AMask:  {Flags: gc.SizeF | gc.LeftRead | RightRdwr},
-	arm.ANEGD & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | RightRdwr},
-	arm.ANEGF & obj.AMask:  {Flags: gc.SizeF | gc.LeftRead | RightRdwr},
 	arm.ASQRTD & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | RightRdwr},

 	// Conversions.
--- a/src/cmd/compile/internal/arm/ssa.go
+++ b/src/cmd/compile/internal/arm/ssa.go
--- a/src/cmd/compile/internal/arm64/galign.go
+++ b/src/cmd/compile/internal/arm64/galign.go
@@ -6,7 +6,6 @@ package arm64

 import (
 	"cmd/compile/internal/gc"
-	"cmd/compile/internal/ssa"
 	"cmd/internal/obj/arm64"
 )

@@ -62,11 +61,6 @@ func Main() {
 	gc.Thearch.Doregbits = doregbits
 	gc.Thearch.Regnames = regnames

-	gc.Thearch.SSARegToReg = ssaRegToReg
-	gc.Thearch.SSAMarkMoves = func(s *gc.SSAGenState, b *ssa.Block) {}
-	gc.Thearch.SSAGenValue = ssaGenValue
-	gc.Thearch.SSAGenBlock = ssaGenBlock
-
 	gc.Main()
 	gc.Exit(0)
 }
--- a/src/cmd/compile/internal/arm64/peep.go
+++ b/src/cmd/compile/internal/arm64/peep.go
@@ -162,7 +162,7 @@ loop1:
 			continue
 		}
 		if gc.Debug['P'] != 0 {
-			fmt.Printf("encoding $%d directly into %v in:\n%v\n%v\n", p.From.Offset, p1.As, p, p1)
+			fmt.Printf("encoding $%d directly into %v in:\n%v\n%v\n", p.From.Offset, obj.Aconv(p1.As), p, p1)
 		}
 		p1.From.Type = obj.TYPE_CONST
 		p1.From = p.From
@@ -423,7 +423,7 @@ func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int {

 	switch p.As {
 	default:
-		fmt.Printf("copyu: can't find %v\n", p.As)
+		fmt.Printf("copyu: can't find %v\n", obj.Aconv(p.As))
 		return 2

 	case obj.ANOP, /* read p->from, write p->to */
--- a/src/cmd/compile/internal/arm64/prog.go
+++ b/src/cmd/compile/internal/arm64/prog.go
@@ -44,37 +44,24 @@ var progtable = [arm64.ALAST & obj.AMask]obj.ProgInfo{
 	// Integer
 	arm64.AADD & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.ASUB & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.ANEG & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite}, // why RegRead? revisit once the old backend gone
+	arm64.ANEG & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.AAND & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.AORR & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.AEOR & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.ABIC & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.AMVN & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite},
 	arm64.AMUL & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.AMULW & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.ASMULL & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.AUMULL & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.ASMULH & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.AUMULH & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
+	arm64.ASMULH & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
+	arm64.AUMULH & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.ASDIV & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.AUDIV & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.ASDIVW & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.AUDIVW & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.AREM & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.AUREM & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.AREMW & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.AUREMW & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.ALSL & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.ALSR & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.AASR & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.ACMP & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead},
-	arm64.ACMPW & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead},
 	arm64.AADC & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite | gc.UseCarry},
 	arm64.AROR & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.ARORW & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.AADDS & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite | gc.SetCarry},
-	arm64.ACSET & obj.AMask:  {Flags: gc.SizeQ | gc.RightWrite},
-	arm64.ACSEL & obj.AMask:  {Flags: gc.SizeQ | gc.RegRead | gc.RightWrite},

 	// Floating point.
 	arm64.AFADDD & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite},
--- a/src/cmd/compile/internal/arm64/ssa.go
+++ b/src/cmd/compile/internal/arm64/ssa.go
@@ -1,865 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package arm64
-
-import (
-	"math"
-
-	"cmd/compile/internal/gc"
-	"cmd/compile/internal/ssa"
-	"cmd/internal/obj"
-	"cmd/internal/obj/arm64"
-)
-
-var ssaRegToReg = []int16{
-	arm64.REG_R0,
-	arm64.REG_R1,
-	arm64.REG_R2,
-	arm64.REG_R3,
-	arm64.REG_R4,
-	arm64.REG_R5,
-	arm64.REG_R6,
-	arm64.REG_R7,
-	arm64.REG_R8,
-	arm64.REG_R9,
-	arm64.REG_R10,
-	arm64.REG_R11,
-	arm64.REG_R12,
-	arm64.REG_R13,
-	arm64.REG_R14,
-	arm64.REG_R15,
-	arm64.REG_R16,
-	arm64.REG_R17,
-	arm64.REG_R18, // platform register, not used
-	arm64.REG_R19,
-	arm64.REG_R20,
-	arm64.REG_R21,
-	arm64.REG_R22,
-	arm64.REG_R23,
-	arm64.REG_R24,
-	arm64.REG_R25,
-	arm64.REG_R26,
-	// R27 = REGTMP not used in regalloc
-	arm64.REGG,    // R28
-	arm64.REG_R29, // frame pointer, not used
-	// R30 = REGLINK not used in regalloc
-	arm64.REGSP, // R31
-
-	arm64.REG_F0,
-	arm64.REG_F1,
-	arm64.REG_F2,
-	arm64.REG_F3,
-	arm64.REG_F4,
-	arm64.REG_F5,
-	arm64.REG_F6,
-	arm64.REG_F7,
-	arm64.REG_F8,
-	arm64.REG_F9,
-	arm64.REG_F10,
-	arm64.REG_F11,
-	arm64.REG_F12,
-	arm64.REG_F13,
-	arm64.REG_F14,
-	arm64.REG_F15,
-	arm64.REG_F16,
-	arm64.REG_F17,
-	arm64.REG_F18,
-	arm64.REG_F19,
-	arm64.REG_F20,
-	arm64.REG_F21,
-	arm64.REG_F22,
-	arm64.REG_F23,
-	arm64.REG_F24,
-	arm64.REG_F25,
-	arm64.REG_F26,
-	arm64.REG_F27,
-	arm64.REG_F28,
-	arm64.REG_F29,
-	arm64.REG_F30,
-	arm64.REG_F31,
-
-	arm64.REG_NZCV, // flag
-	0,              // SB isn't a real register.  We fill an Addr.Reg field with 0 in this case.
-}
-
-// Smallest possible faulting page at address zero,
-// see ../../../../runtime/mheap.go:/minPhysPageSize
-const minZeroPage = 4096
-
-// loadByType returns the load instruction of the given type.
-func loadByType(t ssa.Type) obj.As {
-	if t.IsFloat() {
-		switch t.Size() {
-		case 4:
-			return arm64.AFMOVS
-		case 8:
-			return arm64.AFMOVD
-		}
-	} else {
-		switch t.Size() {
-		case 1:
-			if t.IsSigned() {
-				return arm64.AMOVB
-			} else {
-				return arm64.AMOVBU
-			}
-		case 2:
-			if t.IsSigned() {
-				return arm64.AMOVH
-			} else {
-				return arm64.AMOVHU
-			}
-		case 4:
-			if t.IsSigned() {
-				return arm64.AMOVW
-			} else {
-				return arm64.AMOVWU
-			}
-		case 8:
-			return arm64.AMOVD
-		}
-	}
-	panic("bad load type")
-}
-
-// storeByType returns the store instruction of the given type.
-func storeByType(t ssa.Type) obj.As {
-	if t.IsFloat() {
-		switch t.Size() {
-		case 4:
-			return arm64.AFMOVS
-		case 8:
-			return arm64.AFMOVD
-		}
-	} else {
-		switch t.Size() {
-		case 1:
-			return arm64.AMOVB
-		case 2:
-			return arm64.AMOVH
-		case 4:
-			return arm64.AMOVW
-		case 8:
-			return arm64.AMOVD
-		}
-	}
-	panic("bad store type")
-}
-
-// makeshift encodes a register shifted by a constant, used as an Offset in Prog
-func makeshift(reg int16, typ int64, s int64) int64 {
-	return int64(reg&31)<<16 | typ | (s&63)<<10
-}
-
-// genshift generates a Prog for r = r0 op (r1 shifted by s)
-func genshift(as obj.As, r0, r1, r int16, typ int64, s int64) *obj.Prog {
-	p := gc.Prog(as)
-	p.From.Type = obj.TYPE_SHIFT
-	p.From.Offset = makeshift(r1, typ, s)
-	p.Reg = r0
-	if r != 0 {
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = r
-	}
-	return p
-}
-
-func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
-	s.SetLineno(v.Line)
-	switch v.Op {
-	case ssa.OpInitMem:
-		// memory arg needs no code
-	case ssa.OpArg:
-		// input args need no code
-	case ssa.OpSP, ssa.OpSB, ssa.OpGetG:
-		// nothing to do
-	case ssa.OpCopy, ssa.OpARM64MOVDconvert, ssa.OpARM64MOVDreg:
-		if v.Type.IsMemory() {
-			return
-		}
-		x := gc.SSARegNum(v.Args[0])
-		y := gc.SSARegNum(v)
-		if x == y {
-			return
-		}
-		as := arm64.AMOVD
-		if v.Type.IsFloat() {
-			switch v.Type.Size() {
-			case 4:
-				as = arm64.AFMOVS
-			case 8:
-				as = arm64.AFMOVD
-			default:
-				panic("bad float size")
-			}
-		}
-		p := gc.Prog(as)
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = x
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = y
-	case ssa.OpARM64MOVDnop:
-		if gc.SSARegNum(v) != gc.SSARegNum(v.Args[0]) {
-			v.Fatalf("input[0] and output not in same register %s", v.LongString())
-		}
-		// nothing to do
-	case ssa.OpLoadReg:
-		if v.Type.IsFlags() {
-			v.Unimplementedf("load flags not implemented: %v", v.LongString())
-			return
-		}
-		p := gc.Prog(loadByType(v.Type))
-		n, off := gc.AutoVar(v.Args[0])
-		p.From.Type = obj.TYPE_MEM
-		p.From.Node = n
-		p.From.Sym = gc.Linksym(n.Sym)
-		p.From.Offset = off
-		if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT {
-			p.From.Name = obj.NAME_PARAM
-			p.From.Offset += n.Xoffset
-		} else {
-			p.From.Name = obj.NAME_AUTO
-		}
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpPhi:
-		gc.CheckLoweredPhi(v)
-	case ssa.OpStoreReg:
-		if v.Type.IsFlags() {
-			v.Unimplementedf("store flags not implemented: %v", v.LongString())
-			return
-		}
-		p := gc.Prog(storeByType(v.Type))
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = gc.SSARegNum(v.Args[0])
-		n, off := gc.AutoVar(v)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Node = n
-		p.To.Sym = gc.Linksym(n.Sym)
-		p.To.Offset = off
-		if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT {
-			p.To.Name = obj.NAME_PARAM
-			p.To.Offset += n.Xoffset
-		} else {
-			p.To.Name = obj.NAME_AUTO
-		}
-	case ssa.OpARM64ADD,
-		ssa.OpARM64SUB,
-		ssa.OpARM64AND,
-		ssa.OpARM64OR,
-		ssa.OpARM64XOR,
-		ssa.OpARM64BIC,
-		ssa.OpARM64MUL,
-		ssa.OpARM64MULW,
-		ssa.OpARM64MULH,
-		ssa.OpARM64UMULH,
-		ssa.OpARM64MULL,
-		ssa.OpARM64UMULL,
-		ssa.OpARM64DIV,
-		ssa.OpARM64UDIV,
-		ssa.OpARM64DIVW,
-		ssa.OpARM64UDIVW,
-		ssa.OpARM64MOD,
-		ssa.OpARM64UMOD,
-		ssa.OpARM64MODW,
-		ssa.OpARM64UMODW,
-		ssa.OpARM64SLL,
-		ssa.OpARM64SRL,
-		ssa.OpARM64SRA,
-		ssa.OpARM64FADDS,
-		ssa.OpARM64FADDD,
-		ssa.OpARM64FSUBS,
-		ssa.OpARM64FSUBD,
-		ssa.OpARM64FMULS,
-		ssa.OpARM64FMULD,
-		ssa.OpARM64FDIVS,
-		ssa.OpARM64FDIVD:
-		r := gc.SSARegNum(v)
-		r1 := gc.SSARegNum(v.Args[0])
-		r2 := gc.SSARegNum(v.Args[1])
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = r2
-		p.Reg = r1
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = r
-	case ssa.OpARM64ADDconst,
-		ssa.OpARM64SUBconst,
-		ssa.OpARM64ANDconst,
-		ssa.OpARM64ORconst,
-		ssa.OpARM64XORconst,
-		ssa.OpARM64BICconst,
-		ssa.OpARM64SLLconst,
-		ssa.OpARM64SRLconst,
-		ssa.OpARM64SRAconst,
-		ssa.OpARM64RORconst,
-		ssa.OpARM64RORWconst:
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = v.AuxInt
-		p.Reg = gc.SSARegNum(v.Args[0])
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpARM64ADDshiftLL,
-		ssa.OpARM64SUBshiftLL,
-		ssa.OpARM64ANDshiftLL,
-		ssa.OpARM64ORshiftLL,
-		ssa.OpARM64XORshiftLL,
-		ssa.OpARM64BICshiftLL:
-		genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_LL, v.AuxInt)
-	case ssa.OpARM64ADDshiftRL,
-		ssa.OpARM64SUBshiftRL,
-		ssa.OpARM64ANDshiftRL,
-		ssa.OpARM64ORshiftRL,
-		ssa.OpARM64XORshiftRL,
-		ssa.OpARM64BICshiftRL:
-		genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_LR, v.AuxInt)
-	case ssa.OpARM64ADDshiftRA,
-		ssa.OpARM64SUBshiftRA,
-		ssa.OpARM64ANDshiftRA,
-		ssa.OpARM64ORshiftRA,
-		ssa.OpARM64XORshiftRA,
-		ssa.OpARM64BICshiftRA:
-		genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_AR, v.AuxInt)
-	case ssa.OpARM64MOVDconst:
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = v.AuxInt
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpARM64FMOVSconst,
-		ssa.OpARM64FMOVDconst:
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_FCONST
-		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpARM64CMP,
-		ssa.OpARM64CMPW,
-		ssa.OpARM64CMN,
-		ssa.OpARM64CMNW,
-		ssa.OpARM64FCMPS,
-		ssa.OpARM64FCMPD:
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = gc.SSARegNum(v.Args[1])
-		p.Reg = gc.SSARegNum(v.Args[0])
-	case ssa.OpARM64CMPconst,
-		ssa.OpARM64CMPWconst,
-		ssa.OpARM64CMNconst,
-		ssa.OpARM64CMNWconst:
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = v.AuxInt
-		p.Reg = gc.SSARegNum(v.Args[0])
-	case ssa.OpARM64CMPshiftLL:
-		genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_LL, v.AuxInt)
-	case ssa.OpARM64CMPshiftRL:
-		genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_LR, v.AuxInt)
-	case ssa.OpARM64CMPshiftRA:
-		genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_AR, v.AuxInt)
-	case ssa.OpARM64MOVDaddr:
-		p := gc.Prog(arm64.AMOVD)
-		p.From.Type = obj.TYPE_ADDR
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-
-		var wantreg string
-		// MOVD $sym+off(base), R
-		// the assembler expands it as the following:
-		// - base is SP: add constant offset to SP (R13)
-		//               when constant is large, tmp register (R11) may be used
-		// - base is SB: load external address from constant pool (use relocation)
-		switch v.Aux.(type) {
-		default:
-			v.Fatalf("aux is of unknown type %T", v.Aux)
-		case *ssa.ExternSymbol:
-			wantreg = "SB"
-			gc.AddAux(&p.From, v)
-		case *ssa.ArgSymbol, *ssa.AutoSymbol:
-			wantreg = "SP"
-			gc.AddAux(&p.From, v)
-		case nil:
-			// No sym, just MOVD $off(SP), R
-			wantreg = "SP"
-			p.From.Reg = arm64.REGSP
-			p.From.Offset = v.AuxInt
-		}
-		if reg := gc.SSAReg(v.Args[0]); reg.Name() != wantreg {
-			v.Fatalf("bad reg %s for symbol type %T, want %s", reg.Name(), v.Aux, wantreg)
-		}
-	case ssa.OpARM64MOVBload,
-		ssa.OpARM64MOVBUload,
-		ssa.OpARM64MOVHload,
-		ssa.OpARM64MOVHUload,
-		ssa.OpARM64MOVWload,
-		ssa.OpARM64MOVWUload,
-		ssa.OpARM64MOVDload,
-		ssa.OpARM64FMOVSload,
-		ssa.OpARM64FMOVDload:
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_MEM
-		p.From.Reg = gc.SSARegNum(v.Args[0])
-		gc.AddAux(&p.From, v)
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpARM64MOVBstore,
-		ssa.OpARM64MOVHstore,
-		ssa.OpARM64MOVWstore,
-		ssa.OpARM64MOVDstore,
-		ssa.OpARM64FMOVSstore,
-		ssa.OpARM64FMOVDstore:
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = gc.SSARegNum(v.Args[1])
-		p.To.Type = obj.TYPE_MEM
-		p.To.Reg = gc.SSARegNum(v.Args[0])
-		gc.AddAux(&p.To, v)
-	case ssa.OpARM64MOVBstorezero,
-		ssa.OpARM64MOVHstorezero,
-		ssa.OpARM64MOVWstorezero,
-		ssa.OpARM64MOVDstorezero:
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = arm64.REGZERO
-		p.To.Type = obj.TYPE_MEM
-		p.To.Reg = gc.SSARegNum(v.Args[0])
-		gc.AddAux(&p.To, v)
-	case ssa.OpARM64MOVBreg,
-		ssa.OpARM64MOVBUreg,
-		ssa.OpARM64MOVHreg,
-		ssa.OpARM64MOVHUreg,
-		ssa.OpARM64MOVWreg,
-		ssa.OpARM64MOVWUreg:
-		a := v.Args[0]
-		for a.Op == ssa.OpCopy || a.Op == ssa.OpARM64MOVDreg {
-			a = a.Args[0]
-		}
-		if a.Op == ssa.OpLoadReg {
-			t := a.Type
-			switch {
-			case v.Op == ssa.OpARM64MOVBreg && t.Size() == 1 && t.IsSigned(),
-				v.Op == ssa.OpARM64MOVBUreg && t.Size() == 1 && !t.IsSigned(),
-				v.Op == ssa.OpARM64MOVHreg && t.Size() == 2 && t.IsSigned(),
-				v.Op == ssa.OpARM64MOVHUreg && t.Size() == 2 && !t.IsSigned(),
-				v.Op == ssa.OpARM64MOVWreg && t.Size() == 4 && t.IsSigned(),
-				v.Op == ssa.OpARM64MOVWUreg && t.Size() == 4 && !t.IsSigned():
-				// arg is a proper-typed load, already zero/sign-extended, don't extend again
-				if gc.SSARegNum(v) == gc.SSARegNum(v.Args[0]) {
-					return
-				}
-				p := gc.Prog(arm64.AMOVD)
-				p.From.Type = obj.TYPE_REG
-				p.From.Reg = gc.SSARegNum(v.Args[0])
-				p.To.Type = obj.TYPE_REG
-				p.To.Reg = gc.SSARegNum(v)
-				return
-			default:
-			}
-		}
-		fallthrough
-	case ssa.OpARM64MVN,
-		ssa.OpARM64NEG,
-		ssa.OpARM64FNEGS,
-		ssa.OpARM64FNEGD,
-		ssa.OpARM64FSQRTD,
-		ssa.OpARM64FCVTZSSW,
-		ssa.OpARM64FCVTZSDW,
-		ssa.OpARM64FCVTZUSW,
-		ssa.OpARM64FCVTZUDW,
-		ssa.OpARM64FCVTZSS,
-		ssa.OpARM64FCVTZSD,
-		ssa.OpARM64FCVTZUS,
-		ssa.OpARM64FCVTZUD,
-		ssa.OpARM64SCVTFWS,
-		ssa.OpARM64SCVTFWD,
-		ssa.OpARM64SCVTFS,
-		ssa.OpARM64SCVTFD,
-		ssa.OpARM64UCVTFWS,
-		ssa.OpARM64UCVTFWD,
-		ssa.OpARM64UCVTFS,
-		ssa.OpARM64UCVTFD,
-		ssa.OpARM64FCVTSD,
-		ssa.OpARM64FCVTDS:
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = gc.SSARegNum(v.Args[0])
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpARM64CSELULT,
-		ssa.OpARM64CSELULT0:
-		r1 := int16(arm64.REGZERO)
-		if v.Op == ssa.OpARM64CSELULT {
-			r1 = gc.SSARegNum(v.Args[1])
-		}
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
-		p.From.Reg = arm64.COND_LO
-		p.Reg = gc.SSARegNum(v.Args[0])
-		p.From3 = &obj.Addr{Type: obj.TYPE_REG, Reg: r1}
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpARM64DUFFZERO:
-		// runtime.duffzero expects start address - 8 in R16
-		p := gc.Prog(arm64.ASUB)
-		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = 8
-		p.Reg = gc.SSARegNum(v.Args[0])
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = arm64.REG_R16
-		p = gc.Prog(obj.ADUFFZERO)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
-		p.To.Offset = v.AuxInt
-	case ssa.OpARM64LoweredZero:
-		// MOVD.P	ZR, 8(R16)
-		// CMP	Rarg1, R16
-		// BLE	-2(PC)
-		// arg1 is the address of the last element to zero
-		// auxint is alignment
-		var sz int64
-		var mov obj.As
-		switch {
-		case v.AuxInt%8 == 0:
-			sz = 8
-			mov = arm64.AMOVD
-		case v.AuxInt%4 == 0:
-			sz = 4
-			mov = arm64.AMOVW
-		case v.AuxInt%2 == 0:
-			sz = 2
-			mov = arm64.AMOVH
-		default:
-			sz = 1
-			mov = arm64.AMOVB
-		}
-		p := gc.Prog(mov)
-		p.Scond = arm64.C_XPOST
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = arm64.REGZERO
-		p.To.Type = obj.TYPE_MEM
-		p.To.Reg = arm64.REG_R16
-		p.To.Offset = sz
-		p2 := gc.Prog(arm64.ACMP)
-		p2.From.Type = obj.TYPE_REG
-		p2.From.Reg = gc.SSARegNum(v.Args[1])
-		p2.Reg = arm64.REG_R16
-		p3 := gc.Prog(arm64.ABLE)
-		p3.To.Type = obj.TYPE_BRANCH
-		gc.Patch(p3, p)
-	case ssa.OpARM64LoweredMove:
-		// MOVD.P	8(R16), Rtmp
-		// MOVD.P	Rtmp, 8(R17)
-		// CMP	Rarg2, R16
-		// BLE	-3(PC)
-		// arg2 is the address of the last element of src
-		// auxint is alignment
-		var sz int64
-		var mov obj.As
-		switch {
-		case v.AuxInt%8 == 0:
-			sz = 8
-			mov = arm64.AMOVD
-		case v.AuxInt%4 == 0:
-			sz = 4
-			mov = arm64.AMOVW
-		case v.AuxInt%2 == 0:
-			sz = 2
-			mov = arm64.AMOVH
-		default:
-			sz = 1
-			mov = arm64.AMOVB
-		}
-		p := gc.Prog(mov)
-		p.Scond = arm64.C_XPOST
-		p.From.Type = obj.TYPE_MEM
-		p.From.Reg = arm64.REG_R16
-		p.From.Offset = sz
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = arm64.REGTMP
-		p2 := gc.Prog(mov)
-		p2.Scond = arm64.C_XPOST
-		p2.From.Type = obj.TYPE_REG
-		p2.From.Reg = arm64.REGTMP
-		p2.To.Type = obj.TYPE_MEM
-		p2.To.Reg = arm64.REG_R17
-		p2.To.Offset = sz
-		p3 := gc.Prog(arm64.ACMP)
-		p3.From.Type = obj.TYPE_REG
-		p3.From.Reg = gc.SSARegNum(v.Args[2])
-		p3.Reg = arm64.REG_R16
-		p4 := gc.Prog(arm64.ABLE)
-		p4.To.Type = obj.TYPE_BRANCH
-		gc.Patch(p4, p)
-	case ssa.OpARM64CALLstatic:
-		if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym {
-			// Deferred calls will appear to be returning to
-			// the CALL deferreturn(SB) that we are about to emit.
-			// However, the stack trace code will show the line
-			// of the instruction byte before the return PC.
-			// To avoid that being an unrelated instruction,
-			// insert an actual hardware NOP that will have the right line number.
-			// This is different from obj.ANOP, which is a virtual no-op
-			// that doesn't make it into the instruction stream.
-			ginsnop()
-		}
-		p := gc.Prog(obj.ACALL)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym))
-		if gc.Maxarg < v.AuxInt {
-			gc.Maxarg = v.AuxInt
-		}
-	case ssa.OpARM64CALLclosure:
-		p := gc.Prog(obj.ACALL)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Offset = 0
-		p.To.Reg = gc.SSARegNum(v.Args[0])
-		if gc.Maxarg < v.AuxInt {
-			gc.Maxarg = v.AuxInt
-		}
-	case ssa.OpARM64CALLdefer:
-		p := gc.Prog(obj.ACALL)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = gc.Linksym(gc.Deferproc.Sym)
-		if gc.Maxarg < v.AuxInt {
-			gc.Maxarg = v.AuxInt
-		}
-	case ssa.OpARM64CALLgo:
-		p := gc.Prog(obj.ACALL)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = gc.Linksym(gc.Newproc.Sym)
-		if gc.Maxarg < v.AuxInt {
-			gc.Maxarg = v.AuxInt
-		}
-	case ssa.OpARM64CALLinter:
-		p := gc.Prog(obj.ACALL)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Offset = 0
-		p.To.Reg = gc.SSARegNum(v.Args[0])
-		if gc.Maxarg < v.AuxInt {
-			gc.Maxarg = v.AuxInt
-		}
-	case ssa.OpARM64LoweredNilCheck:
-		// Optimization - if the subsequent block has a load or store
-		// at the same address, we don't need to issue this instruction.
-		mem := v.Args[1]
-		for _, w := range v.Block.Succs[0].Block().Values {
-			if w.Op == ssa.OpPhi {
-				if w.Type.IsMemory() {
-					mem = w
-				}
-				continue
-			}
-			if len(w.Args) == 0 || !w.Args[len(w.Args)-1].Type.IsMemory() {
-				// w doesn't use a store - can't be a memory op.
-				continue
-			}
-			if w.Args[len(w.Args)-1] != mem {
-				v.Fatalf("wrong store after nilcheck v=%s w=%s", v, w)
-			}
-			switch w.Op {
-			case ssa.OpARM64MOVBload, ssa.OpARM64MOVBUload, ssa.OpARM64MOVHload, ssa.OpARM64MOVHUload,
-				ssa.OpARM64MOVWload, ssa.OpARM64MOVWUload, ssa.OpARM64MOVDload,
-				ssa.OpARM64FMOVSload, ssa.OpARM64FMOVDload,
-				ssa.OpARM64MOVBstore, ssa.OpARM64MOVHstore, ssa.OpARM64MOVWstore, ssa.OpARM64MOVDstore,
-				ssa.OpARM64FMOVSstore, ssa.OpARM64FMOVDstore:
-				// arg0 is ptr, auxint is offset
-				if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage {
-					if gc.Debug_checknil != 0 && int(v.Line) > 1 {
-						gc.Warnl(v.Line, "removed nil check")
-					}
-					return
-				}
-			case ssa.OpARM64DUFFZERO, ssa.OpARM64LoweredZero:
-				// arg0 is ptr
-				if w.Args[0] == v.Args[0] {
-					if gc.Debug_checknil != 0 && int(v.Line) > 1 {
-						gc.Warnl(v.Line, "removed nil check")
-					}
-					return
-				}
-			case ssa.OpARM64LoweredMove:
-				// arg0 is dst ptr, arg1 is src ptr
-				if w.Args[0] == v.Args[0] || w.Args[1] == v.Args[0] {
-					if gc.Debug_checknil != 0 && int(v.Line) > 1 {
-						gc.Warnl(v.Line, "removed nil check")
-					}
-					return
-				}
-			default:
-			}
-			if w.Type.IsMemory() {
-				if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive {
-					// these ops are OK
-					mem = w
-					continue
-				}
-				// We can't delay the nil check past the next store.
-				break
-			}
-		}
-		// Issue a load which will fault if arg is nil.
-		p := gc.Prog(arm64.AMOVB)
-		p.From.Type = obj.TYPE_MEM
-		p.From.Reg = gc.SSARegNum(v.Args[0])
-		gc.AddAux(&p.From, v)
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = arm64.REGTMP
-		if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers
-			gc.Warnl(v.Line, "generated nil check")
-		}
-	case ssa.OpVarDef:
-		gc.Gvardef(v.Aux.(*gc.Node))
-	case ssa.OpVarKill:
-		gc.Gvarkill(v.Aux.(*gc.Node))
-	case ssa.OpVarLive:
-		gc.Gvarlive(v.Aux.(*gc.Node))
-	case ssa.OpKeepAlive:
-		if !v.Args[0].Type.IsPtrShaped() {
-			v.Fatalf("keeping non-pointer alive %v", v.Args[0])
-		}
-		n, off := gc.AutoVar(v.Args[0])
-		if n == nil {
-			v.Fatalf("KeepLive with non-spilled value %s %s", v, v.Args[0])
-		}
-		if off != 0 {
-			v.Fatalf("KeepLive with non-zero offset spill location %s:%d", n, off)
-		}
-		gc.Gvarlive(n)
-	case ssa.OpARM64Equal,
-		ssa.OpARM64NotEqual,
-		ssa.OpARM64LessThan,
-		ssa.OpARM64LessEqual,
-		ssa.OpARM64GreaterThan,
-		ssa.OpARM64GreaterEqual,
-		ssa.OpARM64LessThanU,
-		ssa.OpARM64LessEqualU,
-		ssa.OpARM64GreaterThanU,
-		ssa.OpARM64GreaterEqualU:
-		// generate boolean values using CSET
-		p := gc.Prog(arm64.ACSET)
-		p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
-		p.From.Reg = condBits[v.Op]
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpSelect0, ssa.OpSelect1:
-		// nothing to do
-	case ssa.OpARM64LoweredGetClosurePtr:
-		// Closure pointer is R26 (arm64.REGCTXT).
-		gc.CheckLoweredGetClosurePtr(v)
-	case ssa.OpARM64FlagEQ,
-		ssa.OpARM64FlagLT_ULT,
-		ssa.OpARM64FlagLT_UGT,
-		ssa.OpARM64FlagGT_ULT,
-		ssa.OpARM64FlagGT_UGT:
-		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
-	case ssa.OpARM64InvertFlags:
-		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
-	default:
-		v.Unimplementedf("genValue not implemented: %s", v.LongString())
-	}
-}
-
-var condBits = map[ssa.Op]int16{
-	ssa.OpARM64Equal:         arm64.COND_EQ,
-	ssa.OpARM64NotEqual:      arm64.COND_NE,
-	ssa.OpARM64LessThan:      arm64.COND_LT,
-	ssa.OpARM64LessThanU:     arm64.COND_LO,
-	ssa.OpARM64LessEqual:     arm64.COND_LE,
-	ssa.OpARM64LessEqualU:    arm64.COND_LS,
-	ssa.OpARM64GreaterThan:   arm64.COND_GT,
-	ssa.OpARM64GreaterThanU:  arm64.COND_HI,
-	ssa.OpARM64GreaterEqual:  arm64.COND_GE,
-	ssa.OpARM64GreaterEqualU: arm64.COND_HS,
-}
-
-var blockJump = map[ssa.BlockKind]struct {
-	asm, invasm obj.As
-}{
-	ssa.BlockARM64EQ:  {arm64.ABEQ, arm64.ABNE},
-	ssa.BlockARM64NE:  {arm64.ABNE, arm64.ABEQ},
-	ssa.BlockARM64LT:  {arm64.ABLT, arm64.ABGE},
-	ssa.BlockARM64GE:  {arm64.ABGE, arm64.ABLT},
-	ssa.BlockARM64LE:  {arm64.ABLE, arm64.ABGT},
-	ssa.BlockARM64GT:  {arm64.ABGT, arm64.ABLE},
-	ssa.BlockARM64ULT: {arm64.ABLO, arm64.ABHS},
-	ssa.BlockARM64UGE: {arm64.ABHS, arm64.ABLO},
-	ssa.BlockARM64UGT: {arm64.ABHI, arm64.ABLS},
-	ssa.BlockARM64ULE: {arm64.ABLS, arm64.ABHI},
-}
-
-func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
-	s.SetLineno(b.Line)
-
-	switch b.Kind {
-	case ssa.BlockPlain, ssa.BlockCall, ssa.BlockCheck:
-		if b.Succs[0].Block() != next {
-			p := gc.Prog(obj.AJMP)
-			p.To.Type = obj.TYPE_BRANCH
-			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
-		}
-
-	case ssa.BlockDefer:
-		// defer returns in R0:
-		// 0 if we should continue executing
-		// 1 if we should jump to deferreturn call
-		p := gc.Prog(arm64.ACMP)
-		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = 0
-		p.Reg = arm64.REG_R0
-		p = gc.Prog(arm64.ABNE)
-		p.To.Type = obj.TYPE_BRANCH
-		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
-		if b.Succs[0].Block() != next {
-			p := gc.Prog(obj.AJMP)
-			p.To.Type = obj.TYPE_BRANCH
-			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
-		}
-
-	case ssa.BlockExit:
-		gc.Prog(obj.AUNDEF) // tell plive.go that we never reach here
-
-	case ssa.BlockRet:
-		gc.Prog(obj.ARET)
-
-	case ssa.BlockRetJmp:
-		p := gc.Prog(obj.ARET)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = gc.Linksym(b.Aux.(*gc.Sym))
-
-	case ssa.BlockARM64EQ, ssa.BlockARM64NE,
-		ssa.BlockARM64LT, ssa.BlockARM64GE,
-		ssa.BlockARM64LE, ssa.BlockARM64GT,
-		ssa.BlockARM64ULT, ssa.BlockARM64UGT,
-		ssa.BlockARM64ULE, ssa.BlockARM64UGE:
-		jmp := blockJump[b.Kind]
-		var p *obj.Prog
-		switch next {
-		case b.Succs[0].Block():
-			p = gc.Prog(jmp.invasm)
-			p.To.Type = obj.TYPE_BRANCH
-			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
-		case b.Succs[1].Block():
-			p = gc.Prog(jmp.asm)
-			p.To.Type = obj.TYPE_BRANCH
-			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
-		default:
-			p = gc.Prog(jmp.asm)
-			p.To.Type = obj.TYPE_BRANCH
-			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
-			q := gc.Prog(obj.AJMP)
-			q.To.Type = obj.TYPE_BRANCH
-			s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
-		}
-
-	default:
-		b.Unimplementedf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
-	}
-}
--- a/src/cmd/compile/internal/big/arith_test.go
+++ b/src/cmd/compile/internal/big/arith_test.go
@@ -5,7 +5,6 @@
 package big

 import (
-	"fmt"
 	"math/rand"
 	"testing"
 )
@@ -119,22 +118,28 @@ func rndV(n int) []Word {
 	return v
 }

-var benchSizes = []int{1, 2, 3, 4, 5, 1e1, 1e2, 1e3, 1e4, 1e5}
-
-func BenchmarkAddVV(b *testing.B) {
-	for _, n := range benchSizes {
-		x := rndV(n)
-		y := rndV(n)
-		z := make([]Word, n)
-		b.Run(fmt.Sprint(n), func(b *testing.B) {
-			b.SetBytes(int64(n * _W))
-			for i := 0; i < b.N; i++ {
-				addVV(z, x, y)
-			}
-		})
+func benchmarkFunVV(b *testing.B, f funVV, n int) {
+	x := rndV(n)
+	y := rndV(n)
+	z := make([]Word, n)
+	b.SetBytes(int64(n * _W))
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		f(z, x, y)
 	}
 }

+func BenchmarkAddVV_1(b *testing.B)   { benchmarkFunVV(b, addVV, 1) }
+func BenchmarkAddVV_2(b *testing.B)   { benchmarkFunVV(b, addVV, 2) }
+func BenchmarkAddVV_3(b *testing.B)   { benchmarkFunVV(b, addVV, 3) }
+func BenchmarkAddVV_4(b *testing.B)   { benchmarkFunVV(b, addVV, 4) }
+func BenchmarkAddVV_5(b *testing.B)   { benchmarkFunVV(b, addVV, 5) }
+func BenchmarkAddVV_1e1(b *testing.B) { benchmarkFunVV(b, addVV, 1e1) }
+func BenchmarkAddVV_1e2(b *testing.B) { benchmarkFunVV(b, addVV, 1e2) }
+func BenchmarkAddVV_1e3(b *testing.B) { benchmarkFunVV(b, addVV, 1e3) }
+func BenchmarkAddVV_1e4(b *testing.B) { benchmarkFunVV(b, addVV, 1e4) }
+func BenchmarkAddVV_1e5(b *testing.B) { benchmarkFunVV(b, addVV, 1e5) }
+
 type funVW func(z, x []Word, y Word) (c Word)
 type argVW struct {
 	z, x nat
@@ -231,20 +236,28 @@ func TestFunVW(t *testing.T) {
 	}
 }

-func BenchmarkAddVW(b *testing.B) {
-	for _, n := range benchSizes {
-		x := rndV(n)
-		y := rndW()
-		z := make([]Word, n)
-		b.Run(fmt.Sprint(n), func(b *testing.B) {
-			b.SetBytes(int64(n * _S))
-			for i := 0; i < b.N; i++ {
-				addVW(z, x, y)
-			}
-		})
+func benchmarkFunVW(b *testing.B, f funVW, n int) {
+	x := rndV(n)
+	y := rndW()
+	z := make([]Word, n)
+	b.SetBytes(int64(n * _S))
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		f(z, x, y)
 	}
 }

+func BenchmarkAddVW_1(b *testing.B)   { benchmarkFunVW(b, addVW, 1) }
+func BenchmarkAddVW_2(b *testing.B)   { benchmarkFunVW(b, addVW, 2) }
+func BenchmarkAddVW_3(b *testing.B)   { benchmarkFunVW(b, addVW, 3) }
+func BenchmarkAddVW_4(b *testing.B)   { benchmarkFunVW(b, addVW, 4) }
+func BenchmarkAddVW_5(b *testing.B)   { benchmarkFunVW(b, addVW, 5) }
+func BenchmarkAddVW_1e1(b *testing.B) { benchmarkFunVW(b, addVW, 1e1) }
+func BenchmarkAddVW_1e2(b *testing.B) { benchmarkFunVW(b, addVW, 1e2) }
+func BenchmarkAddVW_1e3(b *testing.B) { benchmarkFunVW(b, addVW, 1e3) }
+func BenchmarkAddVW_1e4(b *testing.B) { benchmarkFunVW(b, addVW, 1e4) }
+func BenchmarkAddVW_1e5(b *testing.B) { benchmarkFunVW(b, addVW, 1e5) }
+
 type funVWW func(z, x []Word, y, r Word) (c Word)
 type argVWW struct {
 	z, x nat
@@ -369,20 +382,28 @@ func TestMulAddWWW(t *testing.T) {
 	}
 }

-func BenchmarkAddMulVVW(b *testing.B) {
-	for _, n := range benchSizes {
-		x := rndV(n)
-		y := rndW()
-		z := make([]Word, n)
-		b.Run(fmt.Sprint(n), func(b *testing.B) {
-			b.SetBytes(int64(n * _W))
-			for i := 0; i < b.N; i++ {
-				addMulVVW(z, x, y)
-			}
-		})
+func benchmarkAddMulVVW(b *testing.B, n int) {
+	x := rndV(n)
+	y := rndW()
+	z := make([]Word, n)
+	b.SetBytes(int64(n * _W))
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		addMulVVW(z, x, y)
 	}
 }

+func BenchmarkAddMulVVW_1(b *testing.B)   { benchmarkAddMulVVW(b, 1) }
+func BenchmarkAddMulVVW_2(b *testing.B)   { benchmarkAddMulVVW(b, 2) }
+func BenchmarkAddMulVVW_3(b *testing.B)   { benchmarkAddMulVVW(b, 3) }
+func BenchmarkAddMulVVW_4(b *testing.B)   { benchmarkAddMulVVW(b, 4) }
+func BenchmarkAddMulVVW_5(b *testing.B)   { benchmarkAddMulVVW(b, 5) }
+func BenchmarkAddMulVVW_1e1(b *testing.B) { benchmarkAddMulVVW(b, 1e1) }
+func BenchmarkAddMulVVW_1e2(b *testing.B) { benchmarkAddMulVVW(b, 1e2) }
+func BenchmarkAddMulVVW_1e3(b *testing.B) { benchmarkAddMulVVW(b, 1e3) }
+func BenchmarkAddMulVVW_1e4(b *testing.B) { benchmarkAddMulVVW(b, 1e4) }
+func BenchmarkAddMulVVW_1e5(b *testing.B) { benchmarkAddMulVVW(b, 1e5) }
+
 func testWordBitLen(t *testing.T, fname string, f func(Word) int) {
 	for i := 0; i <= _W; i++ {
 		x := Word(1) << uint(i-1) // i == 0 => x == 0
@@ -399,15 +420,23 @@ func TestWordBitLen(t *testing.T) {
 }

 // runs b.N iterations of bitLen called on a Word containing (1 << nbits)-1.
-func BenchmarkBitLen(b *testing.B) {
-	// Individual bitLen tests. Numbers chosen to examine both sides
-	// of powers-of-two boundaries.
-	for _, nbits := range []uint{0, 1, 2, 3, 4, 5, 8, 9, 16, 17, 31} {
-		testword := Word((uint64(1) << nbits) - 1)
-		b.Run(fmt.Sprint(nbits), func(b *testing.B) {
-			for i := 0; i < b.N; i++ {
-				bitLen(testword)
-			}
-		})
+func benchmarkBitLenN(b *testing.B, nbits uint) {
+	testword := Word((uint64(1) << nbits) - 1)
+	for i := 0; i < b.N; i++ {
+		bitLen(testword)
 	}
 }
+
+// Individual bitLen tests. Numbers chosen to examine both sides
+// of powers-of-two boundaries.
+func BenchmarkBitLen0(b *testing.B)  { benchmarkBitLenN(b, 0) }
+func BenchmarkBitLen1(b *testing.B)  { benchmarkBitLenN(b, 1) }
+func BenchmarkBitLen2(b *testing.B)  { benchmarkBitLenN(b, 2) }
+func BenchmarkBitLen3(b *testing.B)  { benchmarkBitLenN(b, 3) }
+func BenchmarkBitLen4(b *testing.B)  { benchmarkBitLenN(b, 4) }
+func BenchmarkBitLen5(b *testing.B)  { benchmarkBitLenN(b, 5) }
+func BenchmarkBitLen8(b *testing.B)  { benchmarkBitLenN(b, 8) }
+func BenchmarkBitLen9(b *testing.B)  { benchmarkBitLenN(b, 9) }
+func BenchmarkBitLen16(b *testing.B) { benchmarkBitLenN(b, 16) }
+func BenchmarkBitLen17(b *testing.B) { benchmarkBitLenN(b, 17) }
+func BenchmarkBitLen31(b *testing.B) { benchmarkBitLenN(b, 31) }
--- a/src/cmd/compile/internal/big/decimal_test.go
+++ b/src/cmd/compile/internal/big/decimal_test.go
@@ -105,14 +105,12 @@ func TestDecimalRounding(t *testing.T) {
 	}
 }

-var sink string
-
 func BenchmarkDecimalConversion(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		for shift := -100; shift <= +100; shift++ {
 			var d decimal
 			d.init(natOne, shift)
-			sink = d.String()
+			d.String()
 		}
 	}
 }
--- a/src/cmd/compile/internal/big/doc.go
+++ b/src/cmd/compile/internal/big/doc.go
@@ -1,99 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-/*
-Package big implements arbitrary-precision arithmetic (big numbers).
-The following numeric types are supported:
-
-	Int    signed integers
-	Rat    rational numbers
-	Float  floating-point numbers
-
-The zero value for an Int, Rat, or Float correspond to 0. Thus, new
-values can be declared in the usual ways and denote 0 without further
-initialization:
-
-	var x Int        // &x is an *Int of value 0
-	var r = &Rat{}   // r is a *Rat of value 0
-	y := new(Float)  // y is a *Float of value 0
-
-Alternatively, new values can be allocated and initialized with factory
-functions of the form:
-
-	func NewT(v V) *T
-
-For instance, NewInt(x) returns an *Int set to the value of the int64
-argument x, NewRat(a, b) returns a *Rat set to the fraction a/b where
-a and b are int64 values, and NewFloat(f) returns a *Float initialized
-to the float64 argument f. More flexibility is provided with explicit
-setters, for instance:
-
-	var z1 Int
-	z1.SetUint64(123)                 // z1 := 123
-	z2 := new(Rat).SetFloat64(1.2)    // z2 := 6/5
-	z3 := new(Float).SetInt(z1)       // z3 := 123.0
-
-Setters, numeric operations and predicates are represented as methods of
-the form:
-
-	func (z *T) SetV(v V) *T          // z = v
-	func (z *T) Unary(x *T) *T        // z = unary x
-	func (z *T) Binary(x, y *T) *T    // z = x binary y
-	func (x *T) Pred() P              // p = pred(x)
-
-with T one of Int, Rat, or Float. For unary and binary operations, the
-result is the receiver (usually named z in that case; see below); if it
-is one of the operands x or y it may be safely overwritten (and its memory
-reused).
-
-Arithmetic expressions are typically written as a sequence of individual
-method calls, with each call corresponding to an operation. The receiver
-denotes the result and the method arguments are the operation's operands.
-For instance, given three *Int values a, b and c, the invocation
-
-	c.Add(a, b)
-
-computes the sum a + b and stores the result in c, overwriting whatever
-value was held in c before. Unless specified otherwise, operations permit
-aliasing of parameters, so it is perfectly ok to write
-
-	sum.Add(sum, x)
-
-to accumulate values x in a sum.
-
-(By always passing in a result value via the receiver, memory use can be
-much better controlled. Instead of having to allocate new memory for each
-result, an operation can reuse the space allocated for the result value,
-and overwrite that value with the new result in the process.)
-
-Notational convention: Incoming method parameters (including the receiver)
-are named consistently in the API to clarify their use. Incoming operands
-are usually named x, y, a, b, and so on, but never z. A parameter specifying
-the result is named z (typically the receiver).
-
-For instance, the arguments for (*Int).Add are named x and y, and because
-the receiver specifies the result destination, it is called z:
-
-	func (z *Int) Add(x, y *Int) *Int
-
-Methods of this form typically return the incoming receiver as well, to
-enable simple call chaining.
-
-Methods which don't require a result value to be passed in (for instance,
-Int.Sign), simply return the result. In this case, the receiver is typically
-the first operand, named x:
-
-	func (x *Int) Sign() int
-
-Various methods support conversions between strings and corresponding
-numeric values, and vice versa: *Int, *Rat, and *Float values implement
-the Stringer interface for a (default) string representation of the value,
-but also provide SetString methods to initialize a value from a string in
-a variety of supported formats (see the respective SetString documentation).
-
-Finally, *Int, *Rat, and *Float satisfy the fmt package's Scanner interface
-for scanning and (except for *Rat) the Formatter interface for formatted
-printing.
-*/
-package big
--- a/src/cmd/compile/internal/big/float.go
+++ b/src/cmd/compile/internal/big/float.go
@@ -1008,9 +1008,9 @@ func (x *Float) Float64() (float64, Accuracy) {
 		if r.form == inf || e > emax {
 			// overflow
 			if x.neg {
-				return math.Inf(-1), Below
+				return float64(math.Inf(-1)), Below
 			}
-			return math.Inf(+1), Above
+			return float64(math.Inf(+1)), Above
 		}
 		// e <= emax

--- a/src/cmd/compile/internal/big/floatconv_test.go
+++ b/src/cmd/compile/internal/big/floatconv_test.go
@@ -290,11 +290,6 @@ func TestFloat64Text(t *testing.T) {
 		// Issue 2625.
 		{383260575764816448, 'f', 0, "383260575764816448"},
 		{383260575764816448, 'g', -1, "3.8326057576481645e+17"},
-
-		// Issue 15918.
-		{1, 'f', -10, "1"},
-		{1, 'f', -11, "1"},
-		{1, 'f', -12, "1"},
 	} {
 		// The test cases are from the strconv package which tests float64 values.
 		// When formatting values with prec = -1 (shortest representation),
--- a/src/cmd/compile/internal/big/floatexample_test.go
+++ b/src/cmd/compile/internal/big/floatexample_test.go
@@ -11,7 +11,7 @@ import (
 )

 func ExampleFloat_Add() {
-	// Operate on numbers of different precision.
+	// Operating on numbers of different precision.
 	var x, y, z big.Float
 	x.SetInt64(1000)          // x is automatically set to 64bit precision
 	y.SetFloat64(2.718281828) // y is automatically set to 53bit precision
@@ -26,8 +26,8 @@ func ExampleFloat_Add() {
 	// z = 1002.718282 (0x.faadf854p+10, prec = 32, acc = Below)
 }

-func ExampleFloat_shift() {
-	// Implement Float "shift" by modifying the (binary) exponents directly.
+func Example_Shift() {
+	// Implementing Float "shift" by modifying the (binary) exponents directly.
 	for s := -5; s <= 5; s++ {
 		x := big.NewFloat(0.5)
 		x.SetMantExp(x, x.MantExp(nil)+s) // shift x by s
--- a/src/cmd/compile/internal/big/floatmarsh.go
+++ b/src/cmd/compile/internal/big/floatmarsh.go
@@ -6,94 +6,7 @@

 package big

-import (
-	"encoding/binary"
-	"fmt"
-)
-
-// Gob codec version. Permits backward-compatible changes to the encoding.
-const floatGobVersion byte = 1
-
-// GobEncode implements the gob.GobEncoder interface.
-// The Float value and all its attributes (precision,
-// rounding mode, accuracy) are marshalled.
-func (x *Float) GobEncode() ([]byte, error) {
-	if x == nil {
-		return nil, nil
-	}
-
-	// determine max. space (bytes) required for encoding
-	sz := 1 + 1 + 4 // version + mode|acc|form|neg (3+2+2+1bit) + prec
-	n := 0          // number of mantissa words
-	if x.form == finite {
-		// add space for mantissa and exponent
-		n = int((x.prec + (_W - 1)) / _W) // required mantissa length in words for given precision
-		// actual mantissa slice could be shorter (trailing 0's) or longer (unused bits):
-		// - if shorter, only encode the words present
-		// - if longer, cut off unused words when encoding in bytes
-		//   (in practice, this should never happen since rounding
-		//   takes care of it, but be safe and do it always)
-		if len(x.mant) < n {
-			n = len(x.mant)
-		}
-		// len(x.mant) >= n
-		sz += 4 + n*_S // exp + mant
-	}
-	buf := make([]byte, sz)
-
-	buf[0] = floatGobVersion
-	b := byte(x.mode&7)<<5 | byte((x.acc+1)&3)<<3 | byte(x.form&3)<<1
-	if x.neg {
-		b |= 1
-	}
-	buf[1] = b
-	binary.BigEndian.PutUint32(buf[2:], x.prec)
-
-	if x.form == finite {
-		binary.BigEndian.PutUint32(buf[6:], uint32(x.exp))
-		x.mant[len(x.mant)-n:].bytes(buf[10:]) // cut off unused trailing words
-	}
-
-	return buf, nil
-}
-
-// GobDecode implements the gob.GobDecoder interface.
-// The result is rounded per the precision and rounding mode of
-// z unless z's precision is 0, in which case z is set exactly
-// to the decoded value.
-func (z *Float) GobDecode(buf []byte) error {
-	if len(buf) == 0 {
-		// Other side sent a nil or default value.
-		*z = Float{}
-		return nil
-	}
-
-	if buf[0] != floatGobVersion {
-		return fmt.Errorf("Float.GobDecode: encoding version %d not supported", buf[0])
-	}
-
-	oldPrec := z.prec
-	oldMode := z.mode
-
-	b := buf[1]
-	z.mode = RoundingMode((b >> 5) & 7)
-	z.acc = Accuracy((b>>3)&3) - 1
-	z.form = form((b >> 1) & 3)
-	z.neg = b&1 != 0
-	z.prec = binary.BigEndian.Uint32(buf[2:])
-
-	if z.form == finite {
-		z.exp = int32(binary.BigEndian.Uint32(buf[6:]))
-		z.mant = z.mant.setBytes(buf[10:])
-	}
-
-	if oldPrec != 0 {
-		z.mode = oldMode
-		z.SetPrec(uint(oldPrec))
-	}
-
-	return nil
-}
+import "fmt"

 // MarshalText implements the encoding.TextMarshaler interface.
 // Only the Float value is marshaled (in full precision), other
--- a/src/cmd/compile/internal/big/floatmarsh_test.go
+++ b/src/cmd/compile/internal/big/floatmarsh_test.go
@@ -5,10 +5,7 @@
 package big

 import (
-	"bytes"
-	"encoding/gob"
 	"encoding/json"
-	"io"
 	"testing"
 )

@@ -26,85 +23,6 @@ var floatVals = []string{
 	"Inf",
 }

-func TestFloatGobEncoding(t *testing.T) {
-	var medium bytes.Buffer
-	enc := gob.NewEncoder(&medium)
-	dec := gob.NewDecoder(&medium)
-	for _, test := range floatVals {
-		for _, sign := range []string{"", "+", "-"} {
-			for _, prec := range []uint{0, 1, 2, 10, 53, 64, 100, 1000} {
-				for _, mode := range []RoundingMode{ToNearestEven, ToNearestAway, ToZero, AwayFromZero, ToNegativeInf, ToPositiveInf} {
-					medium.Reset() // empty buffer for each test case (in case of failures)
-					x := sign + test
-
-					var tx Float
-					_, _, err := tx.SetPrec(prec).SetMode(mode).Parse(x, 0)
-					if err != nil {
-						t.Errorf("parsing of %s (%dbits, %v) failed (invalid test case): %v", x, prec, mode, err)
-						continue
-					}
-
-					// If tx was set to prec == 0, tx.Parse(x, 0) assumes precision 64. Correct it.
-					if prec == 0 {
-						tx.SetPrec(0)
-					}
-
-					if err := enc.Encode(&tx); err != nil {
-						t.Errorf("encoding of %v (%dbits, %v) failed: %v", &tx, prec, mode, err)
-						continue
-					}
-
-					var rx Float
-					if err := dec.Decode(&rx); err != nil {
-						t.Errorf("decoding of %v (%dbits, %v) failed: %v", &tx, prec, mode, err)
-						continue
-					}
-
-					if rx.Cmp(&tx) != 0 {
-						t.Errorf("transmission of %s failed: got %s want %s", x, rx.String(), tx.String())
-						continue
-					}
-
-					if rx.Prec() != prec {
-						t.Errorf("transmission of %s's prec failed: got %d want %d", x, rx.Prec(), prec)
-					}
-
-					if rx.Mode() != mode {
-						t.Errorf("transmission of %s's mode failed: got %s want %s", x, rx.Mode(), mode)
-					}
-
-					if rx.Acc() != tx.Acc() {
-						t.Errorf("transmission of %s's accuracy failed: got %s want %s", x, rx.Acc(), tx.Acc())
-					}
-				}
-			}
-		}
-	}
-}
-
-func TestFloatCorruptGob(t *testing.T) {
-	var buf bytes.Buffer
-	tx := NewFloat(4 / 3).SetPrec(1000).SetMode(ToPositiveInf)
-	if err := gob.NewEncoder(&buf).Encode(tx); err != nil {
-		t.Fatal(err)
-	}
-	b := buf.Bytes()
-
-	var rx Float
-	if err := gob.NewDecoder(bytes.NewReader(b)).Decode(&rx); err != nil {
-		t.Fatal(err)
-	}
-
-	if err := gob.NewDecoder(bytes.NewReader(b[:10])).Decode(&rx); err != io.ErrUnexpectedEOF {
-		t.Errorf("got %v want EOF", err)
-	}
-
-	b[1] = 0
-	if err := gob.NewDecoder(bytes.NewReader(b)).Decode(&rx); err == nil {
-		t.Fatal("got nil want version error")
-	}
-}
-
 func TestFloatJSONEncoding(t *testing.T) {
 	for _, test := range floatVals {
 		for _, sign := range []string{"", "+", "-"} {
--- a/src/cmd/compile/internal/big/ftoa.go
+++ b/src/cmd/compile/internal/big/ftoa.go
@@ -41,11 +41,8 @@ import (
 // x.Prec() mantissa bits.
 // The prec value is ignored for the 'b' or 'p' format.
 func (x *Float) Text(format byte, prec int) string {
-	cap := 10 // TODO(gri) determine a good/better value here
-	if prec > 0 {
-		cap += prec
-	}
-	return string(x.Append(make([]byte, 0, cap), format, prec))
+	const extra = 10 // TODO(gri) determine a good/better value here
+	return string(x.Append(make([]byte, 0, prec+extra), format, prec))
 }

 // String formats x like x.Text('g', 10).
--- a/src/cmd/compile/internal/big/gcd_test.go
+++ b/src/cmd/compile/internal/big/gcd_test.go
@@ -20,27 +20,13 @@ func randInt(r *rand.Rand, size uint) *Int {
 }

 func runGCD(b *testing.B, aSize, bSize uint) {
-	b.Run("WithoutXY", func(b *testing.B) {
-		runGCDExt(b, aSize, bSize, false)
-	})
-	b.Run("WithXY", func(b *testing.B) {
-		runGCDExt(b, aSize, bSize, true)
-	})
-}
-
-func runGCDExt(b *testing.B, aSize, bSize uint, calcXY bool) {
 	b.StopTimer()
 	var r = rand.New(rand.NewSource(1234))
 	aa := randInt(r, aSize)
 	bb := randInt(r, bSize)
-	var x, y *Int
-	if calcXY {
-		x = new(Int)
-		y = new(Int)
-	}
 	b.StartTimer()
 	for i := 0; i < b.N; i++ {
-		new(Int).GCD(x, y, aa, bb)
+		new(Int).GCD(nil, nil, aa, bb)
 	}
 }

--- a/src/cmd/compile/internal/big/int.go
+++ b/src/cmd/compile/internal/big/int.go
@@ -459,11 +459,11 @@ func (z *Int) GCD(x, y, a, b *Int) *Int {
 	q := new(Int)
 	temp := new(Int)

-	r := new(Int)
 	for len(B.abs) > 0 {
+		r := new(Int)
 		q, r = q.QuoRem(A, B, r)

-		A, B, r = B, r, A
+		A, B = B, r

 		temp.Set(X)
 		X.Mul(X, q)
--- a/src/cmd/compile/internal/big/nat.go
+++ b/src/cmd/compile/internal/big/nat.go
@@ -8,10 +8,7 @@

 package big

-import (
-	"math/rand"
-	"sync"
-)
+import "math/rand"

 // An unsigned integer x of the form
 //
@@ -542,21 +539,6 @@ func (z nat) div(z2, u, v nat) (q, r nat) {
 	return
 }

-// getNat returns a nat of len n. The contents may not be zero.
-func getNat(n int) nat {
-	var z nat
-	if v := natPool.Get(); v != nil {
-		z = v.(nat)
-	}
-	return z.make(n)
-}
-
-func putNat(x nat) {
-	natPool.Put(x)
-}
-
-var natPool sync.Pool
-
 // q = (uIn-r)/v, with 0 <= r < y
 // Uses z as storage for q, and u as storage for r if possible.
 // See Knuth, Volume 2, section 4.3.1, Algorithm D.
@@ -575,7 +557,7 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
 	}
 	q = z.make(m + 1)

-	qhatv := getNat(n + 1)
+	qhatv := make(nat, n+1)
 	if alias(u, uIn) || alias(u, v) {
 		u = nil // u is an alias for uIn or v - cannot reuse
 	}
@@ -583,11 +565,10 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
 	u.clear() // TODO(gri) no need to clear if we allocated a new u

 	// D1.
-	var v1 nat
 	shift := nlz(v[n-1])
 	if shift > 0 {
 		// do not modify v, it may be used by another goroutine simultaneously
-		v1 = getNat(n)
+		v1 := make(nat, n)
 		shlVU(v1, v, shift)
 		v = v1
 	}
@@ -628,10 +609,6 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {

 		q[j] = qhat
 	}
-	if v1 != nil {
-		putNat(v1)
-	}
-	putNat(qhatv)

 	q = q.norm()
 	shrVU(u, u, shift)
--- a/src/cmd/compile/internal/big/nat_test.go
+++ b/src/cmd/compile/internal/big/nat_test.go
@@ -5,7 +5,6 @@
 package big

 import (
-	"fmt"
 	"runtime"
 	"strings"
 	"testing"
@@ -510,20 +509,24 @@ func TestExpNN(t *testing.T) {
 	}
 }

-func BenchmarkExp3Power(b *testing.B) {
-	const x = 3
-	for _, y := range []Word{
-		0x10, 0x40, 0x100, 0x400, 0x1000, 0x4000, 0x10000, 0x40000, 0x100000, 0x400000,
-	} {
-		b.Run(fmt.Sprintf("%#x", y), func(b *testing.B) {
-			var z nat
-			for i := 0; i < b.N; i++ {
-				z.expWW(x, y)
-			}
-		})
+func ExpHelper(b *testing.B, x, y Word) {
+	var z nat
+	for i := 0; i < b.N; i++ {
+		z.expWW(x, y)
 	}
 }

+func BenchmarkExp3Power0x10(b *testing.B)     { ExpHelper(b, 3, 0x10) }
+func BenchmarkExp3Power0x40(b *testing.B)     { ExpHelper(b, 3, 0x40) }
+func BenchmarkExp3Power0x100(b *testing.B)    { ExpHelper(b, 3, 0x100) }
+func BenchmarkExp3Power0x400(b *testing.B)    { ExpHelper(b, 3, 0x400) }
+func BenchmarkExp3Power0x1000(b *testing.B)   { ExpHelper(b, 3, 0x1000) }
+func BenchmarkExp3Power0x4000(b *testing.B)   { ExpHelper(b, 3, 0x4000) }
+func BenchmarkExp3Power0x10000(b *testing.B)  { ExpHelper(b, 3, 0x10000) }
+func BenchmarkExp3Power0x40000(b *testing.B)  { ExpHelper(b, 3, 0x40000) }
+func BenchmarkExp3Power0x100000(b *testing.B) { ExpHelper(b, 3, 0x100000) }
+func BenchmarkExp3Power0x400000(b *testing.B) { ExpHelper(b, 3, 0x400000) }
+
 func fibo(n int) nat {
 	switch n {
 	case 0:
--- a/src/cmd/compile/internal/big/natconv.go
+++ b/src/cmd/compile/internal/big/natconv.go
@@ -302,7 +302,7 @@ func (x nat) itoa(neg bool, base int) []byte {
 		}

 	} else {
-		bb, ndigits := maxPow(b)
+		bb, ndigits := maxPow(Word(b))

 		// construct table of successive squares of bb*leafSize to use in subdivisions
 		// result (table != nil) <=> (len(x) > leafSize > 0)
@@ -391,7 +391,7 @@ func (q nat) convertWords(s []byte, b Word, ndigits int, bb Word, table []diviso
 				// this appears to be faster for BenchmarkString10000Base10
 				// and smaller strings (but a bit slower for larger ones)
 				t := r / 10
-				s[i] = '0' + byte(r-t*10)
+				s[i] = '0' + byte(r-t<<3-t-t) // TODO(gri) replace w/ t*10 once compiler produces better code
 				r = t
 			}
 		}
--- a/src/cmd/compile/internal/big/natconv_test.go
+++ b/src/cmd/compile/internal/big/natconv_test.go
@@ -6,7 +6,6 @@ package big

 import (
 	"bytes"
-	"fmt"
 	"io"
 	"strings"
 	"testing"
@@ -274,57 +273,101 @@ func BenchmarkStringPiParallel(b *testing.B) {
 	})
 }

-func BenchmarkScan(b *testing.B) {
-	const x = 10
-	for _, base := range []int{2, 8, 10, 16} {
-		for _, y := range []Word{10, 100, 1000, 10000, 100000} {
-			b.Run(fmt.Sprintf("%d/Base%d", y, base), func(b *testing.B) {
-				b.StopTimer()
-				var z nat
-				z = z.expWW(x, y)
+func BenchmarkScan10Base2(b *testing.B)     { ScanHelper(b, 2, 10, 10) }
+func BenchmarkScan100Base2(b *testing.B)    { ScanHelper(b, 2, 10, 100) }
+func BenchmarkScan1000Base2(b *testing.B)   { ScanHelper(b, 2, 10, 1000) }
+func BenchmarkScan10000Base2(b *testing.B)  { ScanHelper(b, 2, 10, 10000) }
+func BenchmarkScan100000Base2(b *testing.B) { ScanHelper(b, 2, 10, 100000) }

-				s := z.utoa(base)
-				if t := itoa(z, base); !bytes.Equal(s, t) {
-					b.Fatalf("scanning: got %s; want %s", s, t)
-				}
-				b.StartTimer()
+func BenchmarkScan10Base8(b *testing.B)     { ScanHelper(b, 8, 10, 10) }
+func BenchmarkScan100Base8(b *testing.B)    { ScanHelper(b, 8, 10, 100) }
+func BenchmarkScan1000Base8(b *testing.B)   { ScanHelper(b, 8, 10, 1000) }
+func BenchmarkScan10000Base8(b *testing.B)  { ScanHelper(b, 8, 10, 10000) }
+func BenchmarkScan100000Base8(b *testing.B) { ScanHelper(b, 8, 10, 100000) }

-				for i := 0; i < b.N; i++ {
-					z.scan(bytes.NewReader(s), base, false)
-				}
-			})
-		}
+func BenchmarkScan10Base10(b *testing.B)     { ScanHelper(b, 10, 10, 10) }
+func BenchmarkScan100Base10(b *testing.B)    { ScanHelper(b, 10, 10, 100) }
+func BenchmarkScan1000Base10(b *testing.B)   { ScanHelper(b, 10, 10, 1000) }
+func BenchmarkScan10000Base10(b *testing.B)  { ScanHelper(b, 10, 10, 10000) }
+func BenchmarkScan100000Base10(b *testing.B) { ScanHelper(b, 10, 10, 100000) }
+
+func BenchmarkScan10Base16(b *testing.B)     { ScanHelper(b, 16, 10, 10) }
+func BenchmarkScan100Base16(b *testing.B)    { ScanHelper(b, 16, 10, 100) }
+func BenchmarkScan1000Base16(b *testing.B)   { ScanHelper(b, 16, 10, 1000) }
+func BenchmarkScan10000Base16(b *testing.B)  { ScanHelper(b, 16, 10, 10000) }
+func BenchmarkScan100000Base16(b *testing.B) { ScanHelper(b, 16, 10, 100000) }
+
+func ScanHelper(b *testing.B, base int, x, y Word) {
+	b.StopTimer()
+	var z nat
+	z = z.expWW(x, y)
+
+	s := z.utoa(base)
+	if t := itoa(z, base); !bytes.Equal(s, t) {
+		b.Fatalf("scanning: got %s; want %s", s, t)
+	}
+	b.StartTimer()
+
+	for i := 0; i < b.N; i++ {
+		z.scan(bytes.NewReader(s), base, false)
 	}
 }

-func BenchmarkString(b *testing.B) {
-	const x = 10
-	for _, base := range []int{2, 8, 10, 16} {
-		for _, y := range []Word{10, 100, 1000, 10000, 100000} {
-			b.Run(fmt.Sprintf("%d/Base%d", y, base), func(b *testing.B) {
-				b.StopTimer()
-				var z nat
-				z = z.expWW(x, y)
-				z.utoa(base) // warm divisor cache
-				b.StartTimer()
+func BenchmarkString10Base2(b *testing.B)     { StringHelper(b, 2, 10, 10) }
+func BenchmarkString100Base2(b *testing.B)    { StringHelper(b, 2, 10, 100) }
+func BenchmarkString1000Base2(b *testing.B)   { StringHelper(b, 2, 10, 1000) }
+func BenchmarkString10000Base2(b *testing.B)  { StringHelper(b, 2, 10, 10000) }
+func BenchmarkString100000Base2(b *testing.B) { StringHelper(b, 2, 10, 100000) }

-				for i := 0; i < b.N; i++ {
-					_ = z.utoa(base)
-				}
-			})
-		}
+func BenchmarkString10Base8(b *testing.B)     { StringHelper(b, 8, 10, 10) }
+func BenchmarkString100Base8(b *testing.B)    { StringHelper(b, 8, 10, 100) }
+func BenchmarkString1000Base8(b *testing.B)   { StringHelper(b, 8, 10, 1000) }
+func BenchmarkString10000Base8(b *testing.B)  { StringHelper(b, 8, 10, 10000) }
+func BenchmarkString100000Base8(b *testing.B) { StringHelper(b, 8, 10, 100000) }
+
+func BenchmarkString10Base10(b *testing.B)     { StringHelper(b, 10, 10, 10) }
+func BenchmarkString100Base10(b *testing.B)    { StringHelper(b, 10, 10, 100) }
+func BenchmarkString1000Base10(b *testing.B)   { StringHelper(b, 10, 10, 1000) }
+func BenchmarkString10000Base10(b *testing.B)  { StringHelper(b, 10, 10, 10000) }
+func BenchmarkString100000Base10(b *testing.B) { StringHelper(b, 10, 10, 100000) }
+
+func BenchmarkString10Base16(b *testing.B)     { StringHelper(b, 16, 10, 10) }
+func BenchmarkString100Base16(b *testing.B)    { StringHelper(b, 16, 10, 100) }
+func BenchmarkString1000Base16(b *testing.B)   { StringHelper(b, 16, 10, 1000) }
+func BenchmarkString10000Base16(b *testing.B)  { StringHelper(b, 16, 10, 10000) }
+func BenchmarkString100000Base16(b *testing.B) { StringHelper(b, 16, 10, 100000) }
+
+func StringHelper(b *testing.B, base int, x, y Word) {
+	b.StopTimer()
+	var z nat
+	z = z.expWW(x, y)
+	z.utoa(base) // warm divisor cache
+	b.StartTimer()
+
+	for i := 0; i < b.N; i++ {
+		_ = z.utoa(base)
 	}
 }

-func BenchmarkLeafSize(b *testing.B) {
-	for n := 0; n <= 16; n++ {
-		b.Run(fmt.Sprint(n), func(b *testing.B) { LeafSizeHelper(b, 10, n) })
-	}
-	// Try some large lengths
-	for _, n := range []int{32, 64} {
-		b.Run(fmt.Sprint(n), func(b *testing.B) { LeafSizeHelper(b, 10, n) })
-	}
-}
+func BenchmarkLeafSize0(b *testing.B)  { LeafSizeHelper(b, 10, 0) } // test without splitting
+func BenchmarkLeafSize1(b *testing.B)  { LeafSizeHelper(b, 10, 1) }
+func BenchmarkLeafSize2(b *testing.B)  { LeafSizeHelper(b, 10, 2) }
+func BenchmarkLeafSize3(b *testing.B)  { LeafSizeHelper(b, 10, 3) }
+func BenchmarkLeafSize4(b *testing.B)  { LeafSizeHelper(b, 10, 4) }
+func BenchmarkLeafSize5(b *testing.B)  { LeafSizeHelper(b, 10, 5) }
+func BenchmarkLeafSize6(b *testing.B)  { LeafSizeHelper(b, 10, 6) }
+func BenchmarkLeafSize7(b *testing.B)  { LeafSizeHelper(b, 10, 7) }
+func BenchmarkLeafSize8(b *testing.B)  { LeafSizeHelper(b, 10, 8) }
+func BenchmarkLeafSize9(b *testing.B)  { LeafSizeHelper(b, 10, 9) }
+func BenchmarkLeafSize10(b *testing.B) { LeafSizeHelper(b, 10, 10) }
+func BenchmarkLeafSize11(b *testing.B) { LeafSizeHelper(b, 10, 11) }
+func BenchmarkLeafSize12(b *testing.B) { LeafSizeHelper(b, 10, 12) }
+func BenchmarkLeafSize13(b *testing.B) { LeafSizeHelper(b, 10, 13) }
+func BenchmarkLeafSize14(b *testing.B) { LeafSizeHelper(b, 10, 14) }
+func BenchmarkLeafSize15(b *testing.B) { LeafSizeHelper(b, 10, 15) }
+func BenchmarkLeafSize16(b *testing.B) { LeafSizeHelper(b, 10, 16) }
+func BenchmarkLeafSize32(b *testing.B) { LeafSizeHelper(b, 10, 32) } // try some large lengths
+func BenchmarkLeafSize64(b *testing.B) { LeafSizeHelper(b, 10, 64) }

 func LeafSizeHelper(b *testing.B, base, size int) {
 	b.StopTimer()
--- a/src/cmd/compile/internal/big/ratconv.go
+++ b/src/cmd/compile/internal/big/ratconv.go
@@ -88,12 +88,6 @@ func (z *Rat) SetString(s string) (*Rat, bool) {
 		return nil, false
 	}

-	// special-case 0 (see also issue #16176)
-	if len(z.a.abs) == 0 {
-		return z, true
-	}
-	// len(z.a.abs) > 0
-
 	// correct exponent
 	if ecorr < 0 {
 		exp += int64(ecorr)
@@ -184,7 +178,7 @@ func scanExponent(r io.ByteScanner, binExpOk bool) (exp int64, base int, err err
 			}
 			break // i > 0
 		}
-		digits = append(digits, ch)
+		digits = append(digits, byte(ch))
 	}
 	// i > 0 => we have at least one digit

--- a/src/cmd/compile/internal/big/ratconv_test.go
+++ b/src/cmd/compile/internal/big/ratconv_test.go
@@ -48,7 +48,6 @@ var setStringTests = []StringTest{
 	{"53/70893980658822810696", "53/70893980658822810696", true},
 	{"106/141787961317645621392", "53/70893980658822810696", true},
 	{"204211327800791583.81095", "4084226556015831676219/20000", true},
-	{"0e9999999999", "0", true}, // issue #16176
 	{in: "1/0"},
 }

--- a/src/cmd/compile/internal/gc/bexport.go
+++ b/src/cmd/compile/internal/gc/bexport.go
@@ -197,9 +197,6 @@ type exporter struct {
 	written int // bytes written
 	indent  int // for p.trace
 	trace   bool
-
-	// work-around for issue #16369 only
-	nesting int // amount of "nesting" of interface types
 }

 // export writes the exportlist for localpkg to out and returns the number of bytes written.
@@ -793,39 +790,11 @@ func (p *exporter) typ(t *Type) {

 	case TINTER:
 		p.tag(interfaceTag)
+
 		// gc doesn't separate between embedded interfaces
 		// and methods declared explicitly with an interface
 		p.int(0) // no embedded interfaces
-
-		// Because the compiler flattens interfaces containing
-		// embedded interfaces, it is possible to create interface
-		// types that recur through an unnamed type.
-		// If trackAllTypes is disabled, such recursion is not
-		// detected, leading to a stack overflow during export
-		// (issue #16369).
-		// As a crude work-around we terminate deep recursion
-		// through interface types with an empty interface and
-		// report an error.
-		// This will catch endless recursion, but is unlikely
-		// to trigger for valid, deeply nested types given the
-		// high threshold.
-		// It would be ok to continue without reporting an error
-		// since the export format is valid. But a subsequent
-		// import would import an incorrect type. The textual
-		// exporter does not report an error but importing the
-		// resulting package will lead to a syntax error during
-		// import.
-		// TODO(gri) remove this once we have a permanent fix
-		// for the issue.
-		if p.nesting > 100 {
-			p.int(0) // 0 methods to indicate empty interface
-			yyerrorl(t.Lineno, "cannot export unnamed recursive interface")
-			break
-		}
-
-		p.nesting++
 		p.methodList(t)
-		p.nesting--

 	case TMAP:
 		p.tag(mapTag)
--- a/src/cmd/compile/internal/gc/builtin.go
+++ b/src/cmd/compile/internal/gc/builtin.go
@@ -95,15 +95,14 @@ const runtimeimport = "" +
 	"4div\x00\x03\n\x00\n\x00\x01\n\x00\t\x11uint64div\x00\x03\x14\x00\x14\x00\x01\x14\x00\t\x0fint64" +
 	"mod\x00\x03\n\x00\n\x00\x01\n\x00\t\x11uint64mod\x00\x03\x14\x00\x14\x00\x01\x14\x00\t\x1bfloat6" +
 	"4toint64\x00\x01\x1a\x00\x01\n\x00\t\x1dfloat64touint64\x00\x01\x1a\x00\x01\x14\x00\t" +
-	"\x1dfloat64touint32\x00\x01\x1a\x00\x01\x12\x00\t\x1bint64tofloat64\x00" +
-	"\x01\n\x00\x01\x1a\x00\t\x1duint64tofloat64\x00\x01\x14\x00\x01\x1a\x00\t\x1duint32to" +
-	"float64\x00\x01\x12\x00\x01\x1a\x00\t\x19complex128div\x00\x04\x1e\vnum·2\x00" +
-	"\x00\x1e\vden·3\x00\x00\x02\x1e\vquo·1\x00\x00\t\x19racefuncenter\x00\x01\x16" +
-	"d\x00\t\x17racefuncexit\x00\x00\x00\t\x0fraceread\x00\x01\x16d\x00\t\x11race" +
-	"write\x00\x01\x16d\x00\t\x19racereadrange\x00\x04\x16\raddr·1\x00d\x16\r" +
-	"size·2\x00d\x00\t\x1bracewriterange\x00\x04\x16\x98\x03\x00d\x16\x9a\x03\x00d\x00\t" +
-	"\x0fmsanread\x00\x04\x16\x98\x03\x00d\x16\x9a\x03\x00d\x00\t\x11msanwrite\x00\x04\x16\x98\x03\x00d" +
-	"\x16\x9a\x03\x00d\x00\v\xf8\x01\x02\v\x00\x01\x00\n$$\n"
+	"\x1bint64tofloat64\x00\x01\n\x00\x01\x1a\x00\t\x1duint64tofloat64\x00" +
+	"\x01\x14\x00\x01\x1a\x00\t\x19complex128div\x00\x04\x1e\vnum·2\x00\x00\x1e\vden·" +
+	"3\x00\x00\x02\x1e\vquo·1\x00\x00\t\x19racefuncenter\x00\x01\x16d\x00\t\x17race" +
+	"funcexit\x00\x00\x00\t\x0fraceread\x00\x01\x16d\x00\t\x11racewrite\x00\x01\x16" +
+	"d\x00\t\x19racereadrange\x00\x04\x16\raddr·1\x00d\x16\rsize·2\x00" +
+	"d\x00\t\x1bracewriterange\x00\x04\x16\x94\x03\x00d\x16\x96\x03\x00d\x00\t\x0fmsanrea" +
+	"d\x00\x04\x16\x94\x03\x00d\x16\x96\x03\x00d\x00\t\x11msanwrite\x00\x04\x16\x94\x03\x00d\x16\x96\x03\x00d\x00\v\xf4" +
+	"\x01\x02\v\x00\x01\x00\n$$\n"

 const unsafeimport = "" +
 	"cn\x00\x03v1\x01\vunsafe\x00\x05\r\rPointer\x00\x16\x00\t\x0fOffsetof\x00\x01" +
--- a/src/cmd/compile/internal/gc/builtin/runtime.go
+++ b/src/cmd/compile/internal/gc/builtin/runtime.go
@@ -150,10 +150,8 @@ func int64mod(int64, int64) int64
 func uint64mod(uint64, uint64) uint64
 func float64toint64(float64) int64
 func float64touint64(float64) uint64
-func float64touint32(float64) uint32
 func int64tofloat64(int64) float64
 func uint64tofloat64(uint64) float64
-func uint32tofloat64(uint32) float64

 func complex128div(num complex128, den complex128) (quo complex128)

--- a/src/cmd/compile/internal/gc/const.go
+++ b/src/cmd/compile/internal/gc/const.go
@@ -61,34 +61,6 @@ func (v Val) Ctype() Ctype {
 	}
 }

-func eqval(a, b Val) bool {
-	if a.Ctype() != b.Ctype() {
-		return false
-	}
-	switch x := a.U.(type) {
-	default:
-		Fatalf("unexpected Ctype for %T", a.U)
-		panic("not reached")
-	case *NilVal:
-		return true
-	case bool:
-		y := b.U.(bool)
-		return x == y
-	case *Mpint:
-		y := b.U.(*Mpint)
-		return x.Cmp(y) == 0
-	case *Mpflt:
-		y := b.U.(*Mpflt)
-		return x.Cmp(y) == 0
-	case *Mpcplx:
-		y := b.U.(*Mpcplx)
-		return x.Real.Cmp(&y.Real) == 0 && x.Imag.Cmp(&y.Imag) == 0
-	case string:
-		y := b.U.(string)
-		return x == y
-	}
-}
-
 type NilVal struct{}

 // IntLiteral returns the Node's literal value as an integer.
--- a/src/cmd/compile/internal/gc/constFold_test.go
+++ b/src/cmd/compile/internal/gc/constFold_test.go
--- a/src/cmd/compile/internal/gc/esc.go
+++ b/src/cmd/compile/internal/gc/esc.go
@@ -472,9 +472,29 @@ func escAnalyze(all []*Node, recursive bool) {

 	// visit the upstream of each dst, mark address nodes with
 	// addrescapes, mark parameters unsafe
+	escapes := make([]uint16, len(e.dsts))
+	for i, n := range e.dsts {
+		escapes[i] = n.Esc
+	}
 	for _, n := range e.dsts {
 		escflood(e, n)
 	}
+	for {
+		done := true
+		for i, n := range e.dsts {
+			if n.Esc != escapes[i] {
+				done = false
+				if Debug['m'] > 2 {
+					Warnl(n.Lineno, "Reflooding %v %S", e.curfnSym(n), n)
+				}
+				escapes[i] = n.Esc
+				escflood(e, n)
+			}
+		}
+		if done {
+			break
+		}
+	}

 	// for all top level functions, tag the typenodes corresponding to the param nodes
 	for _, n := range all {
@@ -1796,6 +1816,7 @@ func escwalkBody(e *EscState, level Level, dst *Node, src *Node, step *EscStep,
 	}

 	leaks = level.int() <= 0 && level.guaranteedDereference() <= 0 && dstE.Escloopdepth < modSrcLoopdepth
+	leaks = leaks || level.int() <= 0 && dst.Esc&EscMask == EscHeap

 	osrcesc = src.Esc
 	switch src.Op {
--- a/src/cmd/compile/internal/gc/float_test.go
+++ b/src/cmd/compile/internal/gc/float_test.go
@@ -74,27 +74,6 @@ func cvt8(a float32) int32 {
 	return int32(a)
 }

-// make sure to cover int, uint cases (issue #16738)
-//go:noinline
-func cvt9(a float64) int {
-	return int(a)
-}
-
-//go:noinline
-func cvt10(a float64) uint {
-	return uint(a)
-}
-
-//go:noinline
-func cvt11(a float32) int {
-	return int(a)
-}
-
-//go:noinline
-func cvt12(a float32) uint {
-	return uint(a)
-}
-
 func TestFloatConvert(t *testing.T) {
 	if got := cvt1(3.5); got != 3 {
 		t.Errorf("cvt1 got %d, wanted 3", got)
@@ -120,16 +99,4 @@ func TestFloatConvert(t *testing.T) {
 	if got := cvt8(3.5); got != 3 {
 		t.Errorf("cvt8 got %d, wanted 3", got)
 	}
-	if got := cvt9(3.5); got != 3 {
-		t.Errorf("cvt9 got %d, wanted 3", got)
-	}
-	if got := cvt10(3.5); got != 3 {
-		t.Errorf("cvt10 got %d, wanted 3", got)
-	}
-	if got := cvt11(3.5); got != 3 {
-		t.Errorf("cvt11 got %d, wanted 3", got)
-	}
-	if got := cvt12(3.5); got != 3 {
-		t.Errorf("cvt12 got %d, wanted 3", got)
-	}
 }
--- a/src/cmd/compile/internal/gc/go.go
+++ b/src/cmd/compile/internal/gc/go.go
@@ -156,6 +156,8 @@ var Debug_typeassert int

 var localpkg *Pkg // package being compiled

+var autopkg *Pkg // fake package for allocating auto variables
+
 var importpkg *Pkg // package being imported

 var itabpkg *Pkg // fake pkg for itab entries
--- a/src/cmd/compile/internal/gc/inl.go
+++ b/src/cmd/compile/internal/gc/inl.go
@@ -766,9 +766,7 @@ func mkinlcall1(n *Node, fn *Node, isddd bool) *Node {
 		ninit.Append(as)
 	}

-	retlabel := autolabel(".i")
-	retlabel.Etype = 1 // flag 'safe' for escape analysis (no backjumps)
-
+	retlabel := newlabel_inl()
 	inlgen++

 	subst := inlsubst{
@@ -878,6 +876,15 @@ func argvar(t *Type, i int) *Node {
 	return n
 }

+var newlabel_inl_label int
+
+func newlabel_inl() *Node {
+	newlabel_inl_label++
+	n := newname(LookupN(".inlret", newlabel_inl_label))
+	n.Etype = 1 // flag 'safe' for escape analysis (no backjumps)
+	return n
+}
+
 // The inlsubst type implements the actual inlining of a single
 // function call.
 type inlsubst struct {
--- a/src/cmd/compile/internal/gc/main.go
+++ b/src/cmd/compile/internal/gc/main.go
@@ -29,8 +29,6 @@ var (
 	goarch  string
 	goroot  string
 	buildid string
-
-	flag_newparser bool
 )

 var (
@@ -110,6 +108,8 @@ func Main() {

 	localpkg = mkpkg("")
 	localpkg.Prefix = "\"\""
+	autopkg = mkpkg("")
+	autopkg.Prefix = "\"\""

 	// pseudo-package, for scoping
 	builtinpkg = mkpkg("go.builtin")
@@ -184,7 +184,6 @@ func Main() {
 	obj.Flagcount("live", "debug liveness analysis", &debuglive)
 	obj.Flagcount("m", "print optimization decisions", &Debug['m'])
 	flag.BoolVar(&flag_msan, "msan", false, "build code compatible with C/C++ memory sanitizer")
-	flag.BoolVar(&flag_newparser, "newparser", false, "use new parser")
 	flag.BoolVar(&newexport, "newexport", true, "use new export format") // TODO(gri) remove eventually (issue 15323)
 	flag.BoolVar(&nolocalimports, "nolocalimports", false, "reject local (relative) imports")
 	flag.StringVar(&outfile, "o", "", "write output to `file`")
@@ -314,14 +313,25 @@ func Main() {
 		}

 		linehistpush(infile)
+
+		f, err := os.Open(infile)
+		if err != nil {
+			fmt.Printf("open %s: %v\n", infile, err)
+			errorexit()
+		}
+		bin := bufio.NewReader(f)
+
+		// Skip initial BOM if present.
+		if r, _, _ := bin.ReadRune(); r != BOM {
+			bin.UnreadRune()
+		}
+
 		block = 1
 		iota_ = -1000000
+
 		imported_unsafe = false
-		if flag_newparser {
-			parseFile(infile)
-		} else {
-			oldParseFile(infile)
-		}
+
+		parse_file(bin)
 		if nsyntaxerrors != 0 {
 			errorexit()
 		}
@@ -330,7 +340,9 @@ func Main() {
 		// for the line history to work, and which then has to be corrected elsewhere,
 		// just add a line here.
 		lexlineno++
+
 		linehistpop()
+		f.Close()
 	}

 	testdclstack()
--- a/src/cmd/compile/internal/gc/noder.go
+++ b/src/cmd/compile/internal/gc/noder.go
--- a/src/cmd/compile/internal/gc/parser.go
+++ b/src/cmd/compile/internal/gc/parser.go
@@ -15,7 +15,6 @@ package gc
 import (
 	"bufio"
 	"fmt"
-	"os"
 	"strconv"
 	"strings"
 )
@@ -27,20 +26,8 @@ func parse_import(bin *bufio.Reader, indent []byte) {
 	newparser(bin, indent).import_package()
 }

-// oldParseFile parses a single Go source file.
-func oldParseFile(infile string) {
-	f, err := os.Open(infile)
-	if err != nil {
-		fmt.Printf("open %s: %v\n", infile, err)
-		errorexit()
-	}
-	defer f.Close()
-	bin := bufio.NewReader(f)
-
-	// Skip initial BOM if present.
-	if r, _, _ := bin.ReadRune(); r != BOM {
-		bin.UnreadRune()
-	}
+// parse_file parses a single Go source file.
+func parse_file(bin *bufio.Reader) {
 	newparser(bin, nil).file()
 }

--- a/src/cmd/compile/internal/gc/sinit.go
+++ b/src/cmd/compile/internal/gc/sinit.go
@@ -866,7 +866,7 @@ func maplit(ctxt int, n *Node, var_ *Node, init *Nodes) {
 	nerr := nerrors

 	a := Nod(OMAKE, nil, nil)
-	a.List.Set2(typenod(n.Type), Nodintconst(int64(len(n.List.Slice()))))
+	a.List.Set1(typenod(n.Type))
 	litas(var_, a, init)

 	// count the initializers
--- a/src/cmd/compile/internal/gc/sizeof_test.go
+++ b/src/cmd/compile/internal/gc/sizeof_test.go
@@ -23,7 +23,7 @@ func TestSizeof(t *testing.T) {
 		_64bit uintptr     // size on 64bit platforms
 	}{
 		{Flow{}, 52, 88},
-		{Func{}, 96, 168},
+		{Func{}, 92, 160},
 		{Name{}, 52, 80},
 		{Node{}, 92, 144},
 		{Sym{}, 60, 112},
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -26,9 +26,6 @@ func initssa() *ssa.Config {
 	ssaExp.mustImplement = true
 	if ssaConfig == nil {
 		ssaConfig = ssa.NewConfig(Thearch.LinkArch.Name, &ssaExp, Ctxt, Debug['N'] == 0)
-		if Thearch.LinkArch.Name == "386" {
-			ssaConfig.Set387(Thearch.Use387)
-		}
 	}
 	return ssaConfig
 }
@@ -40,8 +37,8 @@ func shouldssa(fn *Node) bool {
 		if os.Getenv("SSATEST") == "" {
 			return false
 		}
-	case "amd64", "amd64p32", "arm", "386", "arm64":
 		// Generally available.
+	case "amd64":
 	}
 	if !ssaEnabled {
 		return false
@@ -1149,7 +1146,6 @@ var opToSSA = map[opAndType]ssa.Op{
 	opAndType{OEQ, TFUNC}:      ssa.OpEqPtr,
 	opAndType{OEQ, TMAP}:       ssa.OpEqPtr,
 	opAndType{OEQ, TCHAN}:      ssa.OpEqPtr,
-	opAndType{OEQ, TPTR32}:     ssa.OpEqPtr,
 	opAndType{OEQ, TPTR64}:     ssa.OpEqPtr,
 	opAndType{OEQ, TUINTPTR}:   ssa.OpEqPtr,
 	opAndType{OEQ, TUNSAFEPTR}: ssa.OpEqPtr,
@@ -1170,7 +1166,6 @@ var opToSSA = map[opAndType]ssa.Op{
 	opAndType{ONE, TFUNC}:      ssa.OpNeqPtr,
 	opAndType{ONE, TMAP}:       ssa.OpNeqPtr,
 	opAndType{ONE, TCHAN}:      ssa.OpNeqPtr,
-	opAndType{ONE, TPTR32}:     ssa.OpNeqPtr,
 	opAndType{ONE, TPTR64}:     ssa.OpNeqPtr,
 	opAndType{ONE, TUINTPTR}:   ssa.OpNeqPtr,
 	opAndType{ONE, TUNSAFEPTR}: ssa.OpNeqPtr,
@@ -1335,15 +1330,6 @@ var fpConvOpToSSA = map[twoTypes]twoOpsAndType{
 	twoTypes{TFLOAT32, TFLOAT64}: twoOpsAndType{ssa.OpCvt32Fto64F, ssa.OpCopy, TFLOAT64},
 }

-// this map is used only for 32-bit arch, and only includes the difference
-// on 32-bit arch, don't use int64<->float conversion for uint32
-var fpConvOpToSSA32 = map[twoTypes]twoOpsAndType{
-	twoTypes{TUINT32, TFLOAT32}: twoOpsAndType{ssa.OpCopy, ssa.OpCvt32Uto32F, TUINT32},
-	twoTypes{TUINT32, TFLOAT64}: twoOpsAndType{ssa.OpCopy, ssa.OpCvt32Uto64F, TUINT32},
-	twoTypes{TFLOAT32, TUINT32}: twoOpsAndType{ssa.OpCvt32Fto32U, ssa.OpCopy, TUINT32},
-	twoTypes{TFLOAT64, TUINT32}: twoOpsAndType{ssa.OpCvt64Fto32U, ssa.OpCopy, TUINT32},
-}
-
 var shiftOpToSSA = map[opAndTwoTypes]ssa.Op{
 	opAndTwoTypes{OLSH, TINT8, TUINT8}:   ssa.OpLsh8x8,
 	opAndTwoTypes{OLSH, TUINT8, TUINT8}:  ssa.OpLsh8x8,
@@ -1660,11 +1646,6 @@ func (s *state) expr(n *Node) *ssa.Value {

 		if ft.IsFloat() || tt.IsFloat() {
 			conv, ok := fpConvOpToSSA[twoTypes{s.concreteEtype(ft), s.concreteEtype(tt)}]
-			if s.config.IntSize == 4 && Thearch.LinkArch.Name != "amd64p32" {
-				if conv1, ok1 := fpConvOpToSSA32[twoTypes{s.concreteEtype(ft), s.concreteEtype(tt)}]; ok1 {
-					conv = conv1
-				}
-			}
 			if !ok {
 				s.Fatalf("weird float conversion %s -> %s", ft, tt)
 			}
@@ -1973,7 +1954,7 @@ func (s *state) expr(n *Node) *ssa.Value {
 		case n.Left.Type.IsString():
 			a := s.expr(n.Left)
 			i := s.expr(n.Right)
-			i = s.extendIndex(i, Panicindex)
+			i = s.extendIndex(i)
 			if !n.Bounded {
 				len := s.newValue1(ssa.OpStringLen, Types[TINT], a)
 				s.boundsCheck(i, len)
@@ -2062,13 +2043,13 @@ func (s *state) expr(n *Node) *ssa.Value {
 		var i, j, k *ssa.Value
 		low, high, max := n.SliceBounds()
 		if low != nil {
-			i = s.extendIndex(s.expr(low), panicslice)
+			i = s.extendIndex(s.expr(low))
 		}
 		if high != nil {
-			j = s.extendIndex(s.expr(high), panicslice)
+			j = s.extendIndex(s.expr(high))
 		}
 		if max != nil {
-			k = s.extendIndex(s.expr(max), panicslice)
+			k = s.extendIndex(s.expr(max))
 		}
 		p, l, c := s.slice(n.Left.Type, v, i, j, k)
 		return s.newValue3(ssa.OpSliceMake, n.Type, p, l, c)
@@ -2078,10 +2059,10 @@ func (s *state) expr(n *Node) *ssa.Value {
 		var i, j *ssa.Value
 		low, high, _ := n.SliceBounds()
 		if low != nil {
-			i = s.extendIndex(s.expr(low), panicslice)
+			i = s.extendIndex(s.expr(low))
 		}
 		if high != nil {
-			j = s.extendIndex(s.expr(high), panicslice)
+			j = s.extendIndex(s.expr(high))
 		}
 		p, l, _ := s.slice(n.Left.Type, v, i, j, nil)
 		return s.newValue2(ssa.OpStringMake, n.Type, p, l)
@@ -2265,7 +2246,7 @@ func (s *state) append(n *Node, inplace bool) *ssa.Value {
 			if haspointers(et) {
 				s.insertWBmove(et, addr, arg.v, n.Lineno, arg.isVolatile)
 			} else {
-				s.vars[&memVar] = s.newValue3I(ssa.OpMove, ssa.TypeMem, SizeAlignAuxInt(et), addr, arg.v, s.mem())
+				s.vars[&memVar] = s.newValue3I(ssa.OpMove, ssa.TypeMem, et.Size(), addr, arg.v, s.mem())
 			}
 		}
 	}
@@ -2398,14 +2379,14 @@ func (s *state) assign(left *Node, right *ssa.Value, wb, deref bool, line int32,
 	if deref {
 		// Treat as a mem->mem move.
 		if right == nil {
-			s.vars[&memVar] = s.newValue2I(ssa.OpZero, ssa.TypeMem, SizeAlignAuxInt(t), addr, s.mem())
+			s.vars[&memVar] = s.newValue2I(ssa.OpZero, ssa.TypeMem, t.Size(), addr, s.mem())
 			return
 		}
 		if wb {
 			s.insertWBmove(t, addr, right, line, rightIsVolatile)
 			return
 		}
-		s.vars[&memVar] = s.newValue3I(ssa.OpMove, ssa.TypeMem, SizeAlignAuxInt(t), addr, right, s.mem())
+		s.vars[&memVar] = s.newValue3I(ssa.OpMove, ssa.TypeMem, t.Size(), addr, right, s.mem())
 		return
 	}
 	// Treat as a store.
@@ -2604,7 +2585,7 @@ func (s *state) call(n *Node, k callKind) *ssa.Value {
 			s.nilCheck(itab)
 		}
 		itabidx := fn.Xoffset + 3*int64(Widthptr) + 8 // offset of fun field in runtime.itab
-		itab = s.newValue1I(ssa.OpOffPtr, Ptrto(Types[TUINTPTR]), itabidx, itab)
+		itab = s.newValue1I(ssa.OpOffPtr, Types[TUINTPTR], itabidx, itab)
 		if k == callNormal {
 			codeptr = s.newValue2(ssa.OpLoad, Types[TUINTPTR], itab, s.mem())
 		} else {
@@ -2627,18 +2608,16 @@ func (s *state) call(n *Node, k callKind) *ssa.Value {
 		if k != callNormal {
 			argStart += int64(2 * Widthptr)
 		}
-		addr := s.entryNewValue1I(ssa.OpOffPtr, Ptrto(Types[TUINTPTR]), argStart, s.sp)
+		addr := s.entryNewValue1I(ssa.OpOffPtr, Types[TUINTPTR], argStart, s.sp)
 		s.vars[&memVar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, int64(Widthptr), addr, rcvr, s.mem())
 	}

 	// Defer/go args
 	if k != callNormal {
 		// Write argsize and closure (args to Newproc/Deferproc).
-		argStart := Ctxt.FixedFrameSize()
 		argsize := s.constInt32(Types[TUINT32], int32(stksize))
-		addr := s.entryNewValue1I(ssa.OpOffPtr, Ptrto(Types[TUINT32]), argStart, s.sp)
-		s.vars[&memVar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, 4, addr, argsize, s.mem())
-		addr = s.entryNewValue1I(ssa.OpOffPtr, Ptrto(Types[TUINTPTR]), argStart+int64(Widthptr), s.sp)
+		s.vars[&memVar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, 4, s.sp, argsize, s.mem())
+		addr := s.entryNewValue1I(ssa.OpOffPtr, Ptrto(Types[TUINTPTR]), int64(Widthptr), s.sp)
 		s.vars[&memVar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, int64(Widthptr), addr, closure, s.mem())
 		stksize += 2 * int64(Widthptr)
 	}
@@ -2783,7 +2762,7 @@ func (s *state) addr(n *Node, bounded bool) (*ssa.Value, bool) {
 		if n.Left.Type.IsSlice() {
 			a := s.expr(n.Left)
 			i := s.expr(n.Right)
-			i = s.extendIndex(i, Panicindex)
+			i = s.extendIndex(i)
 			len := s.newValue1(ssa.OpSliceLen, Types[TINT], a)
 			if !n.Bounded {
 				s.boundsCheck(i, len)
@@ -2793,7 +2772,7 @@ func (s *state) addr(n *Node, bounded bool) (*ssa.Value, bool) {
 		} else { // array
 			a, isVolatile := s.addr(n.Left, bounded)
 			i := s.expr(n.Right)
-			i = s.extendIndex(i, Panicindex)
+			i = s.extendIndex(i)
 			len := s.constInt(Types[TINT], n.Left.Type.NumElem())
 			if !n.Bounded {
 				s.boundsCheck(i, len)
@@ -2934,11 +2913,12 @@ func (s *state) nilCheck(ptr *ssa.Value) {

 // boundsCheck generates bounds checking code. Checks if 0 <= idx < len, branches to exit if not.
 // Starts a new block on return.
-// idx is already converted to full int width.
 func (s *state) boundsCheck(idx, len *ssa.Value) {
 	if Debug['B'] != 0 {
 		return
 	}
+	// TODO: convert index to full width?
+	// TODO: if index is 64-bit and we're compiling to 32-bit, check that high 32 bits are zero.

 	// bounds check
 	cmp := s.newValue2(ssa.OpIsInBounds, Types[TBOOL], idx, len)
@@ -2947,18 +2927,19 @@ func (s *state) boundsCheck(idx, len *ssa.Value) {

 // sliceBoundsCheck generates slice bounds checking code. Checks if 0 <= idx <= len, branches to exit if not.
 // Starts a new block on return.
-// idx and len are already converted to full int width.
 func (s *state) sliceBoundsCheck(idx, len *ssa.Value) {
 	if Debug['B'] != 0 {
 		return
 	}
+	// TODO: convert index to full width?
+	// TODO: if index is 64-bit and we're compiling to 32-bit, check that high 32 bits are zero.

 	// bounds check
 	cmp := s.newValue2(ssa.OpIsSliceInBounds, Types[TBOOL], idx, len)
 	s.check(cmp, panicslice)
 }

-// If cmp (a bool) is false, panic using the given function.
+// If cmp (a bool) is true, panic using the given function.
 func (s *state) check(cmp *ssa.Value, fn *Node) {
 	b := s.endBlock()
 	b.Kind = ssa.BlockIf
@@ -2988,23 +2969,19 @@ func (s *state) check(cmp *ssa.Value, fn *Node) {
 // is started to load the return values.
 func (s *state) rtcall(fn *Node, returns bool, results []*Type, args ...*ssa.Value) []*ssa.Value {
 	// Write args to the stack
-	off := Ctxt.FixedFrameSize()
+	var off int64 // TODO: arch-dependent starting offset?
 	for _, arg := range args {
 		t := arg.Type
 		off = Rnd(off, t.Alignment())
 		ptr := s.sp
 		if off != 0 {
-			ptr = s.newValue1I(ssa.OpOffPtr, t.PtrTo(), off, s.sp)
+			ptr = s.newValue1I(ssa.OpOffPtr, Types[TUINTPTR], off, s.sp)
 		}
 		size := t.Size()
 		s.vars[&memVar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, size, ptr, arg, s.mem())
 		off += size
 	}
 	off = Rnd(off, int64(Widthptr))
-	if Thearch.LinkArch.Name == "amd64p32" {
-		// amd64p32 wants 8-byte alignment of the start of the return values.
-		off = Rnd(off, 8)
-	}

 	// Issue call
 	call := s.newValue1A(ssa.OpStaticCall, ssa.TypeMem, fn.Sym, s.mem())
@@ -3015,7 +2992,7 @@ func (s *state) rtcall(fn *Node, returns bool, results []*Type, args ...*ssa.Val
 	if !returns {
 		b.Kind = ssa.BlockExit
 		b.SetControl(call)
-		call.AuxInt = off - Ctxt.FixedFrameSize()
+		call.AuxInt = off
 		if len(results) > 0 {
 			Fatalf("panic call can't have results")
 		}
@@ -3038,7 +3015,7 @@ func (s *state) rtcall(fn *Node, returns bool, results []*Type, args ...*ssa.Val
 		off = Rnd(off, t.Alignment())
 		ptr := s.sp
 		if off != 0 {
-			ptr = s.newValue1I(ssa.OpOffPtr, Ptrto(t), off, s.sp)
+			ptr = s.newValue1I(ssa.OpOffPtr, Types[TUINTPTR], off, s.sp)
 		}
 		res[i] = s.newValue2(ssa.OpLoad, t, ptr, s.mem())
 		off += t.Size()
@@ -3072,9 +3049,10 @@ func (s *state) insertWBmove(t *Type, left, right *ssa.Value, line int32, rightI

 	aux := &ssa.ExternSymbol{Typ: Types[TBOOL], Sym: syslook("writeBarrier").Sym}
 	flagaddr := s.newValue1A(ssa.OpAddr, Ptrto(Types[TUINT32]), aux, s.sb)
-	// Load word, test word, avoiding partial register write from load byte.
+	// TODO: select the .enabled field. It is currently first, so not needed for now.
+	// Load word, test byte, avoiding partial register write from load byte.
 	flag := s.newValue2(ssa.OpLoad, Types[TUINT32], flagaddr, s.mem())
-	flag = s.newValue2(ssa.OpNeq32, Types[TBOOL], flag, s.constInt32(Types[TUINT32], 0))
+	flag = s.newValue1(ssa.OpTrunc64to8, Types[TBOOL], flag)
 	b := s.endBlock()
 	b.Kind = ssa.BlockIf
 	b.Likely = ssa.BranchUnlikely
@@ -3095,7 +3073,7 @@ func (s *state) insertWBmove(t *Type, left, right *ssa.Value, line int32, rightI
 		tmp := temp(t)
 		s.vars[&memVar] = s.newValue1A(ssa.OpVarDef, ssa.TypeMem, tmp, s.mem())
 		tmpaddr, _ := s.addr(tmp, true)
-		s.vars[&memVar] = s.newValue3I(ssa.OpMove, ssa.TypeMem, SizeAlignAuxInt(t), tmpaddr, right, s.mem())
+		s.vars[&memVar] = s.newValue3I(ssa.OpMove, ssa.TypeMem, t.Size(), tmpaddr, right, s.mem())
 		// Issue typedmemmove call.
 		taddr := s.newValue1A(ssa.OpAddr, Types[TUINTPTR], &ssa.ExternSymbol{Typ: Types[TUINTPTR], Sym: typenamesym(t)}, s.sb)
 		s.rtcall(typedmemmove, true, nil, taddr, left, tmpaddr)
@@ -3105,7 +3083,7 @@ func (s *state) insertWBmove(t *Type, left, right *ssa.Value, line int32, rightI
 	s.endBlock().AddEdgeTo(bEnd)

 	s.startBlock(bElse)
-	s.vars[&memVar] = s.newValue3I(ssa.OpMove, ssa.TypeMem, SizeAlignAuxInt(t), left, right, s.mem())
+	s.vars[&memVar] = s.newValue3I(ssa.OpMove, ssa.TypeMem, t.Size(), left, right, s.mem())
 	s.endBlock().AddEdgeTo(bEnd)

 	s.startBlock(bEnd)
@@ -3139,9 +3117,10 @@ func (s *state) insertWBstore(t *Type, left, right *ssa.Value, line int32, skip

 	aux := &ssa.ExternSymbol{Typ: Types[TBOOL], Sym: syslook("writeBarrier").Sym}
 	flagaddr := s.newValue1A(ssa.OpAddr, Ptrto(Types[TUINT32]), aux, s.sb)
-	// Load word, test word, avoiding partial register write from load byte.
+	// TODO: select the .enabled field. It is currently first, so not needed for now.
+	// Load word, test byte, avoiding partial register write from load byte.
 	flag := s.newValue2(ssa.OpLoad, Types[TUINT32], flagaddr, s.mem())
-	flag = s.newValue2(ssa.OpNeq32, Types[TBOOL], flag, s.constInt32(Types[TUINT32], 0))
+	flag = s.newValue1(ssa.OpTrunc64to8, Types[TBOOL], flag)
 	b := s.endBlock()
 	b.Kind = ssa.BlockIf
 	b.Likely = ssa.BranchUnlikely
@@ -3951,11 +3930,6 @@ type SSAGenState struct {

 	// bstart remembers where each block starts (indexed by block ID)
 	bstart []*obj.Prog
-
-	// 387 port: maps from SSE registers (REG_X?) to 387 registers (REG_F?)
-	SSEto387 map[int16]int16
-	// Some architectures require a 64-bit temporary for FP-related register shuffling. Examples include x86-387, PPC, and Sparc V8.
-	ScratchFpMem *Node
 }

 // Pc returns the current Prog.
@@ -3992,13 +3966,6 @@ func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) {
 		blockProgs[Pc] = f.Blocks[0]
 	}

-	if Thearch.Use387 {
-		s.SSEto387 = map[int16]int16{}
-	}
-	if f.Config.NeedsFpScratch {
-		s.ScratchFpMem = temp(Types[TUINT64])
-	}
-
 	// Emit basic blocks
 	for i, b := range f.Blocks {
 		s.bstart[b.ID] = Pc
@@ -4161,25 +4128,12 @@ func SSAGenFPJump(s *SSAGenState, b, next *ssa.Block, jumps *[2][2]FloatingEQNEJ
 	}
 }

-func AuxOffset(v *ssa.Value) (offset int64) {
-	if v.Aux == nil {
-		return 0
-	}
-	switch sym := v.Aux.(type) {
-
-	case *ssa.AutoSymbol:
-		n := sym.Node.(*Node)
-		return n.Xoffset
-	}
-	return 0
-}
-
 // AddAux adds the offset in the aux fields (AuxInt and Aux) of v to a.
 func AddAux(a *obj.Addr, v *ssa.Value) {
 	AddAux2(a, v, v.AuxInt)
 }
 func AddAux2(a *obj.Addr, v *ssa.Value, offset int64) {
-	if a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR {
+	if a.Type != obj.TYPE_MEM {
 		v.Fatalf("bad AddAux addr %v", a)
 	}
 	// add integer offset
@@ -4217,27 +4171,17 @@ func AddAux2(a *obj.Addr, v *ssa.Value, offset int64) {
 	}
 }

-// SizeAlignAuxInt returns an AuxInt encoding the size and alignment of type t.
-func SizeAlignAuxInt(t *Type) int64 {
-	return ssa.MakeSizeAndAlign(t.Size(), t.Alignment()).Int64()
-}
-
 // extendIndex extends v to a full int width.
-// panic using the given function if v does not fit in an int (only on 32-bit archs).
-func (s *state) extendIndex(v *ssa.Value, panicfn *Node) *ssa.Value {
+func (s *state) extendIndex(v *ssa.Value) *ssa.Value {
 	size := v.Type.Size()
 	if size == s.config.IntSize {
 		return v
 	}
 	if size > s.config.IntSize {
-		// truncate 64-bit indexes on 32-bit pointer archs. Test the
-		// high word and branch to out-of-bounds failure if it is not 0.
-		if Debug['B'] == 0 {
-			hi := s.newValue1(ssa.OpInt64Hi, Types[TUINT32], v)
-			cmp := s.newValue2(ssa.OpEq32, Types[TBOOL], hi, s.constInt32(Types[TUINT32], 0))
-			s.check(cmp, panicfn)
-		}
-		return s.newValue1(ssa.OpTrunc64to32, Types[TINT], v)
+		// TODO: truncate 64-bit indexes on 32-bit pointer archs. We'd need to test
+		// the high word and branch to out-of-bounds failure if it is not 0.
+		s.Unimplementedf("64->32 index truncation not implemented")
+		return v
 	}

 	// Extend value to the required size
@@ -4276,74 +4220,17 @@ func (s *state) extendIndex(v *ssa.Value, panicfn *Node) *ssa.Value {
 	return s.newValue1(op, Types[TINT], v)
 }

-// SSAReg returns the register to which v has been allocated.
-func SSAReg(v *ssa.Value) *ssa.Register {
+// SSARegNum returns the register (in cmd/internal/obj numbering) to
+// which v has been allocated. Panics if v is not assigned to a
+// register.
+// TODO: Make this panic again once it stops happening routinely.
+func SSARegNum(v *ssa.Value) int16 {
 	reg := v.Block.Func.RegAlloc[v.ID]
 	if reg == nil {
-		v.Fatalf("nil register for value: %s\n%s\n", v.LongString(), v.Block.Func)
-	}
-	return reg.(*ssa.Register)
-}
-
-// SSAReg0 returns the register to which the first output of v has been allocated.
-func SSAReg0(v *ssa.Value) *ssa.Register {
-	reg := v.Block.Func.RegAlloc[v.ID].(ssa.LocPair)[0]
-	if reg == nil {
-		v.Fatalf("nil first register for value: %s\n%s\n", v.LongString(), v.Block.Func)
-	}
-	return reg.(*ssa.Register)
-}
-
-// SSAReg1 returns the register to which the second output of v has been allocated.
-func SSAReg1(v *ssa.Value) *ssa.Register {
-	reg := v.Block.Func.RegAlloc[v.ID].(ssa.LocPair)[1]
-	if reg == nil {
-		v.Fatalf("nil second register for value: %s\n%s\n", v.LongString(), v.Block.Func)
-	}
-	return reg.(*ssa.Register)
-}
-
-// SSARegNum returns the register number (in cmd/internal/obj numbering) to which v has been allocated.
-func SSARegNum(v *ssa.Value) int16 {
-	return Thearch.SSARegToReg[SSAReg(v).Num]
-}
-
-// SSARegNum0 returns the register number (in cmd/internal/obj numbering) to which the first output of v has been allocated.
-func SSARegNum0(v *ssa.Value) int16 {
-	return Thearch.SSARegToReg[SSAReg0(v).Num]
-}
-
-// SSARegNum1 returns the register number (in cmd/internal/obj numbering) to which the second output of v has been allocated.
-func SSARegNum1(v *ssa.Value) int16 {
-	return Thearch.SSARegToReg[SSAReg1(v).Num]
-}
-
-// CheckLoweredPhi checks that regalloc and stackalloc correctly handled phi values.
-// Called during ssaGenValue.
-func CheckLoweredPhi(v *ssa.Value) {
-	if v.Op != ssa.OpPhi {
-		v.Fatalf("CheckLoweredPhi called with non-phi value: %v", v.LongString())
-	}
-	if v.Type.IsMemory() {
-		return
-	}
-	f := v.Block.Func
-	loc := f.RegAlloc[v.ID]
-	for _, a := range v.Args {
-		if aloc := f.RegAlloc[a.ID]; aloc != loc { // TODO: .Equal() instead?
-			v.Fatalf("phi arg at different location than phi: %v @ %v, but arg %v @ %v\n%s\n", v, loc, a, aloc, v.Block.Func)
-		}
-	}
-}
-
-// CheckLoweredGetClosurePtr checks that v is the first instruction in the function's entry block.
-// The output of LoweredGetClosurePtr is generally hardwired to the correct register.
-// That register contains the closure pointer on closure entry.
-func CheckLoweredGetClosurePtr(v *ssa.Value) {
-	entry := v.Block.Func.Entry
-	if entry != v.Block || entry.Values[0] != v {
-		Fatalf("in %s, badly placed LoweredGetClosurePtr: %v %v", v.Block.Func.Name, v.Block, v)
+		v.Unimplementedf("nil regnum for value: %s\n%s\n", v.LongString(), v.Block.Func)
+		return 0
 	}
+	return Thearch.SSARegToReg[reg.(*ssa.Register).Num]
 }

 // AutoVar returns a *Node and int64 representing the auto variable and offset within it
@@ -4485,25 +4372,6 @@ func (e *ssaExport) SplitComplex(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSl
 	return ssa.LocalSlot{N: n, Type: t, Off: name.Off}, ssa.LocalSlot{N: n, Type: t, Off: name.Off + s}
 }

-func (e *ssaExport) SplitInt64(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) {
-	n := name.N.(*Node)
-	var t *Type
-	if name.Type.IsSigned() {
-		t = Types[TINT32]
-	} else {
-		t = Types[TUINT32]
-	}
-	if n.Class == PAUTO && !n.Addrtaken {
-		// Split this int64 up into two separate variables.
-		h := e.namedAuto(n.Sym.Name+".hi", t)
-		l := e.namedAuto(n.Sym.Name+".lo", Types[TUINT32])
-		return ssa.LocalSlot{N: h, Type: t, Off: 0}, ssa.LocalSlot{N: l, Type: Types[TUINT32], Off: 0}
-	}
-	// Return the two parts of the larger variable.
-	// Assuming little endian (we don't support big endian 32-bit architecture yet)
-	return ssa.LocalSlot{N: n, Type: t, Off: name.Off + 4}, ssa.LocalSlot{N: n, Type: Types[TUINT32], Off: name.Off}
-}
-
 func (e *ssaExport) SplitStruct(name ssa.LocalSlot, i int) ssa.LocalSlot {
 	n := name.N.(*Node)
 	st := name.Type
@@ -4521,7 +4389,7 @@ func (e *ssaExport) SplitStruct(name ssa.LocalSlot, i int) ssa.LocalSlot {
 // namedAuto returns a new AUTO variable with the given name and type.
 func (e *ssaExport) namedAuto(name string, typ ssa.Type) ssa.GCNode {
 	t := typ.(*Type)
-	s := &Sym{Name: name, Pkg: localpkg}
+	s := &Sym{Name: name, Pkg: autopkg}
 	n := Nod(ONAME, nil, nil)
 	s.Def = n
 	s.Def.Used = true
--- a/src/cmd/compile/internal/gc/subr.go
+++ b/src/cmd/compile/internal/gc/subr.go
@@ -246,25 +246,6 @@ func LookupN(prefix string, n int) *Sym {
 	return LookupBytes(b)
 }

-// autolabel generates a new Name node for use with
-// an automatically generated label.
-// prefix is a short mnemonic (e.g. ".s" for switch)
-// to help with debugging.
-// It should begin with "." to avoid conflicts with
-// user labels.
-func autolabel(prefix string) *Node {
-	if prefix[0] != '.' {
-		Fatalf("autolabel prefix must start with '.', have %q", prefix)
-	}
-	fn := Curfn
-	if Curfn == nil {
-		Fatalf("autolabel outside function")
-	}
-	n := fn.Func.Label
-	fn.Func.Label++
-	return newname(LookupN(prefix, int(n)))
-}
-
 var initSyms []*Sym

 var nopkg = &Pkg{
@@ -2317,16 +2298,6 @@ func isdirectiface(t *Type) bool {
 	return false
 }

-// itabType loads the _type field from a runtime.itab struct.
-func itabType(itab *Node) *Node {
-	typ := NodSym(ODOTPTR, itab, nil)
-	typ.Type = Ptrto(Types[TUINT8])
-	typ.Typecheck = 1
-	typ.Xoffset = int64(Widthptr) // offset of _type in runtime.itab
-	typ.Bounded = true            // guaranteed not to fault
-	return typ
-}
-
 // iet returns 'T' if t is a concrete type,
 // 'I' if t is an interface type, and 'E' if t is an empty interface type.
 // It is used to build calls to the conv* and assert* runtime routines.
--- a/src/cmd/compile/internal/gc/swt.go
+++ b/src/cmd/compile/internal/gc/swt.go
@@ -4,7 +4,10 @@

 package gc

-import "sort"
+import (
+	"sort"
+	"strconv"
+)

 const (
 	// expression switch
@@ -358,7 +361,7 @@ func casebody(sw *Node, typeswvar *Node) {
 		n.Op = OCASE
 		needvar := n.List.Len() != 1 || n.List.First().Op == OLITERAL

-		jmp := Nod(OGOTO, autolabel(".s"), nil)
+		jmp := Nod(OGOTO, newCaseLabel(), nil)
 		if n.List.Len() == 0 {
 			if def != nil {
 				Yyerror("more than one default case")
@@ -421,6 +424,16 @@ func casebody(sw *Node, typeswvar *Node) {
 	lineno = lno
 }

+// nSwitchLabel is the number of switch labels generated.
+// This should be per-function, but it is a global counter for now.
+var nSwitchLabel int
+
+func newCaseLabel() *Node {
+	label := strconv.Itoa(nSwitchLabel)
+	nSwitchLabel++
+	return newname(Lookup(label))
+}
+
 // caseClauses generates a slice of caseClauses
 // corresponding to the clauses in the switch statement sw.
 // Kind is the kind of switch statement.
@@ -577,7 +590,7 @@ func (s *typeSwitch) walk(sw *Node) {
 		i.Nbody.Set1(typenil)
 	} else {
 		// Jump to default case.
-		lbl := autolabel(".s")
+		lbl := newCaseLabel()
 		i.Nbody.Set1(Nod(OGOTO, lbl, nil))
 		// Wrap default case with label.
 		blk := Nod(OBLOCK, nil, nil)
@@ -589,7 +602,11 @@ func (s *typeSwitch) walk(sw *Node) {

 	if !cond.Right.Type.IsEmptyInterface() {
 		// Load type from itab.
-		typ = itabType(typ)
+		typ = NodSym(ODOTPTR, typ, nil)
+		typ.Type = Ptrto(Types[TUINT8])
+		typ.Typecheck = 1
+		typ.Xoffset = int64(Widthptr) // offset of _type in runtime.itab
+		typ.Bounded = true            // guaranteed not to fault
 	}
 	// Load hash from type.
 	h := NodSym(ODOTPTR, typ, nil)
--- a/src/cmd/compile/internal/gc/syntax.go
+++ b/src/cmd/compile/internal/gc/syntax.go
@@ -290,8 +290,6 @@ type Func struct {
 	InlCost int32
 	Depth   int32

-	Label int32 // largest auto-generated label in this function
-
 	Endlineno int32
 	WBLineno  int32 // line number of first write barrier

@@ -545,11 +543,6 @@ func (n *Nodes) Set1(node *Node) {
 	n.slice = &[]*Node{node}
 }

-// Set2 sets n to a slice containing two nodes.
-func (n *Nodes) Set2(n1, n2 *Node) {
-	n.slice = &[]*Node{n1, n2}
-}
-
 // MoveNodes sets n to the contents of n2, then clears n2.
 func (n *Nodes) MoveNodes(n2 *Nodes) {
 	n.slice = n2.slice
--- a/src/cmd/compile/internal/gc/testdata/arith_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/arith_ssa.go
@@ -553,445 +553,6 @@ func testOrPhi() {
 	}
 }

-//go:noinline
-func addshiftLL_ssa(a, b uint32) uint32 {
-	return a + b<<3
-}
-
-//go:noinline
-func subshiftLL_ssa(a, b uint32) uint32 {
-	return a - b<<3
-}
-
-//go:noinline
-func rsbshiftLL_ssa(a, b uint32) uint32 {
-	return a<<3 - b
-}
-
-//go:noinline
-func andshiftLL_ssa(a, b uint32) uint32 {
-	return a & (b << 3)
-}
-
-//go:noinline
-func orshiftLL_ssa(a, b uint32) uint32 {
-	return a | b<<3
-}
-
-//go:noinline
-func xorshiftLL_ssa(a, b uint32) uint32 {
-	return a ^ b<<3
-}
-
-//go:noinline
-func bicshiftLL_ssa(a, b uint32) uint32 {
-	return a &^ (b << 3)
-}
-
-//go:noinline
-func notshiftLL_ssa(a uint32) uint32 {
-	return ^(a << 3)
-}
-
-//go:noinline
-func addshiftRL_ssa(a, b uint32) uint32 {
-	return a + b>>3
-}
-
-//go:noinline
-func subshiftRL_ssa(a, b uint32) uint32 {
-	return a - b>>3
-}
-
-//go:noinline
-func rsbshiftRL_ssa(a, b uint32) uint32 {
-	return a>>3 - b
-}
-
-//go:noinline
-func andshiftRL_ssa(a, b uint32) uint32 {
-	return a & (b >> 3)
-}
-
-//go:noinline
-func orshiftRL_ssa(a, b uint32) uint32 {
-	return a | b>>3
-}
-
-//go:noinline
-func xorshiftRL_ssa(a, b uint32) uint32 {
-	return a ^ b>>3
-}
-
-//go:noinline
-func bicshiftRL_ssa(a, b uint32) uint32 {
-	return a &^ (b >> 3)
-}
-
-//go:noinline
-func notshiftRL_ssa(a uint32) uint32 {
-	return ^(a >> 3)
-}
-
-//go:noinline
-func addshiftRA_ssa(a, b int32) int32 {
-	return a + b>>3
-}
-
-//go:noinline
-func subshiftRA_ssa(a, b int32) int32 {
-	return a - b>>3
-}
-
-//go:noinline
-func rsbshiftRA_ssa(a, b int32) int32 {
-	return a>>3 - b
-}
-
-//go:noinline
-func andshiftRA_ssa(a, b int32) int32 {
-	return a & (b >> 3)
-}
-
-//go:noinline
-func orshiftRA_ssa(a, b int32) int32 {
-	return a | b>>3
-}
-
-//go:noinline
-func xorshiftRA_ssa(a, b int32) int32 {
-	return a ^ b>>3
-}
-
-//go:noinline
-func bicshiftRA_ssa(a, b int32) int32 {
-	return a &^ (b >> 3)
-}
-
-//go:noinline
-func notshiftRA_ssa(a int32) int32 {
-	return ^(a >> 3)
-}
-
-//go:noinline
-func addshiftLLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a + b<<s
-}
-
-//go:noinline
-func subshiftLLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a - b<<s
-}
-
-//go:noinline
-func rsbshiftLLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a<<s - b
-}
-
-//go:noinline
-func andshiftLLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a & (b << s)
-}
-
-//go:noinline
-func orshiftLLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a | b<<s
-}
-
-//go:noinline
-func xorshiftLLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a ^ b<<s
-}
-
-//go:noinline
-func bicshiftLLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a &^ (b << s)
-}
-
-//go:noinline
-func notshiftLLreg_ssa(a uint32, s uint8) uint32 {
-	return ^(a << s)
-}
-
-//go:noinline
-func addshiftRLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a + b>>s
-}
-
-//go:noinline
-func subshiftRLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a - b>>s
-}
-
-//go:noinline
-func rsbshiftRLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a>>s - b
-}
-
-//go:noinline
-func andshiftRLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a & (b >> s)
-}
-
-//go:noinline
-func orshiftRLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a | b>>s
-}
-
-//go:noinline
-func xorshiftRLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a ^ b>>s
-}
-
-//go:noinline
-func bicshiftRLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a &^ (b >> s)
-}
-
-//go:noinline
-func notshiftRLreg_ssa(a uint32, s uint8) uint32 {
-	return ^(a >> s)
-}
-
-//go:noinline
-func addshiftRAreg_ssa(a, b int32, s uint8) int32 {
-	return a + b>>s
-}
-
-//go:noinline
-func subshiftRAreg_ssa(a, b int32, s uint8) int32 {
-	return a - b>>s
-}
-
-//go:noinline
-func rsbshiftRAreg_ssa(a, b int32, s uint8) int32 {
-	return a>>s - b
-}
-
-//go:noinline
-func andshiftRAreg_ssa(a, b int32, s uint8) int32 {
-	return a & (b >> s)
-}
-
-//go:noinline
-func orshiftRAreg_ssa(a, b int32, s uint8) int32 {
-	return a | b>>s
-}
-
-//go:noinline
-func xorshiftRAreg_ssa(a, b int32, s uint8) int32 {
-	return a ^ b>>s
-}
-
-//go:noinline
-func bicshiftRAreg_ssa(a, b int32, s uint8) int32 {
-	return a &^ (b >> s)
-}
-
-//go:noinline
-func notshiftRAreg_ssa(a int32, s uint8) int32 {
-	return ^(a >> s)
-}
-
-// test ARM shifted ops
-func testShiftedOps() {
-	a, b := uint32(10), uint32(42)
-	if want, got := a+b<<3, addshiftLL_ssa(a, b); got != want {
-		println("addshiftLL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a-b<<3, subshiftLL_ssa(a, b); got != want {
-		println("subshiftLL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a<<3-b, rsbshiftLL_ssa(a, b); got != want {
-		println("rsbshiftLL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a&(b<<3), andshiftLL_ssa(a, b); got != want {
-		println("andshiftLL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a|b<<3, orshiftLL_ssa(a, b); got != want {
-		println("orshiftLL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a^b<<3, xorshiftLL_ssa(a, b); got != want {
-		println("xorshiftLL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a&^(b<<3), bicshiftLL_ssa(a, b); got != want {
-		println("bicshiftLL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := ^(a << 3), notshiftLL_ssa(a); got != want {
-		println("notshiftLL_ssa(10) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a+b>>3, addshiftRL_ssa(a, b); got != want {
-		println("addshiftRL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a-b>>3, subshiftRL_ssa(a, b); got != want {
-		println("subshiftRL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a>>3-b, rsbshiftRL_ssa(a, b); got != want {
-		println("rsbshiftRL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a&(b>>3), andshiftRL_ssa(a, b); got != want {
-		println("andshiftRL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a|b>>3, orshiftRL_ssa(a, b); got != want {
-		println("orshiftRL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a^b>>3, xorshiftRL_ssa(a, b); got != want {
-		println("xorshiftRL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a&^(b>>3), bicshiftRL_ssa(a, b); got != want {
-		println("bicshiftRL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := ^(a >> 3), notshiftRL_ssa(a); got != want {
-		println("notshiftRL_ssa(10) =", got, " want ", want)
-		failed = true
-	}
-	c, d := int32(10), int32(-42)
-	if want, got := c+d>>3, addshiftRA_ssa(c, d); got != want {
-		println("addshiftRA_ssa(10, -42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c-d>>3, subshiftRA_ssa(c, d); got != want {
-		println("subshiftRA_ssa(10, -42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c>>3-d, rsbshiftRA_ssa(c, d); got != want {
-		println("rsbshiftRA_ssa(10, -42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c&(d>>3), andshiftRA_ssa(c, d); got != want {
-		println("andshiftRA_ssa(10, -42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c|d>>3, orshiftRA_ssa(c, d); got != want {
-		println("orshiftRA_ssa(10, -42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c^d>>3, xorshiftRA_ssa(c, d); got != want {
-		println("xorshiftRA_ssa(10, -42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c&^(d>>3), bicshiftRA_ssa(c, d); got != want {
-		println("bicshiftRA_ssa(10, -42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := ^(d >> 3), notshiftRA_ssa(d); got != want {
-		println("notshiftRA_ssa(-42) =", got, " want ", want)
-		failed = true
-	}
-	s := uint8(3)
-	if want, got := a+b<<s, addshiftLLreg_ssa(a, b, s); got != want {
-		println("addshiftLLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a-b<<s, subshiftLLreg_ssa(a, b, s); got != want {
-		println("subshiftLLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a<<s-b, rsbshiftLLreg_ssa(a, b, s); got != want {
-		println("rsbshiftLLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a&(b<<s), andshiftLLreg_ssa(a, b, s); got != want {
-		println("andshiftLLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a|b<<s, orshiftLLreg_ssa(a, b, s); got != want {
-		println("orshiftLLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a^b<<s, xorshiftLLreg_ssa(a, b, s); got != want {
-		println("xorshiftLLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a&^(b<<s), bicshiftLLreg_ssa(a, b, s); got != want {
-		println("bicshiftLLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := ^(a << s), notshiftLLreg_ssa(a, s); got != want {
-		println("notshiftLLreg_ssa(10) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a+b>>s, addshiftRLreg_ssa(a, b, s); got != want {
-		println("addshiftRLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a-b>>s, subshiftRLreg_ssa(a, b, s); got != want {
-		println("subshiftRLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a>>s-b, rsbshiftRLreg_ssa(a, b, s); got != want {
-		println("rsbshiftRLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a&(b>>s), andshiftRLreg_ssa(a, b, s); got != want {
-		println("andshiftRLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a|b>>s, orshiftRLreg_ssa(a, b, s); got != want {
-		println("orshiftRLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a^b>>s, xorshiftRLreg_ssa(a, b, s); got != want {
-		println("xorshiftRLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a&^(b>>s), bicshiftRLreg_ssa(a, b, s); got != want {
-		println("bicshiftRLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := ^(a >> s), notshiftRLreg_ssa(a, s); got != want {
-		println("notshiftRLreg_ssa(10) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c+d>>s, addshiftRAreg_ssa(c, d, s); got != want {
-		println("addshiftRAreg_ssa(10, -42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c-d>>s, subshiftRAreg_ssa(c, d, s); got != want {
-		println("subshiftRAreg_ssa(10, -42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c>>s-d, rsbshiftRAreg_ssa(c, d, s); got != want {
-		println("rsbshiftRAreg_ssa(10, -42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c&(d>>s), andshiftRAreg_ssa(c, d, s); got != want {
-		println("andshiftRAreg_ssa(10, -42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c|d>>s, orshiftRAreg_ssa(c, d, s); got != want {
-		println("orshiftRAreg_ssa(10, -42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c^d>>s, xorshiftRAreg_ssa(c, d, s); got != want {
-		println("xorshiftRAreg_ssa(10, -42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c&^(d>>s), bicshiftRAreg_ssa(c, d, s); got != want {
-		println("bicshiftRAreg_ssa(10, -42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := ^(d >> s), notshiftRAreg_ssa(d, s); got != want {
-		println("notshiftRAreg_ssa(-42, 3) =", got, " want ", want)
-		failed = true
-	}
-}
-
 var failed = false

 func main() {
@@ -1012,7 +573,6 @@ func main() {
 	testLoadCombine()
 	testLoadSymCombine()
 	testShiftRemoval()
-	testShiftedOps()

 	if failed {
 		panic("failed")
--- a/src/cmd/compile/internal/gc/testdata/gen/constFoldGen.go
+++ b/src/cmd/compile/internal/gc/testdata/gen/constFoldGen.go
@@ -144,7 +144,7 @@ func main() {
 					fmt.Fprintf(w, "\tr = x %s y\n", o.symbol)
 					want := ansU(c, d, s.name, o.symbol)
 					fmt.Fprintf(w, "\tif r != %s {\n", want)
-					fmt.Fprintf(w, "\t\tt.Errorf(\"%d %%s %d = %%d, want %s\", %q, r)\n", c, d, want, o.symbol)
+					fmt.Fprintf(w, "\t\tt.Errorf(\"%d %s %d = %%d, want %s\", r)\n", c, o.symbol, d, want)
 					fmt.Fprintf(w, "\t}\n")
 				}
 			}
@@ -159,7 +159,7 @@ func main() {
 					fmt.Fprintf(w, "\tr = x %s y\n", o.symbol)
 					want := ansS(c, d, s.name, o.symbol)
 					fmt.Fprintf(w, "\tif r != %s {\n", want)
-					fmt.Fprintf(w, "\t\tt.Errorf(\"%d %%s %d = %%d, want %s\", %q, r)\n", c, d, want, o.symbol)
+					fmt.Fprintf(w, "\t\tt.Errorf(\"%d %s %d = %%d, want %s\", r)\n", c, o.symbol, d, want)
 					fmt.Fprintf(w, "\t}\n")
 				}
 			}
@@ -188,7 +188,7 @@ func main() {
 						fmt.Fprintf(w, "\tr = x %s y\n", o.symbol)
 						want := ansU(c, d, ls.name, o.symbol)
 						fmt.Fprintf(w, "\tif r != %s {\n", want)
-						fmt.Fprintf(w, "\t\tt.Errorf(\"%d %%s %d = %%d, want %s\", %q, r)\n", c, d, want, o.symbol)
+						fmt.Fprintf(w, "\t\tt.Errorf(\"%d %s %d = %%d, want %s\", r)\n", c, o.symbol, d, want)
 						fmt.Fprintf(w, "\t}\n")
 					}
 				}
@@ -200,7 +200,7 @@ func main() {
 						fmt.Fprintf(w, "\tr = x %s y\n", o.symbol)
 						want := ansS(c, int64(d), ls.name, o.symbol)
 						fmt.Fprintf(w, "\tif r != %s {\n", want)
-						fmt.Fprintf(w, "\t\tt.Errorf(\"%d %%s %d = %%d, want %s\", %q, r)\n", c, d, want, o.symbol)
+						fmt.Fprintf(w, "\t\tt.Errorf(\"%d %s %d = %%d, want %s\", r)\n", c, o.symbol, d, want)
 						fmt.Fprintf(w, "\t}\n")
 					}
 				}
--- a/src/cmd/compile/internal/gc/testdata/string_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/string_ssa.go
@@ -110,67 +110,6 @@ func testSmallIndexType() {
 	}
 }

-//go:noinline
-func testInt64Index_ssa(s string, i int64) byte {
-	return s[i]
-}
-
-//go:noinline
-func testInt64Slice_ssa(s string, i, j int64) string {
-	return s[i:j]
-}
-
-func testInt64Index() {
-	tests := []struct {
-		i int64
-		j int64
-		b byte
-		s string
-	}{
-		{0, 5, 'B', "Below"},
-		{5, 10, 'E', "Exact"},
-		{10, 15, 'A', "Above"},
-	}
-
-	str := "BelowExactAbove"
-	for i, t := range tests {
-		if got := testInt64Index_ssa(str, t.i); got != t.b {
-			println("#", i, "got ", got, ", wanted", t.b)
-			failed = true
-		}
-		if got := testInt64Slice_ssa(str, t.i, t.j); got != t.s {
-			println("#", i, "got ", got, ", wanted", t.s)
-			failed = true
-		}
-	}
-}
-
-func testInt64IndexPanic() {
-	defer func() {
-		if r := recover(); r != nil {
-			println("paniced as expected")
-		}
-	}()
-
-	str := "foobar"
-	println("got ", testInt64Index_ssa(str, 1<<32+1))
-	println("expected to panic, but didn't")
-	failed = true
-}
-
-func testInt64SlicePanic() {
-	defer func() {
-		if r := recover(); r != nil {
-			println("paniced as expected")
-		}
-	}()
-
-	str := "foobar"
-	println("got ", testInt64Slice_ssa(str, 1<<32, 1<<32+1))
-	println("expected to panic, but didn't")
-	failed = true
-}
-
 //go:noinline
 func testStringElem_ssa(s string, i int) byte {
 	return s[i]
@@ -214,9 +153,6 @@ func main() {
 	testSmallIndexType()
 	testStringElem()
 	testStringElemConst()
-	testInt64Index()
-	testInt64IndexPanic()
-	testInt64SlicePanic()

 	if failed {
 		panic("failed")
--- a/src/cmd/compile/internal/gc/type.go
+++ b/src/cmd/compile/internal/gc/type.go
@@ -1207,7 +1207,6 @@ func (t *Type) ChanDir() ChanDir {
 func (t *Type) IsMemory() bool { return false }
 func (t *Type) IsFlags() bool  { return false }
 func (t *Type) IsVoid() bool   { return false }
-func (t *Type) IsTuple() bool  { return false }

 // IsUntyped reports whether t is an untyped type.
 func (t *Type) IsUntyped() bool {
--- a/src/cmd/compile/internal/gc/typecheck.go
+++ b/src/cmd/compile/internal/gc/typecheck.go
@@ -3185,9 +3185,6 @@ func samesafeexpr(l *Node, r *Node) bool {

 	case OINDEX:
 		return samesafeexpr(l.Left, r.Left) && samesafeexpr(l.Right, r.Right)
-
-	case OLITERAL:
-		return eqval(l.Val(), r.Val())
 	}

 	return false
--- a/src/cmd/compile/internal/gc/walk.go
+++ b/src/cmd/compile/internal/gc/walk.go
@@ -506,9 +506,7 @@ func walkexpr(n *Node, init *Nodes) *Node {
 	if n.Op == ONAME && n.Class == PAUTOHEAP {
 		nn := Nod(OIND, n.Name.Heapaddr, nil)
 		nn = typecheck(nn, Erv)
-		nn = walkexpr(nn, init)
-		nn.Left.NonNil = true
-		return nn
+		return walkexpr(nn, init)
 	}

 opswitch:
@@ -960,27 +958,19 @@ opswitch:
 		fromKind := from.Type.iet()
 		toKind := t.iet()

-		res := n.List.First()
-
 		// Avoid runtime calls in a few cases of the form _, ok := i.(T).
 		// This is faster and shorter and allows the corresponding assertX2X2
 		// routines to skip nil checks on their last argument.
-		if isblank(res) {
+		if isblank(n.List.First()) {
 			var fast *Node
-			switch toKind {
-			case 'T':
-				tab := Nod(OITAB, from, nil)
-				if fromKind == 'E' {
-					typ := Nod(OCONVNOP, typename(t), nil)
-					typ.Type = Ptrto(Types[TUINTPTR])
-					fast = Nod(OEQ, tab, typ)
-					break
-				}
-				fast = Nod(OANDAND,
-					Nod(ONE, nodnil(), tab),
-					Nod(OEQ, itabType(tab), typename(t)),
-				)
-			case 'E':
+			switch {
+			case fromKind == 'E' && toKind == 'T':
+				tab := Nod(OITAB, from, nil) // type:eface::tab:iface
+				typ := Nod(OCONVNOP, typename(t), nil)
+				typ.Type = Ptrto(Types[TUINTPTR])
+				fast = Nod(OEQ, tab, typ)
+			case fromKind == 'I' && toKind == 'E',
+				fromKind == 'E' && toKind == 'E':
 				tab := Nod(OITAB, from, nil)
 				fast = Nod(ONE, nodnil(), tab)
 			}
@@ -995,10 +985,10 @@ opswitch:
 		}

 		var resptr *Node // &res
-		if isblank(res) {
+		if isblank(n.List.First()) {
 			resptr = nodnil()
 		} else {
-			resptr = Nod(OADDR, res, nil)
+			resptr = Nod(OADDR, n.List.First(), nil)
 		}
 		resptr.Etype = 1 // addr does not escape

@@ -1104,45 +1094,12 @@ opswitch:

 			if n.Type.IsFloat() {
 				if n.Left.Type.Etype == TINT64 {
-					n = conv(mkcall("int64tofloat64", Types[TFLOAT64], init, conv(n.Left, Types[TINT64])), n.Type)
+					n = mkcall("int64tofloat64", n.Type, init, conv(n.Left, Types[TINT64]))
 					break
 				}

 				if n.Left.Type.Etype == TUINT64 {
-					n = conv(mkcall("uint64tofloat64", Types[TFLOAT64], init, conv(n.Left, Types[TUINT64])), n.Type)
-					break
-				}
-			}
-		}
-
-		if Thearch.LinkArch.Family == sys.I386 {
-			if n.Left.Type.IsFloat() {
-				if n.Type.Etype == TINT64 {
-					n = mkcall("float64toint64", n.Type, init, conv(n.Left, Types[TFLOAT64]))
-					break
-				}
-
-				if n.Type.Etype == TUINT64 {
-					n = mkcall("float64touint64", n.Type, init, conv(n.Left, Types[TFLOAT64]))
-					break
-				}
-				if n.Type.Etype == TUINT32 || n.Type.Etype == TUINT || n.Type.Etype == TUINTPTR {
-					n = mkcall("float64touint32", n.Type, init, conv(n.Left, Types[TFLOAT64]))
-					break
-				}
-			}
-			if n.Type.IsFloat() {
-				if n.Left.Type.Etype == TINT64 {
-					n = conv(mkcall("int64tofloat64", Types[TFLOAT64], init, conv(n.Left, Types[TINT64])), n.Type)
-					break
-				}
-
-				if n.Left.Type.Etype == TUINT64 {
-					n = conv(mkcall("uint64tofloat64", Types[TFLOAT64], init, conv(n.Left, Types[TUINT64])), n.Type)
-					break
-				}
-				if n.Left.Type.Etype == TUINT32 || n.Left.Type.Etype == TUINT || n.Left.Type.Etype == TUINTPTR {
-					n = conv(mkcall("uint32tofloat64", Types[TFLOAT64], init, conv(n.Left, Types[TUINT32])), n.Type)
+					n = mkcall("uint64tofloat64", n.Type, init, conv(n.Left, Types[TUINT64]))
 					break
 				}
 			}
@@ -3346,7 +3303,6 @@ func samecheap(a *Node, b *Node) bool {
 // The result of walkrotate MUST be assigned back to n, e.g.
 // 	n.Left = walkrotate(n.Left)
 func walkrotate(n *Node) *Node {
-	//TODO: enable LROT on ARM64 once the old backend is gone
 	if Thearch.LinkArch.InFamily(sys.MIPS64, sys.ARM64, sys.PPC64) {
 		return n
 	}
@@ -3540,6 +3496,16 @@ func walkdiv(n *Node, init *Nodes) *Node {
 			goto ret
 		}

+		// TODO(zhongwei) Test shows that TUINT8, TINT8, TUINT16 and TINT16's "quick division" method
+		// on current arm64 backend is slower than hardware div instruction on ARM64 due to unnecessary
+		// data movement between registers. It could be enabled when generated code is good enough.
+		if Thearch.LinkArch.Family == sys.ARM64 {
+			switch Simtype[nl.Type.Etype] {
+			case TUINT8, TINT8, TUINT16, TINT16:
+				return n
+			}
+		}
+
 		switch Simtype[nl.Type.Etype] {
 		default:
 			return n
--- a/src/cmd/compile/internal/mips64/peep.go
+++ b/src/cmd/compile/internal/mips64/peep.go
@@ -402,7 +402,7 @@ func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int {

 	switch p.As {
 	default:
-		fmt.Printf("copyu: can't find %v\n", p.As)
+		fmt.Printf("copyu: can't find %v\n", obj.Aconv(p.As))
 		return 2

 	case obj.ANOP, /* read p->from, write p->to */
--- a/src/cmd/compile/internal/ppc64/galign.go
+++ b/src/cmd/compile/internal/ppc64/galign.go
@@ -66,11 +66,6 @@ func Main() {
 	gc.Thearch.Doregbits = doregbits
 	gc.Thearch.Regnames = regnames

-	gc.Thearch.SSARegToReg = ssaRegToReg
-	gc.Thearch.SSAMarkMoves = ssaMarkMoves
-	gc.Thearch.SSAGenValue = ssaGenValue
-	gc.Thearch.SSAGenBlock = ssaGenBlock
-
 	initvariants()
 	initproginfo()

--- a/src/cmd/compile/internal/ppc64/peep.go
+++ b/src/cmd/compile/internal/ppc64/peep.go
@@ -601,7 +601,7 @@ func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int {

 	switch p.As {
 	default:
-		fmt.Printf("copyu: can't find %v\n", p.As)
+		fmt.Printf("copyu: can't find %v\n", obj.Aconv(p.As))
 		return 2

 	case obj.ANOP, /* read p->from, write p->to */
--- a/src/cmd/compile/internal/ppc64/prog.go
+++ b/src/cmd/compile/internal/ppc64/prog.go
@@ -42,34 +42,22 @@ var progtable = [ppc64.ALAST & obj.AMask]obj.ProgInfo{

 	// Integer
 	ppc64.AADD & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.AADDC & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.ASUB & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.AADDME & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.ANEG & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AAND & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.AANDN & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AOR & obj.AMask:     {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.AORN & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AXOR & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.AEQV & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AMULLD & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AMULLW & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AMULHD & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AMULHDU & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.ADIVD & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.ADIVDU & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.ADIVW & obj.AMask:   {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.ADIVWU & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.ASLD & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.ASRD & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.ASRAD & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.ASLW & obj.AMask:    {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.ASRW & obj.AMask:    {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.ASRAW & obj.AMask:   {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.ACMP & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RightRead},
 	ppc64.ACMPU & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RightRead},
-	ppc64.ACMPW & obj.AMask:   {Flags: gc.SizeL | gc.LeftRead | gc.RightRead},
-	ppc64.ACMPWU & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RightRead},
 	ppc64.ATD & obj.AMask:     {Flags: gc.SizeQ | gc.RightRead},

 	// Floating point.
@@ -82,13 +70,11 @@ var progtable = [ppc64.ALAST & obj.AMask]obj.ProgInfo{
 	ppc64.AFDIV & obj.AMask:   {Flags: gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AFDIVS & obj.AMask:  {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AFCTIDZ & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.AFCTIWZ & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AFCFID & obj.AMask:  {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AFCFIDU & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AFCMPU & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RightRead},
 	ppc64.AFRSP & obj.AMask:   {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite | gc.Conv},
 	ppc64.AFSQRT & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite},
-	ppc64.AFNEG & obj.AMask:   {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite},

 	// Moves
 	ppc64.AMOVB & obj.AMask:  {Flags: gc.SizeB | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv},
@@ -105,8 +91,6 @@ var progtable = [ppc64.ALAST & obj.AMask]obj.ProgInfo{
 	ppc64.AMOVD & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.Move},
 	ppc64.AMOVDU & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.Move | gc.PostInc},
 	ppc64.AFMOVS & obj.AMask:  {Flags: gc.SizeF | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv},
-	ppc64.AFMOVSX & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv},
-	ppc64.AFMOVSZ & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv},
 	ppc64.AFMOVD & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite | gc.Move},

 	// Jumps
@@ -313,7 +297,7 @@ func as2variant(as obj.As) int {
 			return i
 		}
 	}
-	gc.Fatalf("as2variant: instruction %v is not a variant of itself", as&obj.AMask)
+	gc.Fatalf("as2variant: instruction %v is not a variant of itself", obj.Aconv(as&obj.AMask))
 	return 0
 }

--- a/src/cmd/compile/internal/ppc64/ssa.go
+++ b/src/cmd/compile/internal/ppc64/ssa.go
--- a/src/cmd/compile/internal/s390x/peep.go
+++ b/src/cmd/compile/internal/s390x/peep.go
@@ -419,7 +419,7 @@ func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) usage {

 	switch p.As {
 	default:
-		fmt.Printf("copyu: can't find %v\n", p.As)
+		fmt.Printf("copyu: can't find %v\n", obj.Aconv(p.As))
 		return _ReadWriteSame

 	case // read p.From, write p.To
--- a/src/cmd/compile/internal/ssa/config.go
+++ b/src/cmd/compile/internal/ssa/config.go
@@ -20,18 +20,11 @@ type Config struct {
 	lowerBlock      func(*Block) bool          // lowering function
 	lowerValue      func(*Value, *Config) bool // lowering function
 	registers       []Register                 // machine registers
-	gpRegMask       regMask                    // general purpose integer register mask
-	fpRegMask       regMask                    // floating point register mask
-	FPReg           int8                       // register number of frame pointer, -1 if not used
-	hasGReg         bool                       // has hardware g register
 	fe              Frontend                   // callbacks into compiler frontend
 	HTML            *HTMLWriter                // html writer, for debugging
 	ctxt            *obj.Link                  // Generic arch information
 	optimize        bool                       // Do optimization
 	noDuffDevice    bool                       // Don't use Duff's device
-	nacl            bool                       // GOOS=nacl
-	use387          bool                       // GO386=387
-	NeedsFpScratch  bool                       // No direct move between GP and FP register sets
 	sparsePhiCutoff uint64                     // Sparse phi location algorithm used above this #blocks*#variables score
 	curFunc         *Func

@@ -113,7 +106,6 @@ type Frontend interface {
 	SplitSlice(LocalSlot) (LocalSlot, LocalSlot, LocalSlot)
 	SplitComplex(LocalSlot) (LocalSlot, LocalSlot)
 	SplitStruct(LocalSlot, int) LocalSlot
-	SplitInt64(LocalSlot) (LocalSlot, LocalSlot) // returns (hi, lo)

 	// Line returns a string describing the given line number.
 	Line(int32) string
@@ -136,88 +128,29 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
 		c.lowerBlock = rewriteBlockAMD64
 		c.lowerValue = rewriteValueAMD64
 		c.registers = registersAMD64[:]
-		c.gpRegMask = gpRegMaskAMD64
-		c.fpRegMask = fpRegMaskAMD64
-		c.FPReg = framepointerRegAMD64
-		c.hasGReg = false
-	case "amd64p32":
-		c.IntSize = 4
-		c.PtrSize = 4
-		c.lowerBlock = rewriteBlockAMD64
-		c.lowerValue = rewriteValueAMD64
-		c.registers = registersAMD64[:]
-		c.gpRegMask = gpRegMaskAMD64
-		c.fpRegMask = fpRegMaskAMD64
-		c.FPReg = framepointerRegAMD64
-		c.hasGReg = false
-		c.noDuffDevice = true
 	case "386":
 		c.IntSize = 4
 		c.PtrSize = 4
-		c.lowerBlock = rewriteBlock386
-		c.lowerValue = rewriteValue386
-		c.registers = registers386[:]
-		c.gpRegMask = gpRegMask386
-		c.fpRegMask = fpRegMask386
-		c.FPReg = framepointerReg386
-		c.hasGReg = false
+		c.lowerBlock = rewriteBlockAMD64
+		c.lowerValue = rewriteValueAMD64 // TODO(khr): full 32-bit support
 	case "arm":
 		c.IntSize = 4
 		c.PtrSize = 4
 		c.lowerBlock = rewriteBlockARM
 		c.lowerValue = rewriteValueARM
 		c.registers = registersARM[:]
-		c.gpRegMask = gpRegMaskARM
-		c.fpRegMask = fpRegMaskARM
-		c.FPReg = framepointerRegARM
-		c.hasGReg = true
-	case "arm64":
-		c.IntSize = 8
-		c.PtrSize = 8
-		c.lowerBlock = rewriteBlockARM64
-		c.lowerValue = rewriteValueARM64
-		c.registers = registersARM64[:]
-		c.gpRegMask = gpRegMaskARM64
-		c.fpRegMask = fpRegMaskARM64
-		c.FPReg = framepointerRegARM64
-		c.hasGReg = true
-		c.noDuffDevice = obj.Getgoos() == "darwin" // darwin linker cannot handle BR26 reloc with non-zero addend
-	case "ppc64le":
-		c.IntSize = 8
-		c.PtrSize = 8
-		c.lowerBlock = rewriteBlockPPC64
-		c.lowerValue = rewriteValuePPC64
-		c.registers = registersPPC64[:]
-		c.gpRegMask = gpRegMaskPPC64
-		c.fpRegMask = fpRegMaskPPC64
-		c.FPReg = framepointerRegPPC64
-		c.noDuffDevice = true // TODO: Resolve PPC64 DuffDevice (has zero, but not copy)
-		c.NeedsFpScratch = true
-		c.hasGReg = true
 	default:
 		fe.Unimplementedf(0, "arch %s not implemented", arch)
 	}
 	c.ctxt = ctxt
 	c.optimize = optimize
-	c.nacl = obj.Getgoos() == "nacl"

-	// Don't use Duff's device on Plan 9 AMD64, because floating
+	// Don't use Duff's device on Plan 9, because floating
 	// point operations are not allowed in note handler.
-	if obj.Getgoos() == "plan9" && arch == "amd64" {
+	if obj.Getgoos() == "plan9" {
 		c.noDuffDevice = true
 	}

-	if c.nacl {
-		c.noDuffDevice = true // Don't use Duff's device on NaCl
-
-		// ARM assembler rewrites DIV/MOD to runtime calls, which
-		// clobber R12 on nacl
-		opcodeTable[OpARMDIV].reg.clobbers |= 1 << 12  // R12
-		opcodeTable[OpARMDIVU].reg.clobbers |= 1 << 12 // R12
-		opcodeTable[OpARMMOD].reg.clobbers |= 1 << 12  // R12
-		opcodeTable[OpARMMODU].reg.clobbers |= 1 << 12 // R12
-	}
-
 	// Assign IDs to preallocated values/blocks.
 	for i := range c.values {
 		c.values[i].ID = ID(i)
@@ -247,11 +180,6 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
 	return c
 }

-func (c *Config) Set387(b bool) {
-	c.NeedsFpScratch = b
-	c.use387 = b
-}
-
 func (c *Config) Frontend() Frontend      { return c.fe }
 func (c *Config) SparsePhiCutoff() uint64 { return c.sparsePhiCutoff }

--- a/src/cmd/compile/internal/ssa/cse.go
+++ b/src/cmd/compile/internal/ssa/cse.go
@@ -163,29 +163,6 @@ func cse(f *Func) {
 		}
 	}

-	// if we rewrite a tuple generator to a new one in a different block,
-	// copy its selectors to the new generator's block, so tuple generator
-	// and selectors stay together.
-	for _, b := range f.Blocks {
-		for _, v := range b.Values {
-			if rewrite[v.ID] != nil {
-				continue
-			}
-			if v.Op != OpSelect0 && v.Op != OpSelect1 {
-				continue
-			}
-			if !v.Args[0].Type.IsTuple() {
-				f.Fatalf("arg of tuple selector %s is not a tuple: %s", v.String(), v.Args[0].LongString())
-			}
-			t := rewrite[v.Args[0].ID]
-			if t != nil && t.Block != b {
-				// v.Args[0] is tuple generator, CSE'd into a different block as t, v is left behind
-				c := v.copyInto(t.Block)
-				rewrite[v.ID] = c
-			}
-		}
-	}
-
 	rewrites := int64(0)

 	// Apply substitutions
--- a/src/cmd/compile/internal/ssa/deadstore.go
+++ b/src/cmd/compile/internal/ssa/deadstore.go
@@ -14,8 +14,7 @@ func dse(f *Func) {
 	defer f.retSparseSet(loadUse)
 	storeUse := f.newSparseSet(f.NumValues())
 	defer f.retSparseSet(storeUse)
-	shadowed := f.newSparseSet(f.NumValues())
-	defer f.retSparseSet(shadowed)
+	shadowed := newSparseMap(f.NumValues()) // TODO: cache
 	for _, b := range f.Blocks {
 		// Find all the stores in this block. Categorize their uses:
 		//  loadUse contains stores which are used by a subsequent load.
@@ -81,17 +80,18 @@ func dse(f *Func) {
 			shadowed.clear()
 		}
 		if v.Op == OpStore || v.Op == OpZero {
-			if shadowed.contains(v.Args[0].ID) {
+			sz := v.AuxInt
+			if shadowedSize := int64(shadowed.get(v.Args[0].ID)); shadowedSize != -1 && shadowedSize >= sz {
 				// Modify store into a copy
 				if v.Op == OpStore {
 					// store addr value mem
 					v.SetArgs1(v.Args[2])
 				} else {
 					// zero addr mem
-					sz := v.Args[0].Type.ElemType().Size()
-					if SizeAndAlign(v.AuxInt).Size() != sz {
+					typesz := v.Args[0].Type.ElemType().Size()
+					if sz != typesz {
 						f.Fatalf("mismatched zero/store sizes: %d and %d [%s]",
-							v.AuxInt, sz, v.LongString())
+							sz, typesz, v.LongString())
 					}
 					v.SetArgs1(v.Args[1])
 				}
@@ -99,7 +99,10 @@ func dse(f *Func) {
 				v.AuxInt = 0
 				v.Op = OpCopy
 			} else {
-				shadowed.add(v.Args[0].ID)
+				if sz > 0x7fffffff { // work around sparseMap's int32 value type
+					sz = 0x7fffffff
+				}
+				shadowed.set(v.Args[0].ID, int32(sz))
 			}
 		}
 		// walk to previous store
--- a/src/cmd/compile/internal/ssa/deadstore_test.go
+++ b/src/cmd/compile/internal/ssa/deadstore_test.go
@@ -8,7 +8,7 @@ import "testing"

 func TestDeadStore(t *testing.T) {
 	c := testConfig(t)
-	elemType := &TypeImpl{Size_: 8, Name: "testtype"}
+	elemType := &TypeImpl{Size_: 1, Name: "testtype"}
 	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr", Elem_: elemType} // dummy for testing
 	fun := Fun(c, "entry",
 		Bloc("entry",
@@ -18,7 +18,7 @@ func TestDeadStore(t *testing.T) {
 			Valu("addr1", OpAddr, ptrType, 0, nil, "sb"),
 			Valu("addr2", OpAddr, ptrType, 0, nil, "sb"),
 			Valu("addr3", OpAddr, ptrType, 0, nil, "sb"),
-			Valu("zero1", OpZero, TypeMem, 8, nil, "addr3", "start"),
+			Valu("zero1", OpZero, TypeMem, 1, nil, "addr3", "start"),
 			Valu("store1", OpStore, TypeMem, 1, nil, "addr1", "v", "zero1"),
 			Valu("store2", OpStore, TypeMem, 1, nil, "addr2", "v", "store1"),
 			Valu("store3", OpStore, TypeMem, 1, nil, "addr1", "v", "store2"),
@@ -95,3 +95,32 @@ func TestDeadStoreTypes(t *testing.T) {
 		t.Errorf("store %s incorrectly removed", v)
 	}
 }
+
+func TestDeadStoreUnsafe(t *testing.T) {
+	// Make sure a narrow store can't shadow a wider one. The test above
+	// covers the case of two different types, but unsafe pointer casting
+	// can get to a point where the size is changed but type unchanged.
+	c := testConfig(t)
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr"} // dummy for testing
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("start", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Valu("v", OpConstBool, TypeBool, 1, nil),
+			Valu("addr1", OpAddr, ptrType, 0, nil, "sb"),
+			Valu("store1", OpStore, TypeMem, 8, nil, "addr1", "v", "start"),  // store 8 bytes
+			Valu("store2", OpStore, TypeMem, 1, nil, "addr1", "v", "store1"), // store 1 byte
+			Goto("exit")),
+		Bloc("exit",
+			Exit("store2")))
+
+	CheckFunc(fun.f)
+	cse(fun.f)
+	dse(fun.f)
+	CheckFunc(fun.f)
+
+	v := fun.values["store1"]
+	if v.Op == OpCopy {
+		t.Errorf("store %s incorrectly removed", v)
+	}
+}
--- a/src/cmd/compile/internal/ssa/decompose.go
+++ b/src/cmd/compile/internal/ssa/decompose.go
@@ -25,22 +25,6 @@ func decomposeBuiltIn(f *Func) {
 	for _, name := range f.Names {
 		t := name.Type
 		switch {
-		case t.IsInteger() && t.Size() == 8 && f.Config.IntSize == 4:
-			var elemType Type
-			if t.IsSigned() {
-				elemType = f.Config.fe.TypeInt32()
-			} else {
-				elemType = f.Config.fe.TypeUInt32()
-			}
-			hiName, loName := f.Config.fe.SplitInt64(name)
-			newNames = append(newNames, hiName, loName)
-			for _, v := range f.NamedValues[name] {
-				hi := v.Block.NewValue1(v.Line, OpInt64Hi, elemType, v)
-				lo := v.Block.NewValue1(v.Line, OpInt64Lo, f.Config.fe.TypeUInt32(), v)
-				f.NamedValues[hiName] = append(f.NamedValues[hiName], hi)
-				f.NamedValues[loName] = append(f.NamedValues[loName], lo)
-			}
-			delete(f.NamedValues, name)
 		case t.IsComplex():
 			var elemType Type
 			if t.Size() == 16 {
@@ -94,8 +78,6 @@ func decomposeBuiltIn(f *Func) {
 				f.NamedValues[dataName] = append(f.NamedValues[dataName], data)
 			}
 			delete(f.NamedValues, name)
-		case t.IsFloat():
-			// floats are never decomposed, even ones bigger than IntSize
 		case t.Size() > f.Config.IntSize:
 			f.Unimplementedf("undecomposed named type %s %s", name, t)
 		default:
@@ -106,13 +88,8 @@ func decomposeBuiltIn(f *Func) {
 }

 func decomposeBuiltInPhi(v *Value) {
+	// TODO: decompose 64-bit ops on 32-bit archs?
 	switch {
-	case v.Type.IsInteger() && v.Type.Size() == 8 && v.Block.Func.Config.IntSize == 4:
-		if v.Block.Func.Config.arch == "amd64p32" {
-			// Even though ints are 32 bits, we have 64-bit ops.
-			break
-		}
-		decomposeInt64Phi(v)
 	case v.Type.IsComplex():
 		decomposeComplexPhi(v)
 	case v.Type.IsString():
@@ -121,8 +98,6 @@ func decomposeBuiltInPhi(v *Value) {
 		decomposeSlicePhi(v)
 	case v.Type.IsInterface():
 		decomposeInterfacePhi(v)
-	case v.Type.IsFloat():
-		// floats are never decomposed, even ones bigger than IntSize
 	case v.Type.Size() > v.Block.Func.Config.IntSize:
 		v.Unimplementedf("undecomposed type %s", v.Type)
 	}
@@ -163,26 +138,6 @@ func decomposeSlicePhi(v *Value) {
 	v.AddArg(cap)
 }

-func decomposeInt64Phi(v *Value) {
-	fe := v.Block.Func.Config.fe
-	var partType Type
-	if v.Type.IsSigned() {
-		partType = fe.TypeInt32()
-	} else {
-		partType = fe.TypeUInt32()
-	}
-
-	hi := v.Block.NewValue0(v.Line, OpPhi, partType)
-	lo := v.Block.NewValue0(v.Line, OpPhi, fe.TypeUInt32())
-	for _, a := range v.Args {
-		hi.AddArg(a.Block.NewValue1(v.Line, OpInt64Hi, partType, a))
-		lo.AddArg(a.Block.NewValue1(v.Line, OpInt64Lo, fe.TypeUInt32(), a))
-	}
-	v.reset(OpInt64Make)
-	v.AddArg(hi)
-	v.AddArg(lo)
-}
-
 func decomposeComplexPhi(v *Value) {
 	fe := v.Block.Func.Config.fe
 	var partType Type
--- a/src/cmd/compile/internal/ssa/export_test.go
+++ b/src/cmd/compile/internal/ssa/export_test.go
@@ -49,12 +49,6 @@ func (d DummyFrontend) SplitComplex(s LocalSlot) (LocalSlot, LocalSlot) {
 	}
 	return LocalSlot{s.N, d.TypeFloat32(), s.Off}, LocalSlot{s.N, d.TypeFloat32(), s.Off + 4}
 }
-func (d DummyFrontend) SplitInt64(s LocalSlot) (LocalSlot, LocalSlot) {
-	if s.Type.IsSigned() {
-		return LocalSlot{s.N, d.TypeInt32(), s.Off + 4}, LocalSlot{s.N, d.TypeUInt32(), s.Off}
-	}
-	return LocalSlot{s.N, d.TypeUInt32(), s.Off + 4}, LocalSlot{s.N, d.TypeUInt32(), s.Off}
-}
 func (d DummyFrontend) SplitStruct(s LocalSlot, i int) LocalSlot {
 	return LocalSlot{s.N, s.Type.FieldType(i), s.Off + s.Type.FieldOff(i)}
 }
--- a/src/cmd/compile/internal/ssa/flagalloc.go
+++ b/src/cmd/compile/internal/ssa/flagalloc.go
@@ -4,6 +4,8 @@

 package ssa

+const flagRegMask = regMask(1) << 33 // TODO: arch-specific
+
 // flagalloc allocates the flag register among all the flag-generating
 // instructions. Flag values are recomputed if they need to be
 // spilled/restored.
@@ -31,7 +33,7 @@ func flagalloc(f *Func) {
 				if v == flag {
 					flag = nil
 				}
-				if opcodeTable[v.Op].clobberFlags {
+				if opcodeTable[v.Op].reg.clobbers&flagRegMask != 0 {
 					flag = nil
 				}
 				for _, a := range v.Args {
@@ -95,7 +97,7 @@ func flagalloc(f *Func) {
 					continue
 				}
 				// Recalculate a
-				c := copyFlags(a, b)
+				c := a.copyInto(b)
 				// Update v.
 				v.SetArg(i, c)
 				// Remember the most-recently computed flag value.
@@ -103,7 +105,7 @@ func flagalloc(f *Func) {
 			}
 			// Issue v.
 			b.Values = append(b.Values, v)
-			if opcodeTable[v.Op].clobberFlags {
+			if opcodeTable[v.Op].reg.clobbers&flagRegMask != 0 {
 				flag = nil
 			}
 			if v.Type.IsFlags() {
@@ -119,7 +121,7 @@ func flagalloc(f *Func) {
 		if v := end[b.ID]; v != nil && v != flag {
 			// Need to reissue flag generator for use by
 			// subsequent blocks.
-			copyFlags(v, b)
+			_ = v.copyInto(b)
 			// Note: this flag generator is not properly linked up
 			// with the flag users. This breaks the SSA representation.
 			// We could fix up the users with another pass, but for now
@@ -133,19 +135,3 @@ func flagalloc(f *Func) {
 		b.FlagsLiveAtEnd = end[b.ID] != nil
 	}
 }
-
-// copyFlags copies v (flag generator) into b, returns the copy.
-// If v's arg is also flags, copy recursively.
-func copyFlags(v *Value, b *Block) *Value {
-	flagsArgs := make(map[int]*Value)
-	for i, a := range v.Args {
-		if a.Type.IsFlags() || a.Type.IsTuple() {
-			flagsArgs[i] = copyFlags(a, b)
-		}
-	}
-	c := v.copyInto(b)
-	for i, a := range flagsArgs {
-		c.SetArg(i, a)
-	}
-	return c
-}
--- a/src/cmd/compile/internal/ssa/gen/386.rules
+++ b/src/cmd/compile/internal/ssa/gen/386.rules
--- a/src/cmd/compile/internal/ssa/gen/386Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/386Ops.go
@@ -1,508 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-import "strings"
-
-// Notes:
-//  - Integer types live in the low portion of registers. Upper portions are junk.
-//  - Boolean types use the low-order byte of a register. 0=false, 1=true.
-//    Upper bytes are junk.
-//  - Floating-point types live in the low natural slot of an sse2 register.
-//    Unused portions are junk.
-//  - We do not use AH,BH,CH,DH registers.
-//  - When doing sub-register operations, we try to write the whole
-//    destination register to avoid a partial-register write.
-//  - Unused portions of AuxInt (or the Val portion of ValAndOff) are
-//    filled by sign-extending the used portion.  Users of AuxInt which interpret
-//    AuxInt as unsigned (e.g. shifts) must be careful.
-
-// Suffixes encode the bit width of various instructions.
-// L (long word) = 32 bit
-// W (word)      = 16 bit
-// B (byte)      = 8 bit
-
-// copied from ../../x86/reg.go
-var regNames386 = []string{
-	"AX",
-	"CX",
-	"DX",
-	"BX",
-	"SP",
-	"BP",
-	"SI",
-	"DI",
-	"X0",
-	"X1",
-	"X2",
-	"X3",
-	"X4",
-	"X5",
-	"X6",
-	"X7",
-
-	// pseudo-registers
-	"SB",
-}
-
-// Notes on 387 support.
-//  - The 387 has a weird stack-register setup for floating-point registers.
-//    We use these registers when SSE registers are not available (when GO386=387).
-//  - We use the same register names (X0-X7) but they refer to the 387
-//    floating-point registers. That way, most of the SSA backend is unchanged.
-//  - The instruction generation pass maintains an SSE->387 register mapping.
-//    This mapping is updated whenever the FP stack is pushed or popped so that
-//    we can always find a given SSE register even when the TOS pointer has changed.
-//  - To facilitate the mapping from SSE to 387, we enforce that
-//    every basic block starts and ends with an empty floating-point stack.
-
-func init() {
-	// Make map from reg names to reg integers.
-	if len(regNames386) > 64 {
-		panic("too many registers")
-	}
-	num := map[string]int{}
-	for i, name := range regNames386 {
-		num[name] = i
-	}
-	buildReg := func(s string) regMask {
-		m := regMask(0)
-		for _, r := range strings.Split(s, " ") {
-			if n, ok := num[r]; ok {
-				m |= regMask(1) << uint(n)
-				continue
-			}
-			panic("register " + r + " not found")
-		}
-		return m
-	}
-
-	// Common individual register masks
-	var (
-		ax         = buildReg("AX")
-		cx         = buildReg("CX")
-		dx         = buildReg("DX")
-		gp         = buildReg("AX CX DX BX BP SI DI")
-		fp         = buildReg("X0 X1 X2 X3 X4 X5 X6 X7")
-		x7         = buildReg("X7")
-		gpsp       = gp | buildReg("SP")
-		gpspsb     = gpsp | buildReg("SB")
-		callerSave = gp | fp
-	)
-	// Common slices of register masks
-	var (
-		gponly = []regMask{gp}
-		fponly = []regMask{fp}
-	)
-
-	// Common regInfo
-	var (
-		gp01      = regInfo{inputs: nil, outputs: gponly}
-		gp11      = regInfo{inputs: []regMask{gp}, outputs: gponly}
-		gp11sp    = regInfo{inputs: []regMask{gpsp}, outputs: gponly}
-		gp11sb    = regInfo{inputs: []regMask{gpspsb}, outputs: gponly}
-		gp21      = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
-		gp11carry = regInfo{inputs: []regMask{gp}, outputs: []regMask{0, gp}}
-		gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{0, gp}}
-		gp1carry1 = regInfo{inputs: []regMask{gp}, outputs: gponly}
-		gp2carry1 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
-		gp21sp    = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
-		gp21sb    = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly}
-		gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}}
-		gp11div   = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax}, clobbers: dx}
-		gp21hmul  = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx}, clobbers: ax}
-		gp11mod   = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx}, clobbers: ax}
-		gp21mul   = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}}
-
-		gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}}
-		gp1flags = regInfo{inputs: []regMask{gpsp}}
-		flagsgp  = regInfo{inputs: nil, outputs: gponly}
-
-		readflags = regInfo{inputs: nil, outputs: gponly}
-		flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}}
-
-		gpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
-		gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
-
-		gpstore         = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
-		gpstoreconst    = regInfo{inputs: []regMask{gpspsb, 0}}
-		gpstoreidx      = regInfo{inputs: []regMask{gpspsb, gpsp, gpsp, 0}}
-		gpstoreconstidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
-
-		fp01   = regInfo{inputs: nil, outputs: fponly}
-		fp21   = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
-		fp21x7 = regInfo{inputs: []regMask{fp &^ x7, fp &^ x7},
-			clobbers: x7, outputs: []regMask{fp &^ x7}}
-		fpgp     = regInfo{inputs: fponly, outputs: gponly}
-		gpfp     = regInfo{inputs: gponly, outputs: fponly}
-		fp11     = regInfo{inputs: fponly, outputs: fponly}
-		fp2flags = regInfo{inputs: []regMask{fp, fp}}
-
-		fpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: fponly}
-		fploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: fponly}
-
-		fpstore    = regInfo{inputs: []regMask{gpspsb, fp, 0}}
-		fpstoreidx = regInfo{inputs: []regMask{gpspsb, gpsp, fp, 0}}
-	)
-
-	var _386ops = []opData{
-		// fp ops
-		{name: "ADDSS", argLength: 2, reg: fp21, asm: "ADDSS", commutative: true, resultInArg0: true}, // fp32 add
-		{name: "ADDSD", argLength: 2, reg: fp21, asm: "ADDSD", commutative: true, resultInArg0: true}, // fp64 add
-		{name: "SUBSS", argLength: 2, reg: fp21x7, asm: "SUBSS", resultInArg0: true},                  // fp32 sub
-		{name: "SUBSD", argLength: 2, reg: fp21x7, asm: "SUBSD", resultInArg0: true},                  // fp64 sub
-		{name: "MULSS", argLength: 2, reg: fp21, asm: "MULSS", commutative: true, resultInArg0: true}, // fp32 mul
-		{name: "MULSD", argLength: 2, reg: fp21, asm: "MULSD", commutative: true, resultInArg0: true}, // fp64 mul
-		{name: "DIVSS", argLength: 2, reg: fp21x7, asm: "DIVSS", resultInArg0: true},                  // fp32 div
-		{name: "DIVSD", argLength: 2, reg: fp21x7, asm: "DIVSD", resultInArg0: true},                  // fp64 div
-
-		{name: "MOVSSload", argLength: 2, reg: fpload, asm: "MOVSS", aux: "SymOff"},            // fp32 load
-		{name: "MOVSDload", argLength: 2, reg: fpload, asm: "MOVSD", aux: "SymOff"},            // fp64 load
-		{name: "MOVSSconst", reg: fp01, asm: "MOVSS", aux: "Float32", rematerializeable: true}, // fp32 constant
-		{name: "MOVSDconst", reg: fp01, asm: "MOVSD", aux: "Float64", rematerializeable: true}, // fp64 constant
-		{name: "MOVSSloadidx1", argLength: 3, reg: fploadidx, asm: "MOVSS", aux: "SymOff"},     // fp32 load indexed by i
-		{name: "MOVSSloadidx4", argLength: 3, reg: fploadidx, asm: "MOVSS", aux: "SymOff"},     // fp32 load indexed by 4*i
-		{name: "MOVSDloadidx1", argLength: 3, reg: fploadidx, asm: "MOVSD", aux: "SymOff"},     // fp64 load indexed by i
-		{name: "MOVSDloadidx8", argLength: 3, reg: fploadidx, asm: "MOVSD", aux: "SymOff"},     // fp64 load indexed by 8*i
-
-		{name: "MOVSSstore", argLength: 3, reg: fpstore, asm: "MOVSS", aux: "SymOff"},        // fp32 store
-		{name: "MOVSDstore", argLength: 3, reg: fpstore, asm: "MOVSD", aux: "SymOff"},        // fp64 store
-		{name: "MOVSSstoreidx1", argLength: 4, reg: fpstoreidx, asm: "MOVSS", aux: "SymOff"}, // fp32 indexed by i store
-		{name: "MOVSSstoreidx4", argLength: 4, reg: fpstoreidx, asm: "MOVSS", aux: "SymOff"}, // fp32 indexed by 4i store
-		{name: "MOVSDstoreidx1", argLength: 4, reg: fpstoreidx, asm: "MOVSD", aux: "SymOff"}, // fp64 indexed by i store
-		{name: "MOVSDstoreidx8", argLength: 4, reg: fpstoreidx, asm: "MOVSD", aux: "SymOff"}, // fp64 indexed by 8i store
-
-		// binary ops
-		{name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true, clobberFlags: true},                // arg0 + arg1
-		{name: "ADDLconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int32", typ: "UInt32", clobberFlags: true}, // arg0 + auxint
-
-		{name: "ADDLcarry", argLength: 2, reg: gp21carry, asm: "ADDL", commutative: true, resultInArg0: true},                // arg0 + arg1, generates <carry,result> pair
-		{name: "ADDLconstcarry", argLength: 1, reg: gp11carry, asm: "ADDL", aux: "Int32", resultInArg0: true},                // arg0 + auxint, generates <carry,result> pair
-		{name: "ADCL", argLength: 3, reg: gp2carry1, asm: "ADCL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0+arg1+carry(arg2), where arg2 is flags
-		{name: "ADCLconst", argLength: 2, reg: gp1carry1, asm: "ADCL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0+auxint+carry(arg1), where arg1 is flags
-
-		{name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true, clobberFlags: true},                    // arg0 - arg1
-		{name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 - auxint
-
-		{name: "SUBLcarry", argLength: 2, reg: gp21carry, asm: "SUBL", resultInArg0: true},                                   // arg0-arg1, generates <borrow,result> pair
-		{name: "SUBLconstcarry", argLength: 1, reg: gp11carry, asm: "SUBL", aux: "Int32", resultInArg0: true},                // arg0-auxint, generates <borrow,result> pair
-		{name: "SBBL", argLength: 3, reg: gp2carry1, asm: "SBBL", resultInArg0: true, clobberFlags: true},                    // arg0-arg1-borrow(arg2), where arg2 is flags
-		{name: "SBBLconst", argLength: 2, reg: gp1carry1, asm: "SBBL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0-auxint-borrow(arg1), where arg1 is flags
-
-		{name: "MULL", argLength: 2, reg: gp21, asm: "IMULL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 * arg1
-		{name: "MULLconst", argLength: 1, reg: gp11, asm: "IMULL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 * auxint
-
-		{name: "HMULL", argLength: 2, reg: gp21hmul, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULLU", argLength: 2, reg: gp21hmul, asm: "MULL", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULW", argLength: 2, reg: gp21hmul, asm: "IMULW", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULB", argLength: 2, reg: gp21hmul, asm: "IMULB", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULWU", argLength: 2, reg: gp21hmul, asm: "MULW", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULBU", argLength: 2, reg: gp21hmul, asm: "MULB", clobberFlags: true}, // (arg0 * arg1) >> width
-
-		{name: "MULLQU", argLength: 2, reg: gp21mul, asm: "MULL", clobberFlags: true}, // arg0 * arg1, high 32 in result[0], low 32 in result[1]
-
-		{name: "DIVL", argLength: 2, reg: gp11div, asm: "IDIVL", clobberFlags: true}, // arg0 / arg1
-		{name: "DIVW", argLength: 2, reg: gp11div, asm: "IDIVW", clobberFlags: true}, // arg0 / arg1
-		{name: "DIVLU", argLength: 2, reg: gp11div, asm: "DIVL", clobberFlags: true}, // arg0 / arg1
-		{name: "DIVWU", argLength: 2, reg: gp11div, asm: "DIVW", clobberFlags: true}, // arg0 / arg1
-
-		{name: "MODL", argLength: 2, reg: gp11mod, asm: "IDIVL", clobberFlags: true}, // arg0 % arg1
-		{name: "MODW", argLength: 2, reg: gp11mod, asm: "IDIVW", clobberFlags: true}, // arg0 % arg1
-		{name: "MODLU", argLength: 2, reg: gp11mod, asm: "DIVL", clobberFlags: true}, // arg0 % arg1
-		{name: "MODWU", argLength: 2, reg: gp11mod, asm: "DIVW", clobberFlags: true}, // arg0 % arg1
-
-		{name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1
-		{name: "ANDLconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint
-
-		{name: "ORL", argLength: 2, reg: gp21, asm: "ORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1
-		{name: "ORLconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint
-
-		{name: "XORL", argLength: 2, reg: gp21, asm: "XORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1
-		{name: "XORLconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint
-
-		{name: "CMPL", argLength: 2, reg: gp2flags, asm: "CMPL", typ: "Flags"},                    // arg0 compare to arg1
-		{name: "CMPW", argLength: 2, reg: gp2flags, asm: "CMPW", typ: "Flags"},                    // arg0 compare to arg1
-		{name: "CMPB", argLength: 2, reg: gp2flags, asm: "CMPB", typ: "Flags"},                    // arg0 compare to arg1
-		{name: "CMPLconst", argLength: 1, reg: gp1flags, asm: "CMPL", typ: "Flags", aux: "Int32"}, // arg0 compare to auxint
-		{name: "CMPWconst", argLength: 1, reg: gp1flags, asm: "CMPW", typ: "Flags", aux: "Int16"}, // arg0 compare to auxint
-		{name: "CMPBconst", argLength: 1, reg: gp1flags, asm: "CMPB", typ: "Flags", aux: "Int8"},  // arg0 compare to auxint
-
-		{name: "UCOMISS", argLength: 2, reg: fp2flags, asm: "UCOMISS", typ: "Flags"}, // arg0 compare to arg1, f32
-		{name: "UCOMISD", argLength: 2, reg: fp2flags, asm: "UCOMISD", typ: "Flags"}, // arg0 compare to arg1, f64
-
-		{name: "TESTL", argLength: 2, reg: gp2flags, asm: "TESTL", typ: "Flags"},                    // (arg0 & arg1) compare to 0
-		{name: "TESTW", argLength: 2, reg: gp2flags, asm: "TESTW", typ: "Flags"},                    // (arg0 & arg1) compare to 0
-		{name: "TESTB", argLength: 2, reg: gp2flags, asm: "TESTB", typ: "Flags"},                    // (arg0 & arg1) compare to 0
-		{name: "TESTLconst", argLength: 1, reg: gp1flags, asm: "TESTL", typ: "Flags", aux: "Int32"}, // (arg0 & auxint) compare to 0
-		{name: "TESTWconst", argLength: 1, reg: gp1flags, asm: "TESTW", typ: "Flags", aux: "Int16"}, // (arg0 & auxint) compare to 0
-		{name: "TESTBconst", argLength: 1, reg: gp1flags, asm: "TESTB", typ: "Flags", aux: "Int8"},  // (arg0 & auxint) compare to 0
-
-		{name: "SHLL", argLength: 2, reg: gp21shift, asm: "SHLL", resultInArg0: true, clobberFlags: true},               // arg0 << arg1, shift amount is mod 32
-		{name: "SHLLconst", argLength: 1, reg: gp11, asm: "SHLL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 << auxint, shift amount 0-31
-		// Note: x86 is weird, the 16 and 8 byte shifts still use all 5 bits of shift amount!
-
-		{name: "SHRL", argLength: 2, reg: gp21shift, asm: "SHRL", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 32
-		{name: "SHRW", argLength: 2, reg: gp21shift, asm: "SHRW", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 32
-		{name: "SHRB", argLength: 2, reg: gp21shift, asm: "SHRB", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 32
-		{name: "SHRLconst", argLength: 1, reg: gp11, asm: "SHRL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-31
-		{name: "SHRWconst", argLength: 1, reg: gp11, asm: "SHRW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-31
-		{name: "SHRBconst", argLength: 1, reg: gp11, asm: "SHRB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // unsigned arg0 >> auxint, shift amount 0-31
-
-		{name: "SARL", argLength: 2, reg: gp21shift, asm: "SARL", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 32
-		{name: "SARW", argLength: 2, reg: gp21shift, asm: "SARW", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 32
-		{name: "SARB", argLength: 2, reg: gp21shift, asm: "SARB", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 32
-		{name: "SARLconst", argLength: 1, reg: gp11, asm: "SARL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-31
-		{name: "SARWconst", argLength: 1, reg: gp11, asm: "SARW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-31
-		{name: "SARBconst", argLength: 1, reg: gp11, asm: "SARB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // signed arg0 >> auxint, shift amount 0-31
-
-		{name: "ROLLconst", argLength: 1, reg: gp11, asm: "ROLL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-31
-		{name: "ROLWconst", argLength: 1, reg: gp11, asm: "ROLW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-15
-		{name: "ROLBconst", argLength: 1, reg: gp11, asm: "ROLB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // arg0 rotate left auxint, rotate amount 0-7
-
-		// unary ops
-		{name: "NEGL", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true, clobberFlags: true}, // -arg0
-
-		{name: "NOTL", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true, clobberFlags: true}, // ^arg0
-
-		{name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
-		{name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
-
-		{name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
-		{name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
-
-		{name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
-
-		{name: "SQRTSD", argLength: 1, reg: fp11, asm: "SQRTSD"}, // sqrt(arg0)
-
-		{name: "SBBLcarrymask", argLength: 1, reg: flagsgp, asm: "SBBL"}, // (int32)(-1) if carry is set, 0 if carry is clear.
-		// Note: SBBW and SBBB are subsumed by SBBL
-
-		{name: "SETEQ", argLength: 1, reg: readflags, asm: "SETEQ"}, // extract == condition from arg0
-		{name: "SETNE", argLength: 1, reg: readflags, asm: "SETNE"}, // extract != condition from arg0
-		{name: "SETL", argLength: 1, reg: readflags, asm: "SETLT"},  // extract signed < condition from arg0
-		{name: "SETLE", argLength: 1, reg: readflags, asm: "SETLE"}, // extract signed <= condition from arg0
-		{name: "SETG", argLength: 1, reg: readflags, asm: "SETGT"},  // extract signed > condition from arg0
-		{name: "SETGE", argLength: 1, reg: readflags, asm: "SETGE"}, // extract signed >= condition from arg0
-		{name: "SETB", argLength: 1, reg: readflags, asm: "SETCS"},  // extract unsigned < condition from arg0
-		{name: "SETBE", argLength: 1, reg: readflags, asm: "SETLS"}, // extract unsigned <= condition from arg0
-		{name: "SETA", argLength: 1, reg: readflags, asm: "SETHI"},  // extract unsigned > condition from arg0
-		{name: "SETAE", argLength: 1, reg: readflags, asm: "SETCC"}, // extract unsigned >= condition from arg0
-		// Need different opcodes for floating point conditions because
-		// any comparison involving a NaN is always FALSE and thus
-		// the patterns for inverting conditions cannot be used.
-		{name: "SETEQF", argLength: 1, reg: flagsgpax, asm: "SETEQ", clobberFlags: true}, // extract == condition from arg0
-		{name: "SETNEF", argLength: 1, reg: flagsgpax, asm: "SETNE", clobberFlags: true}, // extract != condition from arg0
-		{name: "SETORD", argLength: 1, reg: flagsgp, asm: "SETPC"},                       // extract "ordered" (No Nan present) condition from arg0
-		{name: "SETNAN", argLength: 1, reg: flagsgp, asm: "SETPS"},                       // extract "unordered" (Nan present) condition from arg0
-
-		{name: "SETGF", argLength: 1, reg: flagsgp, asm: "SETHI"},  // extract floating > condition from arg0
-		{name: "SETGEF", argLength: 1, reg: flagsgp, asm: "SETCC"}, // extract floating >= condition from arg0
-
-		{name: "MOVBLSX", argLength: 1, reg: gp11, asm: "MOVBLSX"}, // sign extend arg0 from int8 to int32
-		{name: "MOVBLZX", argLength: 1, reg: gp11, asm: "MOVBLZX"}, // zero extend arg0 from int8 to int32
-		{name: "MOVWLSX", argLength: 1, reg: gp11, asm: "MOVWLSX"}, // sign extend arg0 from int16 to int32
-		{name: "MOVWLZX", argLength: 1, reg: gp11, asm: "MOVWLZX"}, // zero extend arg0 from int16 to int32
-
-		{name: "MOVLconst", reg: gp01, asm: "MOVL", typ: "UInt32", aux: "Int32", rematerializeable: true}, // 32 low bits of auxint
-
-		{name: "CVTTSD2SL", argLength: 1, reg: fpgp, asm: "CVTTSD2SL"}, // convert float64 to int32
-		{name: "CVTTSS2SL", argLength: 1, reg: fpgp, asm: "CVTTSS2SL"}, // convert float32 to int32
-		{name: "CVTSL2SS", argLength: 1, reg: gpfp, asm: "CVTSL2SS"},   // convert int32 to float32
-		{name: "CVTSL2SD", argLength: 1, reg: gpfp, asm: "CVTSL2SD"},   // convert int32 to float64
-		{name: "CVTSD2SS", argLength: 1, reg: fp11, asm: "CVTSD2SS"},   // convert float64 to float32
-		{name: "CVTSS2SD", argLength: 1, reg: fp11, asm: "CVTSS2SD"},   // convert float32 to float64
-
-		{name: "PXOR", argLength: 2, reg: fp21, asm: "PXOR", commutative: true, resultInArg0: true}, // exclusive or, applied to X regs for float negation.
-
-		{name: "LEAL", argLength: 1, reg: gp11sb, aux: "SymOff", rematerializeable: true}, // arg0 + auxint + offset encoded in aux
-		{name: "LEAL1", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + arg1 + auxint + aux
-		{name: "LEAL2", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + 2*arg1 + auxint + aux
-		{name: "LEAL4", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + 4*arg1 + auxint + aux
-		{name: "LEAL8", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + 8*arg1 + auxint + aux
-		// Note: LEAL{1,2,4,8} must not have OpSB as either argument.
-
-		// auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address
-		{name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVBLZX", aux: "SymOff", typ: "UInt8"},  // load byte from arg0+auxint+aux. arg1=mem.  Zero extend.
-		{name: "MOVBLSXload", argLength: 2, reg: gpload, asm: "MOVBLSX", aux: "SymOff"},             // ditto, sign extend to int32
-		{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVWLZX", aux: "SymOff", typ: "UInt16"}, // load 2 bytes from arg0+auxint+aux. arg1=mem.  Zero extend.
-		{name: "MOVWLSXload", argLength: 2, reg: gpload, asm: "MOVWLSX", aux: "SymOff"},             // ditto, sign extend to int32
-		{name: "MOVLload", argLength: 2, reg: gpload, asm: "MOVL", aux: "SymOff", typ: "UInt32"},    // load 4 bytes from arg0+auxint+aux. arg1=mem.  Zero extend.
-		{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem"},     // store byte in arg1 to arg0+auxint+aux. arg2=mem
-		{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem"},     // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
-		{name: "MOVLstore", argLength: 3, reg: gpstore, asm: "MOVL", aux: "SymOff", typ: "Mem"},     // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
-
-		// indexed loads/stores
-		{name: "MOVBloadidx1", argLength: 3, reg: gploadidx, asm: "MOVBLZX", aux: "SymOff"}, // load a byte from arg0+arg1+auxint+aux. arg2=mem
-		{name: "MOVWloadidx1", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem
-		{name: "MOVWloadidx2", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff"}, // load 2 bytes from arg0+2*arg1+auxint+aux. arg2=mem
-		{name: "MOVLloadidx1", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff"},    // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem
-		{name: "MOVLloadidx4", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff"},    // load 4 bytes from arg0+4*arg1+auxint+aux. arg2=mem
-		// TODO: sign-extending indexed loads
-		{name: "MOVBstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVB", aux: "SymOff"}, // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
-		{name: "MOVWstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff"}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-		{name: "MOVWstoreidx2", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff"}, // store 2 bytes in arg2 to arg0+2*arg1+auxint+aux. arg3=mem
-		{name: "MOVLstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVL", aux: "SymOff"}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-		{name: "MOVLstoreidx4", argLength: 4, reg: gpstoreidx, asm: "MOVL", aux: "SymOff"}, // store 4 bytes in arg2 to arg0+4*arg1+auxint+aux. arg3=mem
-		// TODO: add size-mismatched indexed loads, like MOVBstoreidx4.
-
-		// For storeconst ops, the AuxInt field encodes both
-		// the value to store and an address offset of the store.
-		// Cast AuxInt to a ValAndOff to extract Val and Off fields.
-		{name: "MOVBstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVB", aux: "SymValAndOff", typ: "Mem"}, // store low byte of ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux.  arg1=mem
-		{name: "MOVWstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVW", aux: "SymValAndOff", typ: "Mem"}, // store low 2 bytes of ...
-		{name: "MOVLstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVL", aux: "SymValAndOff", typ: "Mem"}, // store low 4 bytes of ...
-
-		{name: "MOVBstoreconstidx1", argLength: 3, reg: gpstoreconstidx, asm: "MOVB", aux: "SymValAndOff", typ: "Mem"}, // store low byte of ValAndOff(AuxInt).Val() to arg0+1*arg1+ValAndOff(AuxInt).Off()+aux.  arg2=mem
-		{name: "MOVWstoreconstidx1", argLength: 3, reg: gpstoreconstidx, asm: "MOVW", aux: "SymValAndOff", typ: "Mem"}, // store low 2 bytes of ... arg1 ...
-		{name: "MOVWstoreconstidx2", argLength: 3, reg: gpstoreconstidx, asm: "MOVW", aux: "SymValAndOff", typ: "Mem"}, // store low 2 bytes of ... 2*arg1 ...
-		{name: "MOVLstoreconstidx1", argLength: 3, reg: gpstoreconstidx, asm: "MOVL", aux: "SymValAndOff", typ: "Mem"}, // store low 4 bytes of ... arg1 ...
-		{name: "MOVLstoreconstidx4", argLength: 3, reg: gpstoreconstidx, asm: "MOVL", aux: "SymValAndOff", typ: "Mem"}, // store low 4 bytes of ... 4*arg1 ...
-
-		// arg0 = pointer to start of memory to zero
-		// arg1 = value to store (will always be zero)
-		// arg2 = mem
-		// auxint = offset into duffzero code to start executing
-		// returns mem
-		{
-			name:      "DUFFZERO",
-			aux:       "Int64",
-			argLength: 3,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("DI"), buildReg("AX")},
-				clobbers: buildReg("DI CX"),
-				// Note: CX is only clobbered when dynamic linking.
-			},
-		},
-
-		// arg0 = address of memory to zero
-		// arg1 = # of 4-byte words to zero
-		// arg2 = value to store (will always be zero)
-		// arg3 = mem
-		// returns mem
-		{
-			name:      "REPSTOSL",
-			argLength: 4,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("DI"), buildReg("CX"), buildReg("AX")},
-				clobbers: buildReg("DI CX"),
-			},
-		},
-
-		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff", clobberFlags: true},                                             // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{gpsp, buildReg("DX"), 0}, clobbers: callerSave}, aux: "Int64", clobberFlags: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
-		{name: "CALLdefer", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                               // call deferproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLgo", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                                  // call newproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "Int64", clobberFlags: true},                        // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
-
-		// arg0 = destination pointer
-		// arg1 = source pointer
-		// arg2 = mem
-		// auxint = offset from duffcopy symbol to call
-		// returns memory
-		{
-			name:      "DUFFCOPY",
-			aux:       "Int64",
-			argLength: 3,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("DI"), buildReg("SI")},
-				clobbers: buildReg("DI SI CX"), // uses CX as a temporary
-			},
-			clobberFlags: true,
-		},
-
-		// arg0 = destination pointer
-		// arg1 = source pointer
-		// arg2 = # of 8-byte words to copy
-		// arg3 = mem
-		// returns memory
-		{
-			name:      "REPMOVSL",
-			argLength: 4,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("DI"), buildReg("SI"), buildReg("CX")},
-				clobbers: buildReg("DI SI CX"),
-			},
-		},
-
-		// (InvertFlags (CMPL a b)) == (CMPL b a)
-		// So if we want (SETL (CMPL a b)) but we can't do that because a is a constant,
-		// then we do (SETL (InvertFlags (CMPL b a))) instead.
-		// Rewrites will convert this to (SETG (CMPL b a)).
-		// InvertFlags is a pseudo-op which can't appear in assembly output.
-		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
-
-		// Pseudo-ops
-		{name: "LoweredGetG", argLength: 1, reg: gp01}, // arg0=mem
-		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
-		// and sorts it to the very beginning of the block to prevent other
-		// use of DX (the closure pointer)
-		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("DX")}}},
-		//arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
-		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpsp}}, clobberFlags: true},
-
-		// MOVLconvert converts between pointers and integers.
-		// We have a special op for this so as to not confuse GC
-		// (particularly stack maps).  It takes a memory arg so it
-		// gets correctly ordered with respect to GC safepoints.
-		// arg0=ptr/int arg1=mem, output=int/ptr
-		{name: "MOVLconvert", argLength: 2, reg: gp11, asm: "MOVL"},
-
-		// Constant flag values. For any comparison, there are 5 possible
-		// outcomes: the three from the signed total order (<,==,>) and the
-		// three from the unsigned total order. The == cases overlap.
-		// Note: there's a sixth "unordered" outcome for floating-point
-		// comparisons, but we don't use such a beast yet.
-		// These ops are for temporary use by rewrite rules. They
-		// cannot appear in the generated assembly.
-		{name: "FlagEQ"},     // equal
-		{name: "FlagLT_ULT"}, // signed < and unsigned <
-		{name: "FlagLT_UGT"}, // signed < and unsigned >
-		{name: "FlagGT_UGT"}, // signed > and unsigned <
-		{name: "FlagGT_ULT"}, // signed > and unsigned >
-
-		// Special op for -x on 387
-		{name: "FCHS", argLength: 1, reg: fp11},
-
-		// Special ops for PIC floating-point constants.
-		// MOVSXconst1 loads the address of the constant-pool entry into a register.
-		// MOVSXconst2 loads the constant from that address.
-		// MOVSXconst1 returns a pointer, but we type it as uint32 because it can never point to the Go heap.
-		{name: "MOVSSconst1", reg: gp01, typ: "UInt32", aux: "Float32"},
-		{name: "MOVSDconst1", reg: gp01, typ: "UInt32", aux: "Float64"},
-		{name: "MOVSSconst2", argLength: 1, reg: gpfp, asm: "MOVSS"},
-		{name: "MOVSDconst2", argLength: 1, reg: gpfp, asm: "MOVSD"},
-	}
-
-	var _386blocks = []blockData{
-		{name: "EQ"},
-		{name: "NE"},
-		{name: "LT"},
-		{name: "LE"},
-		{name: "GT"},
-		{name: "GE"},
-		{name: "ULT"},
-		{name: "ULE"},
-		{name: "UGT"},
-		{name: "UGE"},
-		{name: "EQF"},
-		{name: "NEF"},
-		{name: "ORD"}, // FP, ordered comparison (parity zero)
-		{name: "NAN"}, // FP, unordered comparison (parity one)
-	}
-
-	archs = append(archs, arch{
-		name:            "386",
-		pkg:             "cmd/internal/obj/x86",
-		genfile:         "../../x86/ssa.go",
-		ops:             _386ops,
-		blocks:          _386blocks,
-		regnames:        regNames386,
-		gpregmask:       gp,
-		fpregmask:       fp,
-		framepointerreg: int8(num["BP"]),
-	})
-}
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -4,8 +4,7 @@

 // Lowering arithmetic
 (Add64  x y) -> (ADDQ  x y)
-(AddPtr x y) && config.PtrSize == 8 -> (ADDQ x y)
-(AddPtr x y) && config.PtrSize == 4 -> (ADDL x y)
+(AddPtr x y) -> (ADDQ  x y)
 (Add32  x y) -> (ADDL  x y)
 (Add16  x y) -> (ADDL  x y)
 (Add8   x y) -> (ADDL  x y)
@@ -13,8 +12,7 @@
 (Add64F x y) -> (ADDSD x y)

 (Sub64  x y) -> (SUBQ  x y)
-(SubPtr x y) && config.PtrSize == 8 -> (SUBQ x y)
-(SubPtr x y) && config.PtrSize == 4 -> (SUBL x y)
+(SubPtr x y) -> (SUBQ  x y)
 (Sub32  x y) -> (SUBL  x y)
 (Sub16  x y) -> (SUBL  x y)
 (Sub8   x y) -> (SUBL  x y)
@@ -31,14 +29,14 @@
 (Div32F x y) -> (DIVSS x y)
 (Div64F x y) -> (DIVSD x y)

-(Div64  x y) -> (Select0 (DIVQ  x y))
-(Div64u x y) -> (Select0 (DIVQU x y))
-(Div32  x y) -> (Select0 (DIVL  x y))
-(Div32u x y) -> (Select0 (DIVLU x y))
-(Div16  x y) -> (Select0 (DIVW  x y))
-(Div16u x y) -> (Select0 (DIVWU x y))
-(Div8   x y) -> (Select0 (DIVW  (SignExt8to16 x) (SignExt8to16 y)))
-(Div8u  x y) -> (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
+(Div64  x y) -> (DIVQ  x y)
+(Div64u x y) -> (DIVQU x y)
+(Div32  x y) -> (DIVL  x y)
+(Div32u x y) -> (DIVLU x y)
+(Div16  x y) -> (DIVW  x y)
+(Div16u x y) -> (DIVWU x y)
+(Div8   x y) -> (DIVW  (SignExt8to16 x) (SignExt8to16 y))
+(Div8u  x y) -> (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))

 (Hmul64  x y) -> (HMULQ  x y)
 (Hmul64u x y) -> (HMULQU x y)
@@ -51,14 +49,14 @@

 (Avg64u x y) -> (AVGQU x y)

-(Mod64  x y) -> (Select1 (DIVQ  x y))
-(Mod64u x y) -> (Select1 (DIVQU x y))
-(Mod32  x y) -> (Select1 (DIVL  x y))
-(Mod32u x y) -> (Select1 (DIVLU x y))
-(Mod16  x y) -> (Select1 (DIVW  x y))
-(Mod16u x y) -> (Select1 (DIVWU x y))
-(Mod8   x y) -> (Select1 (DIVW  (SignExt8to16 x) (SignExt8to16 y)))
-(Mod8u  x y) -> (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
+(Mod64  x y) -> (MODQ  x y)
+(Mod64u x y) -> (MODQU x y)
+(Mod32  x y) -> (MODL  x y)
+(Mod32u x y) -> (MODLU x y)
+(Mod16  x y) -> (MODW  x y)
+(Mod16u x y) -> (MODWU x y)
+(Mod8   x y) -> (MODW  (SignExt8to16 x) (SignExt8to16 y))
+(Mod8u  x y) -> (MODWU (ZeroExt8to16 x) (ZeroExt8to16 y))

 (And64 x y) -> (ANDQ x y)
 (And32 x y) -> (ANDL x y)
@@ -93,9 +91,8 @@
 (Not x) -> (XORLconst [1] x)

 // Lowering pointer arithmetic
-(OffPtr [off] ptr) && config.PtrSize == 8 && is32Bit(off) -> (ADDQconst [off] ptr)
-(OffPtr [off] ptr) && config.PtrSize == 8 -> (ADDQ (MOVQconst [off]) ptr)
-(OffPtr [off] ptr) && config.PtrSize == 4 -> (ADDLconst [off] ptr)
+(OffPtr [off] ptr) && is32Bit(off) -> (ADDQconst [off] ptr)
+(OffPtr [off] ptr) -> (ADDQ (MOVQconst [off]) ptr)

 // Lowering other arithmetic
 // TODO: CMPQconst 0 below is redundant because BSF sets Z but how to remove?
@@ -273,8 +270,7 @@
 (Eq16  x y) -> (SETEQ (CMPW x y))
 (Eq8   x y) -> (SETEQ (CMPB x y))
 (EqB   x y) -> (SETEQ (CMPB x y))
-(EqPtr x y) && config.PtrSize == 8 -> (SETEQ (CMPQ x y))
-(EqPtr x y) && config.PtrSize == 4 -> (SETEQ (CMPL x y))
+(EqPtr x y) -> (SETEQ (CMPQ x y))
 (Eq64F x y) -> (SETEQF (UCOMISD x y))
 (Eq32F x y) -> (SETEQF (UCOMISS x y))

@@ -283,16 +279,13 @@
 (Neq16  x y) -> (SETNE (CMPW x y))
 (Neq8   x y) -> (SETNE (CMPB x y))
 (NeqB   x y) -> (SETNE (CMPB x y))
-(NeqPtr x y) && config.PtrSize == 8 -> (SETNE (CMPQ x y))
-(NeqPtr x y) && config.PtrSize == 4 -> (SETNE (CMPL x y))
+(NeqPtr x y) -> (SETNE (CMPQ x y))
 (Neq64F x y) -> (SETNEF (UCOMISD x y))
 (Neq32F x y) -> (SETNEF (UCOMISS x y))

-(Int64Hi x) -> (SHRQconst [32] x) // needed for amd64p32
-
 // Lowering loads
-(Load <t> ptr mem) && (is64BitInt(t) || isPtr(t) && config.PtrSize == 8) -> (MOVQload ptr mem)
-(Load <t> ptr mem) && (is32BitInt(t) || isPtr(t) && config.PtrSize == 4) -> (MOVLload ptr mem)
+(Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVQload ptr mem)
+(Load <t> ptr mem) && is32BitInt(t) -> (MOVLload ptr mem)
 (Load <t> ptr mem) && is16BitInt(t) -> (MOVWload ptr mem)
 (Load <t> ptr mem) && (t.IsBoolean() || is8BitInt(t)) -> (MOVBload ptr mem)
 (Load <t> ptr mem) && is32BitFloat(t) -> (MOVSSload ptr mem)
@@ -309,47 +302,39 @@
 (Store [1] ptr val mem) -> (MOVBstore ptr val mem)

 // Lowering moves
-(Move [s] _ _ mem) && SizeAndAlign(s).Size() == 0 -> mem
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 1 -> (MOVBstore dst (MOVBload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 2 -> (MOVWstore dst (MOVWload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 4 -> (MOVLstore dst (MOVLload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 8 -> (MOVQstore dst (MOVQload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 16 -> (MOVOstore dst (MOVOload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 3 ->
+(Move [0] _ _ mem) -> mem
+(Move [1] dst src mem) -> (MOVBstore dst (MOVBload src mem) mem)
+(Move [2] dst src mem) -> (MOVWstore dst (MOVWload src mem) mem)
+(Move [4] dst src mem) -> (MOVLstore dst (MOVLload src mem) mem)
+(Move [8] dst src mem) -> (MOVQstore dst (MOVQload src mem) mem)
+(Move [16] dst src mem) -> (MOVOstore dst (MOVOload src mem) mem)
+(Move [3] dst src mem) ->
 	(MOVBstore [2] dst (MOVBload [2] src mem)
 		(MOVWstore dst (MOVWload src mem) mem))
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 5 ->
+(Move [5] dst src mem) ->
 	(MOVBstore [4] dst (MOVBload [4] src mem)
 		(MOVLstore dst (MOVLload src mem) mem))
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 6 ->
+(Move [6] dst src mem) ->
 	(MOVWstore [4] dst (MOVWload [4] src mem)
 		(MOVLstore dst (MOVLload src mem) mem))
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 7 ->
+(Move [7] dst src mem) ->
 	(MOVLstore [3] dst (MOVLload [3] src mem)
 		(MOVLstore dst (MOVLload src mem) mem))
-(Move [s] dst src mem) && SizeAndAlign(s).Size() > 8 && SizeAndAlign(s).Size() < 16 ->
-	(MOVQstore [SizeAndAlign(s).Size()-8] dst (MOVQload [SizeAndAlign(s).Size()-8] src mem)
+(Move [size] dst src mem) && size > 8 && size < 16 ->
+	(MOVQstore [size-8] dst (MOVQload [size-8] src mem)
 		(MOVQstore dst (MOVQload src mem) mem))

 // Adjust moves to be a multiple of 16 bytes.
-(Move [s] dst src mem)
-	&& SizeAndAlign(s).Size() > 16 && SizeAndAlign(s).Size()%16 != 0 && SizeAndAlign(s).Size()%16 <= 8 ->
-	(Move [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%16]
-		(OffPtr <dst.Type> dst [SizeAndAlign(s).Size()%16])
-		(OffPtr <src.Type> src [SizeAndAlign(s).Size()%16])
+(Move [size] dst src mem) && size > 16 && size%16 != 0 && size%16 <= 8 ->
+	(Move [size-size%16] (ADDQconst <dst.Type> dst [size%16]) (ADDQconst <src.Type> src [size%16])
 		(MOVQstore dst (MOVQload src mem) mem))
-(Move [s] dst src mem)
-	&& SizeAndAlign(s).Size() > 16 && SizeAndAlign(s).Size()%16 != 0 && SizeAndAlign(s).Size()%16 > 8 ->
-	(Move [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%16]
-		(OffPtr <dst.Type> dst [SizeAndAlign(s).Size()%16])
-		(OffPtr <src.Type> src [SizeAndAlign(s).Size()%16])
+(Move [size] dst src mem) && size > 16 && size%16 != 0 && size%16 > 8 ->
+	(Move [size-size%16] (ADDQconst <dst.Type> dst [size%16]) (ADDQconst <src.Type> src [size%16])
 		(MOVOstore dst (MOVOload src mem) mem))

 // Medium copying uses a duff device.
-(Move [s] dst src mem)
-	&& SizeAndAlign(s).Size() >= 32 && SizeAndAlign(s).Size() <= 16*64 && SizeAndAlign(s).Size()%16 == 0
-	&& !config.noDuffDevice ->
-	(DUFFCOPY [14*(64-SizeAndAlign(s).Size()/16)] dst src mem)
+(Move [size] dst src mem) && size >= 32 && size <= 16*64 && size%16 == 0 && !config.noDuffDevice ->
+	(DUFFCOPY [14*(64-size/16)] dst src mem)
 // 14 and 64 are magic constants.  14 is the number of bytes to encode:
 //	MOVUPS	(SI), X0
 //	ADDQ	$16, SI
@@ -358,62 +343,57 @@
 // and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy.

 // Large copying uses REP MOVSQ.
-(Move [s] dst src mem) && (SizeAndAlign(s).Size() > 16*64 || config.noDuffDevice) && SizeAndAlign(s).Size()%8 == 0 ->
-	(REPMOVSQ dst src (MOVQconst [SizeAndAlign(s).Size()/8]) mem)
+(Move [size] dst src mem) && (size > 16*64 || config.noDuffDevice) && size%8 == 0 ->
+	(REPMOVSQ dst src (MOVQconst [size/8]) mem)

 // Lowering Zero instructions
-(Zero [s] _ mem) && SizeAndAlign(s).Size() == 0 -> mem
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 1 -> (MOVBstoreconst [0] destptr mem)
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 2 -> (MOVWstoreconst [0] destptr mem)
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 4 -> (MOVLstoreconst [0] destptr mem)
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 8 -> (MOVQstoreconst [0] destptr mem)
+(Zero [0] _ mem) -> mem
+(Zero [1] destptr mem) -> (MOVBstoreconst [0] destptr mem)
+(Zero [2] destptr mem) -> (MOVWstoreconst [0] destptr mem)
+(Zero [4] destptr mem) -> (MOVLstoreconst [0] destptr mem)
+(Zero [8] destptr mem) -> (MOVQstoreconst [0] destptr mem)

-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 3 ->
+(Zero [3] destptr mem) ->
 	(MOVBstoreconst [makeValAndOff(0,2)] destptr
 		(MOVWstoreconst [0] destptr mem))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 5 ->
+(Zero [5] destptr mem) ->
 	(MOVBstoreconst [makeValAndOff(0,4)] destptr
 		(MOVLstoreconst [0] destptr mem))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 6 ->
+(Zero [6] destptr mem) ->
 	(MOVWstoreconst [makeValAndOff(0,4)] destptr
 		(MOVLstoreconst [0] destptr mem))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 7 ->
+(Zero [7] destptr mem) ->
 	(MOVLstoreconst [makeValAndOff(0,3)] destptr
 		(MOVLstoreconst [0] destptr mem))

 // Strip off any fractional word zeroing.
-(Zero [s] destptr mem) && SizeAndAlign(s).Size()%8 != 0 && SizeAndAlign(s).Size() > 8 ->
-	(Zero [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%8] (OffPtr <destptr.Type> destptr [SizeAndAlign(s).Size()%8])
+(Zero [size] destptr mem) && size%8 != 0 && size > 8 ->
+	(Zero [size-size%8] (ADDQconst destptr [size%8])
 		(MOVQstoreconst [0] destptr mem))

 // Zero small numbers of words directly.
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 16 ->
+(Zero [16] destptr mem) ->
 	(MOVQstoreconst [makeValAndOff(0,8)] destptr
 		(MOVQstoreconst [0] destptr mem))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 24 ->
+(Zero [24] destptr mem) ->
 	(MOVQstoreconst [makeValAndOff(0,16)] destptr
 		(MOVQstoreconst [makeValAndOff(0,8)] destptr
 			(MOVQstoreconst [0] destptr mem)))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 32 ->
+(Zero [32] destptr mem) ->
 	(MOVQstoreconst [makeValAndOff(0,24)] destptr
 		(MOVQstoreconst [makeValAndOff(0,16)] destptr
 			(MOVQstoreconst [makeValAndOff(0,8)] destptr
 				(MOVQstoreconst [0] destptr mem))))

 // Medium zeroing uses a duff device.
-(Zero [s] destptr mem)
-	&& SizeAndAlign(s).Size() <= 1024 && SizeAndAlign(s).Size()%8 == 0 && SizeAndAlign(s).Size()%16 != 0
-	&& !config.noDuffDevice ->
-	(Zero [SizeAndAlign(s).Size()-8] (OffPtr <destptr.Type> [8] destptr) (MOVQstore destptr (MOVQconst [0]) mem))
-(Zero [s] destptr mem)
-	&& SizeAndAlign(s).Size() <= 1024 && SizeAndAlign(s).Size()%16 == 0 && !config.noDuffDevice ->
-	(DUFFZERO [SizeAndAlign(s).Size()] destptr (MOVOconst [0]) mem)
+(Zero [size] destptr mem) && size <= 1024 && size%8 == 0 && size%16 != 0 && !config.noDuffDevice ->
+	(Zero [size-8] (ADDQconst [8] destptr) (MOVQstore destptr (MOVQconst [0]) mem))
+(Zero [size] destptr mem) && size <= 1024 && size%16 == 0 && !config.noDuffDevice ->
+	(DUFFZERO [size] destptr (MOVOconst [0]) mem)

 // Large zeroing uses REP STOSQ.
-(Zero [s] destptr mem)
-	&& (SizeAndAlign(s).Size() > 1024 || (config.noDuffDevice && SizeAndAlign(s).Size() > 32))
-	&& SizeAndAlign(s).Size()%8 == 0 ->
-	(REPSTOSQ destptr (MOVQconst [SizeAndAlign(s).Size()/8]) (MOVQconst [0]) mem)
+(Zero [size] destptr mem) && (size > 1024 || (config.noDuffDevice && size > 32)) && size%8 == 0 ->
+	(REPSTOSQ destptr (MOVQconst [size/8]) (MOVQconst [0]) mem)

 // Lowering constants
 (Const8   [val]) -> (MOVLconst [val])
@@ -422,8 +402,7 @@
 (Const64  [val]) -> (MOVQconst [val])
 (Const32F [val]) -> (MOVSSconst [val])
 (Const64F [val]) -> (MOVSDconst [val])
-(ConstNil) && config.PtrSize == 8 -> (MOVQconst [0])
-(ConstNil) && config.PtrSize == 4 -> (MOVLconst [0])
+(ConstNil) -> (MOVQconst [0])
 (ConstBool [b]) -> (MOVLconst [b])

 // Lowering calls
@@ -434,17 +413,15 @@
 (InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem)

 // Miscellaneous
-(Convert <t> x mem) && config.PtrSize == 8 -> (MOVQconvert <t> x mem)
-(Convert <t> x mem) && config.PtrSize == 4 -> (MOVLconvert <t> x mem)
-(IsNonNil p) && config.PtrSize == 8 -> (SETNE (TESTQ p p))
-(IsNonNil p) && config.PtrSize == 4 -> (SETNE (TESTL p p))
+(Convert <t> x mem) -> (MOVQconvert <t> x mem)
+(IsNonNil p) -> (SETNE (TESTQ p p))
 (IsInBounds idx len) -> (SETB (CMPQ idx len))
 (IsSliceInBounds idx len) -> (SETBE (CMPQ idx len))
 (NilCheck ptr mem) -> (LoweredNilCheck ptr mem)
 (GetG mem) -> (LoweredGetG mem)
 (GetClosurePtr) -> (LoweredGetClosurePtr)
-(Addr {sym} base) && config.PtrSize == 8 -> (LEAQ {sym} base)
-(Addr {sym} base) && config.PtrSize == 4 -> (LEAL {sym} base)
+(Addr {sym} base) -> (LEAQ {sym} base)
+(ITab (Load ptr mem)) -> (MOVQload ptr mem)

 // block rewrites
 (If (SETL  cmp) yes no) -> (LT  cmp yes no)
@@ -518,12 +495,6 @@
 (ANDLconst [c] (ANDLconst [d] x)) -> (ANDLconst [c & d] x)
 (ANDQconst [c] (ANDQconst [d] x)) -> (ANDQconst [c & d] x)

-(XORLconst [c] (XORLconst [d] x)) -> (XORLconst [c ^ d] x)
-(XORQconst [c] (XORQconst [d] x)) -> (XORQconst [c ^ d] x)
-
-(MULLconst [c] (MULLconst [d] x)) -> (MULLconst [int64(int32(c * d))] x)
-(MULQconst [c] (MULQconst [d] x)) && is32Bit(c*d) -> (MULQconst [c * d] x)
-
 (ORQ x (MOVQconst [c])) && is32Bit(c) -> (ORQconst [c] x)
 (ORQ (MOVQconst [c]) x) && is32Bit(c) -> (ORQconst [c] x)
 (ORL x (MOVLconst [c])) -> (ORLconst [c] x)
@@ -573,16 +544,6 @@
 (SHRL x (ANDLconst [31] y)) -> (SHRL x y)
 (SHRQ x (ANDQconst [63] y)) -> (SHRQ x y)

-(ROLQconst [c] (ROLQconst [d] x)) -> (ROLQconst [(c+d)&63] x)
-(ROLLconst [c] (ROLLconst [d] x)) -> (ROLLconst [(c+d)&31] x)
-(ROLWconst [c] (ROLWconst [d] x)) -> (ROLWconst [(c+d)&15] x)
-(ROLBconst [c] (ROLBconst [d] x)) -> (ROLBconst [(c+d)& 7] x)
-
-(ROLQconst [0] x) -> x
-(ROLLconst [0] x) -> x
-(ROLWconst [0] x) -> x
-(ROLBconst [0] x) -> x
-
 // Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits)
 // because the x86 instructions are defined to use all 5 bits of the shift even
 // for the small shifts. I don't think we'll ever generate a weird shift (e.g.
@@ -1603,53 +1564,3 @@
  && x.Uses == 1
  && clobber(x)
  -> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w0 mem)
-
-// amd64p32 rules
-// same as the rules above, but with 32 instead of 64 bit pointer arithmetic.
-// LEAQ,ADDQ -> LEAL,ADDL
-(ADDLconst [c] (LEAL [d] {s} x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x)
-(LEAL [c] {s} (ADDLconst [d] x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x)
-
-(MOVQload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) ->
-	(MOVQload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVLload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) ->
-	(MOVLload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVWload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) ->
-	(MOVWload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVBload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) ->
-	(MOVBload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-
-(MOVQstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) ->
-	(MOVQstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-(MOVLstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) ->
-	(MOVLstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-(MOVWstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) ->
-	(MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-(MOVBstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) ->
-	(MOVBstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-
-(MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
-	(MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-(MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
-	(MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-(MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
-	(MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-(MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
-	(MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-
-(MOVQload  [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVQload  [off1+off2] {sym} ptr mem)
-(MOVLload  [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVLload  [off1+off2] {sym} ptr mem)
-(MOVWload  [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVWload  [off1+off2] {sym} ptr mem)
-(MOVBload  [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVBload  [off1+off2] {sym} ptr mem)
-(MOVQstore  [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVQstore  [off1+off2] {sym} ptr val mem)
-(MOVLstore  [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVLstore  [off1+off2] {sym} ptr val mem)
-(MOVWstore  [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVWstore  [off1+off2] {sym} ptr val mem)
-(MOVBstore  [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVBstore  [off1+off2] {sym} ptr val mem)
-(MOVQstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
-	(MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-(MOVLstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
-	(MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-(MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
-	(MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-(MOVBstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
-	(MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -64,6 +64,7 @@ var regNamesAMD64 = []string{

 	// pseudo-registers
 	"SB",
+	"FLAGS",
 }

 func init() {
@@ -97,36 +98,43 @@ func init() {
 		fp         = buildReg("X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15")
 		gpsp       = gp | buildReg("SP")
 		gpspsb     = gpsp | buildReg("SB")
-		callerSave = gp | fp
+		flags      = buildReg("FLAGS")
+		callerSave = gp | fp | flags
 	)
 	// Common slices of register masks
 	var (
-		gponly = []regMask{gp}
-		fponly = []regMask{fp}
+		gponly    = []regMask{gp}
+		fponly    = []regMask{fp}
+		flagsonly = []regMask{flags}
 	)

 	// Common regInfo
 	var (
-		gp01      = regInfo{inputs: nil, outputs: gponly}
-		gp11      = regInfo{inputs: []regMask{gp}, outputs: gponly}
-		gp11sp    = regInfo{inputs: []regMask{gpsp}, outputs: gponly}
+		gp01      = regInfo{inputs: []regMask{}, outputs: gponly}
+		gp11      = regInfo{inputs: []regMask{gp}, outputs: gponly, clobbers: flags}
+		gp11sp    = regInfo{inputs: []regMask{gpsp}, outputs: gponly, clobbers: flags}
+		gp11nf    = regInfo{inputs: []regMask{gpsp}, outputs: gponly} // nf: no flags clobbered
 		gp11sb    = regInfo{inputs: []regMask{gpspsb}, outputs: gponly}
-		gp21      = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
-		gp21sp    = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
+		gp21      = regInfo{inputs: []regMask{gp, gp}, outputs: gponly, clobbers: flags}
+		gp21sp    = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly, clobbers: flags}
 		gp21sb    = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly}
-		gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}}
-		gp11div   = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax, dx}}
-		gp21hmul  = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx}, clobbers: ax}
+		gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}, clobbers: flags}
+		gp11div   = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax},
+			clobbers: dx | flags}
+		gp11hmul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx},
+			clobbers: ax | flags}
+		gp11mod = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx},
+			clobbers: ax | flags}

-		gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}}
-		gp1flags = regInfo{inputs: []regMask{gpsp}}
-		flagsgp  = regInfo{inputs: nil, outputs: gponly}
+		gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly}
+		gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
+		flagsgp  = regInfo{inputs: flagsonly, outputs: gponly}

 		// for CMOVconst -- uses AX to hold constant temporary.
-		gp1flagsgp = regInfo{inputs: []regMask{gp &^ ax}, clobbers: ax, outputs: []regMask{gp &^ ax}}
+		gp1flagsgp = regInfo{inputs: []regMask{gp &^ ax, flags}, clobbers: ax | flags, outputs: []regMask{gp &^ ax}}

-		readflags = regInfo{inputs: nil, outputs: gponly}
-		flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}}
+		readflags = regInfo{inputs: flagsonly, outputs: gponly}
+		flagsgpax = regInfo{inputs: flagsonly, clobbers: ax | flags, outputs: []regMask{gp &^ ax}}

 		gpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
 		gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
@@ -136,14 +144,14 @@ func init() {
 		gpstoreidx      = regInfo{inputs: []regMask{gpspsb, gpsp, gpsp, 0}}
 		gpstoreconstidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}

-		fp01    = regInfo{inputs: nil, outputs: fponly}
+		fp01    = regInfo{inputs: []regMask{}, outputs: fponly}
 		fp21    = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
 		fp21x15 = regInfo{inputs: []regMask{fp &^ x15, fp &^ x15},
 			clobbers: x15, outputs: []regMask{fp &^ x15}}
 		fpgp     = regInfo{inputs: fponly, outputs: gponly}
 		gpfp     = regInfo{inputs: gponly, outputs: fponly}
 		fp11     = regInfo{inputs: fponly, outputs: fponly}
-		fp2flags = regInfo{inputs: []regMask{fp, fp}}
+		fp2flags = regInfo{inputs: []regMask{fp, fp}, outputs: flagsonly}

 		fpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: fponly}
 		fploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: fponly}
@@ -180,53 +188,60 @@ func init() {
 		{name: "MOVSDstoreidx8", argLength: 4, reg: fpstoreidx, asm: "MOVSD", aux: "SymOff"}, // fp64 indexed by 8i store

 		// binary ops
-		{name: "ADDQ", argLength: 2, reg: gp21sp, asm: "ADDQ", commutative: true, clobberFlags: true},                // arg0 + arg1
-		{name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true, clobberFlags: true},                // arg0 + arg1
-		{name: "ADDQconst", argLength: 1, reg: gp11sp, asm: "ADDQ", aux: "Int64", typ: "UInt64", clobberFlags: true}, // arg0 + auxint
-		{name: "ADDLconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int32", clobberFlags: true},                // arg0 + auxint
+		{name: "ADDQ", argLength: 2, reg: gp21sp, asm: "ADDQ", commutative: true},                // arg0 + arg1
+		{name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true},                // arg0 + arg1
+		{name: "ADDQconst", argLength: 1, reg: gp11sp, asm: "ADDQ", aux: "Int64", typ: "UInt64"}, // arg0 + auxint
+		{name: "ADDLconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int32"},                // arg0 + auxint

-		{name: "SUBQ", argLength: 2, reg: gp21, asm: "SUBQ", resultInArg0: true, clobberFlags: true},                    // arg0 - arg1
-		{name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true, clobberFlags: true},                    // arg0 - arg1
-		{name: "SUBQconst", argLength: 1, reg: gp11, asm: "SUBQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 - auxint
-		{name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 - auxint
+		{name: "SUBQ", argLength: 2, reg: gp21, asm: "SUBQ", resultInArg0: true},                    // arg0 - arg1
+		{name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true},                    // arg0 - arg1
+		{name: "SUBQconst", argLength: 1, reg: gp11, asm: "SUBQ", aux: "Int64", resultInArg0: true}, // arg0 - auxint
+		{name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true}, // arg0 - auxint

-		{name: "MULQ", argLength: 2, reg: gp21, asm: "IMULQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 * arg1
-		{name: "MULL", argLength: 2, reg: gp21, asm: "IMULL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 * arg1
-		{name: "MULQconst", argLength: 1, reg: gp11, asm: "IMULQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 * auxint
-		{name: "MULLconst", argLength: 1, reg: gp11, asm: "IMULL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 * auxint
+		{name: "MULQ", argLength: 2, reg: gp21, asm: "IMULQ", commutative: true, resultInArg0: true}, // arg0 * arg1
+		{name: "MULL", argLength: 2, reg: gp21, asm: "IMULL", commutative: true, resultInArg0: true}, // arg0 * arg1
+		{name: "MULQconst", argLength: 1, reg: gp11, asm: "IMULQ", aux: "Int64", resultInArg0: true}, // arg0 * auxint
+		{name: "MULLconst", argLength: 1, reg: gp11, asm: "IMULL", aux: "Int32", resultInArg0: true}, // arg0 * auxint

-		{name: "HMULQ", argLength: 2, reg: gp21hmul, asm: "IMULQ", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULL", argLength: 2, reg: gp21hmul, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULW", argLength: 2, reg: gp21hmul, asm: "IMULW", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULB", argLength: 2, reg: gp21hmul, asm: "IMULB", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULQU", argLength: 2, reg: gp21hmul, asm: "MULQ", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULLU", argLength: 2, reg: gp21hmul, asm: "MULL", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULWU", argLength: 2, reg: gp21hmul, asm: "MULW", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULBU", argLength: 2, reg: gp21hmul, asm: "MULB", clobberFlags: true}, // (arg0 * arg1) >> width
+		{name: "HMULQ", argLength: 2, reg: gp11hmul, asm: "IMULQ"}, // (arg0 * arg1) >> width
+		{name: "HMULL", argLength: 2, reg: gp11hmul, asm: "IMULL"}, // (arg0 * arg1) >> width
+		{name: "HMULW", argLength: 2, reg: gp11hmul, asm: "IMULW"}, // (arg0 * arg1) >> width
+		{name: "HMULB", argLength: 2, reg: gp11hmul, asm: "IMULB"}, // (arg0 * arg1) >> width
+		{name: "HMULQU", argLength: 2, reg: gp11hmul, asm: "MULQ"}, // (arg0 * arg1) >> width
+		{name: "HMULLU", argLength: 2, reg: gp11hmul, asm: "MULL"}, // (arg0 * arg1) >> width
+		{name: "HMULWU", argLength: 2, reg: gp11hmul, asm: "MULW"}, // (arg0 * arg1) >> width
+		{name: "HMULBU", argLength: 2, reg: gp11hmul, asm: "MULB"}, // (arg0 * arg1) >> width

-		{name: "AVGQU", argLength: 2, reg: gp21, commutative: true, resultInArg0: true, clobberFlags: true}, // (arg0 + arg1) / 2 as unsigned, all 64 result bits
+		{name: "AVGQU", argLength: 2, reg: gp21, commutative: true, resultInArg0: true}, // (arg0 + arg1) / 2 as unsigned, all 64 result bits

-		{name: "DIVQ", argLength: 2, reg: gp11div, typ: "(Int64,Int64)", asm: "IDIVQ", clobberFlags: true},   // [arg0 / arg1, arg0 % arg1]
-		{name: "DIVL", argLength: 2, reg: gp11div, typ: "(Int32,Int32)", asm: "IDIVL", clobberFlags: true},   // [arg0 / arg1, arg0 % arg1]
-		{name: "DIVW", argLength: 2, reg: gp11div, typ: "(Int16,Int16)", asm: "IDIVW", clobberFlags: true},   // [arg0 / arg1, arg0 % arg1]
-		{name: "DIVQU", argLength: 2, reg: gp11div, typ: "(UInt64,UInt64)", asm: "DIVQ", clobberFlags: true}, // [arg0 / arg1, arg0 % arg1]
-		{name: "DIVLU", argLength: 2, reg: gp11div, typ: "(UInt32,UInt32)", asm: "DIVL", clobberFlags: true}, // [arg0 / arg1, arg0 % arg1]
-		{name: "DIVWU", argLength: 2, reg: gp11div, typ: "(UInt16,UInt16)", asm: "DIVW", clobberFlags: true}, // [arg0 / arg1, arg0 % arg1]
+		{name: "DIVQ", argLength: 2, reg: gp11div, asm: "IDIVQ"}, // arg0 / arg1
+		{name: "DIVL", argLength: 2, reg: gp11div, asm: "IDIVL"}, // arg0 / arg1
+		{name: "DIVW", argLength: 2, reg: gp11div, asm: "IDIVW"}, // arg0 / arg1
+		{name: "DIVQU", argLength: 2, reg: gp11div, asm: "DIVQ"}, // arg0 / arg1
+		{name: "DIVLU", argLength: 2, reg: gp11div, asm: "DIVL"}, // arg0 / arg1
+		{name: "DIVWU", argLength: 2, reg: gp11div, asm: "DIVW"}, // arg0 / arg1

-		{name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1
-		{name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1
-		{name: "ANDQconst", argLength: 1, reg: gp11, asm: "ANDQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 & auxint
-		{name: "ANDLconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint
+		{name: "MODQ", argLength: 2, reg: gp11mod, asm: "IDIVQ"}, // arg0 % arg1
+		{name: "MODL", argLength: 2, reg: gp11mod, asm: "IDIVL"}, // arg0 % arg1
+		{name: "MODW", argLength: 2, reg: gp11mod, asm: "IDIVW"}, // arg0 % arg1
+		{name: "MODQU", argLength: 2, reg: gp11mod, asm: "DIVQ"}, // arg0 % arg1
+		{name: "MODLU", argLength: 2, reg: gp11mod, asm: "DIVL"}, // arg0 % arg1
+		{name: "MODWU", argLength: 2, reg: gp11mod, asm: "DIVW"}, // arg0 % arg1

-		{name: "ORQ", argLength: 2, reg: gp21, asm: "ORQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1
-		{name: "ORL", argLength: 2, reg: gp21, asm: "ORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1
-		{name: "ORQconst", argLength: 1, reg: gp11, asm: "ORQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 | auxint
-		{name: "ORLconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint
+		{name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ", commutative: true, resultInArg0: true}, // arg0 & arg1
+		{name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true}, // arg0 & arg1
+		{name: "ANDQconst", argLength: 1, reg: gp11, asm: "ANDQ", aux: "Int64", resultInArg0: true}, // arg0 & auxint
+		{name: "ANDLconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int32", resultInArg0: true}, // arg0 & auxint

-		{name: "XORQ", argLength: 2, reg: gp21, asm: "XORQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1
-		{name: "XORL", argLength: 2, reg: gp21, asm: "XORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1
-		{name: "XORQconst", argLength: 1, reg: gp11, asm: "XORQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint
-		{name: "XORLconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint
+		{name: "ORQ", argLength: 2, reg: gp21, asm: "ORQ", commutative: true, resultInArg0: true}, // arg0 | arg1
+		{name: "ORL", argLength: 2, reg: gp21, asm: "ORL", commutative: true, resultInArg0: true}, // arg0 | arg1
+		{name: "ORQconst", argLength: 1, reg: gp11, asm: "ORQ", aux: "Int64", resultInArg0: true}, // arg0 | auxint
+		{name: "ORLconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int32", resultInArg0: true}, // arg0 | auxint
+
+		{name: "XORQ", argLength: 2, reg: gp21, asm: "XORQ", commutative: true, resultInArg0: true}, // arg0 ^ arg1
+		{name: "XORL", argLength: 2, reg: gp21, asm: "XORL", commutative: true, resultInArg0: true}, // arg0 ^ arg1
+		{name: "XORQconst", argLength: 1, reg: gp11, asm: "XORQ", aux: "Int64", resultInArg0: true}, // arg0 ^ auxint
+		{name: "XORLconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int32", resultInArg0: true}, // arg0 ^ auxint

 		{name: "CMPQ", argLength: 2, reg: gp2flags, asm: "CMPQ", typ: "Flags"},                    // arg0 compare to arg1
 		{name: "CMPL", argLength: 2, reg: gp2flags, asm: "CMPL", typ: "Flags"},                    // arg0 compare to arg1
@@ -249,60 +264,60 @@ func init() {
 		{name: "TESTWconst", argLength: 1, reg: gp1flags, asm: "TESTW", typ: "Flags", aux: "Int16"}, // (arg0 & auxint) compare to 0
 		{name: "TESTBconst", argLength: 1, reg: gp1flags, asm: "TESTB", typ: "Flags", aux: "Int8"},  // (arg0 & auxint) compare to 0

-		{name: "SHLQ", argLength: 2, reg: gp21shift, asm: "SHLQ", resultInArg0: true, clobberFlags: true},               // arg0 << arg1, shift amount is mod 64
-		{name: "SHLL", argLength: 2, reg: gp21shift, asm: "SHLL", resultInArg0: true, clobberFlags: true},               // arg0 << arg1, shift amount is mod 32
-		{name: "SHLQconst", argLength: 1, reg: gp11, asm: "SHLQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 << auxint, shift amount 0-63
-		{name: "SHLLconst", argLength: 1, reg: gp11, asm: "SHLL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 << auxint, shift amount 0-31
+		{name: "SHLQ", argLength: 2, reg: gp21shift, asm: "SHLQ", resultInArg0: true},               // arg0 << arg1, shift amount is mod 64
+		{name: "SHLL", argLength: 2, reg: gp21shift, asm: "SHLL", resultInArg0: true},               // arg0 << arg1, shift amount is mod 32
+		{name: "SHLQconst", argLength: 1, reg: gp11, asm: "SHLQ", aux: "Int64", resultInArg0: true}, // arg0 << auxint, shift amount 0-63
+		{name: "SHLLconst", argLength: 1, reg: gp11, asm: "SHLL", aux: "Int32", resultInArg0: true}, // arg0 << auxint, shift amount 0-31
 		// Note: x86 is weird, the 16 and 8 byte shifts still use all 5 bits of shift amount!

-		{name: "SHRQ", argLength: 2, reg: gp21shift, asm: "SHRQ", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 64
-		{name: "SHRL", argLength: 2, reg: gp21shift, asm: "SHRL", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 32
-		{name: "SHRW", argLength: 2, reg: gp21shift, asm: "SHRW", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 32
-		{name: "SHRB", argLength: 2, reg: gp21shift, asm: "SHRB", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 32
-		{name: "SHRQconst", argLength: 1, reg: gp11, asm: "SHRQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-63
-		{name: "SHRLconst", argLength: 1, reg: gp11, asm: "SHRL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-31
-		{name: "SHRWconst", argLength: 1, reg: gp11, asm: "SHRW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-31
-		{name: "SHRBconst", argLength: 1, reg: gp11, asm: "SHRB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // unsigned arg0 >> auxint, shift amount 0-31
+		{name: "SHRQ", argLength: 2, reg: gp21shift, asm: "SHRQ", resultInArg0: true},               // unsigned arg0 >> arg1, shift amount is mod 64
+		{name: "SHRL", argLength: 2, reg: gp21shift, asm: "SHRL", resultInArg0: true},               // unsigned arg0 >> arg1, shift amount is mod 32
+		{name: "SHRW", argLength: 2, reg: gp21shift, asm: "SHRW", resultInArg0: true},               // unsigned arg0 >> arg1, shift amount is mod 32
+		{name: "SHRB", argLength: 2, reg: gp21shift, asm: "SHRB", resultInArg0: true},               // unsigned arg0 >> arg1, shift amount is mod 32
+		{name: "SHRQconst", argLength: 1, reg: gp11, asm: "SHRQ", aux: "Int64", resultInArg0: true}, // unsigned arg0 >> auxint, shift amount 0-63
+		{name: "SHRLconst", argLength: 1, reg: gp11, asm: "SHRL", aux: "Int32", resultInArg0: true}, // unsigned arg0 >> auxint, shift amount 0-31
+		{name: "SHRWconst", argLength: 1, reg: gp11, asm: "SHRW", aux: "Int16", resultInArg0: true}, // unsigned arg0 >> auxint, shift amount 0-31
+		{name: "SHRBconst", argLength: 1, reg: gp11, asm: "SHRB", aux: "Int8", resultInArg0: true},  // unsigned arg0 >> auxint, shift amount 0-31

-		{name: "SARQ", argLength: 2, reg: gp21shift, asm: "SARQ", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 64
-		{name: "SARL", argLength: 2, reg: gp21shift, asm: "SARL", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 32
-		{name: "SARW", argLength: 2, reg: gp21shift, asm: "SARW", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 32
-		{name: "SARB", argLength: 2, reg: gp21shift, asm: "SARB", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 32
-		{name: "SARQconst", argLength: 1, reg: gp11, asm: "SARQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-63
-		{name: "SARLconst", argLength: 1, reg: gp11, asm: "SARL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-31
-		{name: "SARWconst", argLength: 1, reg: gp11, asm: "SARW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-31
-		{name: "SARBconst", argLength: 1, reg: gp11, asm: "SARB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // signed arg0 >> auxint, shift amount 0-31
+		{name: "SARQ", argLength: 2, reg: gp21shift, asm: "SARQ", resultInArg0: true},               // signed arg0 >> arg1, shift amount is mod 64
+		{name: "SARL", argLength: 2, reg: gp21shift, asm: "SARL", resultInArg0: true},               // signed arg0 >> arg1, shift amount is mod 32
+		{name: "SARW", argLength: 2, reg: gp21shift, asm: "SARW", resultInArg0: true},               // signed arg0 >> arg1, shift amount is mod 32
+		{name: "SARB", argLength: 2, reg: gp21shift, asm: "SARB", resultInArg0: true},               // signed arg0 >> arg1, shift amount is mod 32
+		{name: "SARQconst", argLength: 1, reg: gp11, asm: "SARQ", aux: "Int64", resultInArg0: true}, // signed arg0 >> auxint, shift amount 0-63
+		{name: "SARLconst", argLength: 1, reg: gp11, asm: "SARL", aux: "Int32", resultInArg0: true}, // signed arg0 >> auxint, shift amount 0-31
+		{name: "SARWconst", argLength: 1, reg: gp11, asm: "SARW", aux: "Int16", resultInArg0: true}, // signed arg0 >> auxint, shift amount 0-31
+		{name: "SARBconst", argLength: 1, reg: gp11, asm: "SARB", aux: "Int8", resultInArg0: true},  // signed arg0 >> auxint, shift amount 0-31

-		{name: "ROLQconst", argLength: 1, reg: gp11, asm: "ROLQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-63
-		{name: "ROLLconst", argLength: 1, reg: gp11, asm: "ROLL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-31
-		{name: "ROLWconst", argLength: 1, reg: gp11, asm: "ROLW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-15
-		{name: "ROLBconst", argLength: 1, reg: gp11, asm: "ROLB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // arg0 rotate left auxint, rotate amount 0-7
+		{name: "ROLQconst", argLength: 1, reg: gp11, asm: "ROLQ", aux: "Int64", resultInArg0: true}, // arg0 rotate left auxint, rotate amount 0-63
+		{name: "ROLLconst", argLength: 1, reg: gp11, asm: "ROLL", aux: "Int32", resultInArg0: true}, // arg0 rotate left auxint, rotate amount 0-31
+		{name: "ROLWconst", argLength: 1, reg: gp11, asm: "ROLW", aux: "Int16", resultInArg0: true}, // arg0 rotate left auxint, rotate amount 0-15
+		{name: "ROLBconst", argLength: 1, reg: gp11, asm: "ROLB", aux: "Int8", resultInArg0: true},  // arg0 rotate left auxint, rotate amount 0-7

 		// unary ops
-		{name: "NEGQ", argLength: 1, reg: gp11, asm: "NEGQ", resultInArg0: true, clobberFlags: true}, // -arg0
-		{name: "NEGL", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true, clobberFlags: true}, // -arg0
+		{name: "NEGQ", argLength: 1, reg: gp11, asm: "NEGQ", resultInArg0: true}, // -arg0
+		{name: "NEGL", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true}, // -arg0

-		{name: "NOTQ", argLength: 1, reg: gp11, asm: "NOTQ", resultInArg0: true, clobberFlags: true}, // ^arg0
-		{name: "NOTL", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true, clobberFlags: true}, // ^arg0
+		{name: "NOTQ", argLength: 1, reg: gp11, asm: "NOTQ", resultInArg0: true}, // ^arg0
+		{name: "NOTL", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0

-		{name: "BSFQ", argLength: 1, reg: gp11, asm: "BSFQ", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
-		{name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
-		{name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
+		{name: "BSFQ", argLength: 1, reg: gp11, asm: "BSFQ"}, // arg0 # of low-order zeroes ; undef if zero
+		{name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL"}, // arg0 # of low-order zeroes ; undef if zero
+		{name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW"}, // arg0 # of low-order zeroes ; undef if zero

-		{name: "BSRQ", argLength: 1, reg: gp11, asm: "BSRQ", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
-		{name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
-		{name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
+		{name: "BSRQ", argLength: 1, reg: gp11, asm: "BSRQ"}, // arg0 # of high-order zeroes ; undef if zero
+		{name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL"}, // arg0 # of high-order zeroes ; undef if zero
+		{name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW"}, // arg0 # of high-order zeroes ; undef if zero

 		// Note ASM for ops moves whole register
-		{name: "CMOVQEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQEQ", typ: "UInt64", aux: "Int64", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z set
-		{name: "CMOVLEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt32", aux: "Int32", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z set
-		{name: "CMOVWEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt16", aux: "Int16", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z set
-		{name: "CMOVQNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQNE", typ: "UInt64", aux: "Int64", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z not set
-		{name: "CMOVLNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt32", aux: "Int32", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z not set
-		{name: "CMOVWNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt16", aux: "Int16", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z not set
+		{name: "CMOVQEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQEQ", typ: "UInt64", aux: "Int64", resultInArg0: true}, // replace arg0 w/ constant if Z set
+		{name: "CMOVLEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt32", aux: "Int32", resultInArg0: true}, // replace arg0 w/ constant if Z set
+		{name: "CMOVWEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt16", aux: "Int16", resultInArg0: true}, // replace arg0 w/ constant if Z set
+		{name: "CMOVQNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQNE", typ: "UInt64", aux: "Int64", resultInArg0: true}, // replace arg0 w/ constant if Z not set
+		{name: "CMOVLNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt32", aux: "Int32", resultInArg0: true}, // replace arg0 w/ constant if Z not set
+		{name: "CMOVWNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt16", aux: "Int16", resultInArg0: true}, // replace arg0 w/ constant if Z not set

-		{name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
-		{name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
+		{name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true}, // arg0 swap bytes
+		{name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true}, // arg0 swap bytes

 		{name: "SQRTSD", argLength: 1, reg: fp11, asm: "SQRTSD"}, // sqrt(arg0)

@@ -323,20 +338,20 @@ func init() {
 		// Need different opcodes for floating point conditions because
 		// any comparison involving a NaN is always FALSE and thus
 		// the patterns for inverting conditions cannot be used.
-		{name: "SETEQF", argLength: 1, reg: flagsgpax, asm: "SETEQ", clobberFlags: true}, // extract == condition from arg0
-		{name: "SETNEF", argLength: 1, reg: flagsgpax, asm: "SETNE", clobberFlags: true}, // extract != condition from arg0
-		{name: "SETORD", argLength: 1, reg: flagsgp, asm: "SETPC"},                       // extract "ordered" (No Nan present) condition from arg0
-		{name: "SETNAN", argLength: 1, reg: flagsgp, asm: "SETPS"},                       // extract "unordered" (Nan present) condition from arg0
+		{name: "SETEQF", argLength: 1, reg: flagsgpax, asm: "SETEQ"}, // extract == condition from arg0
+		{name: "SETNEF", argLength: 1, reg: flagsgpax, asm: "SETNE"}, // extract != condition from arg0
+		{name: "SETORD", argLength: 1, reg: flagsgp, asm: "SETPC"},   // extract "ordered" (No Nan present) condition from arg0
+		{name: "SETNAN", argLength: 1, reg: flagsgp, asm: "SETPS"},   // extract "unordered" (Nan present) condition from arg0

 		{name: "SETGF", argLength: 1, reg: flagsgp, asm: "SETHI"},  // extract floating > condition from arg0
 		{name: "SETGEF", argLength: 1, reg: flagsgp, asm: "SETCC"}, // extract floating >= condition from arg0

-		{name: "MOVBQSX", argLength: 1, reg: gp11, asm: "MOVBQSX"}, // sign extend arg0 from int8 to int64
-		{name: "MOVBQZX", argLength: 1, reg: gp11, asm: "MOVBQZX"}, // zero extend arg0 from int8 to int64
-		{name: "MOVWQSX", argLength: 1, reg: gp11, asm: "MOVWQSX"}, // sign extend arg0 from int16 to int64
-		{name: "MOVWQZX", argLength: 1, reg: gp11, asm: "MOVWQZX"}, // zero extend arg0 from int16 to int64
-		{name: "MOVLQSX", argLength: 1, reg: gp11, asm: "MOVLQSX"}, // sign extend arg0 from int32 to int64
-		{name: "MOVLQZX", argLength: 1, reg: gp11, asm: "MOVLQZX"}, // zero extend arg0 from int32 to int64
+		{name: "MOVBQSX", argLength: 1, reg: gp11nf, asm: "MOVBQSX"}, // sign extend arg0 from int8 to int64
+		{name: "MOVBQZX", argLength: 1, reg: gp11nf, asm: "MOVBQZX"}, // zero extend arg0 from int8 to int64
+		{name: "MOVWQSX", argLength: 1, reg: gp11nf, asm: "MOVWQSX"}, // sign extend arg0 from int16 to int64
+		{name: "MOVWQZX", argLength: 1, reg: gp11nf, asm: "MOVWQZX"}, // zero extend arg0 from int16 to int64
+		{name: "MOVLQSX", argLength: 1, reg: gp11nf, asm: "MOVLQSX"}, // sign extend arg0 from int32 to int64
+		{name: "MOVLQZX", argLength: 1, reg: gp11nf, asm: "MOVLQZX"}, // zero extend arg0 from int32 to int64

 		{name: "MOVLconst", reg: gp01, asm: "MOVL", typ: "UInt32", aux: "Int32", rematerializeable: true}, // 32 low bits of auxint
 		{name: "MOVQconst", reg: gp01, asm: "MOVQ", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint
@@ -354,15 +369,13 @@ func init() {

 		{name: "PXOR", argLength: 2, reg: fp21, asm: "PXOR", commutative: true, resultInArg0: true}, // exclusive or, applied to X regs for float negation.

-		{name: "LEAQ", argLength: 1, reg: gp11sb, asm: "LEAQ", aux: "SymOff", rematerializeable: true}, // arg0 + auxint + offset encoded in aux
-		{name: "LEAQ1", argLength: 2, reg: gp21sb, aux: "SymOff"},                                      // arg0 + arg1 + auxint + aux
-		{name: "LEAQ2", argLength: 2, reg: gp21sb, aux: "SymOff"},                                      // arg0 + 2*arg1 + auxint + aux
-		{name: "LEAQ4", argLength: 2, reg: gp21sb, aux: "SymOff"},                                      // arg0 + 4*arg1 + auxint + aux
-		{name: "LEAQ8", argLength: 2, reg: gp21sb, aux: "SymOff"},                                      // arg0 + 8*arg1 + auxint + aux
+		{name: "LEAQ", argLength: 1, reg: gp11sb, aux: "SymOff", rematerializeable: true}, // arg0 + auxint + offset encoded in aux
+		{name: "LEAQ1", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + arg1 + auxint + aux
+		{name: "LEAQ2", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + 2*arg1 + auxint + aux
+		{name: "LEAQ4", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + 4*arg1 + auxint + aux
+		{name: "LEAQ8", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + 8*arg1 + auxint + aux
 		// Note: LEAQ{1,2,4,8} must not have OpSB as either argument.

-		{name: "LEAL", argLength: 1, reg: gp11sb, asm: "LEAL", aux: "SymOff", rematerializeable: true}, // arg0 + auxint + offset encoded in aux
-
 		// auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address
 		{name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVBLZX", aux: "SymOff", typ: "UInt8"},  // load byte from arg0+auxint+aux. arg1=mem.  Zero extend.
 		{name: "MOVBQSXload", argLength: 2, reg: gpload, asm: "MOVBQSX", aux: "SymOff"},             // ditto, sign extend to int64
@@ -423,9 +436,8 @@ func init() {
 			argLength: 3,
 			reg: regInfo{
 				inputs:   []regMask{buildReg("DI"), buildReg("X0")},
-				clobbers: buildReg("DI"),
+				clobbers: buildReg("DI FLAGS"),
 			},
-			clobberFlags: true,
 		},
 		{name: "MOVOconst", reg: regInfo{nil, 0, []regMask{fp}}, typ: "Int128", aux: "Int128", rematerializeable: true},

@@ -443,11 +455,11 @@ func init() {
 			},
 		},

-		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff", clobberFlags: true},                                             // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{gpsp, buildReg("DX"), 0}, clobbers: callerSave}, aux: "Int64", clobberFlags: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
-		{name: "CALLdefer", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                               // call deferproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLgo", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                                  // call newproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "Int64", clobberFlags: true},                        // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
+		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff"},                                // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem
+		{name: "CALLclosure", argLength: 3, reg: regInfo{[]regMask{gpsp, buildReg("DX"), 0}, callerSave, nil}, aux: "Int64"}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
+		{name: "CALLdefer", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64"},                                  // call deferproc.  arg0=mem, auxint=argsize, returns mem
+		{name: "CALLgo", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64"},                                     // call newproc.  arg0=mem, auxint=argsize, returns mem
+		{name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "Int64"},           // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem

 		// arg0 = destination pointer
 		// arg1 = source pointer
@@ -460,9 +472,8 @@ func init() {
 			argLength: 3,
 			reg: regInfo{
 				inputs:   []regMask{buildReg("DI"), buildReg("SI")},
-				clobbers: buildReg("DI SI X0"), // uses X0 as a temporary
+				clobbers: buildReg("DI SI X0 FLAGS"), // uses X0 as a temporary
 			},
-			clobberFlags: true,
 		},

 		// arg0 = destination pointer
@@ -493,15 +504,14 @@ func init() {
 		// use of DX (the closure pointer)
 		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("DX")}}},
 		//arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
-		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpsp}}, clobberFlags: true},
+		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpsp}, clobbers: flags}},

 		// MOVQconvert converts between pointers and integers.
 		// We have a special op for this so as to not confuse GC
 		// (particularly stack maps).  It takes a memory arg so it
 		// gets correctly ordered with respect to GC safepoints.
 		// arg0=ptr/int arg1=mem, output=int/ptr
-		{name: "MOVQconvert", argLength: 2, reg: gp11, asm: "MOVQ"},
-		{name: "MOVLconvert", argLength: 2, reg: gp11, asm: "MOVL"}, // amd64p32 equivalent
+		{name: "MOVQconvert", argLength: 2, reg: gp11nf, asm: "MOVQ"},

 		// Constant flag values. For any comparison, there are 5 possible
 		// outcomes: the three from the signed total order (<,==,>) and the
@@ -535,14 +545,11 @@ func init() {
 	}

 	archs = append(archs, arch{
-		name:            "AMD64",
-		pkg:             "cmd/internal/obj/x86",
-		genfile:         "../../amd64/ssa.go",
-		ops:             AMD64ops,
-		blocks:          AMD64blocks,
-		regnames:        regNamesAMD64,
-		gpregmask:       gp,
-		fpregmask:       fp,
-		framepointerreg: int8(num["BP"]),
+		name:     "AMD64",
+		pkg:      "cmd/internal/obj/x86",
+		genfile:  "../../amd64/ssa.go",
+		ops:      AMD64ops,
+		blocks:   AMD64blocks,
+		regnames: regNamesAMD64,
 	})
 }
--- a/src/cmd/compile/internal/ssa/gen/ARM.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM.rules
--- a/src/cmd/compile/internal/ssa/gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules
--- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
@@ -1,455 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-import "strings"
-
-// Notes:
-//  - Integer types live in the low portion of registers. Upper portions are junk.
-//  - Boolean types use the low-order byte of a register. 0=false, 1=true.
-//    Upper bytes are junk.
-//  - *const instructions may use a constant larger than the instuction can encode.
-//    In this case the assembler expands to multiple instructions and uses tmp
-//    register (R27).
-
-// Suffixes encode the bit width of various instructions.
-// D (double word) = 64 bit
-// W (word)        = 32 bit
-// H (half word)   = 16 bit
-// HU              = 16 bit unsigned
-// B (byte)        = 8 bit
-// BU              = 8 bit unsigned
-// S (single)      = 32 bit float
-// D (double)      = 64 bit float
-
-// Note: registers not used in regalloc are not included in this list,
-// so that regmask stays within int64
-// Be careful when hand coding regmasks.
-var regNamesARM64 = []string{
-	"R0",
-	"R1",
-	"R2",
-	"R3",
-	"R4",
-	"R5",
-	"R6",
-	"R7",
-	"R8",
-	"R9",
-	"R10",
-	"R11",
-	"R12",
-	"R13",
-	"R14",
-	"R15",
-	"R16",
-	"R17",
-	"R18", // platform register, not used
-	"R19",
-	"R20",
-	"R21",
-	"R22",
-	"R23",
-	"R24",
-	"R25",
-	"R26",
-	// R27 = REGTMP not used in regalloc
-	"g",   // aka R28
-	"R29", // frame pointer, not used
-	// R30 = REGLINK not used in regalloc
-	"SP", // aka R31
-
-	"F0",
-	"F1",
-	"F2",
-	"F3",
-	"F4",
-	"F5",
-	"F6",
-	"F7",
-	"F8",
-	"F9",
-	"F10",
-	"F11",
-	"F12",
-	"F13",
-	"F14",
-	"F15",
-	"F16",
-	"F17",
-	"F18",
-	"F19",
-	"F20",
-	"F21",
-	"F22",
-	"F23",
-	"F24",
-	"F25",
-	"F26",
-	"F27",
-	"F28", // 0.0
-	"F29", // 0.5
-	"F30", // 1.0
-	"F31", // 2.0
-
-	// pseudo-registers
-	"SB",
-}
-
-func init() {
-	// Make map from reg names to reg integers.
-	if len(regNamesARM64) > 64 {
-		panic("too many registers")
-	}
-	num := map[string]int{}
-	for i, name := range regNamesARM64 {
-		num[name] = i
-	}
-	buildReg := func(s string) regMask {
-		m := regMask(0)
-		for _, r := range strings.Split(s, " ") {
-			if n, ok := num[r]; ok {
-				m |= regMask(1) << uint(n)
-				continue
-			}
-			panic("register " + r + " not found")
-		}
-		return m
-	}
-
-	// Common individual register masks
-	var (
-		gp         = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26")
-		gpg        = gp | buildReg("g")
-		gpsp       = gp | buildReg("SP")
-		gpspg      = gpg | buildReg("SP")
-		gpspsbg    = gpspg | buildReg("SB")
-		fp         = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27")
-		callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g
-	)
-	// Common regInfo
-	var (
-		gp01      = regInfo{inputs: nil, outputs: []regMask{gp}}
-		gp11      = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
-		gp11sp    = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
-		gp1flags  = regInfo{inputs: []regMask{gpg}}
-		gp1flags1 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
-		gp21      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
-		gp2flags  = regInfo{inputs: []regMask{gpg, gpg}}
-		gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
-		//gp22      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
-		//gp31      = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
-		//gp3flags  = regInfo{inputs: []regMask{gp, gp, gp}}
-		//gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
-		gpload   = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
-		gpstore  = regInfo{inputs: []regMask{gpspsbg, gpg}}
-		gpstore0 = regInfo{inputs: []regMask{gpspsbg}}
-		//gp2load   = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
-		//gp2store  = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}}
-		fp01 = regInfo{inputs: nil, outputs: []regMask{fp}}
-		fp11 = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
-		//fp1flags  = regInfo{inputs: []regMask{fp}}
-		fpgp      = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
-		gpfp      = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
-		fp21      = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
-		fp2flags  = regInfo{inputs: []regMask{fp, fp}}
-		fpload    = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}}
-		fpstore   = regInfo{inputs: []regMask{gpspsbg, fp}}
-		readflags = regInfo{inputs: nil, outputs: []regMask{gp}}
-	)
-	ops := []opData{
-		// binary ops
-		{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},     // arg0 + arg1
-		{name: "ADDconst", argLength: 1, reg: gp11sp, asm: "ADD", aux: "Int64"},   // arg0 + auxInt
-		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                        // arg0 - arg1
-		{name: "SUBconst", argLength: 1, reg: gp11, asm: "SUB", aux: "Int64"},     // arg0 - auxInt
-		{name: "MUL", argLength: 2, reg: gp21, asm: "MUL", commutative: true},     // arg0 * arg1
-		{name: "MULW", argLength: 2, reg: gp21, asm: "MULW", commutative: true},   // arg0 * arg1, 32-bit
-		{name: "MULH", argLength: 2, reg: gp21, asm: "SMULH", commutative: true},  // (arg0 * arg1) >> 64, signed
-		{name: "UMULH", argLength: 2, reg: gp21, asm: "UMULH", commutative: true}, // (arg0 * arg1) >> 64, unsigned
-		{name: "MULL", argLength: 2, reg: gp21, asm: "SMULL", commutative: true},  // arg0 * arg1, signed, 32-bit mult results in 64-bit
-		{name: "UMULL", argLength: 2, reg: gp21, asm: "UMULL", commutative: true}, // arg0 * arg1, unsigned, 32-bit mult results in 64-bit
-		{name: "DIV", argLength: 2, reg: gp21, asm: "SDIV"},                       // arg0 / arg1, signed
-		{name: "UDIV", argLength: 2, reg: gp21, asm: "UDIV"},                      // arg0 / arg1, unsighed
-		{name: "DIVW", argLength: 2, reg: gp21, asm: "SDIVW"},                     // arg0 / arg1, signed, 32 bit
-		{name: "UDIVW", argLength: 2, reg: gp21, asm: "UDIVW"},                    // arg0 / arg1, unsighed, 32 bit
-		{name: "MOD", argLength: 2, reg: gp21, asm: "REM"},                        // arg0 % arg1, signed
-		{name: "UMOD", argLength: 2, reg: gp21, asm: "UREM"},                      // arg0 % arg1, unsigned
-		{name: "MODW", argLength: 2, reg: gp21, asm: "REMW"},                      // arg0 % arg1, signed, 32 bit
-		{name: "UMODW", argLength: 2, reg: gp21, asm: "UREMW"},                    // arg0 % arg1, unsigned, 32 bit
-
-		{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true}, // arg0 + arg1
-		{name: "FADDD", argLength: 2, reg: fp21, asm: "FADDD", commutative: true}, // arg0 + arg1
-		{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"},                    // arg0 - arg1
-		{name: "FSUBD", argLength: 2, reg: fp21, asm: "FSUBD"},                    // arg0 - arg1
-		{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0 * arg1
-		{name: "FMULD", argLength: 2, reg: fp21, asm: "FMULD", commutative: true}, // arg0 * arg1
-		{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"},                    // arg0 / arg1
-		{name: "FDIVD", argLength: 2, reg: fp21, asm: "FDIVD"},                    // arg0 / arg1
-
-		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1
-		{name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int64"}, // arg0 & auxInt
-		{name: "OR", argLength: 2, reg: gp21, asm: "ORR", commutative: true},  // arg0 | arg1
-		{name: "ORconst", argLength: 1, reg: gp11, asm: "ORR", aux: "Int64"},  // arg0 | auxInt
-		{name: "XOR", argLength: 2, reg: gp21, asm: "EOR", commutative: true}, // arg0 ^ arg1
-		{name: "XORconst", argLength: 1, reg: gp11, asm: "EOR", aux: "Int64"}, // arg0 ^ auxInt
-		{name: "BIC", argLength: 2, reg: gp21, asm: "BIC"},                    // arg0 &^ arg1
-		{name: "BICconst", argLength: 1, reg: gp11, asm: "BIC", aux: "Int64"}, // arg0 &^ auxInt
-
-		// unary ops
-		{name: "MVN", argLength: 1, reg: gp11, asm: "MVN"},       // ^arg0
-		{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},       // -arg0
-		{name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS"},   // -arg0, float32
-		{name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD"},   // -arg0, float64
-		{name: "FSQRTD", argLength: 1, reg: fp11, asm: "FSQRTD"}, // sqrt(arg0), float64
-
-		// shifts
-		{name: "SLL", argLength: 2, reg: gp21, asm: "LSL"},                      // arg0 << arg1, shift amount is mod 64
-		{name: "SLLconst", argLength: 1, reg: gp11, asm: "LSL", aux: "Int64"},   // arg0 << auxInt
-		{name: "SRL", argLength: 2, reg: gp21, asm: "LSR"},                      // arg0 >> arg1, unsigned, shift amount is mod 64
-		{name: "SRLconst", argLength: 1, reg: gp11, asm: "LSR", aux: "Int64"},   // arg0 >> auxInt, unsigned
-		{name: "SRA", argLength: 2, reg: gp21, asm: "ASR"},                      // arg0 >> arg1, signed, shift amount is mod 64
-		{name: "SRAconst", argLength: 1, reg: gp11, asm: "ASR", aux: "Int64"},   // arg0 >> auxInt, signed
-		{name: "RORconst", argLength: 1, reg: gp11, asm: "ROR", aux: "Int64"},   // arg0 right rotate by auxInt bits
-		{name: "RORWconst", argLength: 1, reg: gp11, asm: "RORW", aux: "Int64"}, // uint32(arg0) right rotate by auxInt bits
-
-		// comparisons
-		{name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"},                      // arg0 compare to arg1
-		{name: "CMPconst", argLength: 1, reg: gp1flags, asm: "CMP", aux: "Int64", typ: "Flags"},   // arg0 compare to auxInt
-		{name: "CMPW", argLength: 2, reg: gp2flags, asm: "CMPW", typ: "Flags"},                    // arg0 compare to arg1, 32 bit
-		{name: "CMPWconst", argLength: 1, reg: gp1flags, asm: "CMPW", aux: "Int32", typ: "Flags"}, // arg0 compare to auxInt, 32 bit
-		{name: "CMN", argLength: 2, reg: gp2flags, asm: "CMN", typ: "Flags"},                      // arg0 compare to -arg1
-		{name: "CMNconst", argLength: 1, reg: gp1flags, asm: "CMN", aux: "Int64", typ: "Flags"},   // arg0 compare to -auxInt
-		{name: "CMNW", argLength: 2, reg: gp2flags, asm: "CMNW", typ: "Flags"},                    // arg0 compare to -arg1, 32 bit
-		{name: "CMNWconst", argLength: 1, reg: gp1flags, asm: "CMNW", aux: "Int32", typ: "Flags"}, // arg0 compare to -auxInt, 32 bit
-		{name: "FCMPS", argLength: 2, reg: fp2flags, asm: "FCMPS", typ: "Flags"},                  // arg0 compare to arg1, float32
-		{name: "FCMPD", argLength: 2, reg: fp2flags, asm: "FCMPD", typ: "Flags"},                  // arg0 compare to arg1, float64
-
-		// shifted ops
-		{name: "ADDshiftLL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int64"},                   // arg0 + arg1<<auxInt
-		{name: "ADDshiftRL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int64"},                   // arg0 + arg1>>auxInt, unsigned shift
-		{name: "ADDshiftRA", argLength: 2, reg: gp21, asm: "ADD", aux: "Int64"},                   // arg0 + arg1>>auxInt, signed shift
-		{name: "SUBshiftLL", argLength: 2, reg: gp21, asm: "SUB", aux: "Int64"},                   // arg0 - arg1<<auxInt
-		{name: "SUBshiftRL", argLength: 2, reg: gp21, asm: "SUB", aux: "Int64"},                   // arg0 - arg1>>auxInt, unsigned shift
-		{name: "SUBshiftRA", argLength: 2, reg: gp21, asm: "SUB", aux: "Int64"},                   // arg0 - arg1>>auxInt, signed shift
-		{name: "ANDshiftLL", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"},                   // arg0 & (arg1<<auxInt)
-		{name: "ANDshiftRL", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"},                   // arg0 & (arg1>>auxInt), unsigned shift
-		{name: "ANDshiftRA", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"},                   // arg0 & (arg1>>auxInt), signed shift
-		{name: "ORshiftLL", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"},                    // arg0 | arg1<<auxInt
-		{name: "ORshiftRL", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"},                    // arg0 | arg1>>auxInt, unsigned shift
-		{name: "ORshiftRA", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"},                    // arg0 | arg1>>auxInt, signed shift
-		{name: "XORshiftLL", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"},                   // arg0 ^ arg1<<auxInt
-		{name: "XORshiftRL", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"},                   // arg0 ^ arg1>>auxInt, unsigned shift
-		{name: "XORshiftRA", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"},                   // arg0 ^ arg1>>auxInt, signed shift
-		{name: "BICshiftLL", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"},                   // arg0 &^ (arg1<<auxInt)
-		{name: "BICshiftRL", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"},                   // arg0 &^ (arg1>>auxInt), unsigned shift
-		{name: "BICshiftRA", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"},                   // arg0 &^ (arg1>>auxInt), signed shift
-		{name: "CMPshiftLL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1<<auxInt
-		{name: "CMPshiftRL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, unsigned shift
-		{name: "CMPshiftRA", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, signed shift
-
-		// moves
-		{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "UInt64", rematerializeable: true},      // 32 low bits of auxint
-		{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVS", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float
-		{name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", typ: "Float64", rematerializeable: true}, // auxint as 64-bit float
-
-		{name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{buildReg("SP") | buildReg("SB")}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB
-
-		{name: "MOVBload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVB", typ: "Int8"},      // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVBUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVBU", typ: "UInt8"},   // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVHload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVH", typ: "Int16"},     // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVHUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVHU", typ: "UInt16"},  // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVWload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVW", typ: "Int32"},     // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVWUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVWU", typ: "UInt32"},  // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVDload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVD", typ: "UInt64"},    // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "FMOVSload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVS", typ: "Float32"}, // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "FMOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVD", typ: "Float64"}, // load from arg0 + auxInt + aux.  arg1=mem.
-
-		{name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem"},   // store 1 byte of arg1 to arg0 + auxInt + aux.  arg2=mem.
-		{name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem"},   // store 2 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
-		{name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem"},   // store 4 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
-		{name: "MOVDstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVD", typ: "Mem"},   // store 8 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
-		{name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem"}, // store 4 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
-		{name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem"}, // store 8 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
-
-		{name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem"}, // store 1 byte of zero to arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem"}, // store 2 bytes of zero to arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem"}, // store 4 bytes of zero to arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVD", typ: "Mem"}, // store 8 bytes of zero to arg0 + auxInt + aux.  ar12=mem.
-
-		// conversions
-		{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB"},   // move from arg0, sign-extended from byte
-		{name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte
-		{name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH"},   // move from arg0, sign-extended from half
-		{name: "MOVHUreg", argLength: 1, reg: gp11, asm: "MOVHU"}, // move from arg0, unsign-extended from half
-		{name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW"},   // move from arg0, sign-extended from word
-		{name: "MOVWUreg", argLength: 1, reg: gp11, asm: "MOVWU"}, // move from arg0, unsign-extended from word
-		{name: "MOVDreg", argLength: 1, reg: gp11, asm: "MOVD"},   // move from arg0
-
-		{name: "MOVDnop", argLength: 1, reg: regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}, resultInArg0: true}, // nop, return arg0 in same register
-
-		{name: "SCVTFWS", argLength: 1, reg: gpfp, asm: "SCVTFWS"},   // int32 -> float32
-		{name: "SCVTFWD", argLength: 1, reg: gpfp, asm: "SCVTFWD"},   // int32 -> float64
-		{name: "UCVTFWS", argLength: 1, reg: gpfp, asm: "UCVTFWS"},   // uint32 -> float32
-		{name: "UCVTFWD", argLength: 1, reg: gpfp, asm: "UCVTFWD"},   // uint32 -> float64
-		{name: "SCVTFS", argLength: 1, reg: gpfp, asm: "SCVTFS"},     // int64 -> float32
-		{name: "SCVTFD", argLength: 1, reg: gpfp, asm: "SCVTFD"},     // int64 -> float64
-		{name: "UCVTFS", argLength: 1, reg: gpfp, asm: "UCVTFS"},     // uint64 -> float32
-		{name: "UCVTFD", argLength: 1, reg: gpfp, asm: "UCVTFD"},     // uint64 -> float64
-		{name: "FCVTZSSW", argLength: 1, reg: fpgp, asm: "FCVTZSSW"}, // float32 -> int32
-		{name: "FCVTZSDW", argLength: 1, reg: fpgp, asm: "FCVTZSDW"}, // float64 -> int32
-		{name: "FCVTZUSW", argLength: 1, reg: fpgp, asm: "FCVTZUSW"}, // float32 -> uint32
-		{name: "FCVTZUDW", argLength: 1, reg: fpgp, asm: "FCVTZUDW"}, // float64 -> uint32
-		{name: "FCVTZSS", argLength: 1, reg: fpgp, asm: "FCVTZSS"},   // float32 -> int64
-		{name: "FCVTZSD", argLength: 1, reg: fpgp, asm: "FCVTZSD"},   // float64 -> int64
-		{name: "FCVTZUS", argLength: 1, reg: fpgp, asm: "FCVTZUS"},   // float32 -> uint64
-		{name: "FCVTZUD", argLength: 1, reg: fpgp, asm: "FCVTZUD"},   // float64 -> uint64
-		{name: "FCVTSD", argLength: 1, reg: fp11, asm: "FCVTSD"},     // float32 -> float64
-		{name: "FCVTDS", argLength: 1, reg: fp11, asm: "FCVTDS"},     // float64 -> float32
-
-		// conditional instructions
-		{name: "CSELULT", argLength: 3, reg: gp2flags1, asm: "CSEL"},  // returns arg0 if flags indicates unsigned LT, arg1 otherwise, arg2=flags
-		{name: "CSELULT0", argLength: 2, reg: gp1flags1, asm: "CSEL"}, // returns arg0 if flags indicates unsigned LT, 0 otherwise, arg1=flags
-
-		// function calls
-		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff", clobberFlags: true},                                              // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{gpsp, buildReg("R26"), 0}, clobbers: callerSave}, aux: "Int64", clobberFlags: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
-		{name: "CALLdefer", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                                // call deferproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLgo", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                                   // call newproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "Int64", clobberFlags: true},                         // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
-
-		// pseudo-ops
-		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpg}}}, // panic if arg0 is nil.  arg1=mem.
-
-		{name: "Equal", argLength: 1, reg: readflags},         // bool, true flags encode x==y false otherwise.
-		{name: "NotEqual", argLength: 1, reg: readflags},      // bool, true flags encode x!=y false otherwise.
-		{name: "LessThan", argLength: 1, reg: readflags},      // bool, true flags encode signed x<y false otherwise.
-		{name: "LessEqual", argLength: 1, reg: readflags},     // bool, true flags encode signed x<=y false otherwise.
-		{name: "GreaterThan", argLength: 1, reg: readflags},   // bool, true flags encode signed x>y false otherwise.
-		{name: "GreaterEqual", argLength: 1, reg: readflags},  // bool, true flags encode signed x>=y false otherwise.
-		{name: "LessThanU", argLength: 1, reg: readflags},     // bool, true flags encode unsigned x<y false otherwise.
-		{name: "LessEqualU", argLength: 1, reg: readflags},    // bool, true flags encode unsigned x<=y false otherwise.
-		{name: "GreaterThanU", argLength: 1, reg: readflags},  // bool, true flags encode unsigned x>y false otherwise.
-		{name: "GreaterEqualU", argLength: 1, reg: readflags}, // bool, true flags encode unsigned x>=y false otherwise.
-
-		// duffzero
-		// arg0 = address of memory to zero
-		// arg1 = mem
-		// auxint = offset into duffzero code to start executing
-		// returns mem
-		// R16 aka arm64.REGRT1 changed as side effect
-		{
-			name:      "DUFFZERO",
-			aux:       "Int64",
-			argLength: 2,
-			reg: regInfo{
-				inputs:   []regMask{gp},
-				clobbers: buildReg("R16"),
-			},
-		},
-
-		// large zeroing
-		// arg0 = address of memory to zero (in R16 aka arm64.REGRT1, changed as side effect)
-		// arg1 = address of the last element to zero
-		// arg2 = mem
-		// auxint = alignment
-		// returns mem
-		//	MOVD.P	ZR, 8(R16)
-		//	CMP	Rarg1, R16
-		//	BLE	-2(PC)
-		// Note: the-end-of-the-memory may be not a valid pointer. it's a problem if it is spilled.
-		// the-end-of-the-memory - 8 is with the area to zero, ok to spill.
-		{
-			name:      "LoweredZero",
-			aux:       "Int64",
-			argLength: 3,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("R16"), gp},
-				clobbers: buildReg("R16"),
-			},
-			clobberFlags: true,
-		},
-
-		// large move
-		// arg0 = address of dst memory (in R17 aka arm64.REGRT2, changed as side effect)
-		// arg1 = address of src memory (in R16 aka arm64.REGRT1, changed as side effect)
-		// arg2 = address of the last element of src
-		// arg3 = mem
-		// auxint = alignment
-		// returns mem
-		//	MOVD.P	8(R16), Rtmp
-		//	MOVD.P	Rtmp, 8(R17)
-		//	CMP	Rarg2, R16
-		//	BLE	-3(PC)
-		// Note: the-end-of-src may be not a valid pointer. it's a problem if it is spilled.
-		// the-end-of-src - 8 is within the area to copy, ok to spill.
-		{
-			name:      "LoweredMove",
-			aux:       "Int64",
-			argLength: 4,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("R17"), buildReg("R16"), gp},
-				clobbers: buildReg("R16 R17"),
-			},
-			clobberFlags: true,
-		},
-
-		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
-		// and sorts it to the very beginning of the block to prevent other
-		// use of R26 (arm64.REGCTXT, the closure pointer)
-		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("R26")}}},
-
-		// MOVDconvert converts between pointers and integers.
-		// We have a special op for this so as to not confuse GC
-		// (particularly stack maps).  It takes a memory arg so it
-		// gets correctly ordered with respect to GC safepoints.
-		// arg0=ptr/int arg1=mem, output=int/ptr
-		{name: "MOVDconvert", argLength: 2, reg: gp11, asm: "MOVD"},
-
-		// Constant flag values. For any comparison, there are 5 possible
-		// outcomes: the three from the signed total order (<,==,>) and the
-		// three from the unsigned total order. The == cases overlap.
-		// Note: there's a sixth "unordered" outcome for floating-point
-		// comparisons, but we don't use such a beast yet.
-		// These ops are for temporary use by rewrite rules. They
-		// cannot appear in the generated assembly.
-		{name: "FlagEQ"},     // equal
-		{name: "FlagLT_ULT"}, // signed < and unsigned <
-		{name: "FlagLT_UGT"}, // signed < and unsigned >
-		{name: "FlagGT_UGT"}, // signed > and unsigned <
-		{name: "FlagGT_ULT"}, // signed > and unsigned >
-
-		// (InvertFlags (CMP a b)) == (CMP b a)
-		// InvertFlags is a pseudo-op which can't appear in assembly output.
-		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
-	}
-
-	blocks := []blockData{
-		{name: "EQ"},
-		{name: "NE"},
-		{name: "LT"},
-		{name: "LE"},
-		{name: "GT"},
-		{name: "GE"},
-		{name: "ULT"},
-		{name: "ULE"},
-		{name: "UGT"},
-		{name: "UGE"},
-	}
-
-	archs = append(archs, arch{
-		name:            "ARM64",
-		pkg:             "cmd/internal/obj/arm64",
-		genfile:         "../../arm64/ssa.go",
-		ops:             ops,
-		blocks:          blocks,
-		regnames:        regNamesARM64,
-		gpregmask:       gp,
-		fpregmask:       fp,
-		framepointerreg: -1, // not used
-	})
-}
--- a/src/cmd/compile/internal/ssa/gen/ARMOps.go
+++ b/src/cmd/compile/internal/ssa/gen/ARMOps.go
@@ -6,481 +6,32 @@

 package main

-import "strings"
-
-// Notes:
-//  - Integer types live in the low portion of registers. Upper portions are junk.
-//  - Boolean types use the low-order byte of a register. 0=false, 1=true.
-//    Upper bytes are junk.
-//  - *const instructions may use a constant larger than the instuction can encode.
-//    In this case the assembler expands to multiple instructions and uses tmp
-//    register (R11).
-
-// Suffixes encode the bit width of various instructions.
-// W (word)      = 32 bit
-// H (half word) = 16 bit
-// HU            = 16 bit unsigned
-// B (byte)      = 8 bit
-// BU            = 8 bit unsigned
-// F (float)     = 32 bit float
-// D (double)    = 64 bit float
-
-var regNamesARM = []string{
-	"R0",
-	"R1",
-	"R2",
-	"R3",
-	"R4",
-	"R5",
-	"R6",
-	"R7",
-	"R8",
-	"R9",
-	"g",   // aka R10
-	"R11", // tmp
-	"R12",
-	"SP",  // aka R13
-	"R14", // link
-	"R15", // pc
-
-	"F0",
-	"F1",
-	"F2",
-	"F3",
-	"F4",
-	"F5",
-	"F6",
-	"F7",
-	"F8",
-	"F9",
-	"F10",
-	"F11",
-	"F12",
-	"F13",
-	"F14",
-	"F15", // tmp
-
-	// pseudo-registers
-	"SB",
-}
-
 func init() {
-	// Make map from reg names to reg integers.
-	if len(regNamesARM) > 64 {
-		panic("too many registers")
-	}
-	num := map[string]int{}
-	for i, name := range regNamesARM {
-		num[name] = i
-	}
-	buildReg := func(s string) regMask {
-		m := regMask(0)
-		for _, r := range strings.Split(s, " ") {
-			if n, ok := num[r]; ok {
-				m |= regMask(1) << uint(n)
-				continue
-			}
-			panic("register " + r + " not found")
-		}
-		return m
-	}
-
-	// Common individual register masks
 	var (
-		gp         = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12")
-		gpg        = gp | buildReg("g")
-		gpsp       = gp | buildReg("SP")
-		gpspg      = gpg | buildReg("SP")
-		gpspsbg    = gpspg | buildReg("SB")
-		fp         = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15")
-		callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g
-	)
-	// Common regInfo
-	var (
-		gp01      = regInfo{inputs: nil, outputs: []regMask{gp}}
-		gp11      = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
-		gp11carry = regInfo{inputs: []regMask{gpg}, outputs: []regMask{0, gp}}
-		gp11sp    = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
-		gp1flags  = regInfo{inputs: []regMask{gpg}}
-		gp1flags1 = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
-		gp21      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
-		gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{0, gp}}
-		gp2flags  = regInfo{inputs: []regMask{gpg, gpg}}
-		gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
-		gp22      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
-		gp31      = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
-		gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{0, gp}}
-		gp3flags  = regInfo{inputs: []regMask{gp, gp, gp}}
-		gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
-		gpload    = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
-		gpstore   = regInfo{inputs: []regMask{gpspsbg, gpg}}
-		gp2load   = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
-		gp2store  = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}}
-		fp01      = regInfo{inputs: nil, outputs: []regMask{fp}}
-		fp11      = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
-		fp1flags  = regInfo{inputs: []regMask{fp}}
-		fpgp      = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
-		gpfp      = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
-		fp21      = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
-		fp2flags  = regInfo{inputs: []regMask{fp, fp}}
-		fpload    = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}}
-		fpstore   = regInfo{inputs: []regMask{gpspsbg, fp}}
-		readflags = regInfo{inputs: nil, outputs: []regMask{gp}}
+		gp01       = regInfo{inputs: []regMask{}, outputs: []regMask{31}}
+		gp11       = regInfo{inputs: []regMask{31}, outputs: []regMask{31}}
+		gp21       = regInfo{inputs: []regMask{31, 31}, outputs: []regMask{31}}
+		gp2flags   = regInfo{inputs: []regMask{31, 31}, outputs: []regMask{32}}
+		gpload     = regInfo{inputs: []regMask{31}, outputs: []regMask{31}}
+		gpstore    = regInfo{inputs: []regMask{31, 31}, outputs: []regMask{}}
+		flagsgp    = regInfo{inputs: []regMask{32}, outputs: []regMask{31}}
+		callerSave = regMask(15)
 	)
 	ops := []opData{
-		// binary ops
-		{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},     // arg0 + arg1
-		{name: "ADDconst", argLength: 1, reg: gp11sp, asm: "ADD", aux: "Int32"},   // arg0 + auxInt
-		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                        // arg0 - arg1
-		{name: "SUBconst", argLength: 1, reg: gp11, asm: "SUB", aux: "Int32"},     // arg0 - auxInt
-		{name: "RSB", argLength: 2, reg: gp21, asm: "RSB"},                        // arg1 - arg0
-		{name: "RSBconst", argLength: 1, reg: gp11, asm: "RSB", aux: "Int32"},     // auxInt - arg0
-		{name: "MUL", argLength: 2, reg: gp21, asm: "MUL", commutative: true},     // arg0 * arg1
-		{name: "HMUL", argLength: 2, reg: gp21, asm: "MULL", commutative: true},   // (arg0 * arg1) >> 32, signed
-		{name: "HMULU", argLength: 2, reg: gp21, asm: "MULLU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
-		{name: "DIV", argLength: 2, reg: gp21, asm: "DIV", clobberFlags: true},    // arg0 / arg1, signed, soft div clobbers flags
-		{name: "DIVU", argLength: 2, reg: gp21, asm: "DIVU", clobberFlags: true},  // arg0 / arg1, unsighed
-		{name: "MOD", argLength: 2, reg: gp21, asm: "MOD", clobberFlags: true},    // arg0 % arg1, signed
-		{name: "MODU", argLength: 2, reg: gp21, asm: "MODU", clobberFlags: true},  // arg0 % arg1, unsigned
+		{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},  // arg0 + arg1
+		{name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "SymOff"}, // arg0 + auxInt + aux.(*gc.Sym)

-		{name: "ADDS", argLength: 2, reg: gp21carry, asm: "ADD", commutative: true}, // arg0 + arg1, set carry flag
-		{name: "ADDSconst", argLength: 1, reg: gp11carry, asm: "ADD", aux: "Int32"}, // arg0 + auxInt, set carry flag
-		{name: "ADC", argLength: 3, reg: gp2flags1, asm: "ADC", commutative: true},  // arg0 + arg1 + carry, arg2=flags
-		{name: "ADCconst", argLength: 2, reg: gp1flags1, asm: "ADC", aux: "Int32"},  // arg0 + auxInt + carry, arg1=flags
-		{name: "SUBS", argLength: 2, reg: gp21carry, asm: "SUB"},                    // arg0 - arg1, set carry flag
-		{name: "SUBSconst", argLength: 1, reg: gp11carry, asm: "SUB", aux: "Int32"}, // arg0 - auxInt, set carry flag
-		{name: "RSBSconst", argLength: 1, reg: gp11carry, asm: "RSB", aux: "Int32"}, // auxInt - arg0, set carry flag
-		{name: "SBC", argLength: 3, reg: gp2flags1, asm: "SBC"},                     // arg0 - arg1 - carry, arg2=flags
-		{name: "SBCconst", argLength: 2, reg: gp1flags1, asm: "SBC", aux: "Int32"},  // arg0 - auxInt - carry, arg1=flags
-		{name: "RSCconst", argLength: 2, reg: gp1flags1, asm: "RSC", aux: "Int32"},  // auxInt - arg0 - carry, arg1=flags
+		{name: "MOVWconst", argLength: 0, reg: gp01, aux: "Int32", asm: "MOVW", rematerializeable: true}, // 32 low bits of auxint

-		{name: "MULLU", argLength: 2, reg: gp22, asm: "MULLU", commutative: true}, // arg0 * arg1, high 32 bits in out0, low 32 bits in out1
-		{name: "MULA", argLength: 3, reg: gp31, asm: "MULA"},                      // arg0 * arg1 + arg2
+		{name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1

-		{name: "ADDF", argLength: 2, reg: fp21, asm: "ADDF", commutative: true}, // arg0 + arg1
-		{name: "ADDD", argLength: 2, reg: fp21, asm: "ADDD", commutative: true}, // arg0 + arg1
-		{name: "SUBF", argLength: 2, reg: fp21, asm: "SUBF"},                    // arg0 - arg1
-		{name: "SUBD", argLength: 2, reg: fp21, asm: "SUBD"},                    // arg0 - arg1
-		{name: "MULF", argLength: 2, reg: fp21, asm: "MULF", commutative: true}, // arg0 * arg1
-		{name: "MULD", argLength: 2, reg: fp21, asm: "MULD", commutative: true}, // arg0 * arg1
-		{name: "DIVF", argLength: 2, reg: fp21, asm: "DIVF"},                    // arg0 / arg1
-		{name: "DIVD", argLength: 2, reg: fp21, asm: "DIVD"},                    // arg0 / arg1
+		{name: "MOVWload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVW"},   // load from arg0 + auxInt + aux.  arg1=mem.
+		{name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW"}, // store 4 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.

-		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1
-		{name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int32"}, // arg0 & auxInt
-		{name: "OR", argLength: 2, reg: gp21, asm: "ORR", commutative: true},  // arg0 | arg1
-		{name: "ORconst", argLength: 1, reg: gp11, asm: "ORR", aux: "Int32"},  // arg0 | auxInt
-		{name: "XOR", argLength: 2, reg: gp21, asm: "EOR", commutative: true}, // arg0 ^ arg1
-		{name: "XORconst", argLength: 1, reg: gp11, asm: "EOR", aux: "Int32"}, // arg0 ^ auxInt
-		{name: "BIC", argLength: 2, reg: gp21, asm: "BIC"},                    // arg0 &^ arg1
-		{name: "BICconst", argLength: 1, reg: gp11, asm: "BIC", aux: "Int32"}, // arg0 &^ auxInt
-
-		// unary ops
-		{name: "MVN", argLength: 1, reg: gp11, asm: "MVN"}, // ^arg0
-
-		{name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"},   // -arg0, float32
-		{name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"},   // -arg0, float64
-		{name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64
-
-		// shifts
-		{name: "SLL", argLength: 2, reg: gp21, asm: "SLL"},                    // arg0 << arg1, shift amount is mod 256
-		{name: "SLLconst", argLength: 1, reg: gp11, asm: "SLL", aux: "Int32"}, // arg0 << auxInt
-		{name: "SRL", argLength: 2, reg: gp21, asm: "SRL"},                    // arg0 >> arg1, unsigned, shift amount is mod 256
-		{name: "SRLconst", argLength: 1, reg: gp11, asm: "SRL", aux: "Int32"}, // arg0 >> auxInt, unsigned
-		{name: "SRA", argLength: 2, reg: gp21, asm: "SRA"},                    // arg0 >> arg1, signed, shift amount is mod 256
-		{name: "SRAconst", argLength: 1, reg: gp11, asm: "SRA", aux: "Int32"}, // arg0 >> auxInt, signed
-		{name: "SRRconst", argLength: 1, reg: gp11, aux: "Int32"},             // arg0 right rotate by auxInt bits
-
-		{name: "ADDshiftLL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int32"}, // arg0 + arg1<<auxInt
-		{name: "ADDshiftRL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int32"}, // arg0 + arg1>>auxInt, unsigned shift
-		{name: "ADDshiftRA", argLength: 2, reg: gp21, asm: "ADD", aux: "Int32"}, // arg0 + arg1>>auxInt, signed shift
-		{name: "SUBshiftLL", argLength: 2, reg: gp21, asm: "SUB", aux: "Int32"}, // arg0 - arg1<<auxInt
-		{name: "SUBshiftRL", argLength: 2, reg: gp21, asm: "SUB", aux: "Int32"}, // arg0 - arg1>>auxInt, unsigned shift
-		{name: "SUBshiftRA", argLength: 2, reg: gp21, asm: "SUB", aux: "Int32"}, // arg0 - arg1>>auxInt, signed shift
-		{name: "RSBshiftLL", argLength: 2, reg: gp21, asm: "RSB", aux: "Int32"}, // arg1<<auxInt - arg0
-		{name: "RSBshiftRL", argLength: 2, reg: gp21, asm: "RSB", aux: "Int32"}, // arg1>>auxInt - arg0, unsigned shift
-		{name: "RSBshiftRA", argLength: 2, reg: gp21, asm: "RSB", aux: "Int32"}, // arg1>>auxInt - arg0, signed shift
-		{name: "ANDshiftLL", argLength: 2, reg: gp21, asm: "AND", aux: "Int32"}, // arg0 & (arg1<<auxInt)
-		{name: "ANDshiftRL", argLength: 2, reg: gp21, asm: "AND", aux: "Int32"}, // arg0 & (arg1>>auxInt), unsigned shift
-		{name: "ANDshiftRA", argLength: 2, reg: gp21, asm: "AND", aux: "Int32"}, // arg0 & (arg1>>auxInt), signed shift
-		{name: "ORshiftLL", argLength: 2, reg: gp21, asm: "ORR", aux: "Int32"},  // arg0 | arg1<<auxInt
-		{name: "ORshiftRL", argLength: 2, reg: gp21, asm: "ORR", aux: "Int32"},  // arg0 | arg1>>auxInt, unsigned shift
-		{name: "ORshiftRA", argLength: 2, reg: gp21, asm: "ORR", aux: "Int32"},  // arg0 | arg1>>auxInt, signed shift
-		{name: "XORshiftLL", argLength: 2, reg: gp21, asm: "EOR", aux: "Int32"}, // arg0 ^ arg1<<auxInt
-		{name: "XORshiftRL", argLength: 2, reg: gp21, asm: "EOR", aux: "Int32"}, // arg0 ^ arg1>>auxInt, unsigned shift
-		{name: "XORshiftRA", argLength: 2, reg: gp21, asm: "EOR", aux: "Int32"}, // arg0 ^ arg1>>auxInt, signed shift
-		{name: "BICshiftLL", argLength: 2, reg: gp21, asm: "BIC", aux: "Int32"}, // arg0 &^ (arg1<<auxInt)
-		{name: "BICshiftRL", argLength: 2, reg: gp21, asm: "BIC", aux: "Int32"}, // arg0 &^ (arg1>>auxInt), unsigned shift
-		{name: "BICshiftRA", argLength: 2, reg: gp21, asm: "BIC", aux: "Int32"}, // arg0 &^ (arg1>>auxInt), signed shift
-		{name: "MVNshiftLL", argLength: 1, reg: gp11, asm: "MVN", aux: "Int32"}, // ^(arg0<<auxInt)
-		{name: "MVNshiftRL", argLength: 1, reg: gp11, asm: "MVN", aux: "Int32"}, // ^(arg0>>auxInt), unsigned shift
-		{name: "MVNshiftRA", argLength: 1, reg: gp11, asm: "MVN", aux: "Int32"}, // ^(arg0>>auxInt), signed shift
-
-		{name: "ADCshiftLL", argLength: 3, reg: gp2flags1, asm: "ADC", aux: "Int32"}, // arg0 + arg1<<auxInt + carry, arg2=flags
-		{name: "ADCshiftRL", argLength: 3, reg: gp2flags1, asm: "ADC", aux: "Int32"}, // arg0 + arg1>>auxInt + carry, unsigned shift, arg2=flags
-		{name: "ADCshiftRA", argLength: 3, reg: gp2flags1, asm: "ADC", aux: "Int32"}, // arg0 + arg1>>auxInt + carry, signed shift, arg2=flags
-		{name: "SBCshiftLL", argLength: 3, reg: gp2flags1, asm: "SBC", aux: "Int32"}, // arg0 - arg1<<auxInt - carry, arg2=flags
-		{name: "SBCshiftRL", argLength: 3, reg: gp2flags1, asm: "SBC", aux: "Int32"}, // arg0 - arg1>>auxInt - carry, unsigned shift, arg2=flags
-		{name: "SBCshiftRA", argLength: 3, reg: gp2flags1, asm: "SBC", aux: "Int32"}, // arg0 - arg1>>auxInt - carry, signed shift, arg2=flags
-		{name: "RSCshiftLL", argLength: 3, reg: gp2flags1, asm: "RSC", aux: "Int32"}, // arg1<<auxInt - arg0 - carry, arg2=flags
-		{name: "RSCshiftRL", argLength: 3, reg: gp2flags1, asm: "RSC", aux: "Int32"}, // arg1>>auxInt - arg0 - carry, unsigned shift, arg2=flags
-		{name: "RSCshiftRA", argLength: 3, reg: gp2flags1, asm: "RSC", aux: "Int32"}, // arg1>>auxInt - arg0 - carry, signed shift, arg2=flags
-
-		{name: "ADDSshiftLL", argLength: 2, reg: gp21carry, asm: "ADD", aux: "Int32"}, // arg0 + arg1<<auxInt, set carry flag
-		{name: "ADDSshiftRL", argLength: 2, reg: gp21carry, asm: "ADD", aux: "Int32"}, // arg0 + arg1>>auxInt, unsigned shift, set carry flag
-		{name: "ADDSshiftRA", argLength: 2, reg: gp21carry, asm: "ADD", aux: "Int32"}, // arg0 + arg1>>auxInt, signed shift, set carry flag
-		{name: "SUBSshiftLL", argLength: 2, reg: gp21carry, asm: "SUB", aux: "Int32"}, // arg0 - arg1<<auxInt, set carry flag
-		{name: "SUBSshiftRL", argLength: 2, reg: gp21carry, asm: "SUB", aux: "Int32"}, // arg0 - arg1>>auxInt, unsigned shift, set carry flag
-		{name: "SUBSshiftRA", argLength: 2, reg: gp21carry, asm: "SUB", aux: "Int32"}, // arg0 - arg1>>auxInt, signed shift, set carry flag
-		{name: "RSBSshiftLL", argLength: 2, reg: gp21carry, asm: "RSB", aux: "Int32"}, // arg1<<auxInt - arg0, set carry flag
-		{name: "RSBSshiftRL", argLength: 2, reg: gp21carry, asm: "RSB", aux: "Int32"}, // arg1>>auxInt - arg0, unsigned shift, set carry flag
-		{name: "RSBSshiftRA", argLength: 2, reg: gp21carry, asm: "RSB", aux: "Int32"}, // arg1>>auxInt - arg0, signed shift, set carry flag
-
-		{name: "ADDshiftLLreg", argLength: 3, reg: gp31, asm: "ADD"}, // arg0 + arg1<<arg2
-		{name: "ADDshiftRLreg", argLength: 3, reg: gp31, asm: "ADD"}, // arg0 + arg1>>arg2, unsigned shift
-		{name: "ADDshiftRAreg", argLength: 3, reg: gp31, asm: "ADD"}, // arg0 + arg1>>arg2, signed shift
-		{name: "SUBshiftLLreg", argLength: 3, reg: gp31, asm: "SUB"}, // arg0 - arg1<<arg2
-		{name: "SUBshiftRLreg", argLength: 3, reg: gp31, asm: "SUB"}, // arg0 - arg1>>arg2, unsigned shift
-		{name: "SUBshiftRAreg", argLength: 3, reg: gp31, asm: "SUB"}, // arg0 - arg1>>arg2, signed shift
-		{name: "RSBshiftLLreg", argLength: 3, reg: gp31, asm: "RSB"}, // arg1<<arg2 - arg0
-		{name: "RSBshiftRLreg", argLength: 3, reg: gp31, asm: "RSB"}, // arg1>>arg2 - arg0, unsigned shift
-		{name: "RSBshiftRAreg", argLength: 3, reg: gp31, asm: "RSB"}, // arg1>>arg2 - arg0, signed shift
-		{name: "ANDshiftLLreg", argLength: 3, reg: gp31, asm: "AND"}, // arg0 & (arg1<<arg2)
-		{name: "ANDshiftRLreg", argLength: 3, reg: gp31, asm: "AND"}, // arg0 & (arg1>>arg2), unsigned shift
-		{name: "ANDshiftRAreg", argLength: 3, reg: gp31, asm: "AND"}, // arg0 & (arg1>>arg2), signed shift
-		{name: "ORshiftLLreg", argLength: 3, reg: gp31, asm: "ORR"},  // arg0 | arg1<<arg2
-		{name: "ORshiftRLreg", argLength: 3, reg: gp31, asm: "ORR"},  // arg0 | arg1>>arg2, unsigned shift
-		{name: "ORshiftRAreg", argLength: 3, reg: gp31, asm: "ORR"},  // arg0 | arg1>>arg2, signed shift
-		{name: "XORshiftLLreg", argLength: 3, reg: gp31, asm: "EOR"}, // arg0 ^ arg1<<arg2
-		{name: "XORshiftRLreg", argLength: 3, reg: gp31, asm: "EOR"}, // arg0 ^ arg1>>arg2, unsigned shift
-		{name: "XORshiftRAreg", argLength: 3, reg: gp31, asm: "EOR"}, // arg0 ^ arg1>>arg2, signed shift
-		{name: "BICshiftLLreg", argLength: 3, reg: gp31, asm: "BIC"}, // arg0 &^ (arg1<<arg2)
-		{name: "BICshiftRLreg", argLength: 3, reg: gp31, asm: "BIC"}, // arg0 &^ (arg1>>arg2), unsigned shift
-		{name: "BICshiftRAreg", argLength: 3, reg: gp31, asm: "BIC"}, // arg0 &^ (arg1>>arg2), signed shift
-		{name: "MVNshiftLLreg", argLength: 2, reg: gp21, asm: "MVN"}, // ^(arg0<<arg1)
-		{name: "MVNshiftRLreg", argLength: 2, reg: gp21, asm: "MVN"}, // ^(arg0>>arg1), unsigned shift
-		{name: "MVNshiftRAreg", argLength: 2, reg: gp21, asm: "MVN"}, // ^(arg0>>arg1), signed shift
-
-		{name: "ADCshiftLLreg", argLength: 4, reg: gp3flags1, asm: "ADC"}, // arg0 + arg1<<arg2 + carry, arg3=flags
-		{name: "ADCshiftRLreg", argLength: 4, reg: gp3flags1, asm: "ADC"}, // arg0 + arg1>>arg2 + carry, unsigned shift, arg3=flags
-		{name: "ADCshiftRAreg", argLength: 4, reg: gp3flags1, asm: "ADC"}, // arg0 + arg1>>arg2 + carry, signed shift, arg3=flags
-		{name: "SBCshiftLLreg", argLength: 4, reg: gp3flags1, asm: "SBC"}, // arg0 - arg1<<arg2 - carry, arg3=flags
-		{name: "SBCshiftRLreg", argLength: 4, reg: gp3flags1, asm: "SBC"}, // arg0 - arg1>>arg2 - carry, unsigned shift, arg3=flags
-		{name: "SBCshiftRAreg", argLength: 4, reg: gp3flags1, asm: "SBC"}, // arg0 - arg1>>arg2 - carry, signed shift, arg3=flags
-		{name: "RSCshiftLLreg", argLength: 4, reg: gp3flags1, asm: "RSC"}, // arg1<<arg2 - arg0 - carry, arg3=flags
-		{name: "RSCshiftRLreg", argLength: 4, reg: gp3flags1, asm: "RSC"}, // arg1>>arg2 - arg0 - carry, unsigned shift, arg3=flags
-		{name: "RSCshiftRAreg", argLength: 4, reg: gp3flags1, asm: "RSC"}, // arg1>>arg2 - arg0 - carry, signed shift, arg3=flags
-
-		{name: "ADDSshiftLLreg", argLength: 3, reg: gp31carry, asm: "ADD"}, // arg0 + arg1<<arg2, set carry flag
-		{name: "ADDSshiftRLreg", argLength: 3, reg: gp31carry, asm: "ADD"}, // arg0 + arg1>>arg2, unsigned shift, set carry flag
-		{name: "ADDSshiftRAreg", argLength: 3, reg: gp31carry, asm: "ADD"}, // arg0 + arg1>>arg2, signed shift, set carry flag
-		{name: "SUBSshiftLLreg", argLength: 3, reg: gp31carry, asm: "SUB"}, // arg0 - arg1<<arg2, set carry flag
-		{name: "SUBSshiftRLreg", argLength: 3, reg: gp31carry, asm: "SUB"}, // arg0 - arg1>>arg2, unsigned shift, set carry flag
-		{name: "SUBSshiftRAreg", argLength: 3, reg: gp31carry, asm: "SUB"}, // arg0 - arg1>>arg2, signed shift, set carry flag
-		{name: "RSBSshiftLLreg", argLength: 3, reg: gp31carry, asm: "RSB"}, // arg1<<arg2 - arg0, set carry flag
-		{name: "RSBSshiftRLreg", argLength: 3, reg: gp31carry, asm: "RSB"}, // arg1>>arg2 - arg0, unsigned shift, set carry flag
-		{name: "RSBSshiftRAreg", argLength: 3, reg: gp31carry, asm: "RSB"}, // arg1>>arg2 - arg0, signed shift, set carry flag
-
-		// comparisons
-		{name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"},                    // arg0 compare to arg1
-		{name: "CMPconst", argLength: 1, reg: gp1flags, asm: "CMP", aux: "Int32", typ: "Flags"}, // arg0 compare to auxInt
-		{name: "CMN", argLength: 2, reg: gp2flags, asm: "CMN", typ: "Flags"},                    // arg0 compare to -arg1
-		{name: "CMNconst", argLength: 1, reg: gp1flags, asm: "CMN", aux: "Int32", typ: "Flags"}, // arg0 compare to -auxInt
-		{name: "TST", argLength: 2, reg: gp2flags, asm: "TST", typ: "Flags", commutative: true}, // arg0 & arg1 compare to 0
-		{name: "TSTconst", argLength: 1, reg: gp1flags, asm: "TST", aux: "Int32", typ: "Flags"}, // arg0 & auxInt compare to 0
-		{name: "TEQ", argLength: 2, reg: gp2flags, asm: "TEQ", typ: "Flags", commutative: true}, // arg0 ^ arg1 compare to 0
-		{name: "TEQconst", argLength: 1, reg: gp1flags, asm: "TEQ", aux: "Int32", typ: "Flags"}, // arg0 ^ auxInt compare to 0
-		{name: "CMPF", argLength: 2, reg: fp2flags, asm: "CMPF", typ: "Flags"},                  // arg0 compare to arg1, float32
-		{name: "CMPD", argLength: 2, reg: fp2flags, asm: "CMPD", typ: "Flags"},                  // arg0 compare to arg1, float64
-
-		{name: "CMPshiftLL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int32", typ: "Flags"}, // arg0 compare to arg1<<auxInt
-		{name: "CMPshiftRL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int32", typ: "Flags"}, // arg0 compare to arg1>>auxInt, unsigned shift
-		{name: "CMPshiftRA", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int32", typ: "Flags"}, // arg0 compare to arg1>>auxInt, signed shift
-
-		{name: "CMPshiftLLreg", argLength: 3, reg: gp3flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1<<arg2
-		{name: "CMPshiftRLreg", argLength: 3, reg: gp3flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1>>arg2, unsigned shift
-		{name: "CMPshiftRAreg", argLength: 3, reg: gp3flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1>>arg2, signed shift
-
-		{name: "CMPF0", argLength: 1, reg: fp1flags, asm: "CMPF", typ: "Flags"}, // arg0 compare to 0, float32
-		{name: "CMPD0", argLength: 1, reg: fp1flags, asm: "CMPD", typ: "Flags"}, // arg0 compare to 0, float64
-
-		// moves
-		{name: "MOVWconst", argLength: 0, reg: gp01, aux: "Int32", asm: "MOVW", typ: "UInt32", rematerializeable: true},    // 32 low bits of auxint
-		{name: "MOVFconst", argLength: 0, reg: fp01, aux: "Float64", asm: "MOVF", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float
-		{name: "MOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "MOVD", typ: "Float64", rematerializeable: true}, // auxint as 64-bit float
-
-		{name: "MOVWaddr", argLength: 1, reg: regInfo{inputs: []regMask{buildReg("SP") | buildReg("SB")}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVW", rematerializeable: true}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB
-
-		{name: "MOVBload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVB", typ: "Int8"},     // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVBUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVBU", typ: "UInt8"},  // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVHload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVH", typ: "Int16"},    // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVHUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVHU", typ: "UInt16"}, // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVWload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVW", typ: "UInt32"},   // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVFload", argLength: 2, reg: fpload, aux: "SymOff", asm: "MOVF", typ: "Float32"},  // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "MOVD", typ: "Float64"},  // load from arg0 + auxInt + aux.  arg1=mem.
-
-		{name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem"}, // store 1 byte of arg1 to arg0 + auxInt + aux.  arg2=mem.
-		{name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
-		{name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
-		{name: "MOVFstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "MOVF", typ: "Mem"}, // store 4 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
-		{name: "MOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
-
-		{name: "MOVWloadidx", argLength: 3, reg: gp2load, asm: "MOVW"},                   // load from arg0 + arg1. arg2=mem
-		{name: "MOVWloadshiftLL", argLength: 3, reg: gp2load, asm: "MOVW", aux: "Int32"}, // load from arg0 + arg1<<auxInt. arg2=mem
-		{name: "MOVWloadshiftRL", argLength: 3, reg: gp2load, asm: "MOVW", aux: "Int32"}, // load from arg0 + arg1>>auxInt, unsigned shift. arg2=mem
-		{name: "MOVWloadshiftRA", argLength: 3, reg: gp2load, asm: "MOVW", aux: "Int32"}, // load from arg0 + arg1>>auxInt, signed shift. arg2=mem
-
-		{name: "MOVWstoreidx", argLength: 4, reg: gp2store, asm: "MOVW"},                   // store arg2 to arg0 + arg1. arg3=mem
-		{name: "MOVWstoreshiftLL", argLength: 4, reg: gp2store, asm: "MOVW", aux: "Int32"}, // store arg2 to arg0 + arg1<<auxInt. arg3=mem
-		{name: "MOVWstoreshiftRL", argLength: 4, reg: gp2store, asm: "MOVW", aux: "Int32"}, // store arg2 to arg0 + arg1>>auxInt, unsigned shift. arg3=mem
-		{name: "MOVWstoreshiftRA", argLength: 4, reg: gp2store, asm: "MOVW", aux: "Int32"}, // store arg2 to arg0 + arg1>>auxInt, signed shift. arg3=mem
-
-		{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVBS"},  // move from arg0, sign-extended from byte
-		{name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte
-		{name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVHS"},  // move from arg0, sign-extended from half
-		{name: "MOVHUreg", argLength: 1, reg: gp11, asm: "MOVHU"}, // move from arg0, unsign-extended from half
-		{name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW"},   // move from arg0
-
-		{name: "MOVWnop", argLength: 1, reg: regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}, resultInArg0: true}, // nop, return arg0 in same register
-
-		{name: "MOVWF", argLength: 1, reg: gpfp, asm: "MOVWF"},  // int32 -> float32
-		{name: "MOVWD", argLength: 1, reg: gpfp, asm: "MOVWD"},  // int32 -> float64
-		{name: "MOVWUF", argLength: 1, reg: gpfp, asm: "MOVWF"}, // uint32 -> float32, set U bit in the instruction
-		{name: "MOVWUD", argLength: 1, reg: gpfp, asm: "MOVWD"}, // uint32 -> float64, set U bit in the instruction
-		{name: "MOVFW", argLength: 1, reg: fpgp, asm: "MOVFW"},  // float32 -> int32
-		{name: "MOVDW", argLength: 1, reg: fpgp, asm: "MOVDW"},  // float64 -> int32
-		{name: "MOVFWU", argLength: 1, reg: fpgp, asm: "MOVFW"}, // float32 -> uint32, set U bit in the instruction
-		{name: "MOVDWU", argLength: 1, reg: fpgp, asm: "MOVDW"}, // float64 -> uint32, set U bit in the instruction
-		{name: "MOVFD", argLength: 1, reg: fp11, asm: "MOVFD"},  // float32 -> float64
-		{name: "MOVDF", argLength: 1, reg: fp11, asm: "MOVDF"},  // float64 -> float32
-
-		// conditional instructions, for lowering shifts
-		{name: "CMOVWHSconst", argLength: 2, reg: gp1flags1, asm: "MOVW", aux: "Int32", resultInArg0: true}, // replace arg0 w/ const if flags indicates HS, arg1=flags
-		{name: "CMOVWLSconst", argLength: 2, reg: gp1flags1, asm: "MOVW", aux: "Int32", resultInArg0: true}, // replace arg0 w/ const if flags indicates LS, arg1=flags
-		{name: "SRAcond", argLength: 3, reg: gp2flags1, asm: "SRA"},                                         // arg0 >> 31 if flags indicates HS, arg0 >> arg1 otherwise, signed shift, arg2=flags
-
-		// function calls
-		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff", clobberFlags: true},                                             // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{gpsp, buildReg("R7"), 0}, clobbers: callerSave}, aux: "Int64", clobberFlags: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
-		{name: "CALLdefer", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                               // call deferproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLgo", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                                  // call newproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "Int64", clobberFlags: true},                        // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
+		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff"}, // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem

 		// pseudo-ops
-		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpg}}}, // panic if arg0 is nil.  arg1=mem.
-
-		{name: "Equal", argLength: 1, reg: readflags},         // bool, true flags encode x==y false otherwise.
-		{name: "NotEqual", argLength: 1, reg: readflags},      // bool, true flags encode x!=y false otherwise.
-		{name: "LessThan", argLength: 1, reg: readflags},      // bool, true flags encode signed x<y false otherwise.
-		{name: "LessEqual", argLength: 1, reg: readflags},     // bool, true flags encode signed x<=y false otherwise.
-		{name: "GreaterThan", argLength: 1, reg: readflags},   // bool, true flags encode signed x>y false otherwise.
-		{name: "GreaterEqual", argLength: 1, reg: readflags},  // bool, true flags encode signed x>=y false otherwise.
-		{name: "LessThanU", argLength: 1, reg: readflags},     // bool, true flags encode unsigned x<y false otherwise.
-		{name: "LessEqualU", argLength: 1, reg: readflags},    // bool, true flags encode unsigned x<=y false otherwise.
-		{name: "GreaterThanU", argLength: 1, reg: readflags},  // bool, true flags encode unsigned x>y false otherwise.
-		{name: "GreaterEqualU", argLength: 1, reg: readflags}, // bool, true flags encode unsigned x>=y false otherwise.
-
-		// duffzero (must be 4-byte aligned)
-		// arg0 = address of memory to zero (in R1, changed as side effect)
-		// arg1 = value to store (always zero)
-		// arg2 = mem
-		// auxint = offset into duffzero code to start executing
-		// returns mem
-		{
-			name:      "DUFFZERO",
-			aux:       "Int64",
-			argLength: 3,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("R1"), buildReg("R0")},
-				clobbers: buildReg("R1"),
-			},
-		},
-
-		// duffcopy (must be 4-byte aligned)
-		// arg0 = address of dst memory (in R2, changed as side effect)
-		// arg1 = address of src memory (in R1, changed as side effect)
-		// arg2 = mem
-		// auxint = offset into duffcopy code to start executing
-		// returns mem
-		{
-			name:      "DUFFCOPY",
-			aux:       "Int64",
-			argLength: 3,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("R2"), buildReg("R1")},
-				clobbers: buildReg("R0 R1 R2"),
-			},
-		},
-
-		// large or unaligned zeroing
-		// arg0 = address of memory to zero (in R1, changed as side effect)
-		// arg1 = address of the last element to zero
-		// arg2 = value to store (always zero)
-		// arg3 = mem
-		// returns mem
-		//	MOVW.P	Rarg2, 4(R1)
-		//	CMP	R1, Rarg1
-		//	BLE	-2(PC)
-		{
-			name:      "LoweredZero",
-			aux:       "Int64",
-			argLength: 4,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("R1"), gp, gp},
-				clobbers: buildReg("R1"),
-			},
-			clobberFlags: true,
-		},
-
-		// large or unaligned move
-		// arg0 = address of dst memory (in R2, changed as side effect)
-		// arg1 = address of src memory (in R1, changed as side effect)
-		// arg2 = address of the last element of src
-		// arg3 = mem
-		// returns mem
-		//	MOVW.P	4(R1), Rtmp
-		//	MOVW.P	Rtmp, 4(R2)
-		//	CMP	R1, Rarg2
-		//	BLE	-3(PC)
-		{
-			name:      "LoweredMove",
-			aux:       "Int64",
-			argLength: 4,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("R2"), buildReg("R1"), gp},
-				clobbers: buildReg("R1 R2"),
-			},
-			clobberFlags: true,
-		},
-
-		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
-		// and sorts it to the very beginning of the block to prevent other
-		// use of R7 (arm.REGCTXT, the closure pointer)
-		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("R7")}}},
-
-		// MOVWconvert converts between pointers and integers.
-		// We have a special op for this so as to not confuse GC
-		// (particularly stack maps).  It takes a memory arg so it
-		// gets correctly ordered with respect to GC safepoints.
-		// arg0=ptr/int arg1=mem, output=int/ptr
-		{name: "MOVWconvert", argLength: 2, reg: gp11, asm: "MOVW"},
-
-		// Constant flag values. For any comparison, there are 5 possible
-		// outcomes: the three from the signed total order (<,==,>) and the
-		// three from the unsigned total order. The == cases overlap.
-		// Note: there's a sixth "unordered" outcome for floating-point
-		// comparisons, but we don't use such a beast yet.
-		// These ops are for temporary use by rewrite rules. They
-		// cannot appear in the generated assembly.
-		{name: "FlagEQ"},     // equal
-		{name: "FlagLT_ULT"}, // signed < and unsigned <
-		{name: "FlagLT_UGT"}, // signed < and unsigned >
-		{name: "FlagGT_UGT"}, // signed > and unsigned <
-		{name: "FlagGT_ULT"}, // signed > and unsigned >
-
-		// (InvertFlags (CMP a b)) == (CMP b a)
-		// InvertFlags is a pseudo-op which can't appear in assembly output.
-		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
+		{name: "LessThan", argLength: 1, reg: flagsgp}, // bool, 1 flags encode x<y 0 otherwise.
 	}

 	blocks := []blockData{
@@ -496,15 +47,22 @@ func init() {
 		{name: "UGE"},
 	}

+	regNames := []string{
+		"R0",
+		"R1",
+		"R2",
+		"R3",
+		"SP",
+		"FLAGS",
+		"SB",
+	}
+
 	archs = append(archs, arch{
-		name:            "ARM",
-		pkg:             "cmd/internal/obj/arm",
-		genfile:         "../../arm/ssa.go",
-		ops:             ops,
-		blocks:          blocks,
-		regnames:        regNamesARM,
-		gpregmask:       gp,
-		fpregmask:       fp,
-		framepointerreg: -1, // not used
+		name:     "ARM",
+		pkg:      "cmd/internal/obj/arm",
+		genfile:  "../../arm/ssa.go",
+		ops:      ops,
+		blocks:   blocks,
+		regnames: regNames,
 	})
 }
--- a/src/cmd/compile/internal/ssa/gen/PPC64.rules
+++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules
@@ -1,573 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Lowering arithmetic
-(Add64  x y) -> (ADD  x y)
-(AddPtr x y) -> (ADD  x y)
-(Add32  x y) -> (ADD x y)
-(Add16  x y) -> (ADD x y)
-(Add8   x y) -> (ADD x y)
-(Add64F x y) -> (FADD x y)
-(Add32F x y) -> (FADDS x y)
-
-(Sub64  x y) -> (SUB  x y)
-(SubPtr x y) -> (SUB  x y)
-(Sub32  x y) -> (SUB x y)
-(Sub16  x y) -> (SUB x y)
-(Sub8   x y) -> (SUB x y)
-(Sub32F x y) -> (FSUBS x y)
-(Sub64F x y) -> (FSUB x y)
-
-(Mod16 x y) -> (Mod32 (SignExt16to32 x) (SignExt16to32 y))
-(Mod16u x y) -> (Mod32u (ZeroExt16to32 x) (ZeroExt16to32 y))
-(Mod8 x y) -> (Mod32 (SignExt8to32 x) (SignExt8to32 y))
-(Mod8u x y) -> (Mod32u (ZeroExt8to32 x) (ZeroExt8to32 y))
-(Mod64 x y) -> (SUB x (MULLD y (DIVD x y)))
-(Mod64u x y) -> (SUB x (MULLD y (DIVDU x y)))
-(Mod32 x y) -> (SUB x (MULLW y (DIVW x y)))
-(Mod32u x y) -> (SUB x (MULLW y (DIVWU x y)))
-
-(Avg64u <t> x y) -> (ADD (ADD <t> (SRD <t> x (MOVDconst <t> [1])) (SRD <t> y (MOVDconst <t> [1]))) (ANDconst <t> (AND <t> x y) [1]))
-
-(Mul64  x y) -> (MULLD  x y)
-(Mul32  x y) -> (MULLW  x y)
-(Mul16  x y) -> (MULLW x y)
-(Mul8   x y) -> (MULLW x y)
-
-(Div64  x y) -> (DIVD  x y)
-(Div64u x y) -> (DIVDU x y)
-(Div32  x y) -> (DIVW  x y)
-(Div32u x y) -> (DIVWU x y)
-(Div16  x y) -> (DIVW  (SignExt16to32 x) (SignExt16to32 y))
-(Div16u x y) -> (DIVWU (ZeroExt16to32 x) (ZeroExt16to32 y))
-(Div8   x y) -> (DIVW  (SignExt8to32 x) (SignExt8to32 y))
-(Div8u  x y) -> (DIVWU (ZeroExt8to32 x) (ZeroExt8to32 y))
-
-(Hmul64  x y) -> (MULHD  x y)
-(Hmul64u  x y) -> (MULHDU x y)
-(Hmul32  x y) -> (MULHW  x y)
-(Hmul32u  x y) -> (MULHWU x y)
-(Hmul16 x y) -> (SRAWconst (MULLW <config.fe.TypeInt32()> (SignExt16to32 x) (SignExt16to32 y)) [16])
-(Hmul16u x y) -> (SRWconst (MULLW <config.fe.TypeUInt32()> (ZeroExt16to32 x) (ZeroExt16to32 y)) [16])
-(Hmul8 x y) -> (SRAWconst (MULLW <config.fe.TypeInt16()> (SignExt8to32 x) (SignExt8to32 y)) [8])
-(Hmul8u x y) -> (SRWconst (MULLW <config.fe.TypeUInt16()> (ZeroExt8to32 x) (ZeroExt8to32 y)) [8])
-
-(Mul32F x y) -> (FMULS x y)
-(Mul64F x y) -> (FMUL x y)
-
-(Div32F x y) -> (FDIVS x y)
-(Div64F x y) -> (FDIV x y)
-
-// Lowering float <-> int
-(Cvt32to32F x) -> (FRSP (FCFID (Xi2f64 (SignExt32to64 x))))
-(Cvt32to64F x) -> (FCFID (Xi2f64 (SignExt32to64 x)))
-(Cvt64to32F x) -> (FRSP (FCFID (Xi2f64 x)))
-(Cvt64to64F x) -> (FCFID (Xi2f64 x))
-
-(Cvt32Fto32 x) -> (Xf2i64 (FCTIWZ x))
-(Cvt32Fto64 x) -> (Xf2i64 (FCTIDZ x))
-(Cvt64Fto32 x) -> (Xf2i64 (FCTIWZ x))
-(Cvt64Fto64 x) -> (Xf2i64 (FCTIDZ x))
-
-(Cvt32Fto64F x) -> x // Note x will have the wrong type for patterns dependent on Float32/Float64
-(Cvt64Fto32F x) -> (FRSP x)
-
-(Sqrt x) -> (FSQRT x)
-
-(Rsh64x64 x y)  -> (SRAD x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] y))))
-(Rsh64Ux64 x y) -> (SRD  x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] y))))
-(Lsh64x64 x y)  -> (SLD  x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] y))))
-
-(Rsh32x64 x y)  -> (SRAW x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] y))))
-(Rsh32Ux64 x y) -> (SRW  x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] y))))
-(Lsh32x64 x y)  -> (SLW  x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] y))))
-
-(Rsh16x64 x y)  -> (SRAW (SignExt16to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] y))))
-(Rsh16Ux64 x y) -> (SRW  (ZeroExt16to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] y))))
-(Lsh16x64 x y)  -> (SLW  x                 (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] y))))
-
-(Rsh8x64 x y)  -> (SRAW (SignExt8to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] y))))
-(Rsh8Ux64 x y) -> (SRW  (ZeroExt8to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] y))))
-(Lsh8x64 x y)  -> (SLW  x                (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] y))))
-
-
-(Rsh64x32 x y)  -> (SRAD x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt32to64 y)))))
-(Rsh64Ux32 x y) -> (SRD x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt32to64 y)))))
-(Lsh64x32 x y)  -> (SLD x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt32to64 y)))))
-
-(Rsh32x32 x y)  -> (SRAW x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt32to64 y)))))
-(Rsh32Ux32 x y) -> (SRW x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt32to64 y)))))
-(Lsh32x32 x y)  -> (SLW x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt32to64 y)))))
-
-(Rsh16x32 x y)  -> (SRAW (SignExt16to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt32to64 y)))))
-(Rsh16Ux32 x y) -> (SRW  (ZeroExt16to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt32to64 y)))))
-(Lsh16x32 x y)  -> (SLW  x                 (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt32to64 y)))))
-
-(Rsh8x32 x y)  -> (SRAW (SignExt8to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt32to64 y)))))
-(Rsh8Ux32 x y) -> (SRW  (ZeroExt8to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt32to64 y)))))
-(Lsh8x32 x y)  -> (SLW  x                (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt32to64 y)))))
-
-
-(Rsh64x16 x y)  -> (SRAD x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt16to64 y)))))
-(Rsh64Ux16 x y) -> (SRD x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt16to64 y)))))
-(Lsh64x16 x y)  -> (SLD x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt16to64 y)))))
-
-(Rsh32x16 x y)  -> (SRAW x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt16to64 y)))))
-(Rsh32Ux16 x y) -> (SRW x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt16to64 y)))))
-(Lsh32x16 x y)  -> (SLW x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt16to64 y)))))
-
-(Rsh16x16 x y)  -> (SRAW (SignExt16to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt16to64 y)))))
-(Rsh16Ux16 x y) -> (SRW  (ZeroExt16to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt16to64 y)))))
-(Lsh16x16 x y)  -> (SLW  x                 (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt16to64 y)))))
-
-(Rsh8x16 x y)  -> (SRAW (SignExt8to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt16to64 y)))))
-(Rsh8Ux16 x y) -> (SRW  (ZeroExt8to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt16to64 y)))))
-(Lsh8x16 x y)  -> (SLW  x                (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt16to64 y)))))
-
-
-(Rsh64x8 x y)  -> (SRAD x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt8to64 y)))))
-(Rsh64Ux8 x y) -> (SRD x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt8to64 y)))))
-(Lsh64x8 x y)  -> (SLD x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt8to64 y)))))
-
-(Rsh32x8 x y)  -> (SRAW x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt8to64 y)))))
-(Rsh32Ux8 x y) -> (SRW x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt8to64 y)))))
-(Lsh32x8 x y)  -> (SLW x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt8to64 y)))))
-
-(Rsh16x8 x y)  -> (SRAW (SignExt16to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt8to64 y)))))
-(Rsh16Ux8 x y) -> (SRW  (ZeroExt16to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt8to64 y)))))
-(Lsh16x8 x y)  -> (SLW  x                 (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt8to64 y)))))
-
-(Rsh8x8 x y)  -> (SRAW (SignExt8to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt8to64 y)))))
-(Rsh8Ux8 x y) -> (SRW  (ZeroExt8to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt8to64 y)))))
-(Lsh8x8 x y)  -> (SLW  x                (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt8to64 y)))))
-
-// Potentially useful optimizing rewrites.
-// (ADDconstForCarry [k] c), k < 0 && (c < 0 || k+c >= 0) -> CarrySet
-// (ADDconstForCarry [k] c), K < 0 && (c >= 0 && k+c < 0) -> CarryClear
-// (MaskIfNotCarry CarrySet) -> 0
-// (MaskIfNotCarry CarrySet) -> -1
-
-// Lowering constants
-(Const8   [val]) -> (MOVWconst [val])
-(Const16  [val]) -> (MOVWconst [val])
-(Const32  [val]) -> (MOVWconst [val])
-(Const64  [val]) -> (MOVDconst [val])
-(Const32F [val]) -> (FMOVSconst [val])
-(Const64F [val]) -> (FMOVDconst [val])
-(ConstNil) -> (MOVDconst [0])
-(ConstBool [b]) -> (MOVWconst [b])
-
-(Addr {sym} base) -> (MOVDaddr {sym} base)
-// (Addr {sym} base) -> (ADDconst {sym} base)
-(OffPtr [off] ptr) -> (ADD (MOVDconst <config.Frontend().TypeInt64()> [off]) ptr)
-
-(And64 x y) -> (AND x y)
-(And32 x y) -> (AND x y)
-(And16 x y) -> (AND x y)
-(And8  x y) -> (AND x y)
-
-(Or64 x y) -> (OR x y)
-(Or32 x y) -> (OR x y)
-(Or16 x y) -> (OR x y)
-(Or8  x y) -> (OR x y)
-
-(Xor64 x y) -> (XOR x y)
-(Xor32 x y) -> (XOR x y)
-(Xor16 x y) -> (XOR x y)
-(Xor8  x y) -> (XOR x y)
-
-(Neg64F x) -> (FNEG x)
-(Neg32F x) -> (FNEG x)
-(Neg64  x) -> (NEG x)
-(Neg32  x) -> (NEG x)
-(Neg16  x) -> (NEG x)
-(Neg8   x) -> (NEG x)
-
-(Com64 x) -> (XORconst [-1] x)
-(Com32 x) -> (XORconst [-1] x)
-(Com16 x) -> (XORconst [-1] x)
-(Com8  x) -> (XORconst [-1] x)
-
-// Lowering boolean ops
-(AndB x y) -> (AND x y)
-(OrB x y) -> (OR x y)
-(Not x) -> (XORconst [1] x)
-
-// Lowering comparisons
-(EqB x y)  -> (ANDconst [1] (EQV x y))
-(Eq8 x y)  -> (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
-(Eq16 x y) -> (Equal (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
-(Eq32 x y) -> (Equal (CMPW x y))
-(Eq64 x y) -> (Equal (CMP x y))
-(Eq32F x y) -> (Equal (FCMPU x y))
-(Eq64F x y) -> (Equal (FCMPU x y))
-(EqPtr x y) -> (Equal (CMP x y))
-
-(NeqB x y)  -> (XOR x y)
-(Neq8 x y)  -> (NotEqual (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
-(Neq16 x y) -> (NotEqual (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
-(Neq32 x y) -> (NotEqual (CMPW x y))
-(Neq64 x y) -> (NotEqual (CMP x y))
-(Neq32F x y) -> (NotEqual (FCMPU x y))
-(Neq64F x y) -> (NotEqual (FCMPU x y))
-(NeqPtr x y) -> (NotEqual (CMP x y))
-
-(Less8 x y)  -> (LessThan (CMPW (SignExt8to32 x) (SignExt8to32 y)))
-(Less16 x y) -> (LessThan (CMPW (SignExt16to32 x) (SignExt16to32 y)))
-(Less32 x y) -> (LessThan (CMPW x y))
-(Less64 x y) -> (LessThan (CMP x y))
-(Less32F x y) -> (FLessThan (FCMPU x y))
-(Less64F x y) -> (FLessThan (FCMPU x y))
-
-(Less8U x y)  -> (LessThan (CMPWU (ZeroExt8to32 x) (ZeroExt8to32 y)))
-(Less16U x y) -> (LessThan (CMPWU (ZeroExt16to32 x) (ZeroExt16to32 y)))
-(Less32U x y) -> (LessThan (CMPWU x y))
-(Less64U x y) -> (LessThan (CMPU x y))
-
-(Leq8 x y)  -> (LessEqual (CMPW (SignExt8to32 x) (SignExt8to32 y)))
-(Leq16 x y) -> (LessEqual (CMPW (SignExt16to32 x) (SignExt16to32 y)))
-(Leq32 x y) -> (LessEqual (CMPW x y))
-(Leq64 x y) -> (LessEqual (CMP x y))
-(Leq32F x y) -> (FLessEqual (FCMPU x y))
-(Leq64F x y) -> (FLessEqual (FCMPU x y))
-
-(Leq8U x y)  -> (LessEqual (CMPWU (ZeroExt8to32 x) (ZeroExt8to32 y)))
-(Leq16U x y) -> (LessEqual (CMPWU (ZeroExt16to32 x) (ZeroExt16to32 y)))
-(Leq32U x y) -> (LessEqual (CMPWU x y))
-(Leq64U x y) -> (LessEqual (CMPU x y))
-
-(Greater8 x y)  -> (GreaterThan (CMPW (SignExt8to32 x) (SignExt8to32 y)))
-(Greater16 x y) -> (GreaterThan (CMPW (SignExt16to32 x) (SignExt16to32 y)))
-(Greater32 x y) -> (GreaterThan (CMPW x y))
-(Greater64 x y) -> (GreaterThan (CMP x y))
-(Greater32F x y) -> (FGreaterThan (FCMPU x y))
-(Greater64F x y) -> (FGreaterThan (FCMPU x y))
-
-(Greater8U x y)  -> (GreaterThan (CMPWU (ZeroExt8to32 x) (ZeroExt8to32 y)))
-(Greater16U x y) -> (GreaterThan (CMPWU (ZeroExt16to32 x) (ZeroExt16to32 y)))
-(Greater32U x y) -> (GreaterThan (CMPWU x y))
-(Greater64U x y) -> (GreaterThan (CMPU x y))
-
-(Geq8 x y)  -> (GreaterEqual (CMPW (SignExt8to32 x) (SignExt8to32 y)))
-(Geq16 x y) -> (GreaterEqual (CMPW (SignExt16to32 x) (SignExt16to32 y)))
-(Geq32 x y) -> (GreaterEqual (CMPW x y))
-(Geq64 x y) -> (GreaterEqual (CMP x y))
-(Geq32F x y) -> (FGreaterEqual (FCMPU x y))
-(Geq64F x y) -> (FGreaterEqual (FCMPU x y))
-
-(Geq8U x y)  -> (GreaterEqual (CMPU (ZeroExt8to32 x) (ZeroExt8to32 y)))
-(Geq16U x y) -> (GreaterEqual (CMPU (ZeroExt16to32 x) (ZeroExt16to32 y)))
-(Geq32U x y) -> (GreaterEqual (CMPU x y))
-(Geq64U x y) -> (GreaterEqual (CMPU x y))
-
-// Absorb pseudo-ops into blocks.
-(If (Equal cc) yes no) -> (EQ cc yes no)
-(If (NotEqual cc) yes no) -> (NE cc yes no)
-(If (LessThan cc) yes no) -> (LT cc yes no)
-(If (LessEqual cc) yes no) -> (LE cc yes no)
-(If (GreaterThan cc) yes no) -> (GT cc yes no)
-(If (GreaterEqual cc) yes no) -> (GE cc yes no)
-(If (FLessThan cc) yes no) -> (FLT cc yes no)
-(If (FLessEqual cc) yes no) -> (FLE cc yes no)
-(If (FGreaterThan cc) yes no) -> (FGT cc yes no)
-(If (FGreaterEqual cc) yes no) -> (FGE cc yes no)
-
-(If cond yes no) -> (NE (CMPWconst [0] cond) yes no)
-
-// Absorb boolean tests into block
-(NE (CMPWconst [0] (Equal cc)) yes no) -> (EQ cc yes no)
-(NE (CMPWconst [0] (NotEqual cc)) yes no) -> (NE cc yes no)
-(NE (CMPWconst [0] (LessThan cc)) yes no) -> (LT cc yes no)
-(NE (CMPWconst [0] (LessEqual cc)) yes no) -> (LE cc yes no)
-(NE (CMPWconst [0] (GreaterThan cc)) yes no) -> (GT cc yes no)
-(NE (CMPWconst [0] (GreaterEqual cc)) yes no) -> (GE cc yes no)
-// (NE (CMPWconst [0] (FLessThan cc)) yes no) -> (FLT cc yes no)
-// (NE (CMPWconst [0] (FLessEqual cc)) yes no) -> (FLE cc yes no)
-// (NE (CMPWconst [0] (FGreaterThan cc)) yes no) -> (FGT cc yes no)
-// (NE (CMPWconst [0] (FGreaterEqual cc)) yes no) -> (FGE cc yes no)
-
-// absorb flag constants into branches
-(EQ (FlagEQ) yes no) -> (First nil yes no)
-(EQ (FlagLT) yes no) -> (First nil no yes)
-(EQ (FlagGT) yes no) -> (First nil no yes)
-
-(NE (FlagEQ) yes no) -> (First nil no yes)
-(NE (FlagLT) yes no) -> (First nil yes no)
-(NE (FlagGT) yes no) -> (First nil yes no)
-
-(LT (FlagEQ) yes no) -> (First nil no yes)
-(LT (FlagLT) yes no) -> (First nil yes no)
-(LT (FlagGT) yes no) -> (First nil no yes)
-
-(LE (FlagEQ) yes no) -> (First nil yes no)
-(LE (FlagLT) yes no) -> (First nil yes no)
-(LE (FlagGT) yes no) -> (First nil no yes)
-
-(GT (FlagEQ) yes no) -> (First nil no yes)
-(GT (FlagLT) yes no) -> (First nil no yes)
-(GT (FlagGT) yes no) -> (First nil yes no)
-
-(GE (FlagEQ) yes no) -> (First nil yes no)
-(GE (FlagLT) yes no) -> (First nil no yes)
-(GE (FlagGT) yes no) -> (First nil yes no)
-
-// absorb InvertFlags into branches
-(LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
-(GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
-(LE (InvertFlags cmp) yes no) -> (GE cmp yes no)
-(GE (InvertFlags cmp) yes no) -> (LE cmp yes no)
-(EQ (InvertFlags cmp) yes no) -> (EQ cmp yes no)
-(NE (InvertFlags cmp) yes no) -> (NE cmp yes no)
-
-// (FLT (InvertFlags cmp) yes no) -> (FGT cmp yes no)
-// (FGT (InvertFlags cmp) yes no) -> (FLT cmp yes no)
-// (FLE (InvertFlags cmp) yes no) -> (FGE cmp yes no)
-// (FGE (InvertFlags cmp) yes no) -> (FLE cmp yes no)
-
-// constant comparisons
-(CMPWconst (MOVWconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ)
-(CMPWconst (MOVWconst [x]) [y]) && int32(x)<int32(y)  -> (FlagLT)
-(CMPWconst (MOVWconst [x]) [y]) && int32(x)>int32(y)  -> (FlagGT)
-
-(CMPconst (MOVDconst [x]) [y]) && int64(x)==int64(y) -> (FlagEQ)
-(CMPconst (MOVDconst [x]) [y]) && int64(x)<int64(y)  -> (FlagLT)
-(CMPconst (MOVDconst [x]) [y]) && int64(x)>int64(y)  -> (FlagGT)
-
-(CMPWUconst (MOVWconst [x]) [y]) && int32(x)==int32(y)  -> (FlagEQ)
-(CMPWUconst (MOVWconst [x]) [y]) && uint32(x)<uint32(y) -> (FlagLT)
-(CMPWUconst (MOVWconst [x]) [y]) && uint32(x)>uint32(y) -> (FlagGT)
-
-(CMPUconst (MOVDconst [x]) [y]) && int64(x)==int64(y)  -> (FlagEQ)
-(CMPUconst (MOVDconst [x]) [y]) && uint64(x)<uint64(y) -> (FlagLT)
-(CMPUconst (MOVDconst [x]) [y]) && uint64(x)>uint64(y) -> (FlagGT)
-
-// other known comparisons
-//(CMPconst (MOVBUreg _) [c]) && 0xff < c -> (FlagLT)
-//(CMPconst (MOVHUreg _) [c]) && 0xffff < c -> (FlagLT)
-//(CMPconst (ANDconst _ [m]) [n]) && 0 <= int32(m) && int32(m) < int32(n) -> (FlagLT)
-//(CMPconst (SRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint32(32-c)) <= uint32(n) -> (FlagLT)
-
-// absorb flag constants into boolean values
-(Equal (FlagEQ)) -> (MOVWconst [1])
-(Equal (FlagLT)) -> (MOVWconst [0])
-(Equal (FlagGT)) -> (MOVWconst [0])
-
-(NotEqual (FlagEQ)) -> (MOVWconst [0])
-(NotEqual (FlagLT)) -> (MOVWconst [1])
-(NotEqual (FlagGT)) -> (MOVWconst [1])
-
-(LessThan (FlagEQ)) -> (MOVWconst [0])
-(LessThan (FlagLT)) -> (MOVWconst [1])
-(LessThan (FlagGT)) -> (MOVWconst [0])
-
-(LessEqual (FlagEQ)) -> (MOVWconst [1])
-(LessEqual (FlagLT)) -> (MOVWconst [1])
-(LessEqual (FlagGT)) -> (MOVWconst [0])
-
-(GreaterThan (FlagEQ)) -> (MOVWconst [0])
-(GreaterThan (FlagLT)) -> (MOVWconst [0])
-(GreaterThan (FlagGT)) -> (MOVWconst [1])
-
-(GreaterEqual (FlagEQ)) -> (MOVWconst [1])
-(GreaterEqual (FlagLT)) -> (MOVWconst [0])
-(GreaterEqual (FlagGT)) -> (MOVWconst [1])
-
-// absorb InvertFlags into boolean values
-(Equal (InvertFlags x)) -> (Equal x)
-(NotEqual (InvertFlags x)) -> (NotEqual x)
-(LessThan (InvertFlags x)) -> (GreaterThan x)
-(GreaterThan (InvertFlags x)) -> (LessThan x)
-(LessEqual (InvertFlags x)) -> (GreaterEqual x)
-(GreaterEqual (InvertFlags x)) -> (LessEqual x)
-(FLessThan (InvertFlags x)) -> (FGreaterThan x)
-(FGreaterThan (InvertFlags x)) -> (FLessThan x)
-(FLessEqual (InvertFlags x)) -> (FGreaterEqual x)
-(FGreaterEqual (InvertFlags x)) -> (FLessEqual x)
-
-
-// Lowering loads
-(Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVDload ptr mem)
-(Load <t> ptr mem) && is32BitInt(t) && isSigned(t) -> (MOVWload ptr mem)
-(Load <t> ptr mem) && is32BitInt(t) && !isSigned(t) -> (MOVWZload ptr mem)
-(Load <t> ptr mem) && is16BitInt(t) && isSigned(t) -> (MOVHload ptr mem)
-(Load <t> ptr mem) && is16BitInt(t) && !isSigned(t) -> (MOVHZload ptr mem)
-(Load <t> ptr mem) && (t.IsBoolean() || (is8BitInt(t) && isSigned(t))) -> (MOVBload ptr mem)
-(Load <t> ptr mem) && is8BitInt(t) && !isSigned(t) -> (MOVBZload ptr mem)
-
-(Load <t> ptr mem) && is32BitFloat(t) -> (FMOVSload ptr mem)
-(Load <t> ptr mem) && is64BitFloat(t) -> (FMOVDload ptr mem)
-
-(Store [8] ptr val mem) && is64BitFloat(val.Type) -> (FMOVDstore ptr val mem)
-(Store [8] ptr val mem) && is32BitFloat(val.Type) -> (FMOVDstore ptr val mem) // glitch from (Cvt32Fto64F x) -> x -- type is wrong
-(Store [4] ptr val mem) && is32BitFloat(val.Type) -> (FMOVSstore ptr val mem)
-(Store [8] ptr val mem) && (is64BitInt(val.Type) || isPtr(val.Type)) -> (MOVDstore ptr val mem)
-(Store [4] ptr val mem) && is32BitInt(val.Type) -> (MOVWstore ptr val mem)
-(Store [2] ptr val mem) -> (MOVHstore ptr val mem)
-(Store [1] ptr val mem) -> (MOVBstore ptr val mem)
-
-(Zero [s] _ mem) && SizeAndAlign(s).Size() == 0 -> mem
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 1 -> (MOVBstorezero destptr mem)
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 2 && SizeAndAlign(s).Align()%2 == 0 ->
-	(MOVHstorezero destptr mem)
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 2 ->
-	(MOVBstorezero [1] destptr
-		(MOVBstorezero [0] destptr mem))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 4 && SizeAndAlign(s).Align()%4 == 0 ->
-	(MOVWstorezero destptr mem)
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 4 && SizeAndAlign(s).Align()%2 == 0 ->
-	(MOVHstorezero [2] destptr
-		(MOVHstorezero [0] destptr mem))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 4 ->
-	(MOVBstorezero [3] destptr
-		(MOVBstorezero [2] destptr
-			(MOVBstorezero [1] destptr
-				(MOVBstorezero [0] destptr mem))))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 8 && SizeAndAlign(s).Align()%8 == 0 ->
-	(MOVDstorezero [0] destptr mem)
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 8 && SizeAndAlign(s).Align()%4 == 0 ->
-	(MOVWstorezero [4] destptr
-		(MOVWstorezero [0] destptr mem))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 8 && SizeAndAlign(s).Align()%2 == 0 ->
-	(MOVHstorezero [6] destptr
-		(MOVHstorezero [4] destptr
-			(MOVHstorezero [2] destptr
-				(MOVHstorezero [0] destptr mem))))
-
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 3 ->
-	(MOVBstorezero [2] destptr
-		(MOVBstorezero [1] destptr
-			(MOVBstorezero [0] destptr mem)))
-
-// Zero small numbers of words directly.
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 16 && SizeAndAlign(s).Align()%8 == 0 ->
-	(MOVDstorezero [8] destptr
-                (MOVDstorezero [0] destptr mem))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 24 && SizeAndAlign(s).Align()%8 == 0 ->
-	(MOVDstorezero [16] destptr
-		(MOVDstorezero [8] destptr
-			(MOVDstorezero [0] destptr mem)))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 32 && SizeAndAlign(s).Align()%8 == 0 ->
-	(MOVDstorezero [24] destptr
-		(MOVDstorezero [16] destptr
-			(MOVDstorezero [8] destptr
-				(MOVDstorezero [0] destptr mem))))
-
-// Large zeroing uses a loop
-(Zero [s] ptr mem)
-	&& (SizeAndAlign(s).Size() > 512 || config.noDuffDevice) || SizeAndAlign(s).Align()%8 != 0 ->
-	(LoweredZero [SizeAndAlign(s).Align()]
-		ptr
-		(ADDconst <ptr.Type> ptr [SizeAndAlign(s).Size()-moveSize(SizeAndAlign(s).Align(), config)])
-		mem)
-
-// moves
-(Move [s] _ _ mem) && SizeAndAlign(s).Size() == 0 -> mem
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 1 -> (MOVBstore dst (MOVBZload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 2 && SizeAndAlign(s).Align()%2 == 0 ->
-	(MOVHstore dst (MOVHZload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 2 ->
-	(MOVBstore [1] dst (MOVBZload [1] src mem)
-		(MOVBstore dst (MOVBZload src mem) mem))
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 4 && SizeAndAlign(s).Align()%4 == 0 ->
-	(MOVWstore dst (MOVWload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 4 && SizeAndAlign(s).Align()%2 == 0 ->
-	(MOVHstore [2] dst (MOVHZload [2] src mem)
-		(MOVHstore dst (MOVHZload src mem) mem))
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 4 ->
-	(MOVBstore [3] dst (MOVBZload [3] src mem)
-		(MOVBstore [2] dst (MOVBZload [2] src mem)
-			(MOVBstore [1] dst (MOVBZload [1] src mem)
-				(MOVBstore dst (MOVBZload src mem) mem))))
-
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 8 && SizeAndAlign(s).Align()%8 == 0 ->
-	(MOVDstore dst (MOVDload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 8 && SizeAndAlign(s).Align()%4 == 0 ->
-	(MOVWstore [4] dst (MOVWZload [4] src mem)
-		(MOVWstore dst (MOVWZload src mem) mem))
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 8 && SizeAndAlign(s).Align()%2 == 0->
-	(MOVHstore [6] dst (MOVHZload [6] src mem)
-		(MOVHstore [4] dst (MOVHZload [4] src mem)
-			(MOVHstore [2] dst (MOVHZload [2] src mem)
-				(MOVHstore dst (MOVHZload src mem) mem))))
-
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 3 ->
-	(MOVBstore [2] dst (MOVBZload [2] src mem)
-		(MOVBstore [1] dst (MOVBZload [1] src mem)
-			(MOVBstore dst (MOVBZload src mem) mem)))
-
-// Large move uses a loop
-(Move [s] dst src mem)
-	&& (SizeAndAlign(s).Size() > 512 || config.noDuffDevice) || SizeAndAlign(s).Align()%8 != 0 ->
-	(LoweredMove [SizeAndAlign(s).Align()]
-		dst
-		src
-		(ADDconst <src.Type> src [SizeAndAlign(s).Size()-moveSize(SizeAndAlign(s).Align(), config)])
-		mem)
-
-// Calls
-// Lowering calls
-(StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem)
-(ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)
-(DeferCall [argwid] mem) -> (CALLdefer [argwid] mem)
-(GoCall [argwid] mem) -> (CALLgo [argwid] mem)
-(InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem)
-
-// Miscellaneous
-(Convert <t> x mem) -> (MOVDconvert <t> x mem)
-(GetClosurePtr) -> (LoweredGetClosurePtr)
-(IsNonNil ptr) -> (NotEqual (CMPconst [0] ptr))
-(IsInBounds idx len) -> (LessThan (CMPU idx len))
-(IsSliceInBounds idx len) -> (LessEqual (CMPU idx len))
-(NilCheck ptr mem) -> (LoweredNilCheck ptr mem)
-
-// Optimizations
-
-(ADD (MOVDconst [c]) x) && int64(int32(c)) == c -> (ADDconst [c] x)
-(ADD x (MOVDconst [c])) && int64(int32(c)) == c -> (ADDconst [c] x)
-
-// Fold offsets for stores.
-(MOVDstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(off1+off2) -> (MOVDstore [off1+off2] {sym} x val mem)
-(MOVWstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(off1+off2) -> (MOVWstore [off1+off2] {sym} x val mem)
-(MOVHstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(off1+off2) -> (MOVHstore [off1+off2] {sym} x val mem)
-(MOVBstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(off1+off2) -> (MOVBstore [off1+off2] {sym} x val mem)
-
-// Store of zero -> storezero
-(MOVDstore [off] {sym} ptr (MOVDconst [c]) mem) && c == 0 -> (MOVDstorezero [off] {sym} ptr mem)
-(MOVWstore [off] {sym} ptr (MOVDconst [c]) mem) && c == 0 -> (MOVWstorezero [off] {sym} ptr mem)
-(MOVHstore [off] {sym} ptr (MOVDconst [c]) mem) && c == 0 -> (MOVHstorezero [off] {sym} ptr mem)
-(MOVBstore [off] {sym} ptr (MOVDconst [c]) mem) && c == 0 -> (MOVBstorezero [off] {sym} ptr mem)
-
-// Fold offsets for storezero
-(MOVDstorezero [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) ->
-    (MOVDstorezero [off1+off2] {sym} x mem)
-(MOVWstorezero [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) ->
-    (MOVWstorezero [off1+off2] {sym} x mem)
-(MOVHstorezero [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) ->
-    (MOVHstorezero [off1+off2] {sym} x mem)
-(MOVBstorezero [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) ->
-    (MOVBstorezero [off1+off2] {sym} x mem)
-
-// Lowering extension
-// Note: we always extend to 64 bits even though some ops don't need that many result bits.
-(SignExt8to16  x) -> (MOVBreg x)
-(SignExt8to32  x) -> (MOVBreg x)
-(SignExt8to64  x) -> (MOVBreg x)
-(SignExt16to32 x) -> (MOVHreg x)
-(SignExt16to64 x) -> (MOVHreg x)
-(SignExt32to64 x) -> (MOVWreg x)
-
-(ZeroExt8to16  x) -> (MOVBZreg x)
-(ZeroExt8to32  x) -> (MOVBZreg x)
-(ZeroExt8to64  x) -> (MOVBZreg x)
-(ZeroExt16to32 x) -> (MOVHZreg x)
-(ZeroExt16to64 x) -> (MOVHZreg x)
-(ZeroExt32to64 x) -> (MOVWZreg x)
-
-(Trunc16to8  x) -> (MOVBreg x)
-(Trunc32to8  x) -> (MOVBreg x)
-(Trunc32to16 x) -> (MOVHreg x)
-(Trunc64to8  x) -> (MOVBreg x)
-(Trunc64to16 x) -> (MOVHreg x)
-(Trunc64to32 x) -> (MOVWreg x)
-
--- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
@@ -1,395 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-import "strings"
-
-// Notes:
-//  - Less-than-64-bit integer types live in the low portion of registers.
-//    For now, the upper portion is junk; sign/zero-extension might be optimized in the future, but not yet.
-//  - Boolean types are zero or 1; stored in a byte, but loaded with AMOVBZ so the upper bytes of a register are zero.
-//  - *const instructions may use a constant larger than the instuction can encode.
-//    In this case the assembler expands to multiple instructions and uses tmp
-//    register (R31).
-
-var regNamesPPC64 = []string{
-	// "R0", // REGZERO
-	"SP", // REGSP
-	"SB", // REGSB
-	"R3",
-	"R4",
-	"R5",
-	"R6",
-	"R7",
-	"R8",
-	"R9",
-	"R10",
-	"R11", // REGCTXT for closures
-	"R12",
-	"R13", // REGTLS
-	"R14",
-	"R15",
-	"R16",
-	"R17",
-	"R18",
-	"R19",
-	"R20",
-	"R21",
-	"R22",
-	"R23",
-	"R24",
-	"R25",
-	"R26",
-	"R27",
-	"R28",
-	"R29",
-	"g",   // REGG.  Using name "g" and setting Config.hasGReg makes it "just happen".
-	"R31", // REGTMP
-
-	"F0",
-	"F1",
-	"F2",
-	"F3",
-	"F4",
-	"F5",
-	"F6",
-	"F7",
-	"F8",
-	"F9",
-	"F10",
-	"F11",
-	"F12",
-	"F13",
-	"F14",
-	"F15",
-	"F16",
-	"F17",
-	"F18",
-	"F19",
-	"F20",
-	"F21",
-	"F22",
-	"F23",
-	"F24",
-	"F25",
-	"F26",
-	// "F27", // reserved for "floating conversion constant"
-	// "F28", // 0.0
-	// "F29", // 0.5
-	// "F30", // 1.0
-	// "F31", // 2.0
-
-	// "CR0",
-	// "CR1",
-	// "CR2",
-	// "CR3",
-	// "CR4",
-	// "CR5",
-	// "CR6",
-	// "CR7",
-
-	// "CR",
-	// "XER",
-	// "LR",
-	// "CTR",
-}
-
-func init() {
-	// Make map from reg names to reg integers.
-	if len(regNamesPPC64) > 64 {
-		panic("too many registers")
-	}
-	num := map[string]int{}
-	for i, name := range regNamesPPC64 {
-		num[name] = i
-	}
-	buildReg := func(s string) regMask {
-		m := regMask(0)
-		for _, r := range strings.Split(s, " ") {
-			if n, ok := num[r]; ok {
-				m |= regMask(1) << uint(n)
-				continue
-			}
-			panic("register " + r + " not found")
-		}
-		return m
-	}
-
-	var (
-		gp = buildReg("R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29")
-		fp = buildReg("F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26")
-		sp = buildReg("SP")
-		sb = buildReg("SB")
-		// gr  = buildReg("g")
-		// cr  = buildReg("CR")
-		// ctr = buildReg("CTR")
-		// lr  = buildReg("LR")
-		tmp  = buildReg("R31")
-		ctxt = buildReg("R11")
-		// tls = buildReg("R13")
-		gp01        = regInfo{inputs: nil, outputs: []regMask{gp}}
-		gp11        = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
-		gp21        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
-		gp1cr       = regInfo{inputs: []regMask{gp | sp | sb}}
-		gp2cr       = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
-		crgp        = regInfo{inputs: nil, outputs: []regMask{gp}}
-		gpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
-		gpstore     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
-		gpstorezero = regInfo{inputs: []regMask{gp | sp | sb}} // ppc64.REGZERO is reserved zero value
-		fp01        = regInfo{inputs: nil, outputs: []regMask{fp}}
-		fp11        = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
-		fpgp        = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
-		gpfp        = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
-		fp21        = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
-		fp2cr       = regInfo{inputs: []regMask{fp, fp}}
-		fpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{fp}}
-		fpstore     = regInfo{inputs: []regMask{gp | sp | sb, fp}}
-		callerSave  = regMask(gp | fp)
-	)
-	ops := []opData{
-		{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},     // arg0 + arg1
-		{name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "SymOff"},    // arg0 + auxInt + aux.(*gc.Sym)
-		{name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true},   // arg0+arg1
-		{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true}, // arg0+arg1
-		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                        // arg0-arg1
-		{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"},                      // arg0-arg1
-		{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"},                    // arg0-arg1
-
-		{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
-		{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
-
-		{name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true},   // (arg0 * arg1) >> 64, signed
-		{name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true},   // (arg0 * arg1) >> 32, signed
-		{name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned
-		{name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
-
-		{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true},   // arg0*arg1
-		{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
-
-		{name: "SRAD", argLength: 2, reg: gp21, asm: "SRAD"}, // arg0 >>a arg1, 64 bits (all sign if arg1 & 64 != 0)
-		{name: "SRAW", argLength: 2, reg: gp21, asm: "SRAW"}, // arg0 >>a arg1, 32 bits (all sign if arg1 & 32 != 0)
-		{name: "SRD", argLength: 2, reg: gp21, asm: "SRD"},   // arg0 >> arg1, 64 bits  (0 if arg1 & 64 != 0)
-		{name: "SRW", argLength: 2, reg: gp21, asm: "SRW"},   // arg0 >> arg1, 32 bits  (0 if arg1 & 32 != 0)
-		{name: "SLD", argLength: 2, reg: gp21, asm: "SLD"},   // arg0 << arg1, 64 bits  (0 if arg1 & 64 != 0)
-		{name: "SLW", argLength: 2, reg: gp21, asm: "SLW"},   // arg0 << arg1, 32 bits  (0 if arg1 & 32 != 0)
-
-		{name: "ADDconstForCarry", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, aux: "Int16", asm: "ADDC", typ: "Flags"}, // _, carry := arg0 + aux
-		{name: "MaskIfNotCarry", argLength: 1, reg: crgp, asm: "ADDME", typ: "Int64"},                                                                   // carry - 1 (if carry then 0 else -1)
-
-		{name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "Int64"}, // arg0 >>a aux, 64 bits
-		{name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "Int64"}, // arg0 >>a aux, 32 bits
-		{name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int64"},   // arg0 >> aux, 64 bits
-		{name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int64"},   // arg0 >> aux, 32 bits
-		{name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"},   // arg0 << aux, 64 bits
-		{name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int64"},   // arg0 << aux, 32 bits
-
-		{name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"},   // arg0/arg1
-		{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1
-
-		{name: "DIVD", argLength: 2, reg: gp21, asm: "DIVD", typ: "Int64"},   // arg0/arg1 (signed 64-bit)
-		{name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", typ: "Int32"},   // arg0/arg1 (signed 32-bit)
-		{name: "DIVDU", argLength: 2, reg: gp21, asm: "DIVDU", typ: "Int64"}, // arg0/arg1 (unsigned 64-bit)
-		{name: "DIVWU", argLength: 2, reg: gp21, asm: "DIVWU", typ: "Int32"}, // arg0/arg1 (unsigned 32-bit)
-
-		// MOD is implemented as rem := arg0 - (arg0/arg1) * arg1
-
-		// Conversions are all float-to-float register operations.  "Integer" refers to encoding in the FP register.
-		{name: "FCTIDZ", argLength: 1, reg: fp11, asm: "FCTIDZ", typ: "Float64"}, // convert float to 64-bit int round towards zero
-		{name: "FCTIWZ", argLength: 1, reg: fp11, asm: "FCTIWZ", typ: "Float64"}, // convert float to 32-bit int round towards zero
-		{name: "FCFID", argLength: 1, reg: fp11, asm: "FCFID", typ: "Float64"},   // convert 64-bit integer to float
-		{name: "FRSP", argLength: 1, reg: fp11, asm: "FRSP", typ: "Float64"},     // round float to 32-bit value
-
-		// Movement between float and integer registers with no change in bits; accomplished with stores+loads on PPC.
-		// Because the 32-bit load-literal-bits instructions have impoverished addressability, always widen the
-		// data instead and use FMOVDload and FMOVDstore instead (this will also dodge endianess issues).
-		// There are optimizations that should apply -- (Xi2f64 (MOVWload (not-ADD-ptr+offset) ) ) could use
-		// the word-load instructions.  (Xi2f64 (MOVDload ptr )) can be (FMOVDload ptr)
-
-		{name: "Xf2i64", argLength: 1, reg: fpgp, typ: "Int64"},   // move 64 bits of F register into G register
-		{name: "Xi2f64", argLength: 1, reg: gpfp, typ: "Float64"}, // move 64 bits of G register into F register
-
-		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true},               // arg0&arg1
-		{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"},                                // arg0&^arg1
-		{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true},                 // arg0|arg1
-		{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"},                                  // arg0|^arg1
-		{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true}, // arg0^arg1
-		{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true}, // arg0^^arg1
-		{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},                                  // -arg0 (integer)
-		{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"},                                // -arg0 (floating point)
-		{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"},                              // sqrt(arg0) (floating point)
-		{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"},                            // sqrt(arg0) (floating point, single precision)
-
-		{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"},                                                                                     // arg0|aux
-		{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"},                                                                                   // arg0^aux
-		{name: "ANDconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, asm: "ANDCC", aux: "Int64", clobberFlags: true}, // arg0&aux // and-immediate sets CC on PPC, always.
-
-		{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB", typ: "Int64"},                      // sign extend int8 to int64
-		{name: "MOVBZreg", argLength: 1, reg: gp11, asm: "MOVBZ", typ: "Int64"},                    // zero extend uint8 to uint64
-		{name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH", typ: "Int64"},                      // sign extend int16 to int64
-		{name: "MOVHZreg", argLength: 1, reg: gp11, asm: "MOVHZ", typ: "Int64"},                    // zero extend uint16 to uint64
-		{name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW", typ: "Int64"},                      // sign extend int32 to int64
-		{name: "MOVWZreg", argLength: 1, reg: gp11, asm: "MOVWZ", typ: "Int64"},                    // zero extend uint32 to uint64
-		{name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVB", aux: "SymOff", typ: "Int8"},     // sign extend int8 to int64
-		{name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8"},  // zero extend uint8 to uint64
-		{name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", typ: "Int16"},    // sign extend int16 to int64
-		{name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16"}, // zero extend uint16 to uint64
-		{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", typ: "Int32"},    // sign extend int32 to int64
-		{name: "MOVWZload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", typ: "UInt32"}, // zero extend uint32 to uint64
-		{name: "MOVDload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", typ: "Int64"},
-
-		{name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", typ: "Float64"},
-		{name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", typ: "Float32"},
-		{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem"},
-		{name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem"},
-		{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem"},
-		{name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem"},
-		{name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "FMOVD", aux: "SymOff", typ: "Mem"},
-		{name: "FMOVSstore", argLength: 3, reg: fpstore, asm: "FMOVS", aux: "SymOff", typ: "Mem"},
-
-		{name: "MOVBstorezero", argLength: 2, reg: gpstorezero, asm: "MOVB", aux: "SymOff", typ: "Mem"}, // store zero byte to arg0+aux.  arg1=mem
-		{name: "MOVHstorezero", argLength: 2, reg: gpstorezero, asm: "MOVH", aux: "SymOff", typ: "Mem"}, // store zero 2 bytes to ...
-		{name: "MOVWstorezero", argLength: 2, reg: gpstorezero, asm: "MOVW", aux: "SymOff", typ: "Mem"}, // store zero 4 bytes to ...
-		{name: "MOVDstorezero", argLength: 2, reg: gpstorezero, asm: "MOVD", aux: "SymOff", typ: "Mem"}, // store zero 8 bytes to ...
-
-		{name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{sp | sb}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB
-
-		{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", rematerializeable: true},     //
-		{name: "MOVWconst", argLength: 0, reg: gp01, aux: "Int32", asm: "MOVW", rematerializeable: true},     // 32 low bits of auxint
-		{name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", rematerializeable: true}, //
-		{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float32", asm: "FMOVS", rematerializeable: true}, //
-		{name: "FCMPU", argLength: 2, reg: fp2cr, asm: "FCMPU", typ: "Flags"},
-
-		{name: "CMP", argLength: 2, reg: gp2cr, asm: "CMP", typ: "Flags"},     // arg0 compare to arg1
-		{name: "CMPU", argLength: 2, reg: gp2cr, asm: "CMPU", typ: "Flags"},   // arg0 compare to arg1
-		{name: "CMPW", argLength: 2, reg: gp2cr, asm: "CMPW", typ: "Flags"},   // arg0 compare to arg1
-		{name: "CMPWU", argLength: 2, reg: gp2cr, asm: "CMPWU", typ: "Flags"}, // arg0 compare to arg1
-		{name: "CMPconst", argLength: 1, reg: gp1cr, asm: "CMP", aux: "Int64", typ: "Flags"},
-		{name: "CMPUconst", argLength: 1, reg: gp1cr, asm: "CMPU", aux: "Int64", typ: "Flags"},
-		{name: "CMPWconst", argLength: 1, reg: gp1cr, asm: "CMPW", aux: "Int32", typ: "Flags"},
-		{name: "CMPWUconst", argLength: 1, reg: gp1cr, asm: "CMPWU", aux: "Int32", typ: "Flags"},
-
-		// pseudo-ops
-		{name: "Equal", argLength: 1, reg: crgp},         // bool, true flags encode x==y false otherwise.
-		{name: "NotEqual", argLength: 1, reg: crgp},      // bool, true flags encode x!=y false otherwise.
-		{name: "LessThan", argLength: 1, reg: crgp},      // bool, true flags encode  x<y false otherwise.
-		{name: "FLessThan", argLength: 1, reg: crgp},     // bool, true flags encode  x<y false otherwise.
-		{name: "LessEqual", argLength: 1, reg: crgp},     // bool, true flags encode  x<=y false otherwise.
-		{name: "FLessEqual", argLength: 1, reg: crgp},    // bool, true flags encode  x<=y false otherwise; PPC <= === !> which is wrong for NaN
-		{name: "GreaterThan", argLength: 1, reg: crgp},   // bool, true flags encode  x>y false otherwise.
-		{name: "FGreaterThan", argLength: 1, reg: crgp},  // bool, true flags encode  x>y false otherwise.
-		{name: "GreaterEqual", argLength: 1, reg: crgp},  // bool, true flags encode  x>=y false otherwise.
-		{name: "FGreaterEqual", argLength: 1, reg: crgp}, // bool, true flags encode  x>=y false otherwise.; PPC >= === !< which is wrong for NaN
-
-		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
-		// and sorts it to the very beginning of the block to prevent other
-		// use of the closure pointer.
-		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{ctxt}}},
-
-		//arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
-		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, clobberFlags: true},
-
-		// Convert pointer to integer, takes a memory operand for ordering.
-		{name: "MOVDconvert", argLength: 2, reg: gp11, asm: "MOVD"},
-
-		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff", clobberFlags: true},                                      // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{gp | sp, ctxt, 0}, clobbers: callerSave}, aux: "Int64", clobberFlags: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
-		{name: "CALLdefer", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                        // call deferproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLgo", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                           // call newproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "Int64", clobberFlags: true},                 // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
-
-		// large or unaligned zeroing
-		// arg0 = address of memory to zero (in R3, changed as side effect)
-		// arg1 = address of the last element to zero
-		// arg2 = mem
-		// returns mem
-		//  ADD -8,R3,R3 // intermediate value not valid GC ptr, cannot expose to opt+GC
-		//	MOVDU	R0, 8(R3)
-		//	CMP	R3, Rarg1
-		//	BLE	-2(PC)
-		{
-			name:      "LoweredZero",
-			aux:       "Int64",
-			argLength: 3,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("R3"), gp},
-				clobbers: buildReg("R3"),
-			},
-			clobberFlags: true,
-			typ:          "Mem",
-		},
-
-		// large or unaligned move
-		// arg0 = address of dst memory (in R3, changed as side effect)
-		// arg1 = address of src memory (in R4, changed as side effect)
-		// arg2 = address of the last element of src
-		// arg3 = mem
-		// returns mem
-		//  ADD -8,R3,R3 // intermediate value not valid GC ptr, cannot expose to opt+GC
-		//  ADD -8,R4,R4 // intermediate value not valid GC ptr, cannot expose to opt+GC
-		//	MOVDU	8(R4), Rtmp
-		//	MOVDU	Rtmp, 8(R3)
-		//	CMP	R4, Rarg2
-		//	BLT	-3(PC)
-		{
-			name:      "LoweredMove",
-			aux:       "Int64",
-			argLength: 4,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("R3"), buildReg("R4"), gp},
-				clobbers: buildReg("R3 R4"),
-			},
-			clobberFlags: true,
-			typ:          "Mem",
-		},
-
-		// (InvertFlags (CMP a b)) == (CMP b a)
-		// So if we want (LessThan (CMP a b)) but we can't do that because a is a constant,
-		// then we do (LessThan (InvertFlags (CMP b a))) instead.
-		// Rewrites will convert this to (GreaterThan (CMP b a)).
-		// InvertFlags is a pseudo-op which can't appear in assembly output.
-		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
-
-		// Constant flag values. For any comparison, there are 3 possible
-		// outcomes: either the three from the signed total order (<,==,>)
-		// or the three from the unsigned total order, depending on which
-		// comparison operation was used (CMP or CMPU -- PPC is different from
-		// the other architectures, which have a single comparison producing
-		// both signed and unsigned comparison results.)
-
-		// These ops are for temporary use by rewrite rules. They
-		// cannot appear in the generated assembly.
-		{name: "FlagEQ"}, // equal
-		{name: "FlagLT"}, // signed < or unsigned <
-		{name: "FlagGT"}, // signed > or unsigned >
-
-	}
-
-	blocks := []blockData{
-		{name: "EQ"},
-		{name: "NE"},
-		{name: "LT"},
-		{name: "LE"},
-		{name: "GT"},
-		{name: "GE"},
-		{name: "FLT"},
-		{name: "FLE"},
-		{name: "FGT"},
-		{name: "FGE"},
-	}
-
-	archs = append(archs, arch{
-		name:            "PPC64",
-		pkg:             "cmd/internal/obj/ppc64",
-		genfile:         "../../ppc64/ssa.go",
-		ops:             ops,
-		blocks:          blocks,
-		regnames:        regNamesPPC64,
-		gpregmask:       gp,
-		fpregmask:       fp,
-		framepointerreg: int8(num["SP"]),
-	})
-}
--- a/src/cmd/compile/internal/ssa/gen/dec64.rules
+++ b/src/cmd/compile/internal/ssa/gen/dec64.rules
@@ -1,407 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This file contains rules to decompose [u]int64 types on 32-bit
-// architectures. These rules work together with the decomposeBuiltIn
-// pass which handles phis of these types.
-
-(Int64Hi (Int64Make hi _)) -> hi
-(Int64Lo (Int64Make _ lo)) -> lo
-
-// Assuming little endian (we don't support big endian 32-bit architecture yet)
-(Load <t> ptr mem) && is64BitInt(t) && t.IsSigned() ->
-	(Int64Make
-		(Load <config.fe.TypeInt32()> (OffPtr <config.fe.TypeInt32().PtrTo()> [4] ptr) mem)
-		(Load <config.fe.TypeUInt32()> ptr mem))
-(Load <t> ptr mem) && is64BitInt(t) && !t.IsSigned() ->
-	(Int64Make
-		(Load <config.fe.TypeUInt32()> (OffPtr <config.fe.TypeUInt32().PtrTo()> [4] ptr) mem)
-		(Load <config.fe.TypeUInt32()> ptr mem))
-
-(Store [8] dst (Int64Make hi lo) mem) ->
-	(Store [4]
-		(OffPtr <hi.Type.PtrTo()> [4] dst)
-		hi
-		(Store [4] dst lo mem))
-
-(Arg {n} [off]) && is64BitInt(v.Type) && v.Type.IsSigned() ->
-  (Int64Make
-    (Arg <config.fe.TypeInt32()> {n} [off+4])
-    (Arg <config.fe.TypeUInt32()> {n} [off]))
-(Arg {n} [off]) && is64BitInt(v.Type) && !v.Type.IsSigned() ->
-  (Int64Make
-    (Arg <config.fe.TypeUInt32()> {n} [off+4])
-    (Arg <config.fe.TypeUInt32()> {n} [off]))
-
-(Add64 x y) ->
-	(Int64Make
-		(Add32withcarry <config.fe.TypeInt32()>
-			(Int64Hi x)
-			(Int64Hi y)
-			(Select0 <TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y))))
-		(Select1 <config.fe.TypeUInt32()> (Add32carry (Int64Lo x) (Int64Lo y))))
-
-(Sub64 x y) ->
-	(Int64Make
-		(Sub32withcarry <config.fe.TypeInt32()>
-			(Int64Hi x)
-			(Int64Hi y)
-			(Select0 <TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y))))
-		(Select1 <config.fe.TypeUInt32()> (Sub32carry (Int64Lo x) (Int64Lo y))))
-
-(Mul64 x y) ->
-	(Int64Make
-		(Add32 <config.fe.TypeUInt32()>
-			(Mul32 <config.fe.TypeUInt32()> (Int64Lo x) (Int64Hi y))
-			(Add32 <config.fe.TypeUInt32()>
-				(Mul32 <config.fe.TypeUInt32()> (Int64Hi x) (Int64Lo y))
-				(Select0 <config.fe.TypeUInt32()> (Mul32uhilo (Int64Lo x) (Int64Lo y)))))
-		(Select1 <config.fe.TypeUInt32()> (Mul32uhilo (Int64Lo x) (Int64Lo y))))
-
-(And64 x y) ->
-	(Int64Make
-		(And32 <config.fe.TypeUInt32()> (Int64Hi x) (Int64Hi y))
-		(And32 <config.fe.TypeUInt32()> (Int64Lo x) (Int64Lo y)))
-
-(Or64 x y) ->
-	(Int64Make
-		(Or32 <config.fe.TypeUInt32()> (Int64Hi x) (Int64Hi y))
-		(Or32 <config.fe.TypeUInt32()> (Int64Lo x) (Int64Lo y)))
-
-(Xor64 x y) ->
-	(Int64Make
-		(Xor32 <config.fe.TypeUInt32()> (Int64Hi x) (Int64Hi y))
-		(Xor32 <config.fe.TypeUInt32()> (Int64Lo x) (Int64Lo y)))
-
-(Neg64 <t> x) -> (Sub64 (Const64 <t> [0]) x)
-
-(Com64 x) ->
-	(Int64Make
-		(Com32 <config.fe.TypeUInt32()> (Int64Hi x))
-		(Com32 <config.fe.TypeUInt32()> (Int64Lo x)))
-
-(SignExt32to64 x) -> (Int64Make (Signmask x) x)
-(SignExt16to64 x) -> (SignExt32to64 (SignExt16to32 x))
-(SignExt8to64 x) -> (SignExt32to64 (SignExt8to32 x))
-
-(ZeroExt32to64 x) -> (Int64Make (Const32 <config.fe.TypeUInt32()> [0]) x)
-(ZeroExt16to64 x) -> (ZeroExt32to64 (ZeroExt16to32 x))
-(ZeroExt8to64 x) -> (ZeroExt32to64 (ZeroExt8to32 x))
-
-(Trunc64to32 (Int64Make _ lo)) -> lo
-(Trunc64to16 (Int64Make _ lo)) -> (Trunc32to16 lo)
-(Trunc64to8 (Int64Make _ lo)) -> (Trunc32to8 lo)
-
-(Lsh32x64 _ (Int64Make (Const32 [c]) _)) && c != 0 -> (Const32 [0])
-(Rsh32x64 x (Int64Make (Const32 [c]) _)) && c != 0 -> (Signmask x)
-(Rsh32Ux64 _ (Int64Make (Const32 [c]) _)) && c != 0 -> (Const32 [0])
-(Lsh16x64 _ (Int64Make (Const32 [c]) _)) && c != 0 -> (Const32 [0])
-(Rsh16x64 x (Int64Make (Const32 [c]) _)) && c != 0 -> (Signmask (SignExt16to32 x))
-(Rsh16Ux64 _ (Int64Make (Const32 [c]) _)) && c != 0 -> (Const32 [0])
-(Lsh8x64 _ (Int64Make (Const32 [c]) _)) && c != 0 -> (Const32 [0])
-(Rsh8x64 x (Int64Make (Const32 [c]) _)) && c != 0 -> (Signmask (SignExt8to32 x))
-(Rsh8Ux64 _ (Int64Make (Const32 [c]) _)) && c != 0 -> (Const32 [0])
-
-(Lsh32x64 x (Int64Make (Const32 [0]) lo)) -> (Lsh32x32 x lo)
-(Rsh32x64 x (Int64Make (Const32 [0]) lo)) -> (Rsh32x32 x lo)
-(Rsh32Ux64 x (Int64Make (Const32 [0]) lo)) -> (Rsh32Ux32 x lo)
-(Lsh16x64 x (Int64Make (Const32 [0]) lo)) -> (Lsh16x32 x lo)
-(Rsh16x64 x (Int64Make (Const32 [0]) lo)) -> (Rsh16x32 x lo)
-(Rsh16Ux64 x (Int64Make (Const32 [0]) lo)) -> (Rsh16Ux32 x lo)
-(Lsh8x64 x (Int64Make (Const32 [0]) lo)) -> (Lsh8x32 x lo)
-(Rsh8x64 x (Int64Make (Const32 [0]) lo)) -> (Rsh8x32 x lo)
-(Rsh8Ux64 x (Int64Make (Const32 [0]) lo)) -> (Rsh8Ux32 x lo)
-
-(Lsh64x64 _ (Int64Make (Const32 [c]) _)) && c != 0 -> (Const64 [0])
-(Rsh64x64 x (Int64Make (Const32 [c]) _)) && c != 0 -> (Int64Make (Signmask (Int64Hi x)) (Signmask (Int64Hi x)))
-(Rsh64Ux64 _ (Int64Make (Const32 [c]) _)) && c != 0 -> (Const64 [0])
-
-(Lsh64x64 x (Int64Make (Const32 [0]) lo)) -> (Lsh64x32 x lo)
-(Rsh64x64 x (Int64Make (Const32 [0]) lo)) -> (Rsh64x32 x lo)
-(Rsh64Ux64 x (Int64Make (Const32 [0]) lo)) -> (Rsh64Ux32 x lo)
-
-// turn x64 non-constant shifts to x32 shifts
-// if high 32-bit of the shift is nonzero, make a huge shift
-(Lsh64x64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Lsh64x32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Rsh64x64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Rsh64x32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Rsh64Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Rsh64Ux32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Lsh32x64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Lsh32x32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Rsh32x64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Rsh32x32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Rsh32Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Rsh32Ux32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Lsh16x64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Lsh16x32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Rsh16x64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Rsh16x32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Rsh16Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Rsh16Ux32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Lsh8x64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Lsh8x32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Rsh8x64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Rsh8x32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Rsh8Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Rsh8Ux32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-
-// 64x left shift
-// result.hi = hi<<s | lo>>(32-s) | lo<<(s-32) // >> is unsigned, large shifts result 0
-// result.lo = lo<<s
-(Lsh64x32 (Int64Make hi lo) s) ->
-	(Int64Make
-		(Or32 <config.fe.TypeUInt32()>
-			(Or32 <config.fe.TypeUInt32()>
-				(Lsh32x32 <config.fe.TypeUInt32()> hi s)
-				(Rsh32Ux32 <config.fe.TypeUInt32()>
-					lo
-					(Sub32 <config.fe.TypeUInt32()> (Const32 <config.fe.TypeUInt32()> [32]) s)))
-			(Lsh32x32 <config.fe.TypeUInt32()>
-				lo
-				(Sub32 <config.fe.TypeUInt32()> s (Const32 <config.fe.TypeUInt32()> [32]))))
-		(Lsh32x32 <config.fe.TypeUInt32()> lo s))
-(Lsh64x16 (Int64Make hi lo) s) ->
-	(Int64Make
-		(Or32 <config.fe.TypeUInt32()>
-			(Or32 <config.fe.TypeUInt32()>
-				(Lsh32x16 <config.fe.TypeUInt32()> hi s)
-				(Rsh32Ux16 <config.fe.TypeUInt32()>
-					lo
-					(Sub16 <config.fe.TypeUInt16()> (Const16 <config.fe.TypeUInt16()> [32]) s)))
-			(Lsh32x16 <config.fe.TypeUInt32()>
-				lo
-				(Sub16 <config.fe.TypeUInt16()> s (Const16 <config.fe.TypeUInt16()> [32]))))
-		(Lsh32x16 <config.fe.TypeUInt32()> lo s))
-(Lsh64x8 (Int64Make hi lo) s) ->
-	(Int64Make
-		(Or32 <config.fe.TypeUInt32()>
-			(Or32 <config.fe.TypeUInt32()>
-				(Lsh32x8 <config.fe.TypeUInt32()> hi s)
-				(Rsh32Ux8 <config.fe.TypeUInt32()>
-					lo
-					(Sub8 <config.fe.TypeUInt8()> (Const8 <config.fe.TypeUInt8()> [32]) s)))
-			(Lsh32x8 <config.fe.TypeUInt32()>
-				lo
-				(Sub8 <config.fe.TypeUInt8()> s (Const8 <config.fe.TypeUInt8()> [32]))))
-		(Lsh32x8 <config.fe.TypeUInt32()> lo s))
-
-// 64x unsigned right shift
-// result.hi = hi>>s
-// result.lo = lo>>s | hi<<(32-s) | hi>>(s-32) // >> is unsigned, large shifts result 0
-(Rsh64Ux32 (Int64Make hi lo) s) ->
-	(Int64Make
-		(Rsh32Ux32 <config.fe.TypeUInt32()> hi s)
-		(Or32 <config.fe.TypeUInt32()>
-			(Or32 <config.fe.TypeUInt32()>
-				(Rsh32Ux32 <config.fe.TypeUInt32()> lo s)
-				(Lsh32x32 <config.fe.TypeUInt32()>
-					hi
-					(Sub32 <config.fe.TypeUInt32()> (Const32 <config.fe.TypeUInt32()> [32]) s)))
-			(Rsh32Ux32 <config.fe.TypeUInt32()>
-				hi
-				(Sub32 <config.fe.TypeUInt32()> s (Const32 <config.fe.TypeUInt32()> [32])))))
-(Rsh64Ux16 (Int64Make hi lo) s) ->
-	(Int64Make
-		(Rsh32Ux16 <config.fe.TypeUInt32()> hi s)
-		(Or32 <config.fe.TypeUInt32()>
-			(Or32 <config.fe.TypeUInt32()>
-				(Rsh32Ux16 <config.fe.TypeUInt32()> lo s)
-				(Lsh32x16 <config.fe.TypeUInt32()>
-					hi
-					(Sub16 <config.fe.TypeUInt16()> (Const16 <config.fe.TypeUInt16()> [32]) s)))
-			(Rsh32Ux16 <config.fe.TypeUInt32()>
-				hi
-				(Sub16 <config.fe.TypeUInt16()> s (Const16 <config.fe.TypeUInt16()> [32])))))
-(Rsh64Ux8 (Int64Make hi lo) s) ->
-	(Int64Make
-		(Rsh32Ux8 <config.fe.TypeUInt32()> hi s)
-		(Or32 <config.fe.TypeUInt32()>
-			(Or32 <config.fe.TypeUInt32()>
-				(Rsh32Ux8 <config.fe.TypeUInt32()> lo s)
-				(Lsh32x8 <config.fe.TypeUInt32()>
-					hi
-					(Sub8 <config.fe.TypeUInt8()> (Const8 <config.fe.TypeUInt8()> [32]) s)))
-			(Rsh32Ux8 <config.fe.TypeUInt32()>
-				hi
-				(Sub8 <config.fe.TypeUInt8()> s (Const8 <config.fe.TypeUInt8()> [32])))))
-
-// 64x signed right shift
-// result.hi = hi>>s
-// result.lo = lo>>s | hi<<(32-s) | (hi>>(s-32))&zeromask(s>>5) // hi>>(s-32) is signed, large shifts result 0/-1
-(Rsh64x32 (Int64Make hi lo) s) ->
-	(Int64Make
-		(Rsh32x32 <config.fe.TypeUInt32()> hi s)
-		(Or32 <config.fe.TypeUInt32()>
-			(Or32 <config.fe.TypeUInt32()>
-				(Rsh32Ux32 <config.fe.TypeUInt32()> lo s)
-				(Lsh32x32 <config.fe.TypeUInt32()>
-					hi
-					(Sub32 <config.fe.TypeUInt32()> (Const32 <config.fe.TypeUInt32()> [32]) s)))
-			(And32 <config.fe.TypeUInt32()>
-				(Rsh32x32 <config.fe.TypeUInt32()>
-					hi
-					(Sub32 <config.fe.TypeUInt32()> s (Const32 <config.fe.TypeUInt32()> [32])))
-				(Zeromask
-					(Rsh32Ux32 <config.fe.TypeUInt32()> s (Const32 <config.fe.TypeUInt32()> [5]))))))
-(Rsh64x16 (Int64Make hi lo) s) ->
-	(Int64Make
-		(Rsh32x16 <config.fe.TypeUInt32()> hi s)
-		(Or32 <config.fe.TypeUInt32()>
-			(Or32 <config.fe.TypeUInt32()>
-				(Rsh32Ux16 <config.fe.TypeUInt32()> lo s)
-				(Lsh32x16 <config.fe.TypeUInt32()>
-					hi
-					(Sub16 <config.fe.TypeUInt16()> (Const16 <config.fe.TypeUInt16()> [32]) s)))
-			(And32 <config.fe.TypeUInt32()>
-				(Rsh32x16 <config.fe.TypeUInt32()>
-					hi
-					(Sub16 <config.fe.TypeUInt16()> s (Const16 <config.fe.TypeUInt16()> [32])))
-				(Zeromask
-					(ZeroExt16to32
-						(Rsh16Ux32 <config.fe.TypeUInt16()> s (Const32 <config.fe.TypeUInt32()> [5])))))))
-(Rsh64x8 (Int64Make hi lo) s) ->
-	(Int64Make
-		(Rsh32x8 <config.fe.TypeUInt32()> hi s)
-		(Or32 <config.fe.TypeUInt32()>
-			(Or32 <config.fe.TypeUInt32()>
-				(Rsh32Ux8 <config.fe.TypeUInt32()> lo s)
-				(Lsh32x8 <config.fe.TypeUInt32()>
-					hi
-					(Sub8 <config.fe.TypeUInt8()> (Const8 <config.fe.TypeUInt8()> [32]) s)))
-			(And32 <config.fe.TypeUInt32()>
-				(Rsh32x8 <config.fe.TypeUInt32()>
-					hi
-					(Sub8 <config.fe.TypeUInt8()> s (Const8 <config.fe.TypeUInt8()> [32])))
-				(Zeromask
-					(ZeroExt8to32
-						(Rsh8Ux32 <config.fe.TypeUInt8()> s (Const32 <config.fe.TypeUInt32()> [5])))))))
-
-// 64xConst32 shifts
-// we probably do not need them -- lateopt may take care of them just fine
-//(Lsh64x32 _ (Const32 [c])) && uint32(c) >= 64 -> (Const64 [0])
-//(Rsh64x32 x (Const32 [c])) && uint32(c) >= 64 -> (Int64Make (Signmask (Int64Hi x)) (Signmask (Int64Hi x)))
-//(Rsh64Ux32 _ (Const32 [c])) && uint32(c) >= 64 -> (Const64 [0])
-//
-//(Lsh64x32 x (Const32 [c])) && c < 64 && c > 32 ->
-//	(Int64Make
-//		(Lsh32x32 <config.fe.TypeUInt32()> (Int64Lo x) (Const32 <config.fe.TypeUInt32()> [c-32]))
-//		(Const32 <config.fe.TypeUInt32()> [0]))
-//(Rsh64x32 x (Const32 [c])) && c < 64 && c > 32 ->
-//	(Int64Make
-//		(Signmask (Int64Hi x))
-//		(Rsh32x32 <config.fe.TypeInt32()> (Int64Hi x) (Const32 <config.fe.TypeUInt32()> [c-32])))
-//(Rsh64Ux32 x (Const32 [c])) && c < 64 && c > 32 ->
-//	(Int64Make
-//		(Const32 <config.fe.TypeUInt32()> [0])
-//		(Rsh32Ux32 <config.fe.TypeUInt32()> (Int64Hi x) (Const32 <config.fe.TypeUInt32()> [c-32])))
-//
-//(Lsh64x32 x (Const32 [32])) -> (Int64Make (Int64Lo x) (Const32 <config.fe.TypeUInt32()> [0]))
-//(Rsh64x32 x (Const32 [32])) -> (Int64Make (Signmask (Int64Hi x)) (Int64Hi x))
-//(Rsh64Ux32 x (Const32 [32])) -> (Int64Make (Const32 <config.fe.TypeUInt32()> [0]) (Int64Hi x))
-//
-//(Lsh64x32 x (Const32 [c])) && c < 32 && c > 0 ->
-//	(Int64Make
-//		(Or32 <config.fe.TypeUInt32()>
-//			(Lsh32x32 <config.fe.TypeUInt32()> (Int64Hi x) (Const32 <config.fe.TypeUInt32()> [c]))
-//			(Rsh32Ux32 <config.fe.TypeUInt32()> (Int64Lo x) (Const32 <config.fe.TypeUInt32()> [32-c])))
-//		(Lsh32x32 <config.fe.TypeUInt32()> (Int64Lo x) (Const32 <config.fe.TypeUInt32()> [c])))
-//(Rsh64x32 x (Const32 [c])) && c < 32 && c > 0 ->
-//	(Int64Make
-//		(Rsh32x32 <config.fe.TypeInt32()> (Int64Hi x) (Const32 <config.fe.TypeUInt32()> [c]))
-//		(Or32 <config.fe.TypeUInt32()>
-//			(Rsh32Ux32 <config.fe.TypeUInt32()> (Int64Lo x) (Const32 <config.fe.TypeUInt32()> [c]))
-//			(Lsh32x32 <config.fe.TypeUInt32()> (Int64Hi x) (Const32 <config.fe.TypeUInt32()> [32-c]))))
-//(Rsh64Ux32 x (Const32 [c])) && c < 32 && c > 0 ->
-//	(Int64Make
-//		(Rsh32Ux32 <config.fe.TypeUInt32()> (Int64Hi x) (Const32 <config.fe.TypeUInt32()> [c]))
-//		(Or32 <config.fe.TypeUInt32()>
-//			(Rsh32Ux32 <config.fe.TypeUInt32()> (Int64Lo x) (Const32 <config.fe.TypeUInt32()> [c]))
-//			(Lsh32x32 <config.fe.TypeUInt32()> (Int64Hi x) (Const32 <config.fe.TypeUInt32()> [32-c]))))
-//
-//(Lsh64x32 x (Const32 [0])) -> x
-//(Rsh64x32 x (Const32 [0])) -> x
-//(Rsh64Ux32 x (Const32 [0])) -> x
-
-(Lrot64 (Int64Make hi lo) [c]) && c <= 32 ->
-	(Int64Make
-		(Or32 <config.fe.TypeUInt32()>
-			(Lsh32x32 <config.fe.TypeUInt32()> hi (Const32 <config.fe.TypeUInt32()> [c]))
-			(Rsh32Ux32 <config.fe.TypeUInt32()> lo (Const32 <config.fe.TypeUInt32()> [32-c])))
-		(Or32 <config.fe.TypeUInt32()>
-			(Lsh32x32 <config.fe.TypeUInt32()> lo (Const32 <config.fe.TypeUInt32()> [c]))
-			(Rsh32Ux32 <config.fe.TypeUInt32()> hi (Const32 <config.fe.TypeUInt32()> [32-c]))))
-(Lrot64 (Int64Make hi lo) [c]) && c > 32 -> (Lrot64 (Int64Make lo hi) [c-32])
-
-(Const64 <t> [c]) && t.IsSigned() ->
-	(Int64Make (Const32 <config.fe.TypeInt32()> [c>>32]) (Const32 <config.fe.TypeUInt32()> [int64(int32(c))]))
-(Const64 <t> [c]) && !t.IsSigned() ->
-	(Int64Make (Const32 <config.fe.TypeUInt32()> [c>>32]) (Const32 <config.fe.TypeUInt32()> [int64(int32(c))]))
-
-(Eq64 x y) ->
-	(AndB
-		(Eq32 (Int64Hi x) (Int64Hi y))
-		(Eq32 (Int64Lo x) (Int64Lo y)))
-
-(Neq64 x y) ->
-	(OrB
-		(Neq32 (Int64Hi x) (Int64Hi y))
-		(Neq32 (Int64Lo x) (Int64Lo y)))
-
-(Less64U x y) ->
-	(OrB
-		(Less32U (Int64Hi x) (Int64Hi y))
-		(AndB
-			(Eq32 (Int64Hi x) (Int64Hi y))
-			(Less32U (Int64Lo x) (Int64Lo y))))
-
-(Leq64U x y) ->
-	(OrB
-		(Less32U (Int64Hi x) (Int64Hi y))
-		(AndB
-			(Eq32 (Int64Hi x) (Int64Hi y))
-			(Leq32U (Int64Lo x) (Int64Lo y))))
-
-(Greater64U x y) ->
-	(OrB
-		(Greater32U (Int64Hi x) (Int64Hi y))
-		(AndB
-			(Eq32 (Int64Hi x) (Int64Hi y))
-			(Greater32U (Int64Lo x) (Int64Lo y))))
-
-(Geq64U x y) ->
-	(OrB
-		(Greater32U (Int64Hi x) (Int64Hi y))
-		(AndB
-			(Eq32 (Int64Hi x) (Int64Hi y))
-			(Geq32U (Int64Lo x) (Int64Lo y))))
-
-(Less64 x y) ->
-	(OrB
-		(Less32 (Int64Hi x) (Int64Hi y))
-		(AndB
-			(Eq32 (Int64Hi x) (Int64Hi y))
-			(Less32U (Int64Lo x) (Int64Lo y))))
-
-(Leq64 x y) ->
-	(OrB
-		(Less32 (Int64Hi x) (Int64Hi y))
-		(AndB
-			(Eq32 (Int64Hi x) (Int64Hi y))
-			(Leq32U (Int64Lo x) (Int64Lo y))))
-
-(Greater64 x y) ->
-	(OrB
-		(Greater32 (Int64Hi x) (Int64Hi y))
-		(AndB
-			(Eq32 (Int64Hi x) (Int64Hi y))
-			(Greater32U (Int64Lo x) (Int64Lo y))))
-
-(Geq64 x y) ->
-	(OrB
-		(Greater32 (Int64Hi x) (Int64Hi y))
-		(AndB
-			(Eq32 (Int64Hi x) (Int64Hi y))
-			(Geq32U (Int64Lo x) (Int64Lo y))))
--- a/src/cmd/compile/internal/ssa/gen/dec64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/dec64Ops.go
@@ -1,20 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-var dec64Ops = []opData{}
-
-var dec64Blocks = []blockData{}
-
-func init() {
-	archs = append(archs, arch{
-		name:    "dec64",
-		ops:     dec64Ops,
-		blocks:  dec64Blocks,
-		generic: true,
-	})
-}
--- a/src/cmd/compile/internal/ssa/gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/gen/generic.rules
@@ -67,12 +67,6 @@
        (Const32F [f2i(float64(i2f32(c) * i2f32(d)))])
 (Mul64F (Const64F [c]) (Const64F [d])) -> (Const64F [f2i(i2f(c) * i2f(d))])

-// Convert x * -1 to -x. The front-end catches some but not all of these.
-(Mul8  (Const8  [-1]) x) -> (Neg8  x)
-(Mul16 (Const16 [-1]) x) -> (Neg16 x)
-(Mul32 (Const32 [-1]) x) -> (Neg32 x)
-(Mul64 (Const64 [-1]) x) -> (Neg64 x)
-
 (Mod8  (Const8  [c]) (Const8  [d])) && d != 0 -> (Const8  [int64(int8(c % d))])
 (Mod16 (Const16 [c]) (Const16 [d])) && d != 0 -> (Const16 [int64(int16(c % d))])
 (Mod32 (Const32 [c]) (Const32 [d])) && d != 0 -> (Const32 [int64(int32(c % d))])
@@ -631,10 +625,8 @@
        (Store [t.FieldType(0).Size()] dst f0 mem))))

 // un-SSAable values use mem->mem copies
-(Store [size] dst (Load <t> src mem) mem) && !config.fe.CanSSA(t) ->
-	(Move [MakeSizeAndAlign(size, t.Alignment()).Int64()] dst src mem)
-(Store [size] dst (Load <t> src mem) (VarDef {x} mem)) && !config.fe.CanSSA(t) ->
-	(Move [MakeSizeAndAlign(size, t.Alignment()).Int64()] dst src (VarDef {x} mem))
+(Store [size] dst (Load <t> src mem) mem) && !config.fe.CanSSA(t) -> (Move [size] dst src mem)
+(Store [size] dst (Load <t> src mem) (VarDef {x} mem)) && !config.fe.CanSSA(t) -> (Move [size] dst src (VarDef {x} mem))

 // string ops
 // Decomposing StringMake and lowering of StringPtr and StringLen
@@ -840,23 +832,3 @@
  -> (Sub64 x (Mul64 <t> (Div64  <t> x (Const64 <t> [c])) (Const64 <t> [c])))
 (Mod64u <t> x (Const64 [c])) && x.Op != OpConst64 && umagic64ok(c)
  -> (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c])))
-
-// floating point optimizations
-(Add32F x (Const32F [0])) -> x
-(Add32F (Const32F [0]) x) -> x
-(Add64F x (Const64F [0])) -> x
-(Add64F (Const64F [0]) x) -> x
-(Sub32F x (Const32F [0])) -> x
-(Sub64F x (Const64F [0])) -> x
-(Mul32F x (Const32F [f2i(1)])) -> x
-(Mul32F (Const32F [f2i(1)]) x) -> x
-(Mul64F x (Const64F [f2i(1)])) -> x
-(Mul64F (Const64F [f2i(1)]) x) -> x
-(Mul32F x (Const32F [f2i(-1)])) -> (Neg32F x)
-(Mul32F (Const32F [f2i(-1)]) x) -> (Neg32F x)
-(Mul64F x (Const64F [f2i(-1)])) -> (Neg64F x)
-(Mul64F (Const64F [f2i(-1)]) x) -> (Neg64F x)
-(Div32F x (Const32F [f2i(1)])) -> x
-(Div64F x (Const64F [f2i(1)])) -> x
-(Div32F x (Const32F [f2i(-1)])) -> (Neg32F x)
-(Div64F x (Const64F [f2i(-1)])) -> (Neg32F x)
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -173,76 +173,76 @@ var genericOps = []opData{
 	{name: "Lrot64", argLength: 1, aux: "Int64"},

 	// 2-input comparisons
-	{name: "Eq8", argLength: 2, commutative: true, typ: "Bool"}, // arg0 == arg1
-	{name: "Eq16", argLength: 2, commutative: true, typ: "Bool"},
-	{name: "Eq32", argLength: 2, commutative: true, typ: "Bool"},
-	{name: "Eq64", argLength: 2, commutative: true, typ: "Bool"},
-	{name: "EqPtr", argLength: 2, commutative: true, typ: "Bool"},
-	{name: "EqInter", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend
-	{name: "EqSlice", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend
-	{name: "Eq32F", argLength: 2, typ: "Bool"},
-	{name: "Eq64F", argLength: 2, typ: "Bool"},
+	{name: "Eq8", argLength: 2, commutative: true}, // arg0 == arg1
+	{name: "Eq16", argLength: 2, commutative: true},
+	{name: "Eq32", argLength: 2, commutative: true},
+	{name: "Eq64", argLength: 2, commutative: true},
+	{name: "EqPtr", argLength: 2, commutative: true},
+	{name: "EqInter", argLength: 2}, // arg0 or arg1 is nil; other cases handled by frontend
+	{name: "EqSlice", argLength: 2}, // arg0 or arg1 is nil; other cases handled by frontend
+	{name: "Eq32F", argLength: 2},
+	{name: "Eq64F", argLength: 2},

-	{name: "Neq8", argLength: 2, commutative: true, typ: "Bool"}, // arg0 != arg1
-	{name: "Neq16", argLength: 2, commutative: true, typ: "Bool"},
-	{name: "Neq32", argLength: 2, commutative: true, typ: "Bool"},
-	{name: "Neq64", argLength: 2, commutative: true, typ: "Bool"},
-	{name: "NeqPtr", argLength: 2, commutative: true, typ: "Bool"},
-	{name: "NeqInter", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend
-	{name: "NeqSlice", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend
-	{name: "Neq32F", argLength: 2, typ: "Bool"},
+	{name: "Neq8", argLength: 2, commutative: true}, // arg0 != arg1
+	{name: "Neq16", argLength: 2, commutative: true},
+	{name: "Neq32", argLength: 2, commutative: true},
+	{name: "Neq64", argLength: 2, commutative: true},
+	{name: "NeqPtr", argLength: 2, commutative: true},
+	{name: "NeqInter", argLength: 2}, // arg0 or arg1 is nil; other cases handled by frontend
+	{name: "NeqSlice", argLength: 2}, // arg0 or arg1 is nil; other cases handled by frontend
+	{name: "Neq32F", argLength: 2},
 	{name: "Neq64F", argLength: 2},

-	{name: "Less8", argLength: 2, typ: "Bool"},  // arg0 < arg1, signed
-	{name: "Less8U", argLength: 2, typ: "Bool"}, // arg0 < arg1, unsigned
-	{name: "Less16", argLength: 2, typ: "Bool"},
-	{name: "Less16U", argLength: 2, typ: "Bool"},
-	{name: "Less32", argLength: 2, typ: "Bool"},
-	{name: "Less32U", argLength: 2, typ: "Bool"},
-	{name: "Less64", argLength: 2, typ: "Bool"},
-	{name: "Less64U", argLength: 2, typ: "Bool"},
-	{name: "Less32F", argLength: 2, typ: "Bool"},
-	{name: "Less64F", argLength: 2, typ: "Bool"},
+	{name: "Less8", argLength: 2},  // arg0 < arg1, signed
+	{name: "Less8U", argLength: 2}, // arg0 < arg1, unsigned
+	{name: "Less16", argLength: 2},
+	{name: "Less16U", argLength: 2},
+	{name: "Less32", argLength: 2},
+	{name: "Less32U", argLength: 2},
+	{name: "Less64", argLength: 2},
+	{name: "Less64U", argLength: 2},
+	{name: "Less32F", argLength: 2},
+	{name: "Less64F", argLength: 2},

-	{name: "Leq8", argLength: 2, typ: "Bool"},  // arg0 <= arg1, signed
-	{name: "Leq8U", argLength: 2, typ: "Bool"}, // arg0 <= arg1, unsigned
-	{name: "Leq16", argLength: 2, typ: "Bool"},
-	{name: "Leq16U", argLength: 2, typ: "Bool"},
-	{name: "Leq32", argLength: 2, typ: "Bool"},
-	{name: "Leq32U", argLength: 2, typ: "Bool"},
-	{name: "Leq64", argLength: 2, typ: "Bool"},
-	{name: "Leq64U", argLength: 2, typ: "Bool"},
-	{name: "Leq32F", argLength: 2, typ: "Bool"},
-	{name: "Leq64F", argLength: 2, typ: "Bool"},
+	{name: "Leq8", argLength: 2},  // arg0 <= arg1, signed
+	{name: "Leq8U", argLength: 2}, // arg0 <= arg1, unsigned
+	{name: "Leq16", argLength: 2},
+	{name: "Leq16U", argLength: 2},
+	{name: "Leq32", argLength: 2},
+	{name: "Leq32U", argLength: 2},
+	{name: "Leq64", argLength: 2},
+	{name: "Leq64U", argLength: 2},
+	{name: "Leq32F", argLength: 2},
+	{name: "Leq64F", argLength: 2},

-	{name: "Greater8", argLength: 2, typ: "Bool"},  // arg0 > arg1, signed
-	{name: "Greater8U", argLength: 2, typ: "Bool"}, // arg0 > arg1, unsigned
-	{name: "Greater16", argLength: 2, typ: "Bool"},
-	{name: "Greater16U", argLength: 2, typ: "Bool"},
-	{name: "Greater32", argLength: 2, typ: "Bool"},
-	{name: "Greater32U", argLength: 2, typ: "Bool"},
-	{name: "Greater64", argLength: 2, typ: "Bool"},
-	{name: "Greater64U", argLength: 2, typ: "Bool"},
-	{name: "Greater32F", argLength: 2, typ: "Bool"},
-	{name: "Greater64F", argLength: 2, typ: "Bool"},
+	{name: "Greater8", argLength: 2},  // arg0 > arg1, signed
+	{name: "Greater8U", argLength: 2}, // arg0 > arg1, unsigned
+	{name: "Greater16", argLength: 2},
+	{name: "Greater16U", argLength: 2},
+	{name: "Greater32", argLength: 2},
+	{name: "Greater32U", argLength: 2},
+	{name: "Greater64", argLength: 2},
+	{name: "Greater64U", argLength: 2},
+	{name: "Greater32F", argLength: 2},
+	{name: "Greater64F", argLength: 2},

-	{name: "Geq8", argLength: 2, typ: "Bool"},  // arg0 <= arg1, signed
-	{name: "Geq8U", argLength: 2, typ: "Bool"}, // arg0 <= arg1, unsigned
-	{name: "Geq16", argLength: 2, typ: "Bool"},
-	{name: "Geq16U", argLength: 2, typ: "Bool"},
-	{name: "Geq32", argLength: 2, typ: "Bool"},
-	{name: "Geq32U", argLength: 2, typ: "Bool"},
-	{name: "Geq64", argLength: 2, typ: "Bool"},
-	{name: "Geq64U", argLength: 2, typ: "Bool"},
-	{name: "Geq32F", argLength: 2, typ: "Bool"},
-	{name: "Geq64F", argLength: 2, typ: "Bool"},
+	{name: "Geq8", argLength: 2},  // arg0 <= arg1, signed
+	{name: "Geq8U", argLength: 2}, // arg0 <= arg1, unsigned
+	{name: "Geq16", argLength: 2},
+	{name: "Geq16U", argLength: 2},
+	{name: "Geq32", argLength: 2},
+	{name: "Geq32U", argLength: 2},
+	{name: "Geq64", argLength: 2},
+	{name: "Geq64U", argLength: 2},
+	{name: "Geq32F", argLength: 2},
+	{name: "Geq64F", argLength: 2},

 	// boolean ops
-	{name: "AndB", argLength: 2, typ: "Bool"}, // arg0 && arg1 (not shortcircuited)
-	{name: "OrB", argLength: 2, typ: "Bool"},  // arg0 || arg1 (not shortcircuited)
-	{name: "EqB", argLength: 2, typ: "Bool"},  // arg0 == arg1
-	{name: "NeqB", argLength: 2, typ: "Bool"}, // arg0 != arg1
-	{name: "Not", argLength: 1, typ: "Bool"},  // !arg0, boolean
+	{name: "AndB", argLength: 2}, // arg0 && arg1 (not shortcircuited)
+	{name: "OrB", argLength: 2},  // arg0 || arg1 (not shortcircuited)
+	{name: "EqB", argLength: 2},  // arg0 == arg1
+	{name: "NeqB", argLength: 2}, // arg0 != arg1
+	{name: "Not", argLength: 1},  // !arg0, boolean

 	// 1-input ops
 	{name: "Neg8", argLength: 1}, // -arg0
@@ -312,8 +312,8 @@ var genericOps = []opData{
 	// Memory operations
 	{name: "Load", argLength: 2},                            // Load from arg0.  arg1=memory
 	{name: "Store", argLength: 3, typ: "Mem", aux: "Int64"}, // Store arg1 to arg0.  arg2=memory, auxint=size.  Returns memory.
-	{name: "Move", argLength: 3, typ: "Mem", aux: "Int64"},  // arg0=destptr, arg1=srcptr, arg2=mem, auxint=size.  Returns memory.
-	{name: "Zero", argLength: 2, typ: "Mem", aux: "Int64"},  // arg0=destptr, arg1=mem, auxint=size. Returns memory.
+	{name: "Move", argLength: 3, aux: "Int64"},              // arg0=destptr, arg1=srcptr, arg2=mem, auxint=size.  Returns memory.
+	{name: "Zero", argLength: 2, aux: "Int64"},              // arg0=destptr, arg1=mem, auxint=size. Returns memory.

 	// Function calls. Arguments to the call have already been written to the stack.
 	// Return values appear on the stack. The method receiver, if any, is treated
@@ -326,17 +326,17 @@ var genericOps = []opData{

 	// Conversions: signed extensions, zero (unsigned) extensions, truncations
 	{name: "SignExt8to16", argLength: 1, typ: "Int16"},
-	{name: "SignExt8to32", argLength: 1, typ: "Int32"},
-	{name: "SignExt8to64", argLength: 1, typ: "Int64"},
-	{name: "SignExt16to32", argLength: 1, typ: "Int32"},
-	{name: "SignExt16to64", argLength: 1, typ: "Int64"},
-	{name: "SignExt32to64", argLength: 1, typ: "Int64"},
+	{name: "SignExt8to32", argLength: 1},
+	{name: "SignExt8to64", argLength: 1},
+	{name: "SignExt16to32", argLength: 1},
+	{name: "SignExt16to64", argLength: 1},
+	{name: "SignExt32to64", argLength: 1},
 	{name: "ZeroExt8to16", argLength: 1, typ: "UInt16"},
-	{name: "ZeroExt8to32", argLength: 1, typ: "UInt32"},
-	{name: "ZeroExt8to64", argLength: 1, typ: "UInt64"},
-	{name: "ZeroExt16to32", argLength: 1, typ: "UInt32"},
-	{name: "ZeroExt16to64", argLength: 1, typ: "UInt64"},
-	{name: "ZeroExt32to64", argLength: 1, typ: "UInt64"},
+	{name: "ZeroExt8to32", argLength: 1},
+	{name: "ZeroExt8to64", argLength: 1},
+	{name: "ZeroExt16to32", argLength: 1},
+	{name: "ZeroExt16to64", argLength: 1},
+	{name: "ZeroExt32to64", argLength: 1},
 	{name: "Trunc16to8", argLength: 1},
 	{name: "Trunc32to8", argLength: 1},
 	{name: "Trunc32to16", argLength: 1},
@@ -416,31 +416,6 @@ var genericOps = []opData{
 	{name: "VarKill", argLength: 1, aux: "Sym"},            // aux is a *gc.Node of a variable that is known to be dead.  arg0=mem, returns mem
 	{name: "VarLive", argLength: 1, aux: "Sym"},            // aux is a *gc.Node of a variable that must be kept live.  arg0=mem, returns mem
 	{name: "KeepAlive", argLength: 2, typ: "Mem"},          // arg[0] is a value that must be kept alive until this mark.  arg[1]=mem, returns mem
-
-	// Ops for breaking 64-bit operations on 32-bit architectures
-	{name: "Int64Make", argLength: 2, typ: "UInt64"}, // arg0=hi, arg1=lo
-	{name: "Int64Hi", argLength: 1, typ: "UInt32"},   // high 32-bit of arg0
-	{name: "Int64Lo", argLength: 1, typ: "UInt32"},   // low 32-bit of arg0
-
-	{name: "Add32carry", argLength: 2, commutative: true, typ: "(Flags,UInt32)"}, // arg0 + arg1, returns (carry, value)
-	{name: "Add32withcarry", argLength: 3, commutative: true},                    // arg0 + arg1 + arg2, arg2=carry (0 or 1)
-
-	{name: "Sub32carry", argLength: 2, typ: "(Flags,UInt32)"}, // arg0 - arg1, returns (carry, value)
-	{name: "Sub32withcarry", argLength: 3},                    // arg0 - arg1 - arg2, arg2=carry (0 or 1)
-
-	{name: "Mul32uhilo", argLength: 2, typ: "(UInt32,UInt32)"}, // arg0 * arg1, returns (hi, lo)
-
-	{name: "Signmask", argLength: 1, typ: "Int32"},  // 0 if arg0 >= 0, -1 if arg0 < 0
-	{name: "Zeromask", argLength: 1, typ: "UInt32"}, // 0 if arg0 == 0, 0xffffffff if arg0 != 0
-
-	{name: "Cvt32Uto32F", argLength: 1}, // uint32 -> float32, only used on 32-bit arch
-	{name: "Cvt32Uto64F", argLength: 1}, // uint32 -> float64, only used on 32-bit arch
-	{name: "Cvt32Fto32U", argLength: 1}, // float32 -> uint32, only used on 32-bit arch
-	{name: "Cvt64Fto32U", argLength: 1}, // float64 -> uint32, only used on 32-bit arch
-
-	// pseudo-ops for breaking Tuple
-	{name: "Select0", argLength: 1}, // the first component of a tuple
-	{name: "Select1", argLength: 1}, // the second component of a tuple
 }

 //     kind           control    successors       implicit exit
--- a/src/cmd/compile/internal/ssa/gen/main.go
+++ b/src/cmd/compile/internal/ssa/gen/main.go
@@ -21,16 +21,13 @@ import (
 )

 type arch struct {
-	name            string
-	pkg             string // obj package to import for this arch.
-	genfile         string // source file containing opcode code generation.
-	ops             []opData
-	blocks          []blockData
-	regnames        []string
-	gpregmask       regMask
-	fpregmask       regMask
-	framepointerreg int8
-	generic         bool
+	name     string
+	pkg      string // obj package to import for this arch.
+	genfile  string // source file containing opcode code generation.
+	ops      []opData
+	blocks   []blockData
+	regnames []string
+	generic  bool
 }

 type opData struct {
@@ -41,9 +38,8 @@ type opData struct {
 	aux               string
 	rematerializeable bool
 	argLength         int32 // number of arguments, if -1, then this operation has a variable number of arguments
-	commutative       bool  // this operation is commutative on its first 2 arguments (e.g. addition)
-	resultInArg0      bool  // last output of v and v.Args[0] must be allocated to the same register
-	clobberFlags      bool  // this op clobbers flags register
+	commutative       bool  // this operation is commutative (e.g. addition)
+	resultInArg0      bool  // v and v.Args[0] must be allocated to the same register
 }

 type blockData struct {
@@ -77,7 +73,6 @@ var archs []arch

 func main() {
 	flag.Parse()
-	sort.Sort(ArchsByName(archs))
 	genOp()
 	genLower()
 }
@@ -160,16 +155,13 @@ func genOp() {
 			}
 			if v.resultInArg0 {
 				fmt.Fprintln(w, "resultInArg0: true,")
-				if v.reg.inputs[0] != v.reg.outputs[len(v.reg.outputs)-1] {
-					log.Fatalf("input[0] and last output register must be equal for %s", v.name)
+				if v.reg.inputs[0] != v.reg.outputs[0] {
+					log.Fatalf("input[0] and output registers must be equal for %s", v.name)
 				}
-				if v.commutative && v.reg.inputs[1] != v.reg.outputs[len(v.reg.outputs)-1] {
-					log.Fatalf("input[1] and last output register must be equal for %s", v.name)
+				if v.commutative && v.reg.inputs[1] != v.reg.outputs[0] {
+					log.Fatalf("input[1] and output registers must be equal for %s", v.name)
 				}
 			}
-			if v.clobberFlags {
-				fmt.Fprintln(w, "clobberFlags: true,")
-			}
 			if a.name == "generic" {
 				fmt.Fprintln(w, "generic:true,")
 				fmt.Fprintln(w, "},") // close op
@@ -199,22 +191,14 @@ func genOp() {
 				}
 				fmt.Fprintln(w, "},")
 			}
-
 			if v.reg.clobbers > 0 {
 				fmt.Fprintf(w, "clobbers: %d,%s\n", v.reg.clobbers, a.regMaskComment(v.reg.clobbers))
 			}
-
 			// reg outputs
-			s = s[:0]
-			for i, r := range v.reg.outputs {
-				s = append(s, intPair{countRegs(r), i})
-			}
-			if len(s) > 0 {
-				sort.Sort(byKey(s))
-				fmt.Fprintln(w, "outputs: []outputInfo{")
-				for _, p := range s {
-					r := v.reg.outputs[p.val]
-					fmt.Fprintf(w, "{%d,%d},%s\n", p.val, r, a.regMaskComment(r))
+			if len(v.reg.outputs) > 0 {
+				fmt.Fprintln(w, "outputs: []regMask{")
+				for _, r := range v.reg.outputs {
+					fmt.Fprintf(w, "%d,%s\n", r, a.regMaskComment(r))
 				}
 				fmt.Fprintln(w, "},")
 			}
@@ -239,9 +223,6 @@ func genOp() {
 			fmt.Fprintf(w, "  {%d, \"%s\"},\n", i, r)
 		}
 		fmt.Fprintln(w, "}")
-		fmt.Fprintf(w, "var gpRegMask%s = regMask(%d)\n", a.name, a.gpregmask)
-		fmt.Fprintf(w, "var fpRegMask%s = regMask(%d)\n", a.name, a.fpregmask)
-		fmt.Fprintf(w, "var framepointerReg%s = int8(%d)\n", a.name, a.framepointerreg)
 	}

 	// gofmt result
@@ -317,9 +298,3 @@ type byKey []intPair
 func (a byKey) Len() int           { return len(a) }
 func (a byKey) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
 func (a byKey) Less(i, j int) bool { return a[i].key < a[j].key }
-
-type ArchsByName []arch
-
-func (x ArchsByName) Len() int           { return len(x) }
-func (x ArchsByName) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
-func (x ArchsByName) Less(i, j int) bool { return x[i].name < x[j].name }
--- a/src/cmd/compile/internal/ssa/gen/rulegen.go
+++ b/src/cmd/compile/internal/ssa/gen/rulegen.go
@@ -117,17 +117,15 @@ func genRules(arch arch) {
 		if unbalanced(rule) {
 			continue
 		}
-
+		op := strings.Split(rule, " ")[0][1:]
+		if op[len(op)-1] == ')' {
+			op = op[:len(op)-1] // rule has only opcode, e.g. (ConstNil) -> ...
+		}
 		loc := fmt.Sprintf("%s.rules:%d", arch.name, ruleLineno)
-		r := Rule{rule: rule, loc: loc}
-		if rawop := strings.Split(rule, " ")[0][1:]; isBlock(rawop, arch) {
-			blockrules[rawop] = append(blockrules[rawop], r)
+		if isBlock(op, arch) {
+			blockrules[op] = append(blockrules[op], Rule{rule: rule, loc: loc})
 		} else {
-			// Do fancier value op matching.
-			match, _, _ := r.parse()
-			op, oparch, _, _, _, _ := parseValue(match, arch, loc)
-			opname := fmt.Sprintf("Op%s%s", oparch, op.name)
-			oprules[opname] = append(oprules[opname], r)
+			oprules[op] = append(oprules[op], Rule{rule: rule, loc: loc})
 		}
 		rule = ""
 		ruleLineno = 0
@@ -159,8 +157,8 @@ func genRules(arch arch) {
 	fmt.Fprintf(w, "func rewriteValue%s(v *Value, config *Config) bool {\n", arch.name)
 	fmt.Fprintf(w, "switch v.Op {\n")
 	for _, op := range ops {
-		fmt.Fprintf(w, "case %s:\n", op)
-		fmt.Fprintf(w, "return rewriteValue%s_%s(v, config)\n", arch.name, op)
+		fmt.Fprintf(w, "case %s:\n", opName(op, arch))
+		fmt.Fprintf(w, "return rewriteValue%s_%s(v, config)\n", arch.name, opName(op, arch))
 	}
 	fmt.Fprintf(w, "}\n")
 	fmt.Fprintf(w, "return false\n")
@@ -169,7 +167,7 @@ func genRules(arch arch) {
 	// Generate a routine per op. Note that we don't make one giant routine
 	// because it is too big for some compilers.
 	for _, op := range ops {
-		fmt.Fprintf(w, "func rewriteValue%s_%s(v *Value, config *Config) bool {\n", arch.name, op)
+		fmt.Fprintf(w, "func rewriteValue%s_%s(v *Value, config *Config) bool {\n", arch.name, opName(op, arch))
 		fmt.Fprintln(w, "b := v.Block")
 		fmt.Fprintln(w, "_ = b")
 		var canFail bool
@@ -336,108 +334,141 @@ func genMatch0(w io.Writer, arch arch, match, v string, m map[string]struct{}, t
 	}
 	canFail := false

-	op, oparch, typ, auxint, aux, args := parseValue(match, arch, loc)
+	// split body up into regions. Split by spaces/tabs, except those
+	// contained in () or {}.
+	s := split(match[1 : len(match)-1]) // remove parens, then split
+
+	// Find op record
+	var op opData
+	for _, x := range genericOps {
+		if x.name == s[0] {
+			op = x
+			break
+		}
+	}
+	for _, x := range arch.ops {
+		if x.name == s[0] {
+			op = x
+			break
+		}
+	}
+	if op.name == "" {
+		log.Fatalf("%s: unknown op %s", loc, s[0])
+	}

 	// check op
 	if !top {
-		fmt.Fprintf(w, "if %s.Op != Op%s%s {\nbreak\n}\n", v, oparch, op.name)
+		fmt.Fprintf(w, "if %s.Op != %s {\nbreak\n}\n", v, opName(s[0], arch))
 		canFail = true
 	}

-	if typ != "" {
-		if !isVariable(typ) {
-			// code. We must match the results of this code.
-			fmt.Fprintf(w, "if %s.Type != %s {\nbreak\n}\n", v, typ)
-			canFail = true
-		} else {
-			// variable
-			if _, ok := m[typ]; ok {
-				// must match previous variable
-				fmt.Fprintf(w, "if %s.Type != %s {\nbreak\n}\n", v, typ)
+	// check type/aux/args
+	argnum := 0
+	for _, a := range s[1:] {
+		if a[0] == '<' {
+			// type restriction
+			t := a[1 : len(a)-1] // remove <>
+			if !isVariable(t) {
+				// code. We must match the results of this code.
+				fmt.Fprintf(w, "if %s.Type != %s {\nbreak\n}\n", v, t)
 				canFail = true
 			} else {
-				m[typ] = struct{}{}
-				fmt.Fprintf(w, "%s := %s.Type\n", typ, v)
+				// variable
+				if _, ok := m[t]; ok {
+					// must match previous variable
+					fmt.Fprintf(w, "if %s.Type != %s {\nbreak\n}\n", v, t)
+					canFail = true
+				} else {
+					m[t] = struct{}{}
+					fmt.Fprintf(w, "%s := %s.Type\n", t, v)
+				}
 			}
-		}
-	}
-
-	if auxint != "" {
-		if !isVariable(auxint) {
-			// code
-			fmt.Fprintf(w, "if %s.AuxInt != %s {\nbreak\n}\n", v, auxint)
-			canFail = true
-		} else {
-			// variable
-			if _, ok := m[auxint]; ok {
-				fmt.Fprintf(w, "if %s.AuxInt != %s {\nbreak\n}\n", v, auxint)
+		} else if a[0] == '[' {
+			// auxint restriction
+			switch op.aux {
+			case "Bool", "Int8", "Int16", "Int32", "Int64", "Int128", "Float32", "Float64", "SymOff", "SymValAndOff", "SymInt32":
+			default:
+				log.Fatalf("%s: op %s %s can't have auxint", loc, op.name, op.aux)
+			}
+			x := a[1 : len(a)-1] // remove []
+			if !isVariable(x) {
+				// code
+				fmt.Fprintf(w, "if %s.AuxInt != %s {\nbreak\n}\n", v, x)
 				canFail = true
 			} else {
-				m[auxint] = struct{}{}
-				fmt.Fprintf(w, "%s := %s.AuxInt\n", auxint, v)
+				// variable
+				if _, ok := m[x]; ok {
+					fmt.Fprintf(w, "if %s.AuxInt != %s {\nbreak\n}\n", v, x)
+					canFail = true
+				} else {
+					m[x] = struct{}{}
+					fmt.Fprintf(w, "%s := %s.AuxInt\n", x, v)
+				}
 			}
-		}
-	}
-
-	if aux != "" {
-
-		if !isVariable(aux) {
-			// code
-			fmt.Fprintf(w, "if %s.Aux != %s {\nbreak\n}\n", v, aux)
-			canFail = true
-		} else {
-			// variable
-			if _, ok := m[aux]; ok {
-				fmt.Fprintf(w, "if %s.Aux != %s {\nbreak\n}\n", v, aux)
+		} else if a[0] == '{' {
+			// aux restriction
+			switch op.aux {
+			case "String", "Sym", "SymOff", "SymValAndOff", "SymInt32":
+			default:
+				log.Fatalf("%s: op %s %s can't have aux", loc, op.name, op.aux)
+			}
+			x := a[1 : len(a)-1] // remove {}
+			if !isVariable(x) {
+				// code
+				fmt.Fprintf(w, "if %s.Aux != %s {\nbreak\n}\n", v, x)
 				canFail = true
 			} else {
-				m[aux] = struct{}{}
-				fmt.Fprintf(w, "%s := %s.Aux\n", aux, v)
+				// variable
+				if _, ok := m[x]; ok {
+					fmt.Fprintf(w, "if %s.Aux != %s {\nbreak\n}\n", v, x)
+					canFail = true
+				} else {
+					m[x] = struct{}{}
+					fmt.Fprintf(w, "%s := %s.Aux\n", x, v)
+				}
 			}
-		}
-	}
-
-	for i, arg := range args {
-		if arg == "_" {
-			continue
-		}
-		if !strings.Contains(arg, "(") {
+		} else if a == "_" {
+			argnum++
+		} else if !strings.Contains(a, "(") {
 			// leaf variable
-			if _, ok := m[arg]; ok {
+			if _, ok := m[a]; ok {
 				// variable already has a definition. Check whether
 				// the old definition and the new definition match.
 				// For example, (add x x).  Equality is just pointer equality
 				// on Values (so cse is important to do before lowering).
-				fmt.Fprintf(w, "if %s != %s.Args[%d] {\nbreak\n}\n", arg, v, i)
+				fmt.Fprintf(w, "if %s != %s.Args[%d] {\nbreak\n}\n", a, v, argnum)
 				canFail = true
 			} else {
 				// remember that this variable references the given value
-				m[arg] = struct{}{}
-				fmt.Fprintf(w, "%s := %s.Args[%d]\n", arg, v, i)
+				m[a] = struct{}{}
+				fmt.Fprintf(w, "%s := %s.Args[%d]\n", a, v, argnum)
 			}
-			continue
-		}
-		// compound sexpr
-		var argname string
-		colon := strings.Index(arg, ":")
-		openparen := strings.Index(arg, "(")
-		if colon >= 0 && openparen >= 0 && colon < openparen {
-			// rule-specified name
-			argname = arg[:colon]
-			arg = arg[colon+1:]
+			argnum++
 		} else {
-			// autogenerated name
-			argname = fmt.Sprintf("%s_%d", v, i)
-		}
-		fmt.Fprintf(w, "%s := %s.Args[%d]\n", argname, v, i)
-		if genMatch0(w, arch, arg, argname, m, false, loc) {
-			canFail = true
+			// compound sexpr
+			var argname string
+			colon := strings.Index(a, ":")
+			openparen := strings.Index(a, "(")
+			if colon >= 0 && openparen >= 0 && colon < openparen {
+				// rule-specified name
+				argname = a[:colon]
+				a = a[colon+1:]
+			} else {
+				// autogenerated name
+				argname = fmt.Sprintf("%s_%d", v, argnum)
+			}
+			fmt.Fprintf(w, "%s := %s.Args[%d]\n", argname, v, argnum)
+			if genMatch0(w, arch, a, argname, m, false, loc) {
+				canFail = true
+			}
+			argnum++
 		}
 	}
-
 	if op.argLength == -1 {
-		fmt.Fprintf(w, "if len(%s.Args) != %d {\nbreak\n}\n", v, len(args))
+		fmt.Fprintf(w, "if len(%s.Args) != %d {\nbreak\n}\n", v, argnum)
 		canFail = true
+	} else if int(op.argLength) != argnum {
+		log.Fatalf("%s: op %s should have %d args, has %d", loc, op.name, op.argLength, argnum)
 	}
 	return canFail
 }
@@ -469,44 +500,105 @@ func genResult0(w io.Writer, arch arch, result string, alloc *int, top, move boo
 		return result
 	}

-	op, oparch, typ, auxint, aux, args := parseValue(result, arch, loc)
+	s := split(result[1 : len(result)-1]) // remove parens, then split

-	// Find the type of the variable.
-	typeOverride := typ != ""
-	if typ == "" && op.typ != "" {
-		typ = typeName(op.typ)
+	// Find op record
+	var op opData
+	for _, x := range genericOps {
+		if x.name == s[0] {
+			op = x
+			break
+		}
+	}
+	for _, x := range arch.ops {
+		if x.name == s[0] {
+			op = x
+			break
+		}
+	}
+	if op.name == "" {
+		log.Fatalf("%s: unknown op %s", loc, s[0])
 	}

+	// Find the type of the variable.
+	var opType string
+	var typeOverride bool
+	for _, a := range s[1:] {
+		if a[0] == '<' {
+			// type restriction
+			opType = a[1 : len(a)-1] // remove <>
+			typeOverride = true
+			break
+		}
+	}
+	if opType == "" {
+		// find default type, if any
+		for _, op := range arch.ops {
+			if op.name == s[0] && op.typ != "" {
+				opType = typeName(op.typ)
+				break
+			}
+		}
+	}
+	if opType == "" {
+		for _, op := range genericOps {
+			if op.name == s[0] && op.typ != "" {
+				opType = typeName(op.typ)
+				break
+			}
+		}
+	}
 	var v string
 	if top && !move {
 		v = "v"
-		fmt.Fprintf(w, "v.reset(Op%s%s)\n", oparch, op.name)
+		fmt.Fprintf(w, "v.reset(%s)\n", opName(s[0], arch))
 		if typeOverride {
-			fmt.Fprintf(w, "v.Type = %s\n", typ)
+			fmt.Fprintf(w, "v.Type = %s\n", opType)
 		}
 	} else {
-		if typ == "" {
-			log.Fatalf("sub-expression %s (op=Op%s%s) must have a type", result, oparch, op.name)
+		if opType == "" {
+			log.Fatalf("sub-expression %s (op=%s) must have a type", result, s[0])
 		}
 		v = fmt.Sprintf("v%d", *alloc)
 		*alloc++
-		fmt.Fprintf(w, "%s := b.NewValue0(v.Line, Op%s%s, %s)\n", v, oparch, op.name, typ)
+		fmt.Fprintf(w, "%s := b.NewValue0(v.Line, %s, %s)\n", v, opName(s[0], arch), opType)
 		if move && top {
 			// Rewrite original into a copy
 			fmt.Fprintf(w, "v.reset(OpCopy)\n")
 			fmt.Fprintf(w, "v.AddArg(%s)\n", v)
 		}
 	}
-
-	if auxint != "" {
-		fmt.Fprintf(w, "%s.AuxInt = %s\n", v, auxint)
+	argnum := 0
+	for _, a := range s[1:] {
+		if a[0] == '<' {
+			// type restriction, handled above
+		} else if a[0] == '[' {
+			// auxint restriction
+			switch op.aux {
+			case "Bool", "Int8", "Int16", "Int32", "Int64", "Int128", "Float32", "Float64", "SymOff", "SymValAndOff", "SymInt32":
+			default:
+				log.Fatalf("%s: op %s %s can't have auxint", loc, op.name, op.aux)
+			}
+			x := a[1 : len(a)-1] // remove []
+			fmt.Fprintf(w, "%s.AuxInt = %s\n", v, x)
+		} else if a[0] == '{' {
+			// aux restriction
+			switch op.aux {
+			case "String", "Sym", "SymOff", "SymValAndOff", "SymInt32":
+			default:
+				log.Fatalf("%s: op %s %s can't have aux", loc, op.name, op.aux)
+			}
+			x := a[1 : len(a)-1] // remove {}
+			fmt.Fprintf(w, "%s.Aux = %s\n", v, x)
+		} else {
+			// regular argument (sexpr or variable)
+			x := genResult0(w, arch, a, alloc, false, move, loc)
+			fmt.Fprintf(w, "%s.AddArg(%s)\n", v, x)
+			argnum++
+		}
 	}
-	if aux != "" {
-		fmt.Fprintf(w, "%s.Aux = %s\n", v, aux)
-	}
-	for _, arg := range args {
-		x := genResult0(w, arch, arg, alloc, false, move, loc)
-		fmt.Fprintf(w, "%s.AddArg(%s)\n", v, x)
+	if op.argLength != -1 && int(op.argLength) != argnum {
+		log.Fatalf("%s: op %s should have %d args, has %d", loc, op.name, op.argLength, argnum)
 	}

 	return v
@@ -574,102 +666,16 @@ func isBlock(name string, arch arch) bool {
 	return false
 }

-// parseValue parses a parenthesized value from a rule.
-// The value can be from the match or the result side.
-// It returns the op and unparsed strings for typ, auxint, and aux restrictions and for all args.
-// oparch is the architecture that op is located in, or "" for generic.
-func parseValue(val string, arch arch, loc string) (op opData, oparch string, typ string, auxint string, aux string, args []string) {
-	val = val[1 : len(val)-1] // remove ()
-
-	// Split val up into regions.
-	// Split by spaces/tabs, except those contained in (), {}, [], or <>.
-	s := split(val)
-
-	// Extract restrictions and args.
-	for _, a := range s[1:] {
-		switch a[0] {
-		case '<':
-			typ = a[1 : len(a)-1] // remove <>
-		case '[':
-			auxint = a[1 : len(a)-1] // remove []
-		case '{':
-			aux = a[1 : len(a)-1] // remove {}
-		default:
-			args = append(args, a)
+// opName converts from an op name specified in a rule file to an Op enum.
+// if the name matches a generic op, returns "Op" plus the specified name.
+// Otherwise, returns "Op" plus arch name plus op name.
+func opName(name string, arch arch) string {
+	for _, op := range genericOps {
+		if op.name == name {
+			return "Op" + name
 		}
 	}
-
-	// Resolve the op.
-
-	// match reports whether x is a good op to select.
-	// If strict is true, rule generation might succeed.
-	// If strict is false, rule generation has failed,
-	// but we're trying to generate a useful error.
-	// Doing strict=true then strict=false allows
-	// precise op matching while retaining good error messages.
-	match := func(x opData, strict bool, archname string) bool {
-		if x.name != s[0] {
-			return false
-		}
-		if x.argLength != -1 && int(x.argLength) != len(args) {
-			if strict {
-				return false
-			} else {
-				log.Printf("%s: op %s (%s) should have %d args, has %d", loc, s[0], archname, op.argLength, len(args))
-			}
-		}
-		return true
-	}
-
-	for _, x := range genericOps {
-		if match(x, true, "generic") {
-			op = x
-			break
-		}
-	}
-	if arch.name != "generic" {
-		for _, x := range arch.ops {
-			if match(x, true, arch.name) {
-				if op.name != "" {
-					log.Fatalf("%s: matches for op %s found in both generic and %s", loc, op.name, arch.name)
-				}
-				op = x
-				oparch = arch.name
-				break
-			}
-		}
-	}
-
-	if op.name == "" {
-		// Failed to find the op.
-		// Run through everything again with strict=false
-		// to generate useful diagnosic messages before failing.
-		for _, x := range genericOps {
-			match(x, false, "generic")
-		}
-		for _, x := range arch.ops {
-			match(x, false, arch.name)
-		}
-		log.Fatalf("%s: unknown op %s", loc, s)
-	}
-
-	// Sanity check aux, auxint.
-	if auxint != "" {
-		switch op.aux {
-		case "Bool", "Int8", "Int16", "Int32", "Int64", "Int128", "Float32", "Float64", "SymOff", "SymValAndOff", "SymInt32":
-		default:
-			log.Fatalf("%s: op %s %s can't have auxint", loc, op.name, op.aux)
-		}
-	}
-	if aux != "" {
-		switch op.aux {
-		case "String", "Sym", "SymOff", "SymValAndOff", "SymInt32":
-		default:
-			log.Fatalf("%s: op %s %s can't have aux", loc, op.name, op.aux)
-		}
-	}
-
-	return
+	return "Op" + arch.name + name
 }

 func blockName(name string, arch arch) string {
@@ -683,13 +689,6 @@ func blockName(name string, arch arch) string {

 // typeName returns the string to use to generate a type.
 func typeName(typ string) string {
-	if typ[0] == '(' {
-		ts := strings.Split(typ[1:len(typ)-1], ",")
-		if len(ts) != 2 {
-			panic("Tuple expect 2 arguments")
-		}
-		return "MakeTuple(" + typeName(ts[0]) + ", " + typeName(ts[1]) + ")"
-	}
 	switch typ {
 	case "Flags", "Mem", "Void", "Int128":
 		return "Type" + typ
--- a/src/cmd/compile/internal/ssa/html.go
+++ b/src/cmd/compile/internal/ssa/html.go
@@ -359,7 +359,7 @@ func (v *Value) LongHTML() string {
 	}
 	r := v.Block.Func.RegAlloc
 	if int(v.ID) < len(r) && r[v.ID] != nil {
-		s += " : " + html.EscapeString(r[v.ID].Name())
+		s += " : " + r[v.ID].Name()
 	}
 	s += "</span>"
 	return s
--- a/src/cmd/compile/internal/ssa/location.go
+++ b/src/cmd/compile/internal/ssa/location.go
@@ -36,16 +36,3 @@ func (s LocalSlot) Name() string {
 	}
 	return fmt.Sprintf("%s+%d[%s]", s.N, s.Off, s.Type)
 }
-
-type LocPair [2]Location
-
-func (t LocPair) Name() string {
-	n0, n1 := "nil", "nil"
-	if t[0] != nil {
-		n0 = t[0].Name()
-	}
-	if t[1] != nil {
-		n1 = t[1].Name()
-	}
-	return fmt.Sprintf("<%s,%s>", n0, n1)
-}
--- a/src/cmd/compile/internal/ssa/lower.go
+++ b/src/cmd/compile/internal/ssa/lower.go
@@ -21,15 +21,10 @@ func checkLower(f *Func) {
 				continue // lowered
 			}
 			switch v.Op {
-			case OpSP, OpSB, OpInitMem, OpArg, OpPhi, OpVarDef, OpVarKill, OpVarLive, OpKeepAlive, OpSelect0, OpSelect1:
+			case OpSP, OpSB, OpInitMem, OpArg, OpPhi, OpVarDef, OpVarKill, OpVarLive, OpKeepAlive:
 				continue // ok not to lower
-			case OpGetG:
-				if f.Config.hasGReg {
-					// has hardware g register, regalloc takes care of it
-					continue // ok not to lower
-				}
 			}
-			s := "not lowered: " + v.String() + ", " + v.Op.String() + " " + v.Type.SimpleString()
+			s := "not lowered: " + v.Op.String() + " " + v.Type.SimpleString()
 			for _, a := range v.Args {
 				s += " " + a.Type.SimpleString()
 			}
--- a/src/cmd/compile/internal/ssa/op.go
+++ b/src/cmd/compile/internal/ssa/op.go
@@ -26,8 +26,7 @@ type opInfo struct {
 	generic           bool // this is a generic (arch-independent) opcode
 	rematerializeable bool // this op is rematerializeable
 	commutative       bool // this operation is commutative (e.g. addition)
-	resultInArg0      bool // last output of v and v.Args[0] must be allocated to the same register
-	clobberFlags      bool // this op clobbers flags register
+	resultInArg0      bool // v and v.Args[0] must be allocated to the same register
 }

 type inputInfo struct {
@@ -35,15 +34,10 @@ type inputInfo struct {
 	regs regMask // allowed input registers
 }

-type outputInfo struct {
-	idx  int     // index in output tuple
-	regs regMask // allowed output registers
-}
-
 type regInfo struct {
 	inputs   []inputInfo // ordered in register allocation order
 	clobbers regMask
-	outputs  []outputInfo // ordered in register allocation order
+	outputs  []regMask // NOTE: values can only have 1 output for now.
 }

 type auxType int8
@@ -130,31 +124,3 @@ func (x ValAndOff) add(off int64) int64 {
 	}
 	return makeValAndOff(x.Val(), x.Off()+off)
 }
-
-// SizeAndAlign holds both the size and the alignment of a type,
-// used in Zero and Move ops.
-// The high 8 bits hold the alignment.
-// The low 56 bits hold the size.
-type SizeAndAlign int64
-
-func (x SizeAndAlign) Size() int64 {
-	return int64(x) & (1<<56 - 1)
-}
-func (x SizeAndAlign) Align() int64 {
-	return int64(uint64(x) >> 56)
-}
-func (x SizeAndAlign) Int64() int64 {
-	return int64(x)
-}
-func (x SizeAndAlign) String() string {
-	return fmt.Sprintf("size=%d,align=%d", x.Size(), x.Align())
-}
-func MakeSizeAndAlign(size, align int64) SizeAndAlign {
-	if size&^(1<<56-1) != 0 {
-		panic("size too big in SizeAndAlign")
-	}
-	if align >= 1<<8 {
-		panic("alignment too big in SizeAndAlign")
-	}
-	return SizeAndAlign(size | align<<56)
-}
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
--- a/src/cmd/compile/internal/ssa/opt.go
+++ b/src/cmd/compile/internal/ssa/opt.go
@@ -11,7 +11,4 @@ func opt(f *Func) {

 func dec(f *Func) {
 	applyRewrite(f, rewriteBlockdec, rewriteValuedec)
-	if f.Config.IntSize == 4 && f.Config.arch != "amd64p32" {
-		applyRewrite(f, rewriteBlockdec64, rewriteValuedec64)
-	}
 }
--- a/src/cmd/compile/internal/ssa/phiopt.go
+++ b/src/cmd/compile/internal/ssa/phiopt.go
@@ -57,16 +57,7 @@ func phiopt(f *Func) {
 		}

 		for _, v := range b.Values {
-			if v.Op != OpPhi {
-				continue
-			}
-
-			// Look for conversions from bool to 0/1.
-			if v.Type.IsInteger() {
-				phioptint(v, b0, reverse)
-			}
-
-			if !v.Type.IsBoolean() {
+			if v.Op != OpPhi || !v.Type.IsBoolean() {
 				continue
 			}

@@ -119,55 +110,5 @@ func phiopt(f *Func) {
 			}
 		}
 	}
-}
-
-func phioptint(v *Value, b0 *Block, reverse int) {
-	a0 := v.Args[0]
-	a1 := v.Args[1]
-	if a0.Op != a1.Op {
-		return
-	}
-
-	switch a0.Op {
-	case OpConst8, OpConst16, OpConst32, OpConst64:
-	default:
-		return
-	}
-
-	negate := false
-	switch {
-	case a0.AuxInt == 0 && a1.AuxInt == 1:
-		negate = true
-	case a0.AuxInt == 1 && a1.AuxInt == 0:
-	default:
-		return
-	}
-
-	if reverse == 1 {
-		negate = !negate
-	}
-
-	switch v.Type.Size() {
-	case 1:
-		v.reset(OpCopy)
-	case 2:
-		v.reset(OpZeroExt8to16)
-	case 4:
-		v.reset(OpZeroExt8to32)
-	case 8:
-		v.reset(OpZeroExt8to64)
-	default:
-		v.Fatalf("bad int size %d", v.Type.Size())
-	}
-
-	a := b0.Control
-	if negate {
-		a = v.Block.NewValue1(v.Line, OpNot, a.Type, a)
-	}
-	v.AddArg(a)
-
-	f := b0.Func
-	if f.pass.debug > 0 {
-		f.Config.Warnl(v.Block.Line, "converted OpPhi bool -> int%d", v.Type.Size()*8)
-	}
+
 }
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@@ -206,7 +206,6 @@ type regAllocState struct {
 	numRegs     register
 	SPReg       register
 	SBReg       register
-	GReg        register
 	allocatable regMask

 	// for each block, its primary predecessor.
@@ -333,14 +332,14 @@ func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
 	s.f.setHome(c, &s.registers[r])
 }

-// allocReg chooses a register from the set of registers in mask.
+// allocReg chooses a register for v from the set of registers in mask.
 // If there is no unused register, a Value will be kicked out of
 // a register to make room.
-func (s *regAllocState) allocReg(mask regMask, v *Value) register {
+func (s *regAllocState) allocReg(v *Value, mask regMask) register {
 	mask &= s.allocatable
 	mask &^= s.nospill
 	if mask == 0 {
-		s.f.Fatalf("no register available for %s", v)
+		s.f.Fatalf("no register available")
 	}

 	// Pick an unused register if one is available.
@@ -401,7 +400,7 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, line
 	}

 	// Allocate a register.
-	r := s.allocReg(mask, v)
+	r := s.allocReg(v, mask)

 	// Allocate v to the new register.
 	var c *Value
@@ -439,76 +438,28 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, line
 func (s *regAllocState) init(f *Func) {
 	s.f = f
 	s.registers = f.Config.registers
-	if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
-		s.f.Fatalf("bad number of registers: %d", nr)
-	} else {
-		s.numRegs = register(nr)
+	s.numRegs = register(len(s.registers))
+	if s.numRegs > noRegister || s.numRegs > register(unsafe.Sizeof(regMask(0))*8) {
+		panic("too many registers")
 	}
-	// Locate SP, SB, and g registers.
-	s.SPReg = noRegister
-	s.SBReg = noRegister
-	s.GReg = noRegister
 	for r := register(0); r < s.numRegs; r++ {
-		switch s.registers[r].Name() {
-		case "SP":
+		if s.registers[r].Name() == "SP" {
 			s.SPReg = r
-		case "SB":
-			s.SBReg = r
-		case "g":
-			s.GReg = r
 		}
-	}
-	// Make sure we found all required registers.
-	switch noRegister {
-	case s.SPReg:
-		s.f.Fatalf("no SP register found")
-	case s.SBReg:
-		s.f.Fatalf("no SB register found")
-	case s.GReg:
-		if f.Config.hasGReg {
-			s.f.Fatalf("no g register found")
+		if s.registers[r].Name() == "SB" {
+			s.SBReg = r
 		}
 	}

 	// Figure out which registers we're allowed to use.
-	s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask
+	s.allocatable = regMask(1)<<s.numRegs - 1
 	s.allocatable &^= 1 << s.SPReg
 	s.allocatable &^= 1 << s.SBReg
-	if s.f.Config.hasGReg {
-		s.allocatable &^= 1 << s.GReg
-	}
-	if s.f.Config.ctxt.Framepointer_enabled && s.f.Config.FPReg >= 0 {
-		s.allocatable &^= 1 << uint(s.f.Config.FPReg)
+	if s.f.Config.ctxt.Framepointer_enabled {
+		s.allocatable &^= 1 << 5 // BP
 	}
 	if s.f.Config.ctxt.Flag_dynlink {
-		switch s.f.Config.arch {
-		case "amd64":
-			s.allocatable &^= 1 << 15 // R15
-		case "arm":
-			s.allocatable &^= 1 << 9 // R9
-		case "arm64":
-			// nothing to do?
-		case "386":
-			// nothing to do.
-			// Note that for Flag_shared (position independent code)
-			// we do need to be careful, but that carefulness is hidden
-			// in the rewrite rules so we always have a free register
-			// available for global load/stores. See gen/386.rules (search for Flag_shared).
-		default:
-			s.f.Config.fe.Unimplementedf(0, "arch %s not implemented", s.f.Config.arch)
-		}
-	}
-	if s.f.Config.nacl {
-		switch s.f.Config.arch {
-		case "arm":
-			s.allocatable &^= 1 << 9 // R9 is "thread pointer" on nacl/arm
-		case "amd64p32":
-			s.allocatable &^= 1 << 5  // BP - reserved for nacl
-			s.allocatable &^= 1 << 15 // R15 - reserved for nacl
-		}
-	}
-	if s.f.Config.use387 {
-		s.allocatable &^= 1 << 15 // X7 disallowed (one 387 register is used as scratch space during SSE->387 generation in ../x86/387.go)
+		s.allocatable &^= 1 << 15 // R15
 	}

 	s.regs = make([]regState, s.numRegs)
@@ -516,13 +467,11 @@ func (s *regAllocState) init(f *Func) {
 	s.orig = make([]*Value, f.NumValues())
 	for _, b := range f.Blocks {
 		for _, v := range b.Values {
-			if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() {
+			if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() {
 				s.values[v.ID].needReg = true
 				s.values[v.ID].rematerializeable = v.rematerializeable()
 				s.orig[v.ID] = v
 			}
-			// Note: needReg is false for values returning Tuple types.
-			// Instead, we mark the corresponding Selects as needReg.
 		}
 	}
 	s.computeLive()
@@ -615,9 +564,9 @@ func (s *regAllocState) setState(regs []endReg) {
 func (s *regAllocState) compatRegs(t Type) regMask {
 	var m regMask
 	if t.IsFloat() || t == TypeInt128 {
-		m = s.f.Config.fpRegMask
+		m = 0xffff << 16 // X0-X15
 	} else {
-		m = s.f.Config.gpRegMask
+		m = 0xffff << 0 // AX-R15
 	}
 	return m & s.allocatable
 }
@@ -837,9 +786,6 @@ func (s *regAllocState) regalloc(f *Func) {
 				if phiRegs[i] != noRegister {
 					continue
 				}
-				if s.f.Config.use387 && v.Type.IsFloat() {
-					continue // 387 can't handle floats in registers between blocks
-				}
 				m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
 				if m != 0 {
 					r := pickReg(m)
@@ -969,7 +915,6 @@ func (s *regAllocState) regalloc(f *Func) {
 			if s.f.pass.debug > regDebug {
 				fmt.Printf("  processing %s\n", v.LongString())
 			}
-			regspec := opcodeTable[v.Op].reg
 			if v.Op == OpPhi {
 				f.Fatalf("phi %s not at start of block", v)
 			}
@@ -985,28 +930,6 @@ func (s *regAllocState) regalloc(f *Func) {
 				s.advanceUses(v)
 				continue
 			}
-			if v.Op == OpSelect0 || v.Op == OpSelect1 {
-				if s.values[v.ID].needReg {
-					var i = 0
-					if v.Op == OpSelect1 {
-						i = 1
-					}
-					s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).Num), v, v)
-				}
-				b.Values = append(b.Values, v)
-				s.advanceUses(v)
-				goto issueSpill
-			}
-			if v.Op == OpGetG && s.f.Config.hasGReg {
-				// use hardware g register
-				if s.regs[s.GReg].v != nil {
-					s.freeReg(s.GReg) // kick out the old value
-				}
-				s.assignReg(s.GReg, v, v)
-				b.Values = append(b.Values, v)
-				s.advanceUses(v)
-				goto issueSpill
-			}
 			if v.Op == OpArg {
 				// Args are "pre-spilled" values. We don't allocate
 				// any register here. We just set up the spill pointer to
@@ -1034,6 +957,7 @@ func (s *regAllocState) regalloc(f *Func) {
 				b.Values = append(b.Values, v)
 				continue
 			}
+			regspec := opcodeTable[v.Op].reg
 			if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
 				// No register allocation required (or none specified yet)
 				s.freeRegs(regspec.clobbers)
@@ -1078,6 +1002,10 @@ func (s *regAllocState) regalloc(f *Func) {
 			args = append(args[:0], v.Args...)
 			for _, i := range regspec.inputs {
 				mask := i.regs
+				if mask == flagRegMask {
+					// TODO: remove flag input from regspec.inputs.
+					continue
+				}
 				if mask&s.values[args[i.idx].ID].regs == 0 {
 					// Need a new register for the input.
 					mask &= s.allocatable
@@ -1187,73 +1115,49 @@ func (s *regAllocState) regalloc(f *Func) {
 			// Dump any registers which will be clobbered
 			s.freeRegs(regspec.clobbers)

-			// Pick registers for outputs.
-			{
-				outRegs := [2]register{noRegister, noRegister}
-				var used regMask
-				for _, out := range regspec.outputs {
-					mask := out.regs & s.allocatable &^ used
-					if mask == 0 {
-						continue
-					}
-					if opcodeTable[v.Op].resultInArg0 && out.idx == len(regspec.outputs)-1 {
-						if !opcodeTable[v.Op].commutative {
-							// Output must use the same register as input 0.
-							r := register(s.f.getHome(args[0].ID).(*Register).Num)
-							mask = regMask(1) << r
-						} else {
-							// Output must use the same register as input 0 or 1.
-							r0 := register(s.f.getHome(args[0].ID).(*Register).Num)
-							r1 := register(s.f.getHome(args[1].ID).(*Register).Num)
-							// Check r0 and r1 for desired output register.
-							found := false
-							for _, r := range dinfo[idx].out {
-								if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
-									mask = regMask(1) << r
-									found = true
-									if r == r1 {
-										args[0], args[1] = args[1], args[0]
-									}
-									break
+			// Pick register for output.
+			if s.values[v.ID].needReg {
+				mask := regspec.outputs[0] & s.allocatable
+				if opcodeTable[v.Op].resultInArg0 {
+					if !opcodeTable[v.Op].commutative {
+						// Output must use the same register as input 0.
+						r := register(s.f.getHome(args[0].ID).(*Register).Num)
+						mask = regMask(1) << r
+					} else {
+						// Output must use the same register as input 0 or 1.
+						r0 := register(s.f.getHome(args[0].ID).(*Register).Num)
+						r1 := register(s.f.getHome(args[1].ID).(*Register).Num)
+						// Check r0 and r1 for desired output register.
+						found := false
+						for _, r := range dinfo[idx].out {
+							if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
+								mask = regMask(1) << r
+								found = true
+								if r == r1 {
+									args[0], args[1] = args[1], args[0]
 								}
-							}
-							if !found {
-								// Neither are desired, pick r0.
-								mask = regMask(1) << r0
+								break
 							}
 						}
-					}
-					for _, r := range dinfo[idx].out {
-						if r != noRegister && (mask&^s.used)>>r&1 != 0 {
-							// Desired register is allowed and unused.
-							mask = regMask(1) << r
-							break
+						if !found {
+							// Neither are desired, pick r0.
+							mask = regMask(1) << r0
 						}
 					}
-					// Avoid registers we're saving for other values.
-					if mask&^desired.avoid != 0 {
-						mask &^= desired.avoid
-					}
-					r := s.allocReg(mask, v)
-					outRegs[out.idx] = r
-					used |= regMask(1) << r
 				}
-				// Record register choices
-				if v.Type.IsTuple() {
-					var outLocs LocPair
-					if r := outRegs[0]; r != noRegister {
-						outLocs[0] = &s.registers[r]
-					}
-					if r := outRegs[1]; r != noRegister {
-						outLocs[1] = &s.registers[r]
-					}
-					s.f.setHome(v, outLocs)
-					// Note that subsequent SelectX instructions will do the assignReg calls.
-				} else {
-					if r := outRegs[0]; r != noRegister {
-						s.assignReg(r, v, v)
+				for _, r := range dinfo[idx].out {
+					if r != noRegister && (mask&^s.used)>>r&1 != 0 {
+						// Desired register is allowed and unused.
+						mask = regMask(1) << r
+						break
 					}
 				}
+				// Avoid registers we're saving for other values.
+				if mask&^desired.avoid != 0 {
+					mask &^= desired.avoid
+				}
+				r := s.allocReg(v, mask)
+				s.assignReg(r, v, v)
 			}

 			// Issue the Value itself.
@@ -1272,7 +1176,6 @@ func (s *regAllocState) regalloc(f *Func) {
 			//     f()
 			// }
 			// It would be good to have both spill and restore inside the IF.
-		issueSpill:
 			if s.values[v.ID].needReg {
 				spill := b.NewValue1(v.Line, OpStoreReg, v.Type, v)
 				s.setOrig(spill, v)
@@ -1291,10 +1194,9 @@ func (s *regAllocState) regalloc(f *Func) {
 			if s.f.pass.debug > regDebug {
 				fmt.Printf("  processing control %s\n", v.LongString())
 			}
-			// We assume that a control input can be passed in any
-			// type-compatible register. If this turns out not to be true,
-			// we'll need to introduce a regspec for a block's control value.
-			s.allocValToReg(v, s.compatRegs(v.Type), false, b.Line)
+			// TODO: regspec for block control values, instead of using
+			// register set from the control op's output.
+			s.allocValToReg(v, opcodeTable[v.Op].reg.outputs[0], false, b.Line)
 			// Remove this use from the uses list.
 			vi := &s.values[v.ID]
 			u := vi.uses
@@ -1306,11 +1208,6 @@ func (s *regAllocState) regalloc(f *Func) {
 			s.freeUseRecords = u
 		}

-		// Spill any values that can't live across basic block boundaries.
-		if s.f.Config.use387 {
-			s.freeRegs(s.f.Config.fpRegMask)
-		}
-
 		// If we are approaching a merge point and we are the primary
 		// predecessor of it, find live values that we use soon after
 		// the merge point and promote them to registers now.
@@ -1334,9 +1231,6 @@ func (s *regAllocState) regalloc(f *Func) {
 					continue
 				}
 				v := s.orig[vid]
-				if s.f.Config.use387 && v.Type.IsFloat() {
-					continue // 387 can't handle floats in registers between blocks
-				}
 				m := s.compatRegs(v.Type) &^ s.used
 				if m&^desired.avoid != 0 {
 					m &^= desired.avoid
@@ -1875,9 +1769,6 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value) bool {
 			(*splice).Uses--
 			*splice = occupant.c
 			occupant.c.Uses++
-			if occupant.c.Op == OpStoreReg {
-				e.s.lateSpillUse(vid)
-			}
 		}
 		// Note: if splice==nil then c will appear dead. This is
 		// non-SSA formed code, so be careful after this pass not to run
@@ -2119,8 +2010,6 @@ func (e *edgeState) findRegFor(typ Type) Location {
 	return nil
 }

-// rematerializeable reports whether the register allocator should recompute
-// a value instead of spilling/restoring it.
 func (v *Value) rematerializeable() bool {
 	if !opcodeTable[v.Op].rematerializeable {
 		return false
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -205,11 +205,6 @@ func is32Bit(n int64) bool {
 	return n == int64(int32(n))
 }

-// is16Bit reports whether n can be represented as a signed 16 bit integer.
-func is16Bit(n int64) bool {
-	return n == int64(int16(n))
-}
-
 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
 func b2i(b bool) int64 {
 	if b {
@@ -259,19 +254,6 @@ func isSamePtr(p1, p2 *Value) bool {
 	return false
 }

-// moveSize returns the number of bytes an aligned MOV instruction moves
-func moveSize(align int64, c *Config) int64 {
-	switch {
-	case align%8 == 0 && c.IntSize == 8:
-		return 8
-	case align%4 == 0:
-		return 4
-	case align%2 == 0:
-		return 2
-	}
-	return 1
-}
-
 // mergePoint finds a block among a's blocks which dominates b and is itself
 // dominated by all of a's blocks. Returns nil if it can't find one.
 // Might return nil even if one does exist.
--- a/src/cmd/compile/internal/ssa/rewrite386.go
+++ b/src/cmd/compile/internal/ssa/rewrite386.go
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
--- a/Show More
+++ b/Show More