[release-branch.go1.7] go1.7.2

Change-Id: I546e8b1aa4facdbf13bb80d386bf4839a3aff9d1 Reviewed-on: https://go-review.googlesource.com/31314 Run-TryBot: Chris Broadfoot <cbro@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Chris Broadfoot <cbro@golang.org>
[release-branch.go1.7] doc: document go1.7.2
2026-02-02 00:52:04 +03:00 · 2016-10-17 21:08:12 +00:00 · 2016-10-17 20:41:45 +00:00 · 2016-10-17 20:25:13 +00:00 · 2016-10-17 20:25:07 +00:00 · 2016-10-17 20:25:00 +00:00
139 changed files with 7833 additions and 87840 deletions
--- a/1
+++ b/1
@@ -0,0 +1 @@
+go1.7.2
--- a/doc/contrib.html
+++ b/doc/contrib.html
@@ -34,6 +34,7 @@ We encourage all Go users to subscribe to
 <p>A <a href="/doc/devel/release.html">summary</a> of the changes between Go releases. Notes for the major releases:</p>

 <ul>
+	<li><a href="/doc/go1.7">Go 1.7</a> <small>(August 2016)</small></li>
 	<li><a href="/doc/go1.6">Go 1.6</a> <small>(February 2016)</small></li>
 	<li><a href="/doc/go1.5">Go 1.5</a> <small>(August 2015)</small></li>
 	<li><a href="/doc/go1.4">Go 1.4</a> <small>(December 2014)</small></li>
--- a/doc/devel/release.html
+++ b/doc/devel/release.html
@@ -30,6 +30,33 @@ to fix critical security problems in both Go 1.4 and Go 1.5 as they arise.
 See the <a href="/security">security policy</a> for more details.
 </p>

+<h2 id="go1.7">go1.7 (released 2016/08/15)</h2>
+
+<p>
+Go 1.7 is a major release of Go.
+Read the <a href="/doc/go1.7">Go 1.7 Release Notes</a> for more information.
+</p>
+
+<h3 id="go1.7.minor">Minor revisions</h3>
+
+<p>
+go1.7.1 (released 2016/09/07) includes fixes to the compiler, runtime,
+documentation, and the <code>compress/flate</code>, <code>hash/crc32</code>,
+<code>io</code>, <code>net</code>, <code>net/http</code>,
+<code>path/filepath</code>, <code>reflect</code>, and <code>syscall</code>
+packages.
+See the <a href="https://github.com/golang/go/issues?q=milestone%3AGo1.7.1">Go
+1.7.1 milestone</a> on our issue tracker for details.
+</p>
+
+<p>
+go1.7.2 (released 2016/10/17) includes fixes to the compiler, runtime,
+and the <code>crypto/cipher</code>, <code>crypto/tls</code>,
+<code>net/http</code>, and <code>strings</code> packages.
+See the <a href="https://github.com/golang/go/issues?q=milestone%3AGo1.7.2">Go
+1.7.2 milestone</a> on our issue tracker for details.
+</p>
+
 <h2 id="go1.6">go1.6 (released 2016/02/17)</h2>

 <p>
--- a/doc/go1.7.html
+++ b/doc/go1.7.html
@@ -1,5 +1,5 @@
 <!--{
-	"Title": "Go 1.7 Release Notes DRAFT",
+	"Title": "Go 1.7 Release Notes",
 	"Path":  "/doc/go1.7",
 	"Template": true
 }-->
@@ -25,15 +25,6 @@ Do not send CLs removing the interior tags from such phrases.
 ul li { margin: 0.5em 0; }
 </style>

-<p>
-<!-- TODO: REMOVE THIS COMMENT -->
-<!-- TODO: Also remove "DRAFT" in the "Title" at the top of this file. -->
-<i>NOTE: This is a DRAFT of the Go 1.7 release notes, prepared for the Go 1.7 beta.
-Go 1.7 has NOT yet been released.
-By our regular schedule, it is expected some time in August 2016.
-</i>
-</p>
-
 <h2 id="introduction">Introduction to Go 1.7</h2>

 <p>
@@ -919,7 +910,7 @@ For example, the address on which a request received is
 <p>
 The server's <a href="/pkg/net/http/#Server.Serve"><code>Serve</code></a> method
 now only enables HTTP/2 support if the <code>Server.TLSConfig</code> field is <code>nil</code>
-or includes <code>"h2"</code> in its <code>TLSConfig.NextProto</code>.
+or includes <code>"h2"</code> in its <code>TLSConfig.NextProtos</code>.
 </p>

 <p>
--- a/doc/gopher/favicon.svg
+++ b/doc/gopher/favicon.svg
@@ -0,0 +1,238 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="32"
+   height="32"
+   viewBox="0 0 32 32.000001"
+   id="svg4416"
+   version="1.1"
+   inkscape:version="0.91 r13725"
+   sodipodi:docname="favicon.svg"
+   inkscape:export-filename="../../favicon.png"
+   inkscape:export-xdpi="90"
+   inkscape:export-ydpi="90">
+  <defs
+     id="defs4418" />
+  <sodipodi:namedview
+     id="base"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageopacity="0.0"
+     inkscape:pageshadow="2"
+     inkscape:zoom="15.839192"
+     inkscape:cx="17.966652"
+     inkscape:cy="9.2991824"
+     inkscape:document-units="px"
+     inkscape:current-layer="layer1"
+     showgrid="true"
+     units="px"
+     inkscape:snap-bbox="true"
+     inkscape:snap-bbox-edge-midpoints="false"
+     inkscape:bbox-nodes="true"
+     showguides="false"
+     inkscape:window-width="1920"
+     inkscape:window-height="1018"
+     inkscape:window-x="1912"
+     inkscape:window-y="-8"
+     inkscape:window-maximized="1"
+     inkscape:object-nodes="true"
+     inkscape:snap-smooth-nodes="true"
+     inkscape:snap-global="false">
+    <inkscape:grid
+       type="xygrid"
+       id="grid5148" />
+  </sodipodi:namedview>
+  <metadata
+     id="metadata4421">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title />
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     inkscape:label="icon"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(0,-1020.3622)">
+    <ellipse
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#384e54;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       id="ellipse4216"
+       cx="-907.35657"
+       cy="479.90009"
+       rx="3.5793996"
+       ry="3.8207953"
+       transform="matrix(-0.49169095,-0.87076978,-0.87076978,0.49169095,0,0)"
+       inkscape:transform-center-x="0.67794294"
+       inkscape:transform-center-y="-2.3634048" />
+    <ellipse
+       inkscape:transform-center-y="-2.3633882"
+       inkscape:transform-center-x="-0.67793718"
+       transform="matrix(0.49169095,-0.87076978,0.87076978,0.49169095,0,0)"
+       ry="3.8207953"
+       rx="3.5793996"
+       cy="507.8461"
+       cx="-891.57654"
+       id="ellipse4463"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#384e54;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" />
+    <path
+       inkscape:connector-curvature="0"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#384e54;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       d="m 16.091693,1021.3642 c -1.105749,0.01 -2.210341,0.049 -3.31609,0.09 C 6.8422558,1021.6738 2,1026.3942 2,1032.3622 c 0,2.9786 0,13 0,20 l 28,0 c 0,-8 0,-16 0,-20 0,-5.9683 -4.667345,-10.4912 -10.59023,-10.908 -1.10575,-0.078 -2.212328,-0.099 -3.318077,-0.09 z"
+       id="path4465"
+       sodipodi:nodetypes="ccsccscc" />
+    <path
+       inkscape:transform-center-y="-1.3604657"
+       inkscape:transform-center-x="-0.98424303"
+       sodipodi:nodetypes="sssssss"
+       inkscape:connector-curvature="0"
+       id="path4469"
+       d="m 4.6078867,1025.0462 c 0.459564,0.2595 1.818262,1.2013 1.980983,1.648 0.183401,0.5035 0.159385,1.0657 -0.114614,1.551 -0.346627,0.6138 -1.005341,0.9487 -1.696421,0.9365 -0.339886,-0.01 -1.720283,-0.6372 -2.042561,-0.8192 -0.97754,-0.5519 -1.350795,-1.7418 -0.833686,-2.6576 0.517109,-0.9158 1.728749,-1.2107 2.706299,-0.6587 z"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#76e1fe;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" />
+    <rect
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:0.32850246;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       id="rect4473"
+       width="3.0866659"
+       height="3.5313663"
+       x="14.406213"
+       y="1035.6842"
+       ry="0.62426329" />
+    <path
+       inkscape:connector-curvature="0"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#76e1fe;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       d="m 16,1023.3622 c -9,0 -12,3.7153 -12,9 l 0,20 24,0 c -0.04889,-7.3562 0,-18 0,-20 0,-5.2848 -3,-9 -12,-9 z"
+       id="path4471"
+       sodipodi:nodetypes="zsccsz" />
+    <path
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#76e1fe;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       d="m 27.074073,1025.0462 c -0.45957,0.2595 -1.818257,1.2013 -1.980979,1.648 -0.183401,0.5035 -0.159384,1.0657 0.114614,1.551 0.346627,0.6138 1.005335,0.9487 1.696415,0.9365 0.33988,-0.01 1.72029,-0.6372 2.04256,-0.8192 0.97754,-0.5519 1.35079,-1.7418 0.83369,-2.6576 -0.51711,-0.9158 -1.72876,-1.2107 -2.7063,-0.6587 z"
+       id="path4481"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="sssssss"
+       inkscape:transform-center-x="0.98424094"
+       inkscape:transform-center-y="-1.3604657" />
+    <circle
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       id="circle4477"
+       cx="21.175734"
+       cy="1030.3542"
+       r="4.6537542"
+       inkscape:export-filename=".\rect4485.png"
+       inkscape:export-xdpi="90"
+       inkscape:export-ydpi="90" />
+    <circle
+       r="4.8316345"
+       cy="1030.3542"
+       cx="10.339486"
+       id="circle4483"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       inkscape:export-filename=".\rect4485.png"
+       inkscape:export-xdpi="90"
+       inkscape:export-ydpi="90" />
+    <rect
+       inkscape:export-ydpi="90"
+       inkscape:export-xdpi="90"
+       inkscape:export-filename=".\rect4485.png"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:0.32941176;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       id="rect4246"
+       width="3.6673687"
+       height="4.1063409"
+       x="14.115863"
+       y="1035.9174"
+       ry="0.72590536" />
+    <rect
+       ry="0.72590536"
+       y="1035.2253"
+       x="14.115863"
+       height="4.1063409"
+       width="3.6673687"
+       id="rect4485"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#fffcfb;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       inkscape:export-filename=".\rect4485.png"
+       inkscape:export-xdpi="90"
+       inkscape:export-ydpi="90" />
+    <path
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:0.32941176;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       d="m 19.999735,1036.5289 c 0,0.838 -0.871228,1.2682 -2.144766,1.1659 -0.02366,0 -0.04795,-0.6004 -0.254147,-0.5832 -0.503669,0.042 -1.095902,-0.02 -1.685964,-0.02 -0.612939,0 -1.206342,0.1826 -1.68549,0.017 -0.110233,-0.038 -0.178298,0.5838 -0.261532,0.5816 -1.243685,-0.033 -2.078803,-0.3383 -2.078803,-1.1618 0,-1.2118 1.815635,-2.1941 4.055351,-2.1941 2.239704,0 4.055351,0.9823 4.055351,2.1941 z"
+       id="path4487"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="sssssssss"
+       inkscape:export-filename=".\rect4485.png"
+       inkscape:export-xdpi="90"
+       inkscape:export-ydpi="90" />
+    <path
+       sodipodi:nodetypes="sssssssss"
+       inkscape:connector-curvature="0"
+       id="path4489"
+       d="m 19.977414,1035.7004 c 0,0.5685 -0.433659,0.8554 -1.138091,1.0001 -0.291933,0.06 -0.630371,0.096 -1.003719,0.1166 -0.56405,0.032 -1.207782,0.031 -1.89122,0.031 -0.672834,0 -1.307182,0 -1.864904,-0.029 -0.306268,-0.017 -0.589429,-0.043 -0.843164,-0.084 -0.813833,-0.1318 -1.324962,-0.417 -1.324962,-1.0344 0,-1.1601 1.805642,-2.1006 4.03303,-2.1006 2.227377,0 4.03303,0.9405 4.03303,2.1006 z"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#c38c74;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       inkscape:export-filename=".\rect4485.png"
+       inkscape:export-xdpi="90"
+       inkscape:export-ydpi="90" />
+    <ellipse
+       cy="1033.8501"
+       cx="15.944382"
+       id="ellipse4491"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#23201f;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       rx="2.0801733"
+       ry="1.343747"
+       inkscape:export-filename=".\rect4485.png"
+       inkscape:export-xdpi="90"
+       inkscape:export-ydpi="90" />
+    <circle
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#171311;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       id="circle4493"
+       cx="12.414201"
+       cy="1030.3542"
+       r="1.9630634"
+       inkscape:export-filename=".\rect4485.png"
+       inkscape:export-xdpi="90"
+       inkscape:export-ydpi="90" />
+    <circle
+       r="1.9630634"
+       cy="1030.3542"
+       cx="23.110121"
+       id="circle4495"
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#171311;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:10;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       inkscape:export-filename=".\rect4485.png"
+       inkscape:export-xdpi="90"
+       inkscape:export-ydpi="90" />
+    <path
+       sodipodi:nodetypes="cc"
+       inkscape:connector-curvature="0"
+       id="path4497"
+       d="m 5.0055377,1027.2727 c -1.170435,-1.0835 -2.026973,-0.7721 -2.044172,-0.7463"
+       style="display:inline;fill:none;fill-rule:evenodd;stroke:#384e54;stroke-width:0.39730874;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
+    <path
+       style="display:inline;fill:none;fill-rule:evenodd;stroke:#384e54;stroke-width:0.39730874;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="m 4.3852457,1026.9152 c -1.158557,0.036 -1.346704,0.6303 -1.33881,0.6523"
+       id="path4499"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cc" />
+    <path
+       style="display:inline;fill:none;fill-rule:evenodd;stroke:#384e54;stroke-width:0.39730874;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="m 26.630533,1027.1724 c 1.17043,-1.0835 2.02697,-0.7721 2.04417,-0.7463"
+       id="path4501"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cc" />
+    <path
+       sodipodi:nodetypes="cc"
+       inkscape:connector-curvature="0"
+       id="path4503"
+       d="m 27.321773,1026.673 c 1.15856,0.036 1.3467,0.6302 1.3388,0.6522"
+       style="display:inline;fill:none;fill-rule:evenodd;stroke:#384e54;stroke-width:0.39730874;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
+  </g>
+</svg>
--- a/doc/install-source.html
+++ b/doc/install-source.html
@@ -203,7 +203,7 @@ To build without <code>cgo</code>, set the environment variable
 Change to the directory that will be its parent
 and make sure the <code>go</code> directory does not exist.
 Then clone the repository and check out the latest release tag
-(<code class="versionTag">go1.6</code>, for example):</p>
+(<code class="versionTag">go1.7.2</code>, for example):</p>

 <pre>
 $ git clone https://go.googlesource.com/go
@@ -391,7 +391,7 @@ New releases are announced on the
 <a href="//groups.google.com/group/golang-announce">golang-announce</a>
 mailing list.
 Each announcement mentions the latest release tag, for instance,
-<code class="versionTag">go1.6</code>.
+<code class="versionTag">go1.7.2</code>.
 </p>

 <p>
--- a/doc/install.html
+++ b/doc/install.html
@@ -17,7 +17,7 @@
 <p>
 <a href="https://golang.org/dl/" target="_blank">Official binary
 distributions</a> are available for the FreeBSD (release 8-STABLE and above),
-Linux, Mac OS X (10.7 and above), and Windows operating systems and
+Linux, Mac OS X (10.8 and above), and Windows operating systems and
 the 32-bit (<code>386</code>) and 64-bit (<code>amd64</code>) x86 processor
 architectures.
 </p>
@@ -49,7 +49,7 @@ If your OS or architecture is not on the list, you may be able to
 <tr><td colspan="3"><hr></td></tr>
 <tr><td>FreeBSD 8-STABLE or later</td> <td>amd64</td> <td>Debian GNU/kFreeBSD not supported</td></tr>
 <tr><td>Linux 2.6.23 or later with glibc</td> <td>amd64, 386, arm</td> <td>CentOS/RHEL 5.x not supported</td></tr>
-<tr><td>Mac OS X 10.7 or later</td> <td>amd64</td> <td>use the clang or gcc<sup>&#8224;</sup> that comes with Xcode<sup>&#8225;</sup></td></tr>
+<tr><td>Mac OS X 10.7 or later</td> <td>amd64</td> <td>use the clang or gcc<sup>&#8224;</sup> that comes with Xcode<sup>&#8225;</sup> for <code>cgo</code> support</td></tr>
 <tr><td>Windows XP or later</td> <td>amd64, 386</td> <td>use MinGW gcc<sup>&#8224;</sup>. No need for cygwin or msys.</td></tr>
 </table>

--- a/favicon.ico
+++ b/favicon.ico
--- a/src/cmd/asm/internal/asm/operand_test.go
+++ b/src/cmd/asm/internal/asm/operand_test.go
@@ -17,7 +17,6 @@ import (

 func setArch(goarch string) (*arch.Arch, *obj.Link) {
 	os.Setenv("GOOS", "linux") // obj can handle this OS for all architectures.
-	os.Setenv("GOARCH", goarch)
 	architecture := arch.Set(goarch)
 	if architecture == nil {
 		panic("asm: unrecognized architecture " + goarch)
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -239,87 +239,89 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		}
 		opregreg(v.Op.Asm(), r, gc.SSARegNum(v.Args[1]))

-	case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
-		// Arg[0] (the dividend) is in AX.
-		// Arg[1] (the divisor) can be in any other register.
-		// Result[0] (the quotient) is in AX.
-		// Result[1] (the remainder) is in DX.
-		r := gc.SSARegNum(v.Args[1])
+	case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW,
+		ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU,
+		ssa.OpAMD64MODQ, ssa.OpAMD64MODL, ssa.OpAMD64MODW,
+		ssa.OpAMD64MODQU, ssa.OpAMD64MODLU, ssa.OpAMD64MODWU:

-		// Zero extend dividend.
-		c := gc.Prog(x86.AXORL)
-		c.From.Type = obj.TYPE_REG
-		c.From.Reg = x86.REG_DX
-		c.To.Type = obj.TYPE_REG
-		c.To.Reg = x86.REG_DX
+		// Arg[0] is already in AX as it's the only register we allow
+		// and AX is the only output
+		x := gc.SSARegNum(v.Args[1])

-		// Issue divide.
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = r
+		// CPU faults upon signed overflow, which occurs when most
+		// negative int is divided by -1.
+		var j *obj.Prog
+		if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL ||
+			v.Op == ssa.OpAMD64DIVW || v.Op == ssa.OpAMD64MODQ ||
+			v.Op == ssa.OpAMD64MODL || v.Op == ssa.OpAMD64MODW {

-	case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
-		// Arg[0] (the dividend) is in AX.
-		// Arg[1] (the divisor) can be in any other register.
-		// Result[0] (the quotient) is in AX.
-		// Result[1] (the remainder) is in DX.
-		r := gc.SSARegNum(v.Args[1])
+			var c *obj.Prog
+			switch v.Op {
+			case ssa.OpAMD64DIVQ, ssa.OpAMD64MODQ:
+				c = gc.Prog(x86.ACMPQ)
+				j = gc.Prog(x86.AJEQ)
+				// go ahead and sign extend to save doing it later
+				gc.Prog(x86.ACQO)

-		// CPU faults upon signed overflow, which occurs when the most
-		// negative int is divided by -1. Handle divide by -1 as a special case.
-		var c *obj.Prog
-		switch v.Op {
-		case ssa.OpAMD64DIVQ:
-			c = gc.Prog(x86.ACMPQ)
-		case ssa.OpAMD64DIVL:
-			c = gc.Prog(x86.ACMPL)
-		case ssa.OpAMD64DIVW:
-			c = gc.Prog(x86.ACMPW)
-		}
-		c.From.Type = obj.TYPE_REG
-		c.From.Reg = r
-		c.To.Type = obj.TYPE_CONST
-		c.To.Offset = -1
-		j1 := gc.Prog(x86.AJEQ)
-		j1.To.Type = obj.TYPE_BRANCH
+			case ssa.OpAMD64DIVL, ssa.OpAMD64MODL:
+				c = gc.Prog(x86.ACMPL)
+				j = gc.Prog(x86.AJEQ)
+				gc.Prog(x86.ACDQ)
+
+			case ssa.OpAMD64DIVW, ssa.OpAMD64MODW:
+				c = gc.Prog(x86.ACMPW)
+				j = gc.Prog(x86.AJEQ)
+				gc.Prog(x86.ACWD)
+			}
+			c.From.Type = obj.TYPE_REG
+			c.From.Reg = x
+			c.To.Type = obj.TYPE_CONST
+			c.To.Offset = -1
+
+			j.To.Type = obj.TYPE_BRANCH

-		// Sign extend dividend.
-		switch v.Op {
-		case ssa.OpAMD64DIVQ:
-			gc.Prog(x86.ACQO)
-		case ssa.OpAMD64DIVL:
-			gc.Prog(x86.ACDQ)
-		case ssa.OpAMD64DIVW:
-			gc.Prog(x86.ACWD)
 		}

-		// Issue divide.
+		// for unsigned ints, we sign extend by setting DX = 0
+		// signed ints were sign extended above
+		if v.Op == ssa.OpAMD64DIVQU || v.Op == ssa.OpAMD64MODQU ||
+			v.Op == ssa.OpAMD64DIVLU || v.Op == ssa.OpAMD64MODLU ||
+			v.Op == ssa.OpAMD64DIVWU || v.Op == ssa.OpAMD64MODWU {
+			c := gc.Prog(x86.AXORQ)
+			c.From.Type = obj.TYPE_REG
+			c.From.Reg = x86.REG_DX
+			c.To.Type = obj.TYPE_REG
+			c.To.Reg = x86.REG_DX
+		}
+
 		p := gc.Prog(v.Op.Asm())
 		p.From.Type = obj.TYPE_REG
-		p.From.Reg = r
+		p.From.Reg = x

-		// Skip over -1 fixup code.
-		j2 := gc.Prog(obj.AJMP)
-		j2.To.Type = obj.TYPE_BRANCH
+		// signed division, rest of the check for -1 case
+		if j != nil {
+			j2 := gc.Prog(obj.AJMP)
+			j2.To.Type = obj.TYPE_BRANCH

-		// Issue -1 fixup code.
-		// n / -1 = -n
-		n1 := gc.Prog(x86.ANEGQ)
-		n1.To.Type = obj.TYPE_REG
-		n1.To.Reg = x86.REG_AX
+			var n *obj.Prog
+			if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL ||
+				v.Op == ssa.OpAMD64DIVW {
+				// n * -1 = -n
+				n = gc.Prog(x86.ANEGQ)
+				n.To.Type = obj.TYPE_REG
+				n.To.Reg = x86.REG_AX
+			} else {
+				// n % -1 == 0
+				n = gc.Prog(x86.AXORQ)
+				n.From.Type = obj.TYPE_REG
+				n.From.Reg = x86.REG_DX
+				n.To.Type = obj.TYPE_REG
+				n.To.Reg = x86.REG_DX
+			}

-		// n % -1 == 0
-		n2 := gc.Prog(x86.AXORL)
-		n2.From.Type = obj.TYPE_REG
-		n2.From.Reg = x86.REG_DX
-		n2.To.Type = obj.TYPE_REG
-		n2.To.Reg = x86.REG_DX
-
-		// TODO(khr): issue only the -1 fixup code we need.
-		// For instance, if only the quotient is used, no point in zeroing the remainder.
-
-		j1.To.Val = n1
-		j2.To.Val = s.Pc()
+			j.To.Val = n
+			j2.To.Val = s.Pc()
+		}

 	case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULW, ssa.OpAMD64HMULB,
 		ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU, ssa.OpAMD64HMULWU, ssa.OpAMD64HMULBU:
@@ -498,8 +500,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		gc.AddAux(&p.From, v)
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL:
-		p := gc.Prog(v.Op.Asm())
+	case ssa.OpAMD64LEAQ:
+		p := gc.Prog(x86.ALEAQ)
 		p.From.Type = obj.TYPE_MEM
 		p.From.Reg = gc.SSARegNum(v.Args[0])
 		gc.AddAux(&p.From, v)
@@ -703,7 +705,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))
 		p.To.Offset = v.AuxInt

-	case ssa.OpCopy, ssa.OpAMD64MOVQconvert, ssa.OpAMD64MOVLconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
+	case ssa.OpCopy, ssa.OpAMD64MOVQconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
 		if v.Type.IsMemory() {
 			return
 		}
@@ -752,14 +754,27 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 			p.To.Name = obj.NAME_AUTO
 		}
 	case ssa.OpPhi:
-		gc.CheckLoweredPhi(v)
+		// just check to make sure regalloc and stackalloc did it right
+		if v.Type.IsMemory() {
+			return
+		}
+		f := v.Block.Func
+		loc := f.RegAlloc[v.ID]
+		for _, a := range v.Args {
+			if aloc := f.RegAlloc[a.ID]; aloc != loc { // TODO: .Equal() instead?
+				v.Fatalf("phi arg at different location than phi: %v @ %v, but arg %v @ %v\n%s\n", v, loc, a, aloc, v.Block.Func)
+			}
+		}
 	case ssa.OpInitMem:
 		// memory arg needs no code
 	case ssa.OpArg:
 		// input args need no code
 	case ssa.OpAMD64LoweredGetClosurePtr:
-		// Closure pointer is DX.
-		gc.CheckLoweredGetClosurePtr(v)
+		// Output is hardwired to DX only,
+		// and DX contains the closure pointer on
+		// closure entry, and this "instruction"
+		// is scheduled to the very beginning
+		// of the entry block.
 	case ssa.OpAMD64LoweredGetG:
 		r := gc.SSARegNum(v)
 		// See the comments in cmd/internal/obj/x86/obj6.go
@@ -856,8 +871,6 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.To.Reg = gc.SSARegNum(v)
 	case ssa.OpSP, ssa.OpSB:
 		// nothing to do
-	case ssa.OpSelect0, ssa.OpSelect1:
-		// nothing to do
 	case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
 		ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
 		ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
--- a/src/cmd/compile/internal/arm/prog.go
+++ b/src/cmd/compile/internal/arm/prog.go
@@ -79,8 +79,6 @@ var progtable = [arm.ALAST & obj.AMask]obj.ProgInfo{
 	arm.AMULF & obj.AMask:  {Flags: gc.SizeF | gc.LeftRead | RightRdwr},
 	arm.ASUBD & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | RightRdwr},
 	arm.ASUBF & obj.AMask:  {Flags: gc.SizeF | gc.LeftRead | RightRdwr},
-	arm.ANEGD & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | RightRdwr},
-	arm.ANEGF & obj.AMask:  {Flags: gc.SizeF | gc.LeftRead | RightRdwr},
 	arm.ASQRTD & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | RightRdwr},

 	// Conversions.
--- a/src/cmd/compile/internal/arm/ssa.go
+++ b/src/cmd/compile/internal/arm/ssa.go
--- a/src/cmd/compile/internal/arm64/galign.go
+++ b/src/cmd/compile/internal/arm64/galign.go
@@ -6,7 +6,6 @@ package arm64

 import (
 	"cmd/compile/internal/gc"
-	"cmd/compile/internal/ssa"
 	"cmd/internal/obj/arm64"
 )

@@ -62,11 +61,6 @@ func Main() {
 	gc.Thearch.Doregbits = doregbits
 	gc.Thearch.Regnames = regnames

-	gc.Thearch.SSARegToReg = ssaRegToReg
-	gc.Thearch.SSAMarkMoves = func(s *gc.SSAGenState, b *ssa.Block) {}
-	gc.Thearch.SSAGenValue = ssaGenValue
-	gc.Thearch.SSAGenBlock = ssaGenBlock
-
 	gc.Main()
 	gc.Exit(0)
 }
--- a/src/cmd/compile/internal/arm64/prog.go
+++ b/src/cmd/compile/internal/arm64/prog.go
@@ -44,37 +44,24 @@ var progtable = [arm64.ALAST & obj.AMask]obj.ProgInfo{
 	// Integer
 	arm64.AADD & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.ASUB & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.ANEG & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite}, // why RegRead? revisit once the old backend gone
+	arm64.ANEG & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.AAND & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.AORR & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.AEOR & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.ABIC & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.AMVN & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite},
 	arm64.AMUL & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.AMULW & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.ASMULL & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.AUMULL & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.ASMULH & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.AUMULH & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
+	arm64.ASMULH & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
+	arm64.AUMULH & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.ASDIV & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.AUDIV & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.ASDIVW & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.AUDIVW & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.AREM & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.AUREM & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.AREMW & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.AUREMW & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.ALSL & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.ALSR & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.AASR & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.ACMP & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead},
-	arm64.ACMPW & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead},
 	arm64.AADC & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite | gc.UseCarry},
 	arm64.AROR & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	arm64.ARORW & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.AADDS & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite | gc.SetCarry},
-	arm64.ACSET & obj.AMask:  {Flags: gc.SizeQ | gc.RightWrite},
-	arm64.ACSEL & obj.AMask:  {Flags: gc.SizeQ | gc.RegRead | gc.RightWrite},

 	// Floating point.
 	arm64.AFADDD & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite},
--- a/src/cmd/compile/internal/arm64/ssa.go
+++ b/src/cmd/compile/internal/arm64/ssa.go
@@ -1,865 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package arm64
-
-import (
-	"math"
-
-	"cmd/compile/internal/gc"
-	"cmd/compile/internal/ssa"
-	"cmd/internal/obj"
-	"cmd/internal/obj/arm64"
-)
-
-var ssaRegToReg = []int16{
-	arm64.REG_R0,
-	arm64.REG_R1,
-	arm64.REG_R2,
-	arm64.REG_R3,
-	arm64.REG_R4,
-	arm64.REG_R5,
-	arm64.REG_R6,
-	arm64.REG_R7,
-	arm64.REG_R8,
-	arm64.REG_R9,
-	arm64.REG_R10,
-	arm64.REG_R11,
-	arm64.REG_R12,
-	arm64.REG_R13,
-	arm64.REG_R14,
-	arm64.REG_R15,
-	arm64.REG_R16,
-	arm64.REG_R17,
-	arm64.REG_R18, // platform register, not used
-	arm64.REG_R19,
-	arm64.REG_R20,
-	arm64.REG_R21,
-	arm64.REG_R22,
-	arm64.REG_R23,
-	arm64.REG_R24,
-	arm64.REG_R25,
-	arm64.REG_R26,
-	// R27 = REGTMP not used in regalloc
-	arm64.REGG,    // R28
-	arm64.REG_R29, // frame pointer, not used
-	// R30 = REGLINK not used in regalloc
-	arm64.REGSP, // R31
-
-	arm64.REG_F0,
-	arm64.REG_F1,
-	arm64.REG_F2,
-	arm64.REG_F3,
-	arm64.REG_F4,
-	arm64.REG_F5,
-	arm64.REG_F6,
-	arm64.REG_F7,
-	arm64.REG_F8,
-	arm64.REG_F9,
-	arm64.REG_F10,
-	arm64.REG_F11,
-	arm64.REG_F12,
-	arm64.REG_F13,
-	arm64.REG_F14,
-	arm64.REG_F15,
-	arm64.REG_F16,
-	arm64.REG_F17,
-	arm64.REG_F18,
-	arm64.REG_F19,
-	arm64.REG_F20,
-	arm64.REG_F21,
-	arm64.REG_F22,
-	arm64.REG_F23,
-	arm64.REG_F24,
-	arm64.REG_F25,
-	arm64.REG_F26,
-	arm64.REG_F27,
-	arm64.REG_F28,
-	arm64.REG_F29,
-	arm64.REG_F30,
-	arm64.REG_F31,
-
-	arm64.REG_NZCV, // flag
-	0,              // SB isn't a real register.  We fill an Addr.Reg field with 0 in this case.
-}
-
-// Smallest possible faulting page at address zero,
-// see ../../../../runtime/mheap.go:/minPhysPageSize
-const minZeroPage = 4096
-
-// loadByType returns the load instruction of the given type.
-func loadByType(t ssa.Type) obj.As {
-	if t.IsFloat() {
-		switch t.Size() {
-		case 4:
-			return arm64.AFMOVS
-		case 8:
-			return arm64.AFMOVD
-		}
-	} else {
-		switch t.Size() {
-		case 1:
-			if t.IsSigned() {
-				return arm64.AMOVB
-			} else {
-				return arm64.AMOVBU
-			}
-		case 2:
-			if t.IsSigned() {
-				return arm64.AMOVH
-			} else {
-				return arm64.AMOVHU
-			}
-		case 4:
-			if t.IsSigned() {
-				return arm64.AMOVW
-			} else {
-				return arm64.AMOVWU
-			}
-		case 8:
-			return arm64.AMOVD
-		}
-	}
-	panic("bad load type")
-}
-
-// storeByType returns the store instruction of the given type.
-func storeByType(t ssa.Type) obj.As {
-	if t.IsFloat() {
-		switch t.Size() {
-		case 4:
-			return arm64.AFMOVS
-		case 8:
-			return arm64.AFMOVD
-		}
-	} else {
-		switch t.Size() {
-		case 1:
-			return arm64.AMOVB
-		case 2:
-			return arm64.AMOVH
-		case 4:
-			return arm64.AMOVW
-		case 8:
-			return arm64.AMOVD
-		}
-	}
-	panic("bad store type")
-}
-
-// makeshift encodes a register shifted by a constant, used as an Offset in Prog
-func makeshift(reg int16, typ int64, s int64) int64 {
-	return int64(reg&31)<<16 | typ | (s&63)<<10
-}
-
-// genshift generates a Prog for r = r0 op (r1 shifted by s)
-func genshift(as obj.As, r0, r1, r int16, typ int64, s int64) *obj.Prog {
-	p := gc.Prog(as)
-	p.From.Type = obj.TYPE_SHIFT
-	p.From.Offset = makeshift(r1, typ, s)
-	p.Reg = r0
-	if r != 0 {
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = r
-	}
-	return p
-}
-
-func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
-	s.SetLineno(v.Line)
-	switch v.Op {
-	case ssa.OpInitMem:
-		// memory arg needs no code
-	case ssa.OpArg:
-		// input args need no code
-	case ssa.OpSP, ssa.OpSB, ssa.OpGetG:
-		// nothing to do
-	case ssa.OpCopy, ssa.OpARM64MOVDconvert, ssa.OpARM64MOVDreg:
-		if v.Type.IsMemory() {
-			return
-		}
-		x := gc.SSARegNum(v.Args[0])
-		y := gc.SSARegNum(v)
-		if x == y {
-			return
-		}
-		as := arm64.AMOVD
-		if v.Type.IsFloat() {
-			switch v.Type.Size() {
-			case 4:
-				as = arm64.AFMOVS
-			case 8:
-				as = arm64.AFMOVD
-			default:
-				panic("bad float size")
-			}
-		}
-		p := gc.Prog(as)
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = x
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = y
-	case ssa.OpARM64MOVDnop:
-		if gc.SSARegNum(v) != gc.SSARegNum(v.Args[0]) {
-			v.Fatalf("input[0] and output not in same register %s", v.LongString())
-		}
-		// nothing to do
-	case ssa.OpLoadReg:
-		if v.Type.IsFlags() {
-			v.Unimplementedf("load flags not implemented: %v", v.LongString())
-			return
-		}
-		p := gc.Prog(loadByType(v.Type))
-		n, off := gc.AutoVar(v.Args[0])
-		p.From.Type = obj.TYPE_MEM
-		p.From.Node = n
-		p.From.Sym = gc.Linksym(n.Sym)
-		p.From.Offset = off
-		if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT {
-			p.From.Name = obj.NAME_PARAM
-			p.From.Offset += n.Xoffset
-		} else {
-			p.From.Name = obj.NAME_AUTO
-		}
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpPhi:
-		gc.CheckLoweredPhi(v)
-	case ssa.OpStoreReg:
-		if v.Type.IsFlags() {
-			v.Unimplementedf("store flags not implemented: %v", v.LongString())
-			return
-		}
-		p := gc.Prog(storeByType(v.Type))
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = gc.SSARegNum(v.Args[0])
-		n, off := gc.AutoVar(v)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Node = n
-		p.To.Sym = gc.Linksym(n.Sym)
-		p.To.Offset = off
-		if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT {
-			p.To.Name = obj.NAME_PARAM
-			p.To.Offset += n.Xoffset
-		} else {
-			p.To.Name = obj.NAME_AUTO
-		}
-	case ssa.OpARM64ADD,
-		ssa.OpARM64SUB,
-		ssa.OpARM64AND,
-		ssa.OpARM64OR,
-		ssa.OpARM64XOR,
-		ssa.OpARM64BIC,
-		ssa.OpARM64MUL,
-		ssa.OpARM64MULW,
-		ssa.OpARM64MULH,
-		ssa.OpARM64UMULH,
-		ssa.OpARM64MULL,
-		ssa.OpARM64UMULL,
-		ssa.OpARM64DIV,
-		ssa.OpARM64UDIV,
-		ssa.OpARM64DIVW,
-		ssa.OpARM64UDIVW,
-		ssa.OpARM64MOD,
-		ssa.OpARM64UMOD,
-		ssa.OpARM64MODW,
-		ssa.OpARM64UMODW,
-		ssa.OpARM64SLL,
-		ssa.OpARM64SRL,
-		ssa.OpARM64SRA,
-		ssa.OpARM64FADDS,
-		ssa.OpARM64FADDD,
-		ssa.OpARM64FSUBS,
-		ssa.OpARM64FSUBD,
-		ssa.OpARM64FMULS,
-		ssa.OpARM64FMULD,
-		ssa.OpARM64FDIVS,
-		ssa.OpARM64FDIVD:
-		r := gc.SSARegNum(v)
-		r1 := gc.SSARegNum(v.Args[0])
-		r2 := gc.SSARegNum(v.Args[1])
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = r2
-		p.Reg = r1
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = r
-	case ssa.OpARM64ADDconst,
-		ssa.OpARM64SUBconst,
-		ssa.OpARM64ANDconst,
-		ssa.OpARM64ORconst,
-		ssa.OpARM64XORconst,
-		ssa.OpARM64BICconst,
-		ssa.OpARM64SLLconst,
-		ssa.OpARM64SRLconst,
-		ssa.OpARM64SRAconst,
-		ssa.OpARM64RORconst,
-		ssa.OpARM64RORWconst:
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = v.AuxInt
-		p.Reg = gc.SSARegNum(v.Args[0])
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpARM64ADDshiftLL,
-		ssa.OpARM64SUBshiftLL,
-		ssa.OpARM64ANDshiftLL,
-		ssa.OpARM64ORshiftLL,
-		ssa.OpARM64XORshiftLL,
-		ssa.OpARM64BICshiftLL:
-		genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_LL, v.AuxInt)
-	case ssa.OpARM64ADDshiftRL,
-		ssa.OpARM64SUBshiftRL,
-		ssa.OpARM64ANDshiftRL,
-		ssa.OpARM64ORshiftRL,
-		ssa.OpARM64XORshiftRL,
-		ssa.OpARM64BICshiftRL:
-		genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_LR, v.AuxInt)
-	case ssa.OpARM64ADDshiftRA,
-		ssa.OpARM64SUBshiftRA,
-		ssa.OpARM64ANDshiftRA,
-		ssa.OpARM64ORshiftRA,
-		ssa.OpARM64XORshiftRA,
-		ssa.OpARM64BICshiftRA:
-		genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_AR, v.AuxInt)
-	case ssa.OpARM64MOVDconst:
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = v.AuxInt
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpARM64FMOVSconst,
-		ssa.OpARM64FMOVDconst:
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_FCONST
-		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpARM64CMP,
-		ssa.OpARM64CMPW,
-		ssa.OpARM64CMN,
-		ssa.OpARM64CMNW,
-		ssa.OpARM64FCMPS,
-		ssa.OpARM64FCMPD:
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = gc.SSARegNum(v.Args[1])
-		p.Reg = gc.SSARegNum(v.Args[0])
-	case ssa.OpARM64CMPconst,
-		ssa.OpARM64CMPWconst,
-		ssa.OpARM64CMNconst,
-		ssa.OpARM64CMNWconst:
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = v.AuxInt
-		p.Reg = gc.SSARegNum(v.Args[0])
-	case ssa.OpARM64CMPshiftLL:
-		genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_LL, v.AuxInt)
-	case ssa.OpARM64CMPshiftRL:
-		genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_LR, v.AuxInt)
-	case ssa.OpARM64CMPshiftRA:
-		genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_AR, v.AuxInt)
-	case ssa.OpARM64MOVDaddr:
-		p := gc.Prog(arm64.AMOVD)
-		p.From.Type = obj.TYPE_ADDR
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-
-		var wantreg string
-		// MOVD $sym+off(base), R
-		// the assembler expands it as the following:
-		// - base is SP: add constant offset to SP (R13)
-		//               when constant is large, tmp register (R11) may be used
-		// - base is SB: load external address from constant pool (use relocation)
-		switch v.Aux.(type) {
-		default:
-			v.Fatalf("aux is of unknown type %T", v.Aux)
-		case *ssa.ExternSymbol:
-			wantreg = "SB"
-			gc.AddAux(&p.From, v)
-		case *ssa.ArgSymbol, *ssa.AutoSymbol:
-			wantreg = "SP"
-			gc.AddAux(&p.From, v)
-		case nil:
-			// No sym, just MOVD $off(SP), R
-			wantreg = "SP"
-			p.From.Reg = arm64.REGSP
-			p.From.Offset = v.AuxInt
-		}
-		if reg := gc.SSAReg(v.Args[0]); reg.Name() != wantreg {
-			v.Fatalf("bad reg %s for symbol type %T, want %s", reg.Name(), v.Aux, wantreg)
-		}
-	case ssa.OpARM64MOVBload,
-		ssa.OpARM64MOVBUload,
-		ssa.OpARM64MOVHload,
-		ssa.OpARM64MOVHUload,
-		ssa.OpARM64MOVWload,
-		ssa.OpARM64MOVWUload,
-		ssa.OpARM64MOVDload,
-		ssa.OpARM64FMOVSload,
-		ssa.OpARM64FMOVDload:
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_MEM
-		p.From.Reg = gc.SSARegNum(v.Args[0])
-		gc.AddAux(&p.From, v)
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpARM64MOVBstore,
-		ssa.OpARM64MOVHstore,
-		ssa.OpARM64MOVWstore,
-		ssa.OpARM64MOVDstore,
-		ssa.OpARM64FMOVSstore,
-		ssa.OpARM64FMOVDstore:
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = gc.SSARegNum(v.Args[1])
-		p.To.Type = obj.TYPE_MEM
-		p.To.Reg = gc.SSARegNum(v.Args[0])
-		gc.AddAux(&p.To, v)
-	case ssa.OpARM64MOVBstorezero,
-		ssa.OpARM64MOVHstorezero,
-		ssa.OpARM64MOVWstorezero,
-		ssa.OpARM64MOVDstorezero:
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = arm64.REGZERO
-		p.To.Type = obj.TYPE_MEM
-		p.To.Reg = gc.SSARegNum(v.Args[0])
-		gc.AddAux(&p.To, v)
-	case ssa.OpARM64MOVBreg,
-		ssa.OpARM64MOVBUreg,
-		ssa.OpARM64MOVHreg,
-		ssa.OpARM64MOVHUreg,
-		ssa.OpARM64MOVWreg,
-		ssa.OpARM64MOVWUreg:
-		a := v.Args[0]
-		for a.Op == ssa.OpCopy || a.Op == ssa.OpARM64MOVDreg {
-			a = a.Args[0]
-		}
-		if a.Op == ssa.OpLoadReg {
-			t := a.Type
-			switch {
-			case v.Op == ssa.OpARM64MOVBreg && t.Size() == 1 && t.IsSigned(),
-				v.Op == ssa.OpARM64MOVBUreg && t.Size() == 1 && !t.IsSigned(),
-				v.Op == ssa.OpARM64MOVHreg && t.Size() == 2 && t.IsSigned(),
-				v.Op == ssa.OpARM64MOVHUreg && t.Size() == 2 && !t.IsSigned(),
-				v.Op == ssa.OpARM64MOVWreg && t.Size() == 4 && t.IsSigned(),
-				v.Op == ssa.OpARM64MOVWUreg && t.Size() == 4 && !t.IsSigned():
-				// arg is a proper-typed load, already zero/sign-extended, don't extend again
-				if gc.SSARegNum(v) == gc.SSARegNum(v.Args[0]) {
-					return
-				}
-				p := gc.Prog(arm64.AMOVD)
-				p.From.Type = obj.TYPE_REG
-				p.From.Reg = gc.SSARegNum(v.Args[0])
-				p.To.Type = obj.TYPE_REG
-				p.To.Reg = gc.SSARegNum(v)
-				return
-			default:
-			}
-		}
-		fallthrough
-	case ssa.OpARM64MVN,
-		ssa.OpARM64NEG,
-		ssa.OpARM64FNEGS,
-		ssa.OpARM64FNEGD,
-		ssa.OpARM64FSQRTD,
-		ssa.OpARM64FCVTZSSW,
-		ssa.OpARM64FCVTZSDW,
-		ssa.OpARM64FCVTZUSW,
-		ssa.OpARM64FCVTZUDW,
-		ssa.OpARM64FCVTZSS,
-		ssa.OpARM64FCVTZSD,
-		ssa.OpARM64FCVTZUS,
-		ssa.OpARM64FCVTZUD,
-		ssa.OpARM64SCVTFWS,
-		ssa.OpARM64SCVTFWD,
-		ssa.OpARM64SCVTFS,
-		ssa.OpARM64SCVTFD,
-		ssa.OpARM64UCVTFWS,
-		ssa.OpARM64UCVTFWD,
-		ssa.OpARM64UCVTFS,
-		ssa.OpARM64UCVTFD,
-		ssa.OpARM64FCVTSD,
-		ssa.OpARM64FCVTDS:
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = gc.SSARegNum(v.Args[0])
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpARM64CSELULT,
-		ssa.OpARM64CSELULT0:
-		r1 := int16(arm64.REGZERO)
-		if v.Op == ssa.OpARM64CSELULT {
-			r1 = gc.SSARegNum(v.Args[1])
-		}
-		p := gc.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
-		p.From.Reg = arm64.COND_LO
-		p.Reg = gc.SSARegNum(v.Args[0])
-		p.From3 = &obj.Addr{Type: obj.TYPE_REG, Reg: r1}
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpARM64DUFFZERO:
-		// runtime.duffzero expects start address - 8 in R16
-		p := gc.Prog(arm64.ASUB)
-		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = 8
-		p.Reg = gc.SSARegNum(v.Args[0])
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = arm64.REG_R16
-		p = gc.Prog(obj.ADUFFZERO)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
-		p.To.Offset = v.AuxInt
-	case ssa.OpARM64LoweredZero:
-		// MOVD.P	ZR, 8(R16)
-		// CMP	Rarg1, R16
-		// BLE	-2(PC)
-		// arg1 is the address of the last element to zero
-		// auxint is alignment
-		var sz int64
-		var mov obj.As
-		switch {
-		case v.AuxInt%8 == 0:
-			sz = 8
-			mov = arm64.AMOVD
-		case v.AuxInt%4 == 0:
-			sz = 4
-			mov = arm64.AMOVW
-		case v.AuxInt%2 == 0:
-			sz = 2
-			mov = arm64.AMOVH
-		default:
-			sz = 1
-			mov = arm64.AMOVB
-		}
-		p := gc.Prog(mov)
-		p.Scond = arm64.C_XPOST
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = arm64.REGZERO
-		p.To.Type = obj.TYPE_MEM
-		p.To.Reg = arm64.REG_R16
-		p.To.Offset = sz
-		p2 := gc.Prog(arm64.ACMP)
-		p2.From.Type = obj.TYPE_REG
-		p2.From.Reg = gc.SSARegNum(v.Args[1])
-		p2.Reg = arm64.REG_R16
-		p3 := gc.Prog(arm64.ABLE)
-		p3.To.Type = obj.TYPE_BRANCH
-		gc.Patch(p3, p)
-	case ssa.OpARM64LoweredMove:
-		// MOVD.P	8(R16), Rtmp
-		// MOVD.P	Rtmp, 8(R17)
-		// CMP	Rarg2, R16
-		// BLE	-3(PC)
-		// arg2 is the address of the last element of src
-		// auxint is alignment
-		var sz int64
-		var mov obj.As
-		switch {
-		case v.AuxInt%8 == 0:
-			sz = 8
-			mov = arm64.AMOVD
-		case v.AuxInt%4 == 0:
-			sz = 4
-			mov = arm64.AMOVW
-		case v.AuxInt%2 == 0:
-			sz = 2
-			mov = arm64.AMOVH
-		default:
-			sz = 1
-			mov = arm64.AMOVB
-		}
-		p := gc.Prog(mov)
-		p.Scond = arm64.C_XPOST
-		p.From.Type = obj.TYPE_MEM
-		p.From.Reg = arm64.REG_R16
-		p.From.Offset = sz
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = arm64.REGTMP
-		p2 := gc.Prog(mov)
-		p2.Scond = arm64.C_XPOST
-		p2.From.Type = obj.TYPE_REG
-		p2.From.Reg = arm64.REGTMP
-		p2.To.Type = obj.TYPE_MEM
-		p2.To.Reg = arm64.REG_R17
-		p2.To.Offset = sz
-		p3 := gc.Prog(arm64.ACMP)
-		p3.From.Type = obj.TYPE_REG
-		p3.From.Reg = gc.SSARegNum(v.Args[2])
-		p3.Reg = arm64.REG_R16
-		p4 := gc.Prog(arm64.ABLE)
-		p4.To.Type = obj.TYPE_BRANCH
-		gc.Patch(p4, p)
-	case ssa.OpARM64CALLstatic:
-		if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym {
-			// Deferred calls will appear to be returning to
-			// the CALL deferreturn(SB) that we are about to emit.
-			// However, the stack trace code will show the line
-			// of the instruction byte before the return PC.
-			// To avoid that being an unrelated instruction,
-			// insert an actual hardware NOP that will have the right line number.
-			// This is different from obj.ANOP, which is a virtual no-op
-			// that doesn't make it into the instruction stream.
-			ginsnop()
-		}
-		p := gc.Prog(obj.ACALL)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym))
-		if gc.Maxarg < v.AuxInt {
-			gc.Maxarg = v.AuxInt
-		}
-	case ssa.OpARM64CALLclosure:
-		p := gc.Prog(obj.ACALL)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Offset = 0
-		p.To.Reg = gc.SSARegNum(v.Args[0])
-		if gc.Maxarg < v.AuxInt {
-			gc.Maxarg = v.AuxInt
-		}
-	case ssa.OpARM64CALLdefer:
-		p := gc.Prog(obj.ACALL)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = gc.Linksym(gc.Deferproc.Sym)
-		if gc.Maxarg < v.AuxInt {
-			gc.Maxarg = v.AuxInt
-		}
-	case ssa.OpARM64CALLgo:
-		p := gc.Prog(obj.ACALL)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = gc.Linksym(gc.Newproc.Sym)
-		if gc.Maxarg < v.AuxInt {
-			gc.Maxarg = v.AuxInt
-		}
-	case ssa.OpARM64CALLinter:
-		p := gc.Prog(obj.ACALL)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Offset = 0
-		p.To.Reg = gc.SSARegNum(v.Args[0])
-		if gc.Maxarg < v.AuxInt {
-			gc.Maxarg = v.AuxInt
-		}
-	case ssa.OpARM64LoweredNilCheck:
-		// Optimization - if the subsequent block has a load or store
-		// at the same address, we don't need to issue this instruction.
-		mem := v.Args[1]
-		for _, w := range v.Block.Succs[0].Block().Values {
-			if w.Op == ssa.OpPhi {
-				if w.Type.IsMemory() {
-					mem = w
-				}
-				continue
-			}
-			if len(w.Args) == 0 || !w.Args[len(w.Args)-1].Type.IsMemory() {
-				// w doesn't use a store - can't be a memory op.
-				continue
-			}
-			if w.Args[len(w.Args)-1] != mem {
-				v.Fatalf("wrong store after nilcheck v=%s w=%s", v, w)
-			}
-			switch w.Op {
-			case ssa.OpARM64MOVBload, ssa.OpARM64MOVBUload, ssa.OpARM64MOVHload, ssa.OpARM64MOVHUload,
-				ssa.OpARM64MOVWload, ssa.OpARM64MOVWUload, ssa.OpARM64MOVDload,
-				ssa.OpARM64FMOVSload, ssa.OpARM64FMOVDload,
-				ssa.OpARM64MOVBstore, ssa.OpARM64MOVHstore, ssa.OpARM64MOVWstore, ssa.OpARM64MOVDstore,
-				ssa.OpARM64FMOVSstore, ssa.OpARM64FMOVDstore:
-				// arg0 is ptr, auxint is offset
-				if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage {
-					if gc.Debug_checknil != 0 && int(v.Line) > 1 {
-						gc.Warnl(v.Line, "removed nil check")
-					}
-					return
-				}
-			case ssa.OpARM64DUFFZERO, ssa.OpARM64LoweredZero:
-				// arg0 is ptr
-				if w.Args[0] == v.Args[0] {
-					if gc.Debug_checknil != 0 && int(v.Line) > 1 {
-						gc.Warnl(v.Line, "removed nil check")
-					}
-					return
-				}
-			case ssa.OpARM64LoweredMove:
-				// arg0 is dst ptr, arg1 is src ptr
-				if w.Args[0] == v.Args[0] || w.Args[1] == v.Args[0] {
-					if gc.Debug_checknil != 0 && int(v.Line) > 1 {
-						gc.Warnl(v.Line, "removed nil check")
-					}
-					return
-				}
-			default:
-			}
-			if w.Type.IsMemory() {
-				if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive {
-					// these ops are OK
-					mem = w
-					continue
-				}
-				// We can't delay the nil check past the next store.
-				break
-			}
-		}
-		// Issue a load which will fault if arg is nil.
-		p := gc.Prog(arm64.AMOVB)
-		p.From.Type = obj.TYPE_MEM
-		p.From.Reg = gc.SSARegNum(v.Args[0])
-		gc.AddAux(&p.From, v)
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = arm64.REGTMP
-		if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers
-			gc.Warnl(v.Line, "generated nil check")
-		}
-	case ssa.OpVarDef:
-		gc.Gvardef(v.Aux.(*gc.Node))
-	case ssa.OpVarKill:
-		gc.Gvarkill(v.Aux.(*gc.Node))
-	case ssa.OpVarLive:
-		gc.Gvarlive(v.Aux.(*gc.Node))
-	case ssa.OpKeepAlive:
-		if !v.Args[0].Type.IsPtrShaped() {
-			v.Fatalf("keeping non-pointer alive %v", v.Args[0])
-		}
-		n, off := gc.AutoVar(v.Args[0])
-		if n == nil {
-			v.Fatalf("KeepLive with non-spilled value %s %s", v, v.Args[0])
-		}
-		if off != 0 {
-			v.Fatalf("KeepLive with non-zero offset spill location %s:%d", n, off)
-		}
-		gc.Gvarlive(n)
-	case ssa.OpARM64Equal,
-		ssa.OpARM64NotEqual,
-		ssa.OpARM64LessThan,
-		ssa.OpARM64LessEqual,
-		ssa.OpARM64GreaterThan,
-		ssa.OpARM64GreaterEqual,
-		ssa.OpARM64LessThanU,
-		ssa.OpARM64LessEqualU,
-		ssa.OpARM64GreaterThanU,
-		ssa.OpARM64GreaterEqualU:
-		// generate boolean values using CSET
-		p := gc.Prog(arm64.ACSET)
-		p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
-		p.From.Reg = condBits[v.Op]
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpSelect0, ssa.OpSelect1:
-		// nothing to do
-	case ssa.OpARM64LoweredGetClosurePtr:
-		// Closure pointer is R26 (arm64.REGCTXT).
-		gc.CheckLoweredGetClosurePtr(v)
-	case ssa.OpARM64FlagEQ,
-		ssa.OpARM64FlagLT_ULT,
-		ssa.OpARM64FlagLT_UGT,
-		ssa.OpARM64FlagGT_ULT,
-		ssa.OpARM64FlagGT_UGT:
-		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
-	case ssa.OpARM64InvertFlags:
-		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
-	default:
-		v.Unimplementedf("genValue not implemented: %s", v.LongString())
-	}
-}
-
-var condBits = map[ssa.Op]int16{
-	ssa.OpARM64Equal:         arm64.COND_EQ,
-	ssa.OpARM64NotEqual:      arm64.COND_NE,
-	ssa.OpARM64LessThan:      arm64.COND_LT,
-	ssa.OpARM64LessThanU:     arm64.COND_LO,
-	ssa.OpARM64LessEqual:     arm64.COND_LE,
-	ssa.OpARM64LessEqualU:    arm64.COND_LS,
-	ssa.OpARM64GreaterThan:   arm64.COND_GT,
-	ssa.OpARM64GreaterThanU:  arm64.COND_HI,
-	ssa.OpARM64GreaterEqual:  arm64.COND_GE,
-	ssa.OpARM64GreaterEqualU: arm64.COND_HS,
-}
-
-var blockJump = map[ssa.BlockKind]struct {
-	asm, invasm obj.As
-}{
-	ssa.BlockARM64EQ:  {arm64.ABEQ, arm64.ABNE},
-	ssa.BlockARM64NE:  {arm64.ABNE, arm64.ABEQ},
-	ssa.BlockARM64LT:  {arm64.ABLT, arm64.ABGE},
-	ssa.BlockARM64GE:  {arm64.ABGE, arm64.ABLT},
-	ssa.BlockARM64LE:  {arm64.ABLE, arm64.ABGT},
-	ssa.BlockARM64GT:  {arm64.ABGT, arm64.ABLE},
-	ssa.BlockARM64ULT: {arm64.ABLO, arm64.ABHS},
-	ssa.BlockARM64UGE: {arm64.ABHS, arm64.ABLO},
-	ssa.BlockARM64UGT: {arm64.ABHI, arm64.ABLS},
-	ssa.BlockARM64ULE: {arm64.ABLS, arm64.ABHI},
-}
-
-func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
-	s.SetLineno(b.Line)
-
-	switch b.Kind {
-	case ssa.BlockPlain, ssa.BlockCall, ssa.BlockCheck:
-		if b.Succs[0].Block() != next {
-			p := gc.Prog(obj.AJMP)
-			p.To.Type = obj.TYPE_BRANCH
-			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
-		}
-
-	case ssa.BlockDefer:
-		// defer returns in R0:
-		// 0 if we should continue executing
-		// 1 if we should jump to deferreturn call
-		p := gc.Prog(arm64.ACMP)
-		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = 0
-		p.Reg = arm64.REG_R0
-		p = gc.Prog(arm64.ABNE)
-		p.To.Type = obj.TYPE_BRANCH
-		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
-		if b.Succs[0].Block() != next {
-			p := gc.Prog(obj.AJMP)
-			p.To.Type = obj.TYPE_BRANCH
-			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
-		}
-
-	case ssa.BlockExit:
-		gc.Prog(obj.AUNDEF) // tell plive.go that we never reach here
-
-	case ssa.BlockRet:
-		gc.Prog(obj.ARET)
-
-	case ssa.BlockRetJmp:
-		p := gc.Prog(obj.ARET)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = gc.Linksym(b.Aux.(*gc.Sym))
-
-	case ssa.BlockARM64EQ, ssa.BlockARM64NE,
-		ssa.BlockARM64LT, ssa.BlockARM64GE,
-		ssa.BlockARM64LE, ssa.BlockARM64GT,
-		ssa.BlockARM64ULT, ssa.BlockARM64UGT,
-		ssa.BlockARM64ULE, ssa.BlockARM64UGE:
-		jmp := blockJump[b.Kind]
-		var p *obj.Prog
-		switch next {
-		case b.Succs[0].Block():
-			p = gc.Prog(jmp.invasm)
-			p.To.Type = obj.TYPE_BRANCH
-			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
-		case b.Succs[1].Block():
-			p = gc.Prog(jmp.asm)
-			p.To.Type = obj.TYPE_BRANCH
-			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
-		default:
-			p = gc.Prog(jmp.asm)
-			p.To.Type = obj.TYPE_BRANCH
-			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
-			q := gc.Prog(obj.AJMP)
-			q.To.Type = obj.TYPE_BRANCH
-			s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
-		}
-
-	default:
-		b.Unimplementedf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
-	}
-}
--- a/src/cmd/compile/internal/gc/builtin.go
+++ b/src/cmd/compile/internal/gc/builtin.go
@@ -95,15 +95,14 @@ const runtimeimport = "" +
 	"4div\x00\x03\n\x00\n\x00\x01\n\x00\t\x11uint64div\x00\x03\x14\x00\x14\x00\x01\x14\x00\t\x0fint64" +
 	"mod\x00\x03\n\x00\n\x00\x01\n\x00\t\x11uint64mod\x00\x03\x14\x00\x14\x00\x01\x14\x00\t\x1bfloat6" +
 	"4toint64\x00\x01\x1a\x00\x01\n\x00\t\x1dfloat64touint64\x00\x01\x1a\x00\x01\x14\x00\t" +
-	"\x1dfloat64touint32\x00\x01\x1a\x00\x01\x12\x00\t\x1bint64tofloat64\x00" +
-	"\x01\n\x00\x01\x1a\x00\t\x1duint64tofloat64\x00\x01\x14\x00\x01\x1a\x00\t\x1duint32to" +
-	"float64\x00\x01\x12\x00\x01\x1a\x00\t\x19complex128div\x00\x04\x1e\vnum·2\x00" +
-	"\x00\x1e\vden·3\x00\x00\x02\x1e\vquo·1\x00\x00\t\x19racefuncenter\x00\x01\x16" +
-	"d\x00\t\x17racefuncexit\x00\x00\x00\t\x0fraceread\x00\x01\x16d\x00\t\x11race" +
-	"write\x00\x01\x16d\x00\t\x19racereadrange\x00\x04\x16\raddr·1\x00d\x16\r" +
-	"size·2\x00d\x00\t\x1bracewriterange\x00\x04\x16\x98\x03\x00d\x16\x9a\x03\x00d\x00\t" +
-	"\x0fmsanread\x00\x04\x16\x98\x03\x00d\x16\x9a\x03\x00d\x00\t\x11msanwrite\x00\x04\x16\x98\x03\x00d" +
-	"\x16\x9a\x03\x00d\x00\v\xf8\x01\x02\v\x00\x01\x00\n$$\n"
+	"\x1bint64tofloat64\x00\x01\n\x00\x01\x1a\x00\t\x1duint64tofloat64\x00" +
+	"\x01\x14\x00\x01\x1a\x00\t\x19complex128div\x00\x04\x1e\vnum·2\x00\x00\x1e\vden·" +
+	"3\x00\x00\x02\x1e\vquo·1\x00\x00\t\x19racefuncenter\x00\x01\x16d\x00\t\x17race" +
+	"funcexit\x00\x00\x00\t\x0fraceread\x00\x01\x16d\x00\t\x11racewrite\x00\x01\x16" +
+	"d\x00\t\x19racereadrange\x00\x04\x16\raddr·1\x00d\x16\rsize·2\x00" +
+	"d\x00\t\x1bracewriterange\x00\x04\x16\x94\x03\x00d\x16\x96\x03\x00d\x00\t\x0fmsanrea" +
+	"d\x00\x04\x16\x94\x03\x00d\x16\x96\x03\x00d\x00\t\x11msanwrite\x00\x04\x16\x94\x03\x00d\x16\x96\x03\x00d\x00\v\xf4" +
+	"\x01\x02\v\x00\x01\x00\n$$\n"

 const unsafeimport = "" +
 	"cn\x00\x03v1\x01\vunsafe\x00\x05\r\rPointer\x00\x16\x00\t\x0fOffsetof\x00\x01" +
--- a/src/cmd/compile/internal/gc/builtin/runtime.go
+++ b/src/cmd/compile/internal/gc/builtin/runtime.go
@@ -150,10 +150,8 @@ func int64mod(int64, int64) int64
 func uint64mod(uint64, uint64) uint64
 func float64toint64(float64) int64
 func float64touint64(float64) uint64
-func float64touint32(float64) uint32
 func int64tofloat64(int64) float64
 func uint64tofloat64(uint64) float64
-func uint32tofloat64(uint32) float64

 func complex128div(num complex128, den complex128) (quo complex128)

--- a/src/cmd/compile/internal/gc/esc.go
+++ b/src/cmd/compile/internal/gc/esc.go
@@ -472,9 +472,29 @@ func escAnalyze(all []*Node, recursive bool) {

 	// visit the upstream of each dst, mark address nodes with
 	// addrescapes, mark parameters unsafe
+	escapes := make([]uint16, len(e.dsts))
+	for i, n := range e.dsts {
+		escapes[i] = n.Esc
+	}
 	for _, n := range e.dsts {
 		escflood(e, n)
 	}
+	for {
+		done := true
+		for i, n := range e.dsts {
+			if n.Esc != escapes[i] {
+				done = false
+				if Debug['m'] > 2 {
+					Warnl(n.Lineno, "Reflooding %v %S", e.curfnSym(n), n)
+				}
+				escapes[i] = n.Esc
+				escflood(e, n)
+			}
+		}
+		if done {
+			break
+		}
+	}

 	// for all top level functions, tag the typenodes corresponding to the param nodes
 	for _, n := range all {
@@ -1796,6 +1816,7 @@ func escwalkBody(e *EscState, level Level, dst *Node, src *Node, step *EscStep,
 	}

 	leaks = level.int() <= 0 && level.guaranteedDereference() <= 0 && dstE.Escloopdepth < modSrcLoopdepth
+	leaks = leaks || level.int() <= 0 && dst.Esc&EscMask == EscHeap

 	osrcesc = src.Esc
 	switch src.Op {
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -26,9 +26,6 @@ func initssa() *ssa.Config {
 	ssaExp.mustImplement = true
 	if ssaConfig == nil {
 		ssaConfig = ssa.NewConfig(Thearch.LinkArch.Name, &ssaExp, Ctxt, Debug['N'] == 0)
-		if Thearch.LinkArch.Name == "386" {
-			ssaConfig.Set387(Thearch.Use387)
-		}
 	}
 	return ssaConfig
 }
@@ -40,8 +37,8 @@ func shouldssa(fn *Node) bool {
 		if os.Getenv("SSATEST") == "" {
 			return false
 		}
-	case "amd64", "amd64p32", "arm", "386", "arm64":
 		// Generally available.
+	case "amd64":
 	}
 	if !ssaEnabled {
 		return false
@@ -1149,7 +1146,6 @@ var opToSSA = map[opAndType]ssa.Op{
 	opAndType{OEQ, TFUNC}:      ssa.OpEqPtr,
 	opAndType{OEQ, TMAP}:       ssa.OpEqPtr,
 	opAndType{OEQ, TCHAN}:      ssa.OpEqPtr,
-	opAndType{OEQ, TPTR32}:     ssa.OpEqPtr,
 	opAndType{OEQ, TPTR64}:     ssa.OpEqPtr,
 	opAndType{OEQ, TUINTPTR}:   ssa.OpEqPtr,
 	opAndType{OEQ, TUNSAFEPTR}: ssa.OpEqPtr,
@@ -1170,7 +1166,6 @@ var opToSSA = map[opAndType]ssa.Op{
 	opAndType{ONE, TFUNC}:      ssa.OpNeqPtr,
 	opAndType{ONE, TMAP}:       ssa.OpNeqPtr,
 	opAndType{ONE, TCHAN}:      ssa.OpNeqPtr,
-	opAndType{ONE, TPTR32}:     ssa.OpNeqPtr,
 	opAndType{ONE, TPTR64}:     ssa.OpNeqPtr,
 	opAndType{ONE, TUINTPTR}:   ssa.OpNeqPtr,
 	opAndType{ONE, TUNSAFEPTR}: ssa.OpNeqPtr,
@@ -1335,15 +1330,6 @@ var fpConvOpToSSA = map[twoTypes]twoOpsAndType{
 	twoTypes{TFLOAT32, TFLOAT64}: twoOpsAndType{ssa.OpCvt32Fto64F, ssa.OpCopy, TFLOAT64},
 }

-// this map is used only for 32-bit arch, and only includes the difference
-// on 32-bit arch, don't use int64<->float conversion for uint32
-var fpConvOpToSSA32 = map[twoTypes]twoOpsAndType{
-	twoTypes{TUINT32, TFLOAT32}: twoOpsAndType{ssa.OpCopy, ssa.OpCvt32Uto32F, TUINT32},
-	twoTypes{TUINT32, TFLOAT64}: twoOpsAndType{ssa.OpCopy, ssa.OpCvt32Uto64F, TUINT32},
-	twoTypes{TFLOAT32, TUINT32}: twoOpsAndType{ssa.OpCvt32Fto32U, ssa.OpCopy, TUINT32},
-	twoTypes{TFLOAT64, TUINT32}: twoOpsAndType{ssa.OpCvt64Fto32U, ssa.OpCopy, TUINT32},
-}
-
 var shiftOpToSSA = map[opAndTwoTypes]ssa.Op{
 	opAndTwoTypes{OLSH, TINT8, TUINT8}:   ssa.OpLsh8x8,
 	opAndTwoTypes{OLSH, TUINT8, TUINT8}:  ssa.OpLsh8x8,
@@ -1660,11 +1646,6 @@ func (s *state) expr(n *Node) *ssa.Value {

 		if ft.IsFloat() || tt.IsFloat() {
 			conv, ok := fpConvOpToSSA[twoTypes{s.concreteEtype(ft), s.concreteEtype(tt)}]
-			if s.config.IntSize == 4 && Thearch.LinkArch.Name != "amd64p32" {
-				if conv1, ok1 := fpConvOpToSSA32[twoTypes{s.concreteEtype(ft), s.concreteEtype(tt)}]; ok1 {
-					conv = conv1
-				}
-			}
 			if !ok {
 				s.Fatalf("weird float conversion %s -> %s", ft, tt)
 			}
@@ -1973,7 +1954,7 @@ func (s *state) expr(n *Node) *ssa.Value {
 		case n.Left.Type.IsString():
 			a := s.expr(n.Left)
 			i := s.expr(n.Right)
-			i = s.extendIndex(i, Panicindex)
+			i = s.extendIndex(i)
 			if !n.Bounded {
 				len := s.newValue1(ssa.OpStringLen, Types[TINT], a)
 				s.boundsCheck(i, len)
@@ -2062,13 +2043,13 @@ func (s *state) expr(n *Node) *ssa.Value {
 		var i, j, k *ssa.Value
 		low, high, max := n.SliceBounds()
 		if low != nil {
-			i = s.extendIndex(s.expr(low), panicslice)
+			i = s.extendIndex(s.expr(low))
 		}
 		if high != nil {
-			j = s.extendIndex(s.expr(high), panicslice)
+			j = s.extendIndex(s.expr(high))
 		}
 		if max != nil {
-			k = s.extendIndex(s.expr(max), panicslice)
+			k = s.extendIndex(s.expr(max))
 		}
 		p, l, c := s.slice(n.Left.Type, v, i, j, k)
 		return s.newValue3(ssa.OpSliceMake, n.Type, p, l, c)
@@ -2078,10 +2059,10 @@ func (s *state) expr(n *Node) *ssa.Value {
 		var i, j *ssa.Value
 		low, high, _ := n.SliceBounds()
 		if low != nil {
-			i = s.extendIndex(s.expr(low), panicslice)
+			i = s.extendIndex(s.expr(low))
 		}
 		if high != nil {
-			j = s.extendIndex(s.expr(high), panicslice)
+			j = s.extendIndex(s.expr(high))
 		}
 		p, l, _ := s.slice(n.Left.Type, v, i, j, nil)
 		return s.newValue2(ssa.OpStringMake, n.Type, p, l)
@@ -2265,7 +2246,7 @@ func (s *state) append(n *Node, inplace bool) *ssa.Value {
 			if haspointers(et) {
 				s.insertWBmove(et, addr, arg.v, n.Lineno, arg.isVolatile)
 			} else {
-				s.vars[&memVar] = s.newValue3I(ssa.OpMove, ssa.TypeMem, SizeAlignAuxInt(et), addr, arg.v, s.mem())
+				s.vars[&memVar] = s.newValue3I(ssa.OpMove, ssa.TypeMem, et.Size(), addr, arg.v, s.mem())
 			}
 		}
 	}
@@ -2398,14 +2379,14 @@ func (s *state) assign(left *Node, right *ssa.Value, wb, deref bool, line int32,
 	if deref {
 		// Treat as a mem->mem move.
 		if right == nil {
-			s.vars[&memVar] = s.newValue2I(ssa.OpZero, ssa.TypeMem, SizeAlignAuxInt(t), addr, s.mem())
+			s.vars[&memVar] = s.newValue2I(ssa.OpZero, ssa.TypeMem, t.Size(), addr, s.mem())
 			return
 		}
 		if wb {
 			s.insertWBmove(t, addr, right, line, rightIsVolatile)
 			return
 		}
-		s.vars[&memVar] = s.newValue3I(ssa.OpMove, ssa.TypeMem, SizeAlignAuxInt(t), addr, right, s.mem())
+		s.vars[&memVar] = s.newValue3I(ssa.OpMove, ssa.TypeMem, t.Size(), addr, right, s.mem())
 		return
 	}
 	// Treat as a store.
@@ -2604,7 +2585,7 @@ func (s *state) call(n *Node, k callKind) *ssa.Value {
 			s.nilCheck(itab)
 		}
 		itabidx := fn.Xoffset + 3*int64(Widthptr) + 8 // offset of fun field in runtime.itab
-		itab = s.newValue1I(ssa.OpOffPtr, Ptrto(Types[TUINTPTR]), itabidx, itab)
+		itab = s.newValue1I(ssa.OpOffPtr, Types[TUINTPTR], itabidx, itab)
 		if k == callNormal {
 			codeptr = s.newValue2(ssa.OpLoad, Types[TUINTPTR], itab, s.mem())
 		} else {
@@ -2627,18 +2608,16 @@ func (s *state) call(n *Node, k callKind) *ssa.Value {
 		if k != callNormal {
 			argStart += int64(2 * Widthptr)
 		}
-		addr := s.entryNewValue1I(ssa.OpOffPtr, Ptrto(Types[TUINTPTR]), argStart, s.sp)
+		addr := s.entryNewValue1I(ssa.OpOffPtr, Types[TUINTPTR], argStart, s.sp)
 		s.vars[&memVar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, int64(Widthptr), addr, rcvr, s.mem())
 	}

 	// Defer/go args
 	if k != callNormal {
 		// Write argsize and closure (args to Newproc/Deferproc).
-		argStart := Ctxt.FixedFrameSize()
 		argsize := s.constInt32(Types[TUINT32], int32(stksize))
-		addr := s.entryNewValue1I(ssa.OpOffPtr, Ptrto(Types[TUINT32]), argStart, s.sp)
-		s.vars[&memVar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, 4, addr, argsize, s.mem())
-		addr = s.entryNewValue1I(ssa.OpOffPtr, Ptrto(Types[TUINTPTR]), argStart+int64(Widthptr), s.sp)
+		s.vars[&memVar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, 4, s.sp, argsize, s.mem())
+		addr := s.entryNewValue1I(ssa.OpOffPtr, Ptrto(Types[TUINTPTR]), int64(Widthptr), s.sp)
 		s.vars[&memVar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, int64(Widthptr), addr, closure, s.mem())
 		stksize += 2 * int64(Widthptr)
 	}
@@ -2783,7 +2762,7 @@ func (s *state) addr(n *Node, bounded bool) (*ssa.Value, bool) {
 		if n.Left.Type.IsSlice() {
 			a := s.expr(n.Left)
 			i := s.expr(n.Right)
-			i = s.extendIndex(i, Panicindex)
+			i = s.extendIndex(i)
 			len := s.newValue1(ssa.OpSliceLen, Types[TINT], a)
 			if !n.Bounded {
 				s.boundsCheck(i, len)
@@ -2793,7 +2772,7 @@ func (s *state) addr(n *Node, bounded bool) (*ssa.Value, bool) {
 		} else { // array
 			a, isVolatile := s.addr(n.Left, bounded)
 			i := s.expr(n.Right)
-			i = s.extendIndex(i, Panicindex)
+			i = s.extendIndex(i)
 			len := s.constInt(Types[TINT], n.Left.Type.NumElem())
 			if !n.Bounded {
 				s.boundsCheck(i, len)
@@ -2934,11 +2913,12 @@ func (s *state) nilCheck(ptr *ssa.Value) {

 // boundsCheck generates bounds checking code. Checks if 0 <= idx < len, branches to exit if not.
 // Starts a new block on return.
-// idx is already converted to full int width.
 func (s *state) boundsCheck(idx, len *ssa.Value) {
 	if Debug['B'] != 0 {
 		return
 	}
+	// TODO: convert index to full width?
+	// TODO: if index is 64-bit and we're compiling to 32-bit, check that high 32 bits are zero.

 	// bounds check
 	cmp := s.newValue2(ssa.OpIsInBounds, Types[TBOOL], idx, len)
@@ -2947,18 +2927,19 @@ func (s *state) boundsCheck(idx, len *ssa.Value) {

 // sliceBoundsCheck generates slice bounds checking code. Checks if 0 <= idx <= len, branches to exit if not.
 // Starts a new block on return.
-// idx and len are already converted to full int width.
 func (s *state) sliceBoundsCheck(idx, len *ssa.Value) {
 	if Debug['B'] != 0 {
 		return
 	}
+	// TODO: convert index to full width?
+	// TODO: if index is 64-bit and we're compiling to 32-bit, check that high 32 bits are zero.

 	// bounds check
 	cmp := s.newValue2(ssa.OpIsSliceInBounds, Types[TBOOL], idx, len)
 	s.check(cmp, panicslice)
 }

-// If cmp (a bool) is false, panic using the given function.
+// If cmp (a bool) is true, panic using the given function.
 func (s *state) check(cmp *ssa.Value, fn *Node) {
 	b := s.endBlock()
 	b.Kind = ssa.BlockIf
@@ -2988,23 +2969,19 @@ func (s *state) check(cmp *ssa.Value, fn *Node) {
 // is started to load the return values.
 func (s *state) rtcall(fn *Node, returns bool, results []*Type, args ...*ssa.Value) []*ssa.Value {
 	// Write args to the stack
-	off := Ctxt.FixedFrameSize()
+	var off int64 // TODO: arch-dependent starting offset?
 	for _, arg := range args {
 		t := arg.Type
 		off = Rnd(off, t.Alignment())
 		ptr := s.sp
 		if off != 0 {
-			ptr = s.newValue1I(ssa.OpOffPtr, t.PtrTo(), off, s.sp)
+			ptr = s.newValue1I(ssa.OpOffPtr, Types[TUINTPTR], off, s.sp)
 		}
 		size := t.Size()
 		s.vars[&memVar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, size, ptr, arg, s.mem())
 		off += size
 	}
 	off = Rnd(off, int64(Widthptr))
-	if Thearch.LinkArch.Name == "amd64p32" {
-		// amd64p32 wants 8-byte alignment of the start of the return values.
-		off = Rnd(off, 8)
-	}

 	// Issue call
 	call := s.newValue1A(ssa.OpStaticCall, ssa.TypeMem, fn.Sym, s.mem())
@@ -3015,7 +2992,7 @@ func (s *state) rtcall(fn *Node, returns bool, results []*Type, args ...*ssa.Val
 	if !returns {
 		b.Kind = ssa.BlockExit
 		b.SetControl(call)
-		call.AuxInt = off - Ctxt.FixedFrameSize()
+		call.AuxInt = off
 		if len(results) > 0 {
 			Fatalf("panic call can't have results")
 		}
@@ -3038,7 +3015,7 @@ func (s *state) rtcall(fn *Node, returns bool, results []*Type, args ...*ssa.Val
 		off = Rnd(off, t.Alignment())
 		ptr := s.sp
 		if off != 0 {
-			ptr = s.newValue1I(ssa.OpOffPtr, Ptrto(t), off, s.sp)
+			ptr = s.newValue1I(ssa.OpOffPtr, Types[TUINTPTR], off, s.sp)
 		}
 		res[i] = s.newValue2(ssa.OpLoad, t, ptr, s.mem())
 		off += t.Size()
@@ -3072,9 +3049,10 @@ func (s *state) insertWBmove(t *Type, left, right *ssa.Value, line int32, rightI

 	aux := &ssa.ExternSymbol{Typ: Types[TBOOL], Sym: syslook("writeBarrier").Sym}
 	flagaddr := s.newValue1A(ssa.OpAddr, Ptrto(Types[TUINT32]), aux, s.sb)
-	// Load word, test word, avoiding partial register write from load byte.
+	// TODO: select the .enabled field. It is currently first, so not needed for now.
+	// Load word, test byte, avoiding partial register write from load byte.
 	flag := s.newValue2(ssa.OpLoad, Types[TUINT32], flagaddr, s.mem())
-	flag = s.newValue2(ssa.OpNeq32, Types[TBOOL], flag, s.constInt32(Types[TUINT32], 0))
+	flag = s.newValue1(ssa.OpTrunc64to8, Types[TBOOL], flag)
 	b := s.endBlock()
 	b.Kind = ssa.BlockIf
 	b.Likely = ssa.BranchUnlikely
@@ -3095,7 +3073,7 @@ func (s *state) insertWBmove(t *Type, left, right *ssa.Value, line int32, rightI
 		tmp := temp(t)
 		s.vars[&memVar] = s.newValue1A(ssa.OpVarDef, ssa.TypeMem, tmp, s.mem())
 		tmpaddr, _ := s.addr(tmp, true)
-		s.vars[&memVar] = s.newValue3I(ssa.OpMove, ssa.TypeMem, SizeAlignAuxInt(t), tmpaddr, right, s.mem())
+		s.vars[&memVar] = s.newValue3I(ssa.OpMove, ssa.TypeMem, t.Size(), tmpaddr, right, s.mem())
 		// Issue typedmemmove call.
 		taddr := s.newValue1A(ssa.OpAddr, Types[TUINTPTR], &ssa.ExternSymbol{Typ: Types[TUINTPTR], Sym: typenamesym(t)}, s.sb)
 		s.rtcall(typedmemmove, true, nil, taddr, left, tmpaddr)
@@ -3105,7 +3083,7 @@ func (s *state) insertWBmove(t *Type, left, right *ssa.Value, line int32, rightI
 	s.endBlock().AddEdgeTo(bEnd)

 	s.startBlock(bElse)
-	s.vars[&memVar] = s.newValue3I(ssa.OpMove, ssa.TypeMem, SizeAlignAuxInt(t), left, right, s.mem())
+	s.vars[&memVar] = s.newValue3I(ssa.OpMove, ssa.TypeMem, t.Size(), left, right, s.mem())
 	s.endBlock().AddEdgeTo(bEnd)

 	s.startBlock(bEnd)
@@ -3139,9 +3117,10 @@ func (s *state) insertWBstore(t *Type, left, right *ssa.Value, line int32, skip

 	aux := &ssa.ExternSymbol{Typ: Types[TBOOL], Sym: syslook("writeBarrier").Sym}
 	flagaddr := s.newValue1A(ssa.OpAddr, Ptrto(Types[TUINT32]), aux, s.sb)
-	// Load word, test word, avoiding partial register write from load byte.
+	// TODO: select the .enabled field. It is currently first, so not needed for now.
+	// Load word, test byte, avoiding partial register write from load byte.
 	flag := s.newValue2(ssa.OpLoad, Types[TUINT32], flagaddr, s.mem())
-	flag = s.newValue2(ssa.OpNeq32, Types[TBOOL], flag, s.constInt32(Types[TUINT32], 0))
+	flag = s.newValue1(ssa.OpTrunc64to8, Types[TBOOL], flag)
 	b := s.endBlock()
 	b.Kind = ssa.BlockIf
 	b.Likely = ssa.BranchUnlikely
@@ -3951,11 +3930,6 @@ type SSAGenState struct {

 	// bstart remembers where each block starts (indexed by block ID)
 	bstart []*obj.Prog
-
-	// 387 port: maps from SSE registers (REG_X?) to 387 registers (REG_F?)
-	SSEto387 map[int16]int16
-	// Some architectures require a 64-bit temporary for FP-related register shuffling. Examples include x86-387, PPC, and Sparc V8.
-	ScratchFpMem *Node
 }

 // Pc returns the current Prog.
@@ -3992,13 +3966,6 @@ func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) {
 		blockProgs[Pc] = f.Blocks[0]
 	}

-	if Thearch.Use387 {
-		s.SSEto387 = map[int16]int16{}
-	}
-	if f.Config.NeedsFpScratch {
-		s.ScratchFpMem = temp(Types[TUINT64])
-	}
-
 	// Emit basic blocks
 	for i, b := range f.Blocks {
 		s.bstart[b.ID] = Pc
@@ -4161,25 +4128,12 @@ func SSAGenFPJump(s *SSAGenState, b, next *ssa.Block, jumps *[2][2]FloatingEQNEJ
 	}
 }

-func AuxOffset(v *ssa.Value) (offset int64) {
-	if v.Aux == nil {
-		return 0
-	}
-	switch sym := v.Aux.(type) {
-
-	case *ssa.AutoSymbol:
-		n := sym.Node.(*Node)
-		return n.Xoffset
-	}
-	return 0
-}
-
 // AddAux adds the offset in the aux fields (AuxInt and Aux) of v to a.
 func AddAux(a *obj.Addr, v *ssa.Value) {
 	AddAux2(a, v, v.AuxInt)
 }
 func AddAux2(a *obj.Addr, v *ssa.Value, offset int64) {
-	if a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR {
+	if a.Type != obj.TYPE_MEM {
 		v.Fatalf("bad AddAux addr %v", a)
 	}
 	// add integer offset
@@ -4217,27 +4171,17 @@ func AddAux2(a *obj.Addr, v *ssa.Value, offset int64) {
 	}
 }

-// SizeAlignAuxInt returns an AuxInt encoding the size and alignment of type t.
-func SizeAlignAuxInt(t *Type) int64 {
-	return ssa.MakeSizeAndAlign(t.Size(), t.Alignment()).Int64()
-}
-
 // extendIndex extends v to a full int width.
-// panic using the given function if v does not fit in an int (only on 32-bit archs).
-func (s *state) extendIndex(v *ssa.Value, panicfn *Node) *ssa.Value {
+func (s *state) extendIndex(v *ssa.Value) *ssa.Value {
 	size := v.Type.Size()
 	if size == s.config.IntSize {
 		return v
 	}
 	if size > s.config.IntSize {
-		// truncate 64-bit indexes on 32-bit pointer archs. Test the
-		// high word and branch to out-of-bounds failure if it is not 0.
-		if Debug['B'] == 0 {
-			hi := s.newValue1(ssa.OpInt64Hi, Types[TUINT32], v)
-			cmp := s.newValue2(ssa.OpEq32, Types[TBOOL], hi, s.constInt32(Types[TUINT32], 0))
-			s.check(cmp, panicfn)
-		}
-		return s.newValue1(ssa.OpTrunc64to32, Types[TINT], v)
+		// TODO: truncate 64-bit indexes on 32-bit pointer archs. We'd need to test
+		// the high word and branch to out-of-bounds failure if it is not 0.
+		s.Unimplementedf("64->32 index truncation not implemented")
+		return v
 	}

 	// Extend value to the required size
@@ -4276,74 +4220,17 @@ func (s *state) extendIndex(v *ssa.Value, panicfn *Node) *ssa.Value {
 	return s.newValue1(op, Types[TINT], v)
 }

-// SSAReg returns the register to which v has been allocated.
-func SSAReg(v *ssa.Value) *ssa.Register {
+// SSARegNum returns the register (in cmd/internal/obj numbering) to
+// which v has been allocated. Panics if v is not assigned to a
+// register.
+// TODO: Make this panic again once it stops happening routinely.
+func SSARegNum(v *ssa.Value) int16 {
 	reg := v.Block.Func.RegAlloc[v.ID]
 	if reg == nil {
-		v.Fatalf("nil register for value: %s\n%s\n", v.LongString(), v.Block.Func)
-	}
-	return reg.(*ssa.Register)
-}
-
-// SSAReg0 returns the register to which the first output of v has been allocated.
-func SSAReg0(v *ssa.Value) *ssa.Register {
-	reg := v.Block.Func.RegAlloc[v.ID].(ssa.LocPair)[0]
-	if reg == nil {
-		v.Fatalf("nil first register for value: %s\n%s\n", v.LongString(), v.Block.Func)
-	}
-	return reg.(*ssa.Register)
-}
-
-// SSAReg1 returns the register to which the second output of v has been allocated.
-func SSAReg1(v *ssa.Value) *ssa.Register {
-	reg := v.Block.Func.RegAlloc[v.ID].(ssa.LocPair)[1]
-	if reg == nil {
-		v.Fatalf("nil second register for value: %s\n%s\n", v.LongString(), v.Block.Func)
-	}
-	return reg.(*ssa.Register)
-}
-
-// SSARegNum returns the register number (in cmd/internal/obj numbering) to which v has been allocated.
-func SSARegNum(v *ssa.Value) int16 {
-	return Thearch.SSARegToReg[SSAReg(v).Num]
-}
-
-// SSARegNum0 returns the register number (in cmd/internal/obj numbering) to which the first output of v has been allocated.
-func SSARegNum0(v *ssa.Value) int16 {
-	return Thearch.SSARegToReg[SSAReg0(v).Num]
-}
-
-// SSARegNum1 returns the register number (in cmd/internal/obj numbering) to which the second output of v has been allocated.
-func SSARegNum1(v *ssa.Value) int16 {
-	return Thearch.SSARegToReg[SSAReg1(v).Num]
-}
-
-// CheckLoweredPhi checks that regalloc and stackalloc correctly handled phi values.
-// Called during ssaGenValue.
-func CheckLoweredPhi(v *ssa.Value) {
-	if v.Op != ssa.OpPhi {
-		v.Fatalf("CheckLoweredPhi called with non-phi value: %v", v.LongString())
-	}
-	if v.Type.IsMemory() {
-		return
-	}
-	f := v.Block.Func
-	loc := f.RegAlloc[v.ID]
-	for _, a := range v.Args {
-		if aloc := f.RegAlloc[a.ID]; aloc != loc { // TODO: .Equal() instead?
-			v.Fatalf("phi arg at different location than phi: %v @ %v, but arg %v @ %v\n%s\n", v, loc, a, aloc, v.Block.Func)
-		}
-	}
-}
-
-// CheckLoweredGetClosurePtr checks that v is the first instruction in the function's entry block.
-// The output of LoweredGetClosurePtr is generally hardwired to the correct register.
-// That register contains the closure pointer on closure entry.
-func CheckLoweredGetClosurePtr(v *ssa.Value) {
-	entry := v.Block.Func.Entry
-	if entry != v.Block || entry.Values[0] != v {
-		Fatalf("in %s, badly placed LoweredGetClosurePtr: %v %v", v.Block.Func.Name, v.Block, v)
+		v.Unimplementedf("nil regnum for value: %s\n%s\n", v.LongString(), v.Block.Func)
+		return 0
 	}
+	return Thearch.SSARegToReg[reg.(*ssa.Register).Num]
 }

 // AutoVar returns a *Node and int64 representing the auto variable and offset within it
@@ -4485,25 +4372,6 @@ func (e *ssaExport) SplitComplex(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSl
 	return ssa.LocalSlot{N: n, Type: t, Off: name.Off}, ssa.LocalSlot{N: n, Type: t, Off: name.Off + s}
 }

-func (e *ssaExport) SplitInt64(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) {
-	n := name.N.(*Node)
-	var t *Type
-	if name.Type.IsSigned() {
-		t = Types[TINT32]
-	} else {
-		t = Types[TUINT32]
-	}
-	if n.Class == PAUTO && !n.Addrtaken {
-		// Split this int64 up into two separate variables.
-		h := e.namedAuto(n.Sym.Name+".hi", t)
-		l := e.namedAuto(n.Sym.Name+".lo", Types[TUINT32])
-		return ssa.LocalSlot{N: h, Type: t, Off: 0}, ssa.LocalSlot{N: l, Type: Types[TUINT32], Off: 0}
-	}
-	// Return the two parts of the larger variable.
-	// Assuming little endian (we don't support big endian 32-bit architecture yet)
-	return ssa.LocalSlot{N: n, Type: t, Off: name.Off + 4}, ssa.LocalSlot{N: n, Type: Types[TUINT32], Off: name.Off}
-}
-
 func (e *ssaExport) SplitStruct(name ssa.LocalSlot, i int) ssa.LocalSlot {
 	n := name.N.(*Node)
 	st := name.Type
--- a/src/cmd/compile/internal/gc/testdata/arith_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/arith_ssa.go
@@ -553,445 +553,6 @@ func testOrPhi() {
 	}
 }

-//go:noinline
-func addshiftLL_ssa(a, b uint32) uint32 {
-	return a + b<<3
-}
-
-//go:noinline
-func subshiftLL_ssa(a, b uint32) uint32 {
-	return a - b<<3
-}
-
-//go:noinline
-func rsbshiftLL_ssa(a, b uint32) uint32 {
-	return a<<3 - b
-}
-
-//go:noinline
-func andshiftLL_ssa(a, b uint32) uint32 {
-	return a & (b << 3)
-}
-
-//go:noinline
-func orshiftLL_ssa(a, b uint32) uint32 {
-	return a | b<<3
-}
-
-//go:noinline
-func xorshiftLL_ssa(a, b uint32) uint32 {
-	return a ^ b<<3
-}
-
-//go:noinline
-func bicshiftLL_ssa(a, b uint32) uint32 {
-	return a &^ (b << 3)
-}
-
-//go:noinline
-func notshiftLL_ssa(a uint32) uint32 {
-	return ^(a << 3)
-}
-
-//go:noinline
-func addshiftRL_ssa(a, b uint32) uint32 {
-	return a + b>>3
-}
-
-//go:noinline
-func subshiftRL_ssa(a, b uint32) uint32 {
-	return a - b>>3
-}
-
-//go:noinline
-func rsbshiftRL_ssa(a, b uint32) uint32 {
-	return a>>3 - b
-}
-
-//go:noinline
-func andshiftRL_ssa(a, b uint32) uint32 {
-	return a & (b >> 3)
-}
-
-//go:noinline
-func orshiftRL_ssa(a, b uint32) uint32 {
-	return a | b>>3
-}
-
-//go:noinline
-func xorshiftRL_ssa(a, b uint32) uint32 {
-	return a ^ b>>3
-}
-
-//go:noinline
-func bicshiftRL_ssa(a, b uint32) uint32 {
-	return a &^ (b >> 3)
-}
-
-//go:noinline
-func notshiftRL_ssa(a uint32) uint32 {
-	return ^(a >> 3)
-}
-
-//go:noinline
-func addshiftRA_ssa(a, b int32) int32 {
-	return a + b>>3
-}
-
-//go:noinline
-func subshiftRA_ssa(a, b int32) int32 {
-	return a - b>>3
-}
-
-//go:noinline
-func rsbshiftRA_ssa(a, b int32) int32 {
-	return a>>3 - b
-}
-
-//go:noinline
-func andshiftRA_ssa(a, b int32) int32 {
-	return a & (b >> 3)
-}
-
-//go:noinline
-func orshiftRA_ssa(a, b int32) int32 {
-	return a | b>>3
-}
-
-//go:noinline
-func xorshiftRA_ssa(a, b int32) int32 {
-	return a ^ b>>3
-}
-
-//go:noinline
-func bicshiftRA_ssa(a, b int32) int32 {
-	return a &^ (b >> 3)
-}
-
-//go:noinline
-func notshiftRA_ssa(a int32) int32 {
-	return ^(a >> 3)
-}
-
-//go:noinline
-func addshiftLLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a + b<<s
-}
-
-//go:noinline
-func subshiftLLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a - b<<s
-}
-
-//go:noinline
-func rsbshiftLLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a<<s - b
-}
-
-//go:noinline
-func andshiftLLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a & (b << s)
-}
-
-//go:noinline
-func orshiftLLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a | b<<s
-}
-
-//go:noinline
-func xorshiftLLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a ^ b<<s
-}
-
-//go:noinline
-func bicshiftLLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a &^ (b << s)
-}
-
-//go:noinline
-func notshiftLLreg_ssa(a uint32, s uint8) uint32 {
-	return ^(a << s)
-}
-
-//go:noinline
-func addshiftRLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a + b>>s
-}
-
-//go:noinline
-func subshiftRLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a - b>>s
-}
-
-//go:noinline
-func rsbshiftRLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a>>s - b
-}
-
-//go:noinline
-func andshiftRLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a & (b >> s)
-}
-
-//go:noinline
-func orshiftRLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a | b>>s
-}
-
-//go:noinline
-func xorshiftRLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a ^ b>>s
-}
-
-//go:noinline
-func bicshiftRLreg_ssa(a, b uint32, s uint8) uint32 {
-	return a &^ (b >> s)
-}
-
-//go:noinline
-func notshiftRLreg_ssa(a uint32, s uint8) uint32 {
-	return ^(a >> s)
-}
-
-//go:noinline
-func addshiftRAreg_ssa(a, b int32, s uint8) int32 {
-	return a + b>>s
-}
-
-//go:noinline
-func subshiftRAreg_ssa(a, b int32, s uint8) int32 {
-	return a - b>>s
-}
-
-//go:noinline
-func rsbshiftRAreg_ssa(a, b int32, s uint8) int32 {
-	return a>>s - b
-}
-
-//go:noinline
-func andshiftRAreg_ssa(a, b int32, s uint8) int32 {
-	return a & (b >> s)
-}
-
-//go:noinline
-func orshiftRAreg_ssa(a, b int32, s uint8) int32 {
-	return a | b>>s
-}
-
-//go:noinline
-func xorshiftRAreg_ssa(a, b int32, s uint8) int32 {
-	return a ^ b>>s
-}
-
-//go:noinline
-func bicshiftRAreg_ssa(a, b int32, s uint8) int32 {
-	return a &^ (b >> s)
-}
-
-//go:noinline
-func notshiftRAreg_ssa(a int32, s uint8) int32 {
-	return ^(a >> s)
-}
-
-// test ARM shifted ops
-func testShiftedOps() {
-	a, b := uint32(10), uint32(42)
-	if want, got := a+b<<3, addshiftLL_ssa(a, b); got != want {
-		println("addshiftLL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a-b<<3, subshiftLL_ssa(a, b); got != want {
-		println("subshiftLL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a<<3-b, rsbshiftLL_ssa(a, b); got != want {
-		println("rsbshiftLL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a&(b<<3), andshiftLL_ssa(a, b); got != want {
-		println("andshiftLL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a|b<<3, orshiftLL_ssa(a, b); got != want {
-		println("orshiftLL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a^b<<3, xorshiftLL_ssa(a, b); got != want {
-		println("xorshiftLL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a&^(b<<3), bicshiftLL_ssa(a, b); got != want {
-		println("bicshiftLL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := ^(a << 3), notshiftLL_ssa(a); got != want {
-		println("notshiftLL_ssa(10) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a+b>>3, addshiftRL_ssa(a, b); got != want {
-		println("addshiftRL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a-b>>3, subshiftRL_ssa(a, b); got != want {
-		println("subshiftRL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a>>3-b, rsbshiftRL_ssa(a, b); got != want {
-		println("rsbshiftRL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a&(b>>3), andshiftRL_ssa(a, b); got != want {
-		println("andshiftRL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a|b>>3, orshiftRL_ssa(a, b); got != want {
-		println("orshiftRL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a^b>>3, xorshiftRL_ssa(a, b); got != want {
-		println("xorshiftRL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a&^(b>>3), bicshiftRL_ssa(a, b); got != want {
-		println("bicshiftRL_ssa(10, 42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := ^(a >> 3), notshiftRL_ssa(a); got != want {
-		println("notshiftRL_ssa(10) =", got, " want ", want)
-		failed = true
-	}
-	c, d := int32(10), int32(-42)
-	if want, got := c+d>>3, addshiftRA_ssa(c, d); got != want {
-		println("addshiftRA_ssa(10, -42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c-d>>3, subshiftRA_ssa(c, d); got != want {
-		println("subshiftRA_ssa(10, -42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c>>3-d, rsbshiftRA_ssa(c, d); got != want {
-		println("rsbshiftRA_ssa(10, -42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c&(d>>3), andshiftRA_ssa(c, d); got != want {
-		println("andshiftRA_ssa(10, -42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c|d>>3, orshiftRA_ssa(c, d); got != want {
-		println("orshiftRA_ssa(10, -42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c^d>>3, xorshiftRA_ssa(c, d); got != want {
-		println("xorshiftRA_ssa(10, -42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c&^(d>>3), bicshiftRA_ssa(c, d); got != want {
-		println("bicshiftRA_ssa(10, -42) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := ^(d >> 3), notshiftRA_ssa(d); got != want {
-		println("notshiftRA_ssa(-42) =", got, " want ", want)
-		failed = true
-	}
-	s := uint8(3)
-	if want, got := a+b<<s, addshiftLLreg_ssa(a, b, s); got != want {
-		println("addshiftLLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a-b<<s, subshiftLLreg_ssa(a, b, s); got != want {
-		println("subshiftLLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a<<s-b, rsbshiftLLreg_ssa(a, b, s); got != want {
-		println("rsbshiftLLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a&(b<<s), andshiftLLreg_ssa(a, b, s); got != want {
-		println("andshiftLLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a|b<<s, orshiftLLreg_ssa(a, b, s); got != want {
-		println("orshiftLLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a^b<<s, xorshiftLLreg_ssa(a, b, s); got != want {
-		println("xorshiftLLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a&^(b<<s), bicshiftLLreg_ssa(a, b, s); got != want {
-		println("bicshiftLLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := ^(a << s), notshiftLLreg_ssa(a, s); got != want {
-		println("notshiftLLreg_ssa(10) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a+b>>s, addshiftRLreg_ssa(a, b, s); got != want {
-		println("addshiftRLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a-b>>s, subshiftRLreg_ssa(a, b, s); got != want {
-		println("subshiftRLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a>>s-b, rsbshiftRLreg_ssa(a, b, s); got != want {
-		println("rsbshiftRLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a&(b>>s), andshiftRLreg_ssa(a, b, s); got != want {
-		println("andshiftRLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a|b>>s, orshiftRLreg_ssa(a, b, s); got != want {
-		println("orshiftRLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a^b>>s, xorshiftRLreg_ssa(a, b, s); got != want {
-		println("xorshiftRLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := a&^(b>>s), bicshiftRLreg_ssa(a, b, s); got != want {
-		println("bicshiftRLreg_ssa(10, 42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := ^(a >> s), notshiftRLreg_ssa(a, s); got != want {
-		println("notshiftRLreg_ssa(10) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c+d>>s, addshiftRAreg_ssa(c, d, s); got != want {
-		println("addshiftRAreg_ssa(10, -42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c-d>>s, subshiftRAreg_ssa(c, d, s); got != want {
-		println("subshiftRAreg_ssa(10, -42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c>>s-d, rsbshiftRAreg_ssa(c, d, s); got != want {
-		println("rsbshiftRAreg_ssa(10, -42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c&(d>>s), andshiftRAreg_ssa(c, d, s); got != want {
-		println("andshiftRAreg_ssa(10, -42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c|d>>s, orshiftRAreg_ssa(c, d, s); got != want {
-		println("orshiftRAreg_ssa(10, -42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c^d>>s, xorshiftRAreg_ssa(c, d, s); got != want {
-		println("xorshiftRAreg_ssa(10, -42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := c&^(d>>s), bicshiftRAreg_ssa(c, d, s); got != want {
-		println("bicshiftRAreg_ssa(10, -42, 3) =", got, " want ", want)
-		failed = true
-	}
-	if want, got := ^(d >> s), notshiftRAreg_ssa(d, s); got != want {
-		println("notshiftRAreg_ssa(-42, 3) =", got, " want ", want)
-		failed = true
-	}
-}
-
 var failed = false

 func main() {
@@ -1012,7 +573,6 @@ func main() {
 	testLoadCombine()
 	testLoadSymCombine()
 	testShiftRemoval()
-	testShiftedOps()

 	if failed {
 		panic("failed")
--- a/src/cmd/compile/internal/gc/testdata/string_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/string_ssa.go
@@ -110,67 +110,6 @@ func testSmallIndexType() {
 	}
 }

-//go:noinline
-func testInt64Index_ssa(s string, i int64) byte {
-	return s[i]
-}
-
-//go:noinline
-func testInt64Slice_ssa(s string, i, j int64) string {
-	return s[i:j]
-}
-
-func testInt64Index() {
-	tests := []struct {
-		i int64
-		j int64
-		b byte
-		s string
-	}{
-		{0, 5, 'B', "Below"},
-		{5, 10, 'E', "Exact"},
-		{10, 15, 'A', "Above"},
-	}
-
-	str := "BelowExactAbove"
-	for i, t := range tests {
-		if got := testInt64Index_ssa(str, t.i); got != t.b {
-			println("#", i, "got ", got, ", wanted", t.b)
-			failed = true
-		}
-		if got := testInt64Slice_ssa(str, t.i, t.j); got != t.s {
-			println("#", i, "got ", got, ", wanted", t.s)
-			failed = true
-		}
-	}
-}
-
-func testInt64IndexPanic() {
-	defer func() {
-		if r := recover(); r != nil {
-			println("paniced as expected")
-		}
-	}()
-
-	str := "foobar"
-	println("got ", testInt64Index_ssa(str, 1<<32+1))
-	println("expected to panic, but didn't")
-	failed = true
-}
-
-func testInt64SlicePanic() {
-	defer func() {
-		if r := recover(); r != nil {
-			println("paniced as expected")
-		}
-	}()
-
-	str := "foobar"
-	println("got ", testInt64Slice_ssa(str, 1<<32, 1<<32+1))
-	println("expected to panic, but didn't")
-	failed = true
-}
-
 //go:noinline
 func testStringElem_ssa(s string, i int) byte {
 	return s[i]
@@ -214,9 +153,6 @@ func main() {
 	testSmallIndexType()
 	testStringElem()
 	testStringElemConst()
-	testInt64Index()
-	testInt64IndexPanic()
-	testInt64SlicePanic()

 	if failed {
 		panic("failed")
--- a/src/cmd/compile/internal/gc/type.go
+++ b/src/cmd/compile/internal/gc/type.go
@@ -1207,7 +1207,6 @@ func (t *Type) ChanDir() ChanDir {
 func (t *Type) IsMemory() bool { return false }
 func (t *Type) IsFlags() bool  { return false }
 func (t *Type) IsVoid() bool   { return false }
-func (t *Type) IsTuple() bool  { return false }

 // IsUntyped reports whether t is an untyped type.
 func (t *Type) IsUntyped() bool {
--- a/src/cmd/compile/internal/gc/walk.go
+++ b/src/cmd/compile/internal/gc/walk.go
@@ -1094,45 +1094,12 @@ opswitch:

 			if n.Type.IsFloat() {
 				if n.Left.Type.Etype == TINT64 {
-					n = conv(mkcall("int64tofloat64", Types[TFLOAT64], init, conv(n.Left, Types[TINT64])), n.Type)
+					n = mkcall("int64tofloat64", n.Type, init, conv(n.Left, Types[TINT64]))
 					break
 				}

 				if n.Left.Type.Etype == TUINT64 {
-					n = conv(mkcall("uint64tofloat64", Types[TFLOAT64], init, conv(n.Left, Types[TUINT64])), n.Type)
-					break
-				}
-			}
-		}
-
-		if Thearch.LinkArch.Family == sys.I386 {
-			if n.Left.Type.IsFloat() {
-				if n.Type.Etype == TINT64 {
-					n = mkcall("float64toint64", n.Type, init, conv(n.Left, Types[TFLOAT64]))
-					break
-				}
-
-				if n.Type.Etype == TUINT64 {
-					n = mkcall("float64touint64", n.Type, init, conv(n.Left, Types[TFLOAT64]))
-					break
-				}
-				if n.Type.Etype == TUINT32 || n.Type.Etype == TUINTPTR {
-					n = mkcall("float64touint32", n.Type, init, conv(n.Left, Types[TFLOAT64]))
-					break
-				}
-			}
-			if n.Type.IsFloat() {
-				if n.Left.Type.Etype == TINT64 {
-					n = conv(mkcall("int64tofloat64", Types[TFLOAT64], init, conv(n.Left, Types[TINT64])), n.Type)
-					break
-				}
-
-				if n.Left.Type.Etype == TUINT64 {
-					n = conv(mkcall("uint64tofloat64", Types[TFLOAT64], init, conv(n.Left, Types[TUINT64])), n.Type)
-					break
-				}
-				if n.Left.Type.Etype == TUINT32 || n.Left.Type.Etype == TUINTPTR {
-					n = conv(mkcall("uint32tofloat64", Types[TFLOAT64], init, conv(n.Left, Types[TUINT32])), n.Type)
+					n = mkcall("uint64tofloat64", n.Type, init, conv(n.Left, Types[TUINT64]))
 					break
 				}
 			}
@@ -3336,7 +3303,6 @@ func samecheap(a *Node, b *Node) bool {
 // The result of walkrotate MUST be assigned back to n, e.g.
 // 	n.Left = walkrotate(n.Left)
 func walkrotate(n *Node) *Node {
-	//TODO: enable LROT on ARM64 once the old backend is gone
 	if Thearch.LinkArch.InFamily(sys.MIPS64, sys.ARM64, sys.PPC64) {
 		return n
 	}
@@ -3530,6 +3496,16 @@ func walkdiv(n *Node, init *Nodes) *Node {
 			goto ret
 		}

+		// TODO(zhongwei) Test shows that TUINT8, TINT8, TUINT16 and TINT16's "quick division" method
+		// on current arm64 backend is slower than hardware div instruction on ARM64 due to unnecessary
+		// data movement between registers. It could be enabled when generated code is good enough.
+		if Thearch.LinkArch.Family == sys.ARM64 {
+			switch Simtype[nl.Type.Etype] {
+			case TUINT8, TINT8, TUINT16, TINT16:
+				return n
+			}
+		}
+
 		switch Simtype[nl.Type.Etype] {
 		default:
 			return n
--- a/src/cmd/compile/internal/ppc64/galign.go
+++ b/src/cmd/compile/internal/ppc64/galign.go
@@ -66,11 +66,6 @@ func Main() {
 	gc.Thearch.Doregbits = doregbits
 	gc.Thearch.Regnames = regnames

-	gc.Thearch.SSARegToReg = ssaRegToReg
-	gc.Thearch.SSAMarkMoves = ssaMarkMoves
-	gc.Thearch.SSAGenValue = ssaGenValue
-	gc.Thearch.SSAGenBlock = ssaGenBlock
-
 	initvariants()
 	initproginfo()

--- a/src/cmd/compile/internal/ppc64/prog.go
+++ b/src/cmd/compile/internal/ppc64/prog.go
@@ -42,34 +42,22 @@ var progtable = [ppc64.ALAST & obj.AMask]obj.ProgInfo{

 	// Integer
 	ppc64.AADD & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.AADDC & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.ASUB & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.AADDME & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.ANEG & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AAND & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.AANDN & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AOR & obj.AMask:     {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.AORN & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AXOR & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.AEQV & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AMULLD & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AMULLW & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AMULHD & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AMULHDU & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.ADIVD & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.ADIVDU & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.ADIVW & obj.AMask:   {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.ADIVWU & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.ASLD & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.ASRD & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.ASRAD & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.ASLW & obj.AMask:    {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.ASRW & obj.AMask:    {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.ASRAW & obj.AMask:   {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.ACMP & obj.AMask:    {Flags: gc.SizeQ | gc.LeftRead | gc.RightRead},
 	ppc64.ACMPU & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RightRead},
-	ppc64.ACMPW & obj.AMask:   {Flags: gc.SizeL | gc.LeftRead | gc.RightRead},
-	ppc64.ACMPWU & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RightRead},
 	ppc64.ATD & obj.AMask:     {Flags: gc.SizeQ | gc.RightRead},

 	// Floating point.
@@ -82,13 +70,11 @@ var progtable = [ppc64.ALAST & obj.AMask]obj.ProgInfo{
 	ppc64.AFDIV & obj.AMask:   {Flags: gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AFDIVS & obj.AMask:  {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AFCTIDZ & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
-	ppc64.AFCTIWZ & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AFCFID & obj.AMask:  {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AFCFIDU & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	ppc64.AFCMPU & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RightRead},
 	ppc64.AFRSP & obj.AMask:   {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite | gc.Conv},
 	ppc64.AFSQRT & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite},
-	ppc64.AFNEG & obj.AMask:   {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite},

 	// Moves
 	ppc64.AMOVB & obj.AMask:  {Flags: gc.SizeB | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv},
@@ -105,8 +91,6 @@ var progtable = [ppc64.ALAST & obj.AMask]obj.ProgInfo{
 	ppc64.AMOVD & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.Move},
 	ppc64.AMOVDU & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.Move | gc.PostInc},
 	ppc64.AFMOVS & obj.AMask:  {Flags: gc.SizeF | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv},
-	ppc64.AFMOVSX & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv},
-	ppc64.AFMOVSZ & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv},
 	ppc64.AFMOVD & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite | gc.Move},

 	// Jumps
--- a/src/cmd/compile/internal/ppc64/ssa.go
+++ b/src/cmd/compile/internal/ppc64/ssa.go
--- a/src/cmd/compile/internal/ssa/config.go
+++ b/src/cmd/compile/internal/ssa/config.go
@@ -20,18 +20,11 @@ type Config struct {
 	lowerBlock      func(*Block) bool          // lowering function
 	lowerValue      func(*Value, *Config) bool // lowering function
 	registers       []Register                 // machine registers
-	gpRegMask       regMask                    // general purpose integer register mask
-	fpRegMask       regMask                    // floating point register mask
-	FPReg           int8                       // register number of frame pointer, -1 if not used
-	hasGReg         bool                       // has hardware g register
 	fe              Frontend                   // callbacks into compiler frontend
 	HTML            *HTMLWriter                // html writer, for debugging
 	ctxt            *obj.Link                  // Generic arch information
 	optimize        bool                       // Do optimization
 	noDuffDevice    bool                       // Don't use Duff's device
-	nacl            bool                       // GOOS=nacl
-	use387          bool                       // GO386=387
-	NeedsFpScratch  bool                       // No direct move between GP and FP register sets
 	sparsePhiCutoff uint64                     // Sparse phi location algorithm used above this #blocks*#variables score
 	curFunc         *Func

@@ -113,7 +106,6 @@ type Frontend interface {
 	SplitSlice(LocalSlot) (LocalSlot, LocalSlot, LocalSlot)
 	SplitComplex(LocalSlot) (LocalSlot, LocalSlot)
 	SplitStruct(LocalSlot, int) LocalSlot
-	SplitInt64(LocalSlot) (LocalSlot, LocalSlot) // returns (hi, lo)

 	// Line returns a string describing the given line number.
 	Line(int32) string
@@ -136,87 +128,29 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
 		c.lowerBlock = rewriteBlockAMD64
 		c.lowerValue = rewriteValueAMD64
 		c.registers = registersAMD64[:]
-		c.gpRegMask = gpRegMaskAMD64
-		c.fpRegMask = fpRegMaskAMD64
-		c.FPReg = framepointerRegAMD64
-		c.hasGReg = false
-	case "amd64p32":
-		c.IntSize = 4
-		c.PtrSize = 4
-		c.lowerBlock = rewriteBlockAMD64
-		c.lowerValue = rewriteValueAMD64
-		c.registers = registersAMD64[:]
-		c.gpRegMask = gpRegMaskAMD64
-		c.fpRegMask = fpRegMaskAMD64
-		c.FPReg = framepointerRegAMD64
-		c.hasGReg = false
-		c.noDuffDevice = true
 	case "386":
 		c.IntSize = 4
 		c.PtrSize = 4
-		c.lowerBlock = rewriteBlock386
-		c.lowerValue = rewriteValue386
-		c.registers = registers386[:]
-		c.gpRegMask = gpRegMask386
-		c.fpRegMask = fpRegMask386
-		c.FPReg = framepointerReg386
-		c.hasGReg = false
+		c.lowerBlock = rewriteBlockAMD64
+		c.lowerValue = rewriteValueAMD64 // TODO(khr): full 32-bit support
 	case "arm":
 		c.IntSize = 4
 		c.PtrSize = 4
 		c.lowerBlock = rewriteBlockARM
 		c.lowerValue = rewriteValueARM
 		c.registers = registersARM[:]
-		c.gpRegMask = gpRegMaskARM
-		c.fpRegMask = fpRegMaskARM
-		c.FPReg = framepointerRegARM
-		c.hasGReg = true
-	case "arm64":
-		c.IntSize = 8
-		c.PtrSize = 8
-		c.lowerBlock = rewriteBlockARM64
-		c.lowerValue = rewriteValueARM64
-		c.registers = registersARM64[:]
-		c.gpRegMask = gpRegMaskARM64
-		c.fpRegMask = fpRegMaskARM64
-		c.FPReg = framepointerRegARM64
-		c.hasGReg = true
-	case "ppc64le":
-		c.IntSize = 8
-		c.PtrSize = 8
-		c.lowerBlock = rewriteBlockPPC64
-		c.lowerValue = rewriteValuePPC64
-		c.registers = registersPPC64[:]
-		c.gpRegMask = gpRegMaskPPC64
-		c.fpRegMask = fpRegMaskPPC64
-		c.FPReg = framepointerRegPPC64
-		c.noDuffDevice = true // TODO: Resolve PPC64 DuffDevice (has zero, but not copy)
-		c.NeedsFpScratch = true
-		c.hasGReg = true
 	default:
 		fe.Unimplementedf(0, "arch %s not implemented", arch)
 	}
 	c.ctxt = ctxt
 	c.optimize = optimize
-	c.nacl = obj.Getgoos() == "nacl"

-	// Don't use Duff's device on Plan 9 AMD64, because floating
+	// Don't use Duff's device on Plan 9, because floating
 	// point operations are not allowed in note handler.
-	if obj.Getgoos() == "plan9" && arch == "amd64" {
+	if obj.Getgoos() == "plan9" {
 		c.noDuffDevice = true
 	}

-	if c.nacl {
-		c.noDuffDevice = true // Don't use Duff's device on NaCl
-
-		// ARM assembler rewrites DIV/MOD to runtime calls, which
-		// clobber R12 on nacl
-		opcodeTable[OpARMDIV].reg.clobbers |= 1 << 12  // R12
-		opcodeTable[OpARMDIVU].reg.clobbers |= 1 << 12 // R12
-		opcodeTable[OpARMMOD].reg.clobbers |= 1 << 12  // R12
-		opcodeTable[OpARMMODU].reg.clobbers |= 1 << 12 // R12
-	}
-
 	// Assign IDs to preallocated values/blocks.
 	for i := range c.values {
 		c.values[i].ID = ID(i)
@@ -246,11 +180,6 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
 	return c
 }

-func (c *Config) Set387(b bool) {
-	c.NeedsFpScratch = b
-	c.use387 = b
-}
-
 func (c *Config) Frontend() Frontend      { return c.fe }
 func (c *Config) SparsePhiCutoff() uint64 { return c.sparsePhiCutoff }

--- a/src/cmd/compile/internal/ssa/cse.go
+++ b/src/cmd/compile/internal/ssa/cse.go
@@ -163,29 +163,6 @@ func cse(f *Func) {
 		}
 	}

-	// if we rewrite a tuple generator to a new one in a different block,
-	// copy its selectors to the new generator's block, so tuple generator
-	// and selectors stay together.
-	for _, b := range f.Blocks {
-		for _, v := range b.Values {
-			if rewrite[v.ID] != nil {
-				continue
-			}
-			if v.Op != OpSelect0 && v.Op != OpSelect1 {
-				continue
-			}
-			if !v.Args[0].Type.IsTuple() {
-				f.Fatalf("arg of tuple selector %s is not a tuple: %s", v.String(), v.Args[0].LongString())
-			}
-			t := rewrite[v.Args[0].ID]
-			if t != nil && t.Block != b {
-				// v.Args[0] is tuple generator, CSE'd into a different block as t, v is left behind
-				c := v.copyInto(t.Block)
-				rewrite[v.ID] = c
-			}
-		}
-	}
-
 	rewrites := int64(0)

 	// Apply substitutions
--- a/src/cmd/compile/internal/ssa/deadstore.go
+++ b/src/cmd/compile/internal/ssa/deadstore.go
@@ -14,8 +14,7 @@ func dse(f *Func) {
 	defer f.retSparseSet(loadUse)
 	storeUse := f.newSparseSet(f.NumValues())
 	defer f.retSparseSet(storeUse)
-	shadowed := f.newSparseSet(f.NumValues())
-	defer f.retSparseSet(shadowed)
+	shadowed := newSparseMap(f.NumValues()) // TODO: cache
 	for _, b := range f.Blocks {
 		// Find all the stores in this block. Categorize their uses:
 		//  loadUse contains stores which are used by a subsequent load.
@@ -81,17 +80,18 @@ func dse(f *Func) {
 			shadowed.clear()
 		}
 		if v.Op == OpStore || v.Op == OpZero {
-			if shadowed.contains(v.Args[0].ID) {
+			sz := v.AuxInt
+			if shadowedSize := int64(shadowed.get(v.Args[0].ID)); shadowedSize != -1 && shadowedSize >= sz {
 				// Modify store into a copy
 				if v.Op == OpStore {
 					// store addr value mem
 					v.SetArgs1(v.Args[2])
 				} else {
 					// zero addr mem
-					sz := v.Args[0].Type.ElemType().Size()
-					if SizeAndAlign(v.AuxInt).Size() != sz {
+					typesz := v.Args[0].Type.ElemType().Size()
+					if sz != typesz {
 						f.Fatalf("mismatched zero/store sizes: %d and %d [%s]",
-							v.AuxInt, sz, v.LongString())
+							sz, typesz, v.LongString())
 					}
 					v.SetArgs1(v.Args[1])
 				}
@@ -99,7 +99,10 @@ func dse(f *Func) {
 				v.AuxInt = 0
 				v.Op = OpCopy
 			} else {
-				shadowed.add(v.Args[0].ID)
+				if sz > 0x7fffffff { // work around sparseMap's int32 value type
+					sz = 0x7fffffff
+				}
+				shadowed.set(v.Args[0].ID, int32(sz))
 			}
 		}
 		// walk to previous store
--- a/src/cmd/compile/internal/ssa/deadstore_test.go
+++ b/src/cmd/compile/internal/ssa/deadstore_test.go
@@ -8,7 +8,7 @@ import "testing"

 func TestDeadStore(t *testing.T) {
 	c := testConfig(t)
-	elemType := &TypeImpl{Size_: 8, Name: "testtype"}
+	elemType := &TypeImpl{Size_: 1, Name: "testtype"}
 	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr", Elem_: elemType} // dummy for testing
 	fun := Fun(c, "entry",
 		Bloc("entry",
@@ -18,7 +18,7 @@ func TestDeadStore(t *testing.T) {
 			Valu("addr1", OpAddr, ptrType, 0, nil, "sb"),
 			Valu("addr2", OpAddr, ptrType, 0, nil, "sb"),
 			Valu("addr3", OpAddr, ptrType, 0, nil, "sb"),
-			Valu("zero1", OpZero, TypeMem, 8, nil, "addr3", "start"),
+			Valu("zero1", OpZero, TypeMem, 1, nil, "addr3", "start"),
 			Valu("store1", OpStore, TypeMem, 1, nil, "addr1", "v", "zero1"),
 			Valu("store2", OpStore, TypeMem, 1, nil, "addr2", "v", "store1"),
 			Valu("store3", OpStore, TypeMem, 1, nil, "addr1", "v", "store2"),
@@ -95,3 +95,32 @@ func TestDeadStoreTypes(t *testing.T) {
 		t.Errorf("store %s incorrectly removed", v)
 	}
 }
+
+func TestDeadStoreUnsafe(t *testing.T) {
+	// Make sure a narrow store can't shadow a wider one. The test above
+	// covers the case of two different types, but unsafe pointer casting
+	// can get to a point where the size is changed but type unchanged.
+	c := testConfig(t)
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr"} // dummy for testing
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("start", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Valu("v", OpConstBool, TypeBool, 1, nil),
+			Valu("addr1", OpAddr, ptrType, 0, nil, "sb"),
+			Valu("store1", OpStore, TypeMem, 8, nil, "addr1", "v", "start"),  // store 8 bytes
+			Valu("store2", OpStore, TypeMem, 1, nil, "addr1", "v", "store1"), // store 1 byte
+			Goto("exit")),
+		Bloc("exit",
+			Exit("store2")))
+
+	CheckFunc(fun.f)
+	cse(fun.f)
+	dse(fun.f)
+	CheckFunc(fun.f)
+
+	v := fun.values["store1"]
+	if v.Op == OpCopy {
+		t.Errorf("store %s incorrectly removed", v)
+	}
+}
--- a/src/cmd/compile/internal/ssa/decompose.go
+++ b/src/cmd/compile/internal/ssa/decompose.go
@@ -25,22 +25,6 @@ func decomposeBuiltIn(f *Func) {
 	for _, name := range f.Names {
 		t := name.Type
 		switch {
-		case t.IsInteger() && t.Size() == 8 && f.Config.IntSize == 4:
-			var elemType Type
-			if t.IsSigned() {
-				elemType = f.Config.fe.TypeInt32()
-			} else {
-				elemType = f.Config.fe.TypeUInt32()
-			}
-			hiName, loName := f.Config.fe.SplitInt64(name)
-			newNames = append(newNames, hiName, loName)
-			for _, v := range f.NamedValues[name] {
-				hi := v.Block.NewValue1(v.Line, OpInt64Hi, elemType, v)
-				lo := v.Block.NewValue1(v.Line, OpInt64Lo, f.Config.fe.TypeUInt32(), v)
-				f.NamedValues[hiName] = append(f.NamedValues[hiName], hi)
-				f.NamedValues[loName] = append(f.NamedValues[loName], lo)
-			}
-			delete(f.NamedValues, name)
 		case t.IsComplex():
 			var elemType Type
 			if t.Size() == 16 {
@@ -94,8 +78,6 @@ func decomposeBuiltIn(f *Func) {
 				f.NamedValues[dataName] = append(f.NamedValues[dataName], data)
 			}
 			delete(f.NamedValues, name)
-		case t.IsFloat():
-			// floats are never decomposed, even ones bigger than IntSize
 		case t.Size() > f.Config.IntSize:
 			f.Unimplementedf("undecomposed named type %s %s", name, t)
 		default:
@@ -106,13 +88,8 @@ func decomposeBuiltIn(f *Func) {
 }

 func decomposeBuiltInPhi(v *Value) {
+	// TODO: decompose 64-bit ops on 32-bit archs?
 	switch {
-	case v.Type.IsInteger() && v.Type.Size() == 8 && v.Block.Func.Config.IntSize == 4:
-		if v.Block.Func.Config.arch == "amd64p32" {
-			// Even though ints are 32 bits, we have 64-bit ops.
-			break
-		}
-		decomposeInt64Phi(v)
 	case v.Type.IsComplex():
 		decomposeComplexPhi(v)
 	case v.Type.IsString():
@@ -121,8 +98,6 @@ func decomposeBuiltInPhi(v *Value) {
 		decomposeSlicePhi(v)
 	case v.Type.IsInterface():
 		decomposeInterfacePhi(v)
-	case v.Type.IsFloat():
-		// floats are never decomposed, even ones bigger than IntSize
 	case v.Type.Size() > v.Block.Func.Config.IntSize:
 		v.Unimplementedf("undecomposed type %s", v.Type)
 	}
@@ -163,26 +138,6 @@ func decomposeSlicePhi(v *Value) {
 	v.AddArg(cap)
 }

-func decomposeInt64Phi(v *Value) {
-	fe := v.Block.Func.Config.fe
-	var partType Type
-	if v.Type.IsSigned() {
-		partType = fe.TypeInt32()
-	} else {
-		partType = fe.TypeUInt32()
-	}
-
-	hi := v.Block.NewValue0(v.Line, OpPhi, partType)
-	lo := v.Block.NewValue0(v.Line, OpPhi, fe.TypeUInt32())
-	for _, a := range v.Args {
-		hi.AddArg(a.Block.NewValue1(v.Line, OpInt64Hi, partType, a))
-		lo.AddArg(a.Block.NewValue1(v.Line, OpInt64Lo, fe.TypeUInt32(), a))
-	}
-	v.reset(OpInt64Make)
-	v.AddArg(hi)
-	v.AddArg(lo)
-}
-
 func decomposeComplexPhi(v *Value) {
 	fe := v.Block.Func.Config.fe
 	var partType Type
--- a/src/cmd/compile/internal/ssa/export_test.go
+++ b/src/cmd/compile/internal/ssa/export_test.go
@@ -49,12 +49,6 @@ func (d DummyFrontend) SplitComplex(s LocalSlot) (LocalSlot, LocalSlot) {
 	}
 	return LocalSlot{s.N, d.TypeFloat32(), s.Off}, LocalSlot{s.N, d.TypeFloat32(), s.Off + 4}
 }
-func (d DummyFrontend) SplitInt64(s LocalSlot) (LocalSlot, LocalSlot) {
-	if s.Type.IsSigned() {
-		return LocalSlot{s.N, d.TypeInt32(), s.Off + 4}, LocalSlot{s.N, d.TypeUInt32(), s.Off}
-	}
-	return LocalSlot{s.N, d.TypeUInt32(), s.Off + 4}, LocalSlot{s.N, d.TypeUInt32(), s.Off}
-}
 func (d DummyFrontend) SplitStruct(s LocalSlot, i int) LocalSlot {
 	return LocalSlot{s.N, s.Type.FieldType(i), s.Off + s.Type.FieldOff(i)}
 }
--- a/src/cmd/compile/internal/ssa/flagalloc.go
+++ b/src/cmd/compile/internal/ssa/flagalloc.go
@@ -4,6 +4,8 @@

 package ssa

+const flagRegMask = regMask(1) << 33 // TODO: arch-specific
+
 // flagalloc allocates the flag register among all the flag-generating
 // instructions. Flag values are recomputed if they need to be
 // spilled/restored.
@@ -31,7 +33,7 @@ func flagalloc(f *Func) {
 				if v == flag {
 					flag = nil
 				}
-				if opcodeTable[v.Op].clobberFlags {
+				if opcodeTable[v.Op].reg.clobbers&flagRegMask != 0 {
 					flag = nil
 				}
 				for _, a := range v.Args {
@@ -95,7 +97,7 @@ func flagalloc(f *Func) {
 					continue
 				}
 				// Recalculate a
-				c := copyFlags(a, b)
+				c := a.copyInto(b)
 				// Update v.
 				v.SetArg(i, c)
 				// Remember the most-recently computed flag value.
@@ -103,7 +105,7 @@ func flagalloc(f *Func) {
 			}
 			// Issue v.
 			b.Values = append(b.Values, v)
-			if opcodeTable[v.Op].clobberFlags {
+			if opcodeTable[v.Op].reg.clobbers&flagRegMask != 0 {
 				flag = nil
 			}
 			if v.Type.IsFlags() {
@@ -119,7 +121,7 @@ func flagalloc(f *Func) {
 		if v := end[b.ID]; v != nil && v != flag {
 			// Need to reissue flag generator for use by
 			// subsequent blocks.
-			copyFlags(v, b)
+			_ = v.copyInto(b)
 			// Note: this flag generator is not properly linked up
 			// with the flag users. This breaks the SSA representation.
 			// We could fix up the users with another pass, but for now
@@ -133,19 +135,3 @@ func flagalloc(f *Func) {
 		b.FlagsLiveAtEnd = end[b.ID] != nil
 	}
 }
-
-// copyFlags copies v (flag generator) into b, returns the copy.
-// If v's arg is also flags, copy recursively.
-func copyFlags(v *Value, b *Block) *Value {
-	flagsArgs := make(map[int]*Value)
-	for i, a := range v.Args {
-		if a.Type.IsFlags() || a.Type.IsTuple() {
-			flagsArgs[i] = copyFlags(a, b)
-		}
-	}
-	c := v.copyInto(b)
-	for i, a := range flagsArgs {
-		c.SetArg(i, a)
-	}
-	return c
-}
--- a/src/cmd/compile/internal/ssa/gen/386.rules
+++ b/src/cmd/compile/internal/ssa/gen/386.rules
--- a/src/cmd/compile/internal/ssa/gen/386Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/386Ops.go
@@ -1,508 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-import "strings"
-
-// Notes:
-//  - Integer types live in the low portion of registers. Upper portions are junk.
-//  - Boolean types use the low-order byte of a register. 0=false, 1=true.
-//    Upper bytes are junk.
-//  - Floating-point types live in the low natural slot of an sse2 register.
-//    Unused portions are junk.
-//  - We do not use AH,BH,CH,DH registers.
-//  - When doing sub-register operations, we try to write the whole
-//    destination register to avoid a partial-register write.
-//  - Unused portions of AuxInt (or the Val portion of ValAndOff) are
-//    filled by sign-extending the used portion.  Users of AuxInt which interpret
-//    AuxInt as unsigned (e.g. shifts) must be careful.
-
-// Suffixes encode the bit width of various instructions.
-// L (long word) = 32 bit
-// W (word)      = 16 bit
-// B (byte)      = 8 bit
-
-// copied from ../../x86/reg.go
-var regNames386 = []string{
-	"AX",
-	"CX",
-	"DX",
-	"BX",
-	"SP",
-	"BP",
-	"SI",
-	"DI",
-	"X0",
-	"X1",
-	"X2",
-	"X3",
-	"X4",
-	"X5",
-	"X6",
-	"X7",
-
-	// pseudo-registers
-	"SB",
-}
-
-// Notes on 387 support.
-//  - The 387 has a weird stack-register setup for floating-point registers.
-//    We use these registers when SSE registers are not available (when GO386=387).
-//  - We use the same register names (X0-X7) but they refer to the 387
-//    floating-point registers. That way, most of the SSA backend is unchanged.
-//  - The instruction generation pass maintains an SSE->387 register mapping.
-//    This mapping is updated whenever the FP stack is pushed or popped so that
-//    we can always find a given SSE register even when the TOS pointer has changed.
-//  - To facilitate the mapping from SSE to 387, we enforce that
-//    every basic block starts and ends with an empty floating-point stack.
-
-func init() {
-	// Make map from reg names to reg integers.
-	if len(regNames386) > 64 {
-		panic("too many registers")
-	}
-	num := map[string]int{}
-	for i, name := range regNames386 {
-		num[name] = i
-	}
-	buildReg := func(s string) regMask {
-		m := regMask(0)
-		for _, r := range strings.Split(s, " ") {
-			if n, ok := num[r]; ok {
-				m |= regMask(1) << uint(n)
-				continue
-			}
-			panic("register " + r + " not found")
-		}
-		return m
-	}
-
-	// Common individual register masks
-	var (
-		ax         = buildReg("AX")
-		cx         = buildReg("CX")
-		dx         = buildReg("DX")
-		gp         = buildReg("AX CX DX BX BP SI DI")
-		fp         = buildReg("X0 X1 X2 X3 X4 X5 X6 X7")
-		x7         = buildReg("X7")
-		gpsp       = gp | buildReg("SP")
-		gpspsb     = gpsp | buildReg("SB")
-		callerSave = gp | fp
-	)
-	// Common slices of register masks
-	var (
-		gponly = []regMask{gp}
-		fponly = []regMask{fp}
-	)
-
-	// Common regInfo
-	var (
-		gp01      = regInfo{inputs: nil, outputs: gponly}
-		gp11      = regInfo{inputs: []regMask{gp}, outputs: gponly}
-		gp11sp    = regInfo{inputs: []regMask{gpsp}, outputs: gponly}
-		gp11sb    = regInfo{inputs: []regMask{gpspsb}, outputs: gponly}
-		gp21      = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
-		gp11carry = regInfo{inputs: []regMask{gp}, outputs: []regMask{0, gp}}
-		gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{0, gp}}
-		gp1carry1 = regInfo{inputs: []regMask{gp}, outputs: gponly}
-		gp2carry1 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
-		gp21sp    = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
-		gp21sb    = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly}
-		gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}}
-		gp11div   = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax}, clobbers: dx}
-		gp21hmul  = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx}, clobbers: ax}
-		gp11mod   = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx}, clobbers: ax}
-		gp21mul   = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}}
-
-		gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}}
-		gp1flags = regInfo{inputs: []regMask{gpsp}}
-		flagsgp  = regInfo{inputs: nil, outputs: gponly}
-
-		readflags = regInfo{inputs: nil, outputs: gponly}
-		flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}}
-
-		gpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
-		gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
-
-		gpstore         = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
-		gpstoreconst    = regInfo{inputs: []regMask{gpspsb, 0}}
-		gpstoreidx      = regInfo{inputs: []regMask{gpspsb, gpsp, gpsp, 0}}
-		gpstoreconstidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
-
-		fp01   = regInfo{inputs: nil, outputs: fponly}
-		fp21   = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
-		fp21x7 = regInfo{inputs: []regMask{fp &^ x7, fp &^ x7},
-			clobbers: x7, outputs: []regMask{fp &^ x7}}
-		fpgp     = regInfo{inputs: fponly, outputs: gponly}
-		gpfp     = regInfo{inputs: gponly, outputs: fponly}
-		fp11     = regInfo{inputs: fponly, outputs: fponly}
-		fp2flags = regInfo{inputs: []regMask{fp, fp}}
-
-		fpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: fponly}
-		fploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: fponly}
-
-		fpstore    = regInfo{inputs: []regMask{gpspsb, fp, 0}}
-		fpstoreidx = regInfo{inputs: []regMask{gpspsb, gpsp, fp, 0}}
-	)
-
-	var _386ops = []opData{
-		// fp ops
-		{name: "ADDSS", argLength: 2, reg: fp21, asm: "ADDSS", commutative: true, resultInArg0: true}, // fp32 add
-		{name: "ADDSD", argLength: 2, reg: fp21, asm: "ADDSD", commutative: true, resultInArg0: true}, // fp64 add
-		{name: "SUBSS", argLength: 2, reg: fp21x7, asm: "SUBSS", resultInArg0: true},                  // fp32 sub
-		{name: "SUBSD", argLength: 2, reg: fp21x7, asm: "SUBSD", resultInArg0: true},                  // fp64 sub
-		{name: "MULSS", argLength: 2, reg: fp21, asm: "MULSS", commutative: true, resultInArg0: true}, // fp32 mul
-		{name: "MULSD", argLength: 2, reg: fp21, asm: "MULSD", commutative: true, resultInArg0: true}, // fp64 mul
-		{name: "DIVSS", argLength: 2, reg: fp21x7, asm: "DIVSS", resultInArg0: true},                  // fp32 div
-		{name: "DIVSD", argLength: 2, reg: fp21x7, asm: "DIVSD", resultInArg0: true},                  // fp64 div
-
-		{name: "MOVSSload", argLength: 2, reg: fpload, asm: "MOVSS", aux: "SymOff"},            // fp32 load
-		{name: "MOVSDload", argLength: 2, reg: fpload, asm: "MOVSD", aux: "SymOff"},            // fp64 load
-		{name: "MOVSSconst", reg: fp01, asm: "MOVSS", aux: "Float32", rematerializeable: true}, // fp32 constant
-		{name: "MOVSDconst", reg: fp01, asm: "MOVSD", aux: "Float64", rematerializeable: true}, // fp64 constant
-		{name: "MOVSSloadidx1", argLength: 3, reg: fploadidx, asm: "MOVSS", aux: "SymOff"},     // fp32 load indexed by i
-		{name: "MOVSSloadidx4", argLength: 3, reg: fploadidx, asm: "MOVSS", aux: "SymOff"},     // fp32 load indexed by 4*i
-		{name: "MOVSDloadidx1", argLength: 3, reg: fploadidx, asm: "MOVSD", aux: "SymOff"},     // fp64 load indexed by i
-		{name: "MOVSDloadidx8", argLength: 3, reg: fploadidx, asm: "MOVSD", aux: "SymOff"},     // fp64 load indexed by 8*i
-
-		{name: "MOVSSstore", argLength: 3, reg: fpstore, asm: "MOVSS", aux: "SymOff"},        // fp32 store
-		{name: "MOVSDstore", argLength: 3, reg: fpstore, asm: "MOVSD", aux: "SymOff"},        // fp64 store
-		{name: "MOVSSstoreidx1", argLength: 4, reg: fpstoreidx, asm: "MOVSS", aux: "SymOff"}, // fp32 indexed by i store
-		{name: "MOVSSstoreidx4", argLength: 4, reg: fpstoreidx, asm: "MOVSS", aux: "SymOff"}, // fp32 indexed by 4i store
-		{name: "MOVSDstoreidx1", argLength: 4, reg: fpstoreidx, asm: "MOVSD", aux: "SymOff"}, // fp64 indexed by i store
-		{name: "MOVSDstoreidx8", argLength: 4, reg: fpstoreidx, asm: "MOVSD", aux: "SymOff"}, // fp64 indexed by 8i store
-
-		// binary ops
-		{name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true, clobberFlags: true},                // arg0 + arg1
-		{name: "ADDLconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int32", typ: "UInt32", clobberFlags: true}, // arg0 + auxint
-
-		{name: "ADDLcarry", argLength: 2, reg: gp21carry, asm: "ADDL", commutative: true, resultInArg0: true},                // arg0 + arg1, generates <carry,result> pair
-		{name: "ADDLconstcarry", argLength: 1, reg: gp11carry, asm: "ADDL", aux: "Int32", resultInArg0: true},                // arg0 + auxint, generates <carry,result> pair
-		{name: "ADCL", argLength: 3, reg: gp2carry1, asm: "ADCL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0+arg1+carry(arg2), where arg2 is flags
-		{name: "ADCLconst", argLength: 2, reg: gp1carry1, asm: "ADCL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0+auxint+carry(arg1), where arg1 is flags
-
-		{name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true, clobberFlags: true},                    // arg0 - arg1
-		{name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 - auxint
-
-		{name: "SUBLcarry", argLength: 2, reg: gp21carry, asm: "SUBL", resultInArg0: true},                                   // arg0-arg1, generates <borrow,result> pair
-		{name: "SUBLconstcarry", argLength: 1, reg: gp11carry, asm: "SUBL", aux: "Int32", resultInArg0: true},                // arg0-auxint, generates <borrow,result> pair
-		{name: "SBBL", argLength: 3, reg: gp2carry1, asm: "SBBL", resultInArg0: true, clobberFlags: true},                    // arg0-arg1-borrow(arg2), where arg2 is flags
-		{name: "SBBLconst", argLength: 2, reg: gp1carry1, asm: "SBBL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0-auxint-borrow(arg1), where arg1 is flags
-
-		{name: "MULL", argLength: 2, reg: gp21, asm: "IMULL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 * arg1
-		{name: "MULLconst", argLength: 1, reg: gp11, asm: "IMULL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 * auxint
-
-		{name: "HMULL", argLength: 2, reg: gp21hmul, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULLU", argLength: 2, reg: gp21hmul, asm: "MULL", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULW", argLength: 2, reg: gp21hmul, asm: "IMULW", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULB", argLength: 2, reg: gp21hmul, asm: "IMULB", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULWU", argLength: 2, reg: gp21hmul, asm: "MULW", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULBU", argLength: 2, reg: gp21hmul, asm: "MULB", clobberFlags: true}, // (arg0 * arg1) >> width
-
-		{name: "MULLQU", argLength: 2, reg: gp21mul, asm: "MULL", clobberFlags: true}, // arg0 * arg1, high 32 in result[0], low 32 in result[1]
-
-		{name: "DIVL", argLength: 2, reg: gp11div, asm: "IDIVL", clobberFlags: true}, // arg0 / arg1
-		{name: "DIVW", argLength: 2, reg: gp11div, asm: "IDIVW", clobberFlags: true}, // arg0 / arg1
-		{name: "DIVLU", argLength: 2, reg: gp11div, asm: "DIVL", clobberFlags: true}, // arg0 / arg1
-		{name: "DIVWU", argLength: 2, reg: gp11div, asm: "DIVW", clobberFlags: true}, // arg0 / arg1
-
-		{name: "MODL", argLength: 2, reg: gp11mod, asm: "IDIVL", clobberFlags: true}, // arg0 % arg1
-		{name: "MODW", argLength: 2, reg: gp11mod, asm: "IDIVW", clobberFlags: true}, // arg0 % arg1
-		{name: "MODLU", argLength: 2, reg: gp11mod, asm: "DIVL", clobberFlags: true}, // arg0 % arg1
-		{name: "MODWU", argLength: 2, reg: gp11mod, asm: "DIVW", clobberFlags: true}, // arg0 % arg1
-
-		{name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1
-		{name: "ANDLconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint
-
-		{name: "ORL", argLength: 2, reg: gp21, asm: "ORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1
-		{name: "ORLconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint
-
-		{name: "XORL", argLength: 2, reg: gp21, asm: "XORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1
-		{name: "XORLconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint
-
-		{name: "CMPL", argLength: 2, reg: gp2flags, asm: "CMPL", typ: "Flags"},                    // arg0 compare to arg1
-		{name: "CMPW", argLength: 2, reg: gp2flags, asm: "CMPW", typ: "Flags"},                    // arg0 compare to arg1
-		{name: "CMPB", argLength: 2, reg: gp2flags, asm: "CMPB", typ: "Flags"},                    // arg0 compare to arg1
-		{name: "CMPLconst", argLength: 1, reg: gp1flags, asm: "CMPL", typ: "Flags", aux: "Int32"}, // arg0 compare to auxint
-		{name: "CMPWconst", argLength: 1, reg: gp1flags, asm: "CMPW", typ: "Flags", aux: "Int16"}, // arg0 compare to auxint
-		{name: "CMPBconst", argLength: 1, reg: gp1flags, asm: "CMPB", typ: "Flags", aux: "Int8"},  // arg0 compare to auxint
-
-		{name: "UCOMISS", argLength: 2, reg: fp2flags, asm: "UCOMISS", typ: "Flags"}, // arg0 compare to arg1, f32
-		{name: "UCOMISD", argLength: 2, reg: fp2flags, asm: "UCOMISD", typ: "Flags"}, // arg0 compare to arg1, f64
-
-		{name: "TESTL", argLength: 2, reg: gp2flags, asm: "TESTL", typ: "Flags"},                    // (arg0 & arg1) compare to 0
-		{name: "TESTW", argLength: 2, reg: gp2flags, asm: "TESTW", typ: "Flags"},                    // (arg0 & arg1) compare to 0
-		{name: "TESTB", argLength: 2, reg: gp2flags, asm: "TESTB", typ: "Flags"},                    // (arg0 & arg1) compare to 0
-		{name: "TESTLconst", argLength: 1, reg: gp1flags, asm: "TESTL", typ: "Flags", aux: "Int32"}, // (arg0 & auxint) compare to 0
-		{name: "TESTWconst", argLength: 1, reg: gp1flags, asm: "TESTW", typ: "Flags", aux: "Int16"}, // (arg0 & auxint) compare to 0
-		{name: "TESTBconst", argLength: 1, reg: gp1flags, asm: "TESTB", typ: "Flags", aux: "Int8"},  // (arg0 & auxint) compare to 0
-
-		{name: "SHLL", argLength: 2, reg: gp21shift, asm: "SHLL", resultInArg0: true, clobberFlags: true},               // arg0 << arg1, shift amount is mod 32
-		{name: "SHLLconst", argLength: 1, reg: gp11, asm: "SHLL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 << auxint, shift amount 0-31
-		// Note: x86 is weird, the 16 and 8 byte shifts still use all 5 bits of shift amount!
-
-		{name: "SHRL", argLength: 2, reg: gp21shift, asm: "SHRL", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 32
-		{name: "SHRW", argLength: 2, reg: gp21shift, asm: "SHRW", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 32
-		{name: "SHRB", argLength: 2, reg: gp21shift, asm: "SHRB", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 32
-		{name: "SHRLconst", argLength: 1, reg: gp11, asm: "SHRL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-31
-		{name: "SHRWconst", argLength: 1, reg: gp11, asm: "SHRW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-31
-		{name: "SHRBconst", argLength: 1, reg: gp11, asm: "SHRB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // unsigned arg0 >> auxint, shift amount 0-31
-
-		{name: "SARL", argLength: 2, reg: gp21shift, asm: "SARL", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 32
-		{name: "SARW", argLength: 2, reg: gp21shift, asm: "SARW", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 32
-		{name: "SARB", argLength: 2, reg: gp21shift, asm: "SARB", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 32
-		{name: "SARLconst", argLength: 1, reg: gp11, asm: "SARL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-31
-		{name: "SARWconst", argLength: 1, reg: gp11, asm: "SARW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-31
-		{name: "SARBconst", argLength: 1, reg: gp11, asm: "SARB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // signed arg0 >> auxint, shift amount 0-31
-
-		{name: "ROLLconst", argLength: 1, reg: gp11, asm: "ROLL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-31
-		{name: "ROLWconst", argLength: 1, reg: gp11, asm: "ROLW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-15
-		{name: "ROLBconst", argLength: 1, reg: gp11, asm: "ROLB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // arg0 rotate left auxint, rotate amount 0-7
-
-		// unary ops
-		{name: "NEGL", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true, clobberFlags: true}, // -arg0
-
-		{name: "NOTL", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true, clobberFlags: true}, // ^arg0
-
-		{name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
-		{name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
-
-		{name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
-		{name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
-
-		{name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
-
-		{name: "SQRTSD", argLength: 1, reg: fp11, asm: "SQRTSD"}, // sqrt(arg0)
-
-		{name: "SBBLcarrymask", argLength: 1, reg: flagsgp, asm: "SBBL"}, // (int32)(-1) if carry is set, 0 if carry is clear.
-		// Note: SBBW and SBBB are subsumed by SBBL
-
-		{name: "SETEQ", argLength: 1, reg: readflags, asm: "SETEQ"}, // extract == condition from arg0
-		{name: "SETNE", argLength: 1, reg: readflags, asm: "SETNE"}, // extract != condition from arg0
-		{name: "SETL", argLength: 1, reg: readflags, asm: "SETLT"},  // extract signed < condition from arg0
-		{name: "SETLE", argLength: 1, reg: readflags, asm: "SETLE"}, // extract signed <= condition from arg0
-		{name: "SETG", argLength: 1, reg: readflags, asm: "SETGT"},  // extract signed > condition from arg0
-		{name: "SETGE", argLength: 1, reg: readflags, asm: "SETGE"}, // extract signed >= condition from arg0
-		{name: "SETB", argLength: 1, reg: readflags, asm: "SETCS"},  // extract unsigned < condition from arg0
-		{name: "SETBE", argLength: 1, reg: readflags, asm: "SETLS"}, // extract unsigned <= condition from arg0
-		{name: "SETA", argLength: 1, reg: readflags, asm: "SETHI"},  // extract unsigned > condition from arg0
-		{name: "SETAE", argLength: 1, reg: readflags, asm: "SETCC"}, // extract unsigned >= condition from arg0
-		// Need different opcodes for floating point conditions because
-		// any comparison involving a NaN is always FALSE and thus
-		// the patterns for inverting conditions cannot be used.
-		{name: "SETEQF", argLength: 1, reg: flagsgpax, asm: "SETEQ", clobberFlags: true}, // extract == condition from arg0
-		{name: "SETNEF", argLength: 1, reg: flagsgpax, asm: "SETNE", clobberFlags: true}, // extract != condition from arg0
-		{name: "SETORD", argLength: 1, reg: flagsgp, asm: "SETPC"},                       // extract "ordered" (No Nan present) condition from arg0
-		{name: "SETNAN", argLength: 1, reg: flagsgp, asm: "SETPS"},                       // extract "unordered" (Nan present) condition from arg0
-
-		{name: "SETGF", argLength: 1, reg: flagsgp, asm: "SETHI"},  // extract floating > condition from arg0
-		{name: "SETGEF", argLength: 1, reg: flagsgp, asm: "SETCC"}, // extract floating >= condition from arg0
-
-		{name: "MOVBLSX", argLength: 1, reg: gp11, asm: "MOVBLSX"}, // sign extend arg0 from int8 to int32
-		{name: "MOVBLZX", argLength: 1, reg: gp11, asm: "MOVBLZX"}, // zero extend arg0 from int8 to int32
-		{name: "MOVWLSX", argLength: 1, reg: gp11, asm: "MOVWLSX"}, // sign extend arg0 from int16 to int32
-		{name: "MOVWLZX", argLength: 1, reg: gp11, asm: "MOVWLZX"}, // zero extend arg0 from int16 to int32
-
-		{name: "MOVLconst", reg: gp01, asm: "MOVL", typ: "UInt32", aux: "Int32", rematerializeable: true}, // 32 low bits of auxint
-
-		{name: "CVTTSD2SL", argLength: 1, reg: fpgp, asm: "CVTTSD2SL"}, // convert float64 to int32
-		{name: "CVTTSS2SL", argLength: 1, reg: fpgp, asm: "CVTTSS2SL"}, // convert float32 to int32
-		{name: "CVTSL2SS", argLength: 1, reg: gpfp, asm: "CVTSL2SS"},   // convert int32 to float32
-		{name: "CVTSL2SD", argLength: 1, reg: gpfp, asm: "CVTSL2SD"},   // convert int32 to float64
-		{name: "CVTSD2SS", argLength: 1, reg: fp11, asm: "CVTSD2SS"},   // convert float64 to float32
-		{name: "CVTSS2SD", argLength: 1, reg: fp11, asm: "CVTSS2SD"},   // convert float32 to float64
-
-		{name: "PXOR", argLength: 2, reg: fp21, asm: "PXOR", commutative: true, resultInArg0: true}, // exclusive or, applied to X regs for float negation.
-
-		{name: "LEAL", argLength: 1, reg: gp11sb, aux: "SymOff", rematerializeable: true}, // arg0 + auxint + offset encoded in aux
-		{name: "LEAL1", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + arg1 + auxint + aux
-		{name: "LEAL2", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + 2*arg1 + auxint + aux
-		{name: "LEAL4", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + 4*arg1 + auxint + aux
-		{name: "LEAL8", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + 8*arg1 + auxint + aux
-		// Note: LEAL{1,2,4,8} must not have OpSB as either argument.
-
-		// auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address
-		{name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVBLZX", aux: "SymOff", typ: "UInt8"},  // load byte from arg0+auxint+aux. arg1=mem.  Zero extend.
-		{name: "MOVBLSXload", argLength: 2, reg: gpload, asm: "MOVBLSX", aux: "SymOff"},             // ditto, sign extend to int32
-		{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVWLZX", aux: "SymOff", typ: "UInt16"}, // load 2 bytes from arg0+auxint+aux. arg1=mem.  Zero extend.
-		{name: "MOVWLSXload", argLength: 2, reg: gpload, asm: "MOVWLSX", aux: "SymOff"},             // ditto, sign extend to int32
-		{name: "MOVLload", argLength: 2, reg: gpload, asm: "MOVL", aux: "SymOff", typ: "UInt32"},    // load 4 bytes from arg0+auxint+aux. arg1=mem.  Zero extend.
-		{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem"},     // store byte in arg1 to arg0+auxint+aux. arg2=mem
-		{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem"},     // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
-		{name: "MOVLstore", argLength: 3, reg: gpstore, asm: "MOVL", aux: "SymOff", typ: "Mem"},     // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
-
-		// indexed loads/stores
-		{name: "MOVBloadidx1", argLength: 3, reg: gploadidx, asm: "MOVBLZX", aux: "SymOff"}, // load a byte from arg0+arg1+auxint+aux. arg2=mem
-		{name: "MOVWloadidx1", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem
-		{name: "MOVWloadidx2", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff"}, // load 2 bytes from arg0+2*arg1+auxint+aux. arg2=mem
-		{name: "MOVLloadidx1", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff"},    // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem
-		{name: "MOVLloadidx4", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff"},    // load 4 bytes from arg0+4*arg1+auxint+aux. arg2=mem
-		// TODO: sign-extending indexed loads
-		{name: "MOVBstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVB", aux: "SymOff"}, // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
-		{name: "MOVWstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff"}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-		{name: "MOVWstoreidx2", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff"}, // store 2 bytes in arg2 to arg0+2*arg1+auxint+aux. arg3=mem
-		{name: "MOVLstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVL", aux: "SymOff"}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-		{name: "MOVLstoreidx4", argLength: 4, reg: gpstoreidx, asm: "MOVL", aux: "SymOff"}, // store 4 bytes in arg2 to arg0+4*arg1+auxint+aux. arg3=mem
-		// TODO: add size-mismatched indexed loads, like MOVBstoreidx4.
-
-		// For storeconst ops, the AuxInt field encodes both
-		// the value to store and an address offset of the store.
-		// Cast AuxInt to a ValAndOff to extract Val and Off fields.
-		{name: "MOVBstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVB", aux: "SymValAndOff", typ: "Mem"}, // store low byte of ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux.  arg1=mem
-		{name: "MOVWstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVW", aux: "SymValAndOff", typ: "Mem"}, // store low 2 bytes of ...
-		{name: "MOVLstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVL", aux: "SymValAndOff", typ: "Mem"}, // store low 4 bytes of ...
-
-		{name: "MOVBstoreconstidx1", argLength: 3, reg: gpstoreconstidx, asm: "MOVB", aux: "SymValAndOff", typ: "Mem"}, // store low byte of ValAndOff(AuxInt).Val() to arg0+1*arg1+ValAndOff(AuxInt).Off()+aux.  arg2=mem
-		{name: "MOVWstoreconstidx1", argLength: 3, reg: gpstoreconstidx, asm: "MOVW", aux: "SymValAndOff", typ: "Mem"}, // store low 2 bytes of ... arg1 ...
-		{name: "MOVWstoreconstidx2", argLength: 3, reg: gpstoreconstidx, asm: "MOVW", aux: "SymValAndOff", typ: "Mem"}, // store low 2 bytes of ... 2*arg1 ...
-		{name: "MOVLstoreconstidx1", argLength: 3, reg: gpstoreconstidx, asm: "MOVL", aux: "SymValAndOff", typ: "Mem"}, // store low 4 bytes of ... arg1 ...
-		{name: "MOVLstoreconstidx4", argLength: 3, reg: gpstoreconstidx, asm: "MOVL", aux: "SymValAndOff", typ: "Mem"}, // store low 4 bytes of ... 4*arg1 ...
-
-		// arg0 = pointer to start of memory to zero
-		// arg1 = value to store (will always be zero)
-		// arg2 = mem
-		// auxint = offset into duffzero code to start executing
-		// returns mem
-		{
-			name:      "DUFFZERO",
-			aux:       "Int64",
-			argLength: 3,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("DI"), buildReg("AX")},
-				clobbers: buildReg("DI CX"),
-				// Note: CX is only clobbered when dynamic linking.
-			},
-		},
-
-		// arg0 = address of memory to zero
-		// arg1 = # of 4-byte words to zero
-		// arg2 = value to store (will always be zero)
-		// arg3 = mem
-		// returns mem
-		{
-			name:      "REPSTOSL",
-			argLength: 4,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("DI"), buildReg("CX"), buildReg("AX")},
-				clobbers: buildReg("DI CX"),
-			},
-		},
-
-		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff", clobberFlags: true},                                             // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{gpsp, buildReg("DX"), 0}, clobbers: callerSave}, aux: "Int64", clobberFlags: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
-		{name: "CALLdefer", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                               // call deferproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLgo", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                                  // call newproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "Int64", clobberFlags: true},                        // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
-
-		// arg0 = destination pointer
-		// arg1 = source pointer
-		// arg2 = mem
-		// auxint = offset from duffcopy symbol to call
-		// returns memory
-		{
-			name:      "DUFFCOPY",
-			aux:       "Int64",
-			argLength: 3,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("DI"), buildReg("SI")},
-				clobbers: buildReg("DI SI CX"), // uses CX as a temporary
-			},
-			clobberFlags: true,
-		},
-
-		// arg0 = destination pointer
-		// arg1 = source pointer
-		// arg2 = # of 8-byte words to copy
-		// arg3 = mem
-		// returns memory
-		{
-			name:      "REPMOVSL",
-			argLength: 4,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("DI"), buildReg("SI"), buildReg("CX")},
-				clobbers: buildReg("DI SI CX"),
-			},
-		},
-
-		// (InvertFlags (CMPL a b)) == (CMPL b a)
-		// So if we want (SETL (CMPL a b)) but we can't do that because a is a constant,
-		// then we do (SETL (InvertFlags (CMPL b a))) instead.
-		// Rewrites will convert this to (SETG (CMPL b a)).
-		// InvertFlags is a pseudo-op which can't appear in assembly output.
-		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
-
-		// Pseudo-ops
-		{name: "LoweredGetG", argLength: 1, reg: gp01}, // arg0=mem
-		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
-		// and sorts it to the very beginning of the block to prevent other
-		// use of DX (the closure pointer)
-		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("DX")}}},
-		//arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
-		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpsp}}, clobberFlags: true},
-
-		// MOVLconvert converts between pointers and integers.
-		// We have a special op for this so as to not confuse GC
-		// (particularly stack maps).  It takes a memory arg so it
-		// gets correctly ordered with respect to GC safepoints.
-		// arg0=ptr/int arg1=mem, output=int/ptr
-		{name: "MOVLconvert", argLength: 2, reg: gp11, asm: "MOVL"},
-
-		// Constant flag values. For any comparison, there are 5 possible
-		// outcomes: the three from the signed total order (<,==,>) and the
-		// three from the unsigned total order. The == cases overlap.
-		// Note: there's a sixth "unordered" outcome for floating-point
-		// comparisons, but we don't use such a beast yet.
-		// These ops are for temporary use by rewrite rules. They
-		// cannot appear in the generated assembly.
-		{name: "FlagEQ"},     // equal
-		{name: "FlagLT_ULT"}, // signed < and unsigned <
-		{name: "FlagLT_UGT"}, // signed < and unsigned >
-		{name: "FlagGT_UGT"}, // signed > and unsigned <
-		{name: "FlagGT_ULT"}, // signed > and unsigned >
-
-		// Special op for -x on 387
-		{name: "FCHS", argLength: 1, reg: fp11},
-
-		// Special ops for PIC floating-point constants.
-		// MOVSXconst1 loads the address of the constant-pool entry into a register.
-		// MOVSXconst2 loads the constant from that address.
-		// MOVSXconst1 returns a pointer, but we type it as uint32 because it can never point to the Go heap.
-		{name: "MOVSSconst1", reg: gp01, typ: "UInt32", aux: "Float32"},
-		{name: "MOVSDconst1", reg: gp01, typ: "UInt32", aux: "Float64"},
-		{name: "MOVSSconst2", argLength: 1, reg: gpfp, asm: "MOVSS"},
-		{name: "MOVSDconst2", argLength: 1, reg: gpfp, asm: "MOVSD"},
-	}
-
-	var _386blocks = []blockData{
-		{name: "EQ"},
-		{name: "NE"},
-		{name: "LT"},
-		{name: "LE"},
-		{name: "GT"},
-		{name: "GE"},
-		{name: "ULT"},
-		{name: "ULE"},
-		{name: "UGT"},
-		{name: "UGE"},
-		{name: "EQF"},
-		{name: "NEF"},
-		{name: "ORD"}, // FP, ordered comparison (parity zero)
-		{name: "NAN"}, // FP, unordered comparison (parity one)
-	}
-
-	archs = append(archs, arch{
-		name:            "386",
-		pkg:             "cmd/internal/obj/x86",
-		genfile:         "../../x86/ssa.go",
-		ops:             _386ops,
-		blocks:          _386blocks,
-		regnames:        regNames386,
-		gpregmask:       gp,
-		fpregmask:       fp,
-		framepointerreg: int8(num["BP"]),
-	})
-}
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -4,8 +4,7 @@

 // Lowering arithmetic
 (Add64  x y) -> (ADDQ  x y)
-(AddPtr x y) && config.PtrSize == 8 -> (ADDQ x y)
-(AddPtr x y) && config.PtrSize == 4 -> (ADDL x y)
+(AddPtr x y) -> (ADDQ  x y)
 (Add32  x y) -> (ADDL  x y)
 (Add16  x y) -> (ADDL  x y)
 (Add8   x y) -> (ADDL  x y)
@@ -13,8 +12,7 @@
 (Add64F x y) -> (ADDSD x y)

 (Sub64  x y) -> (SUBQ  x y)
-(SubPtr x y) && config.PtrSize == 8 -> (SUBQ x y)
-(SubPtr x y) && config.PtrSize == 4 -> (SUBL x y)
+(SubPtr x y) -> (SUBQ  x y)
 (Sub32  x y) -> (SUBL  x y)
 (Sub16  x y) -> (SUBL  x y)
 (Sub8   x y) -> (SUBL  x y)
@@ -31,14 +29,14 @@
 (Div32F x y) -> (DIVSS x y)
 (Div64F x y) -> (DIVSD x y)

-(Div64  x y) -> (Select0 (DIVQ  x y))
-(Div64u x y) -> (Select0 (DIVQU x y))
-(Div32  x y) -> (Select0 (DIVL  x y))
-(Div32u x y) -> (Select0 (DIVLU x y))
-(Div16  x y) -> (Select0 (DIVW  x y))
-(Div16u x y) -> (Select0 (DIVWU x y))
-(Div8   x y) -> (Select0 (DIVW  (SignExt8to16 x) (SignExt8to16 y)))
-(Div8u  x y) -> (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
+(Div64  x y) -> (DIVQ  x y)
+(Div64u x y) -> (DIVQU x y)
+(Div32  x y) -> (DIVL  x y)
+(Div32u x y) -> (DIVLU x y)
+(Div16  x y) -> (DIVW  x y)
+(Div16u x y) -> (DIVWU x y)
+(Div8   x y) -> (DIVW  (SignExt8to16 x) (SignExt8to16 y))
+(Div8u  x y) -> (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))

 (Hmul64  x y) -> (HMULQ  x y)
 (Hmul64u x y) -> (HMULQU x y)
@@ -51,14 +49,14 @@

 (Avg64u x y) -> (AVGQU x y)

-(Mod64  x y) -> (Select1 (DIVQ  x y))
-(Mod64u x y) -> (Select1 (DIVQU x y))
-(Mod32  x y) -> (Select1 (DIVL  x y))
-(Mod32u x y) -> (Select1 (DIVLU x y))
-(Mod16  x y) -> (Select1 (DIVW  x y))
-(Mod16u x y) -> (Select1 (DIVWU x y))
-(Mod8   x y) -> (Select1 (DIVW  (SignExt8to16 x) (SignExt8to16 y)))
-(Mod8u  x y) -> (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
+(Mod64  x y) -> (MODQ  x y)
+(Mod64u x y) -> (MODQU x y)
+(Mod32  x y) -> (MODL  x y)
+(Mod32u x y) -> (MODLU x y)
+(Mod16  x y) -> (MODW  x y)
+(Mod16u x y) -> (MODWU x y)
+(Mod8   x y) -> (MODW  (SignExt8to16 x) (SignExt8to16 y))
+(Mod8u  x y) -> (MODWU (ZeroExt8to16 x) (ZeroExt8to16 y))

 (And64 x y) -> (ANDQ x y)
 (And32 x y) -> (ANDL x y)
@@ -93,9 +91,8 @@
 (Not x) -> (XORLconst [1] x)

 // Lowering pointer arithmetic
-(OffPtr [off] ptr) && config.PtrSize == 8 && is32Bit(off) -> (ADDQconst [off] ptr)
-(OffPtr [off] ptr) && config.PtrSize == 8 -> (ADDQ (MOVQconst [off]) ptr)
-(OffPtr [off] ptr) && config.PtrSize == 4 -> (ADDLconst [off] ptr)
+(OffPtr [off] ptr) && is32Bit(off) -> (ADDQconst [off] ptr)
+(OffPtr [off] ptr) -> (ADDQ (MOVQconst [off]) ptr)

 // Lowering other arithmetic
 // TODO: CMPQconst 0 below is redundant because BSF sets Z but how to remove?
@@ -273,8 +270,7 @@
 (Eq16  x y) -> (SETEQ (CMPW x y))
 (Eq8   x y) -> (SETEQ (CMPB x y))
 (EqB   x y) -> (SETEQ (CMPB x y))
-(EqPtr x y) && config.PtrSize == 8 -> (SETEQ (CMPQ x y))
-(EqPtr x y) && config.PtrSize == 4 -> (SETEQ (CMPL x y))
+(EqPtr x y) -> (SETEQ (CMPQ x y))
 (Eq64F x y) -> (SETEQF (UCOMISD x y))
 (Eq32F x y) -> (SETEQF (UCOMISS x y))

@@ -283,16 +279,13 @@
 (Neq16  x y) -> (SETNE (CMPW x y))
 (Neq8   x y) -> (SETNE (CMPB x y))
 (NeqB   x y) -> (SETNE (CMPB x y))
-(NeqPtr x y) && config.PtrSize == 8 -> (SETNE (CMPQ x y))
-(NeqPtr x y) && config.PtrSize == 4 -> (SETNE (CMPL x y))
+(NeqPtr x y) -> (SETNE (CMPQ x y))
 (Neq64F x y) -> (SETNEF (UCOMISD x y))
 (Neq32F x y) -> (SETNEF (UCOMISS x y))

-(Int64Hi x) -> (SHRQconst [32] x) // needed for amd64p32
-
 // Lowering loads
-(Load <t> ptr mem) && (is64BitInt(t) || isPtr(t) && config.PtrSize == 8) -> (MOVQload ptr mem)
-(Load <t> ptr mem) && (is32BitInt(t) || isPtr(t) && config.PtrSize == 4) -> (MOVLload ptr mem)
+(Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVQload ptr mem)
+(Load <t> ptr mem) && is32BitInt(t) -> (MOVLload ptr mem)
 (Load <t> ptr mem) && is16BitInt(t) -> (MOVWload ptr mem)
 (Load <t> ptr mem) && (t.IsBoolean() || is8BitInt(t)) -> (MOVBload ptr mem)
 (Load <t> ptr mem) && is32BitFloat(t) -> (MOVSSload ptr mem)
@@ -309,47 +302,39 @@
 (Store [1] ptr val mem) -> (MOVBstore ptr val mem)

 // Lowering moves
-(Move [s] _ _ mem) && SizeAndAlign(s).Size() == 0 -> mem
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 1 -> (MOVBstore dst (MOVBload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 2 -> (MOVWstore dst (MOVWload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 4 -> (MOVLstore dst (MOVLload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 8 -> (MOVQstore dst (MOVQload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 16 -> (MOVOstore dst (MOVOload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 3 ->
+(Move [0] _ _ mem) -> mem
+(Move [1] dst src mem) -> (MOVBstore dst (MOVBload src mem) mem)
+(Move [2] dst src mem) -> (MOVWstore dst (MOVWload src mem) mem)
+(Move [4] dst src mem) -> (MOVLstore dst (MOVLload src mem) mem)
+(Move [8] dst src mem) -> (MOVQstore dst (MOVQload src mem) mem)
+(Move [16] dst src mem) -> (MOVOstore dst (MOVOload src mem) mem)
+(Move [3] dst src mem) ->
 	(MOVBstore [2] dst (MOVBload [2] src mem)
 		(MOVWstore dst (MOVWload src mem) mem))
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 5 ->
+(Move [5] dst src mem) ->
 	(MOVBstore [4] dst (MOVBload [4] src mem)
 		(MOVLstore dst (MOVLload src mem) mem))
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 6 ->
+(Move [6] dst src mem) ->
 	(MOVWstore [4] dst (MOVWload [4] src mem)
 		(MOVLstore dst (MOVLload src mem) mem))
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 7 ->
+(Move [7] dst src mem) ->
 	(MOVLstore [3] dst (MOVLload [3] src mem)
 		(MOVLstore dst (MOVLload src mem) mem))
-(Move [s] dst src mem) && SizeAndAlign(s).Size() > 8 && SizeAndAlign(s).Size() < 16 ->
-	(MOVQstore [SizeAndAlign(s).Size()-8] dst (MOVQload [SizeAndAlign(s).Size()-8] src mem)
+(Move [size] dst src mem) && size > 8 && size < 16 ->
+	(MOVQstore [size-8] dst (MOVQload [size-8] src mem)
 		(MOVQstore dst (MOVQload src mem) mem))

 // Adjust moves to be a multiple of 16 bytes.
-(Move [s] dst src mem)
-	&& SizeAndAlign(s).Size() > 16 && SizeAndAlign(s).Size()%16 != 0 && SizeAndAlign(s).Size()%16 <= 8 ->
-	(Move [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%16]
-		(OffPtr <dst.Type> dst [SizeAndAlign(s).Size()%16])
-		(OffPtr <src.Type> src [SizeAndAlign(s).Size()%16])
+(Move [size] dst src mem) && size > 16 && size%16 != 0 && size%16 <= 8 ->
+	(Move [size-size%16] (ADDQconst <dst.Type> dst [size%16]) (ADDQconst <src.Type> src [size%16])
 		(MOVQstore dst (MOVQload src mem) mem))
-(Move [s] dst src mem)
-	&& SizeAndAlign(s).Size() > 16 && SizeAndAlign(s).Size()%16 != 0 && SizeAndAlign(s).Size()%16 > 8 ->
-	(Move [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%16]
-		(OffPtr <dst.Type> dst [SizeAndAlign(s).Size()%16])
-		(OffPtr <src.Type> src [SizeAndAlign(s).Size()%16])
+(Move [size] dst src mem) && size > 16 && size%16 != 0 && size%16 > 8 ->
+	(Move [size-size%16] (ADDQconst <dst.Type> dst [size%16]) (ADDQconst <src.Type> src [size%16])
 		(MOVOstore dst (MOVOload src mem) mem))

 // Medium copying uses a duff device.
-(Move [s] dst src mem)
-	&& SizeAndAlign(s).Size() >= 32 && SizeAndAlign(s).Size() <= 16*64 && SizeAndAlign(s).Size()%16 == 0
-	&& !config.noDuffDevice ->
-	(DUFFCOPY [14*(64-SizeAndAlign(s).Size()/16)] dst src mem)
+(Move [size] dst src mem) && size >= 32 && size <= 16*64 && size%16 == 0 && !config.noDuffDevice ->
+	(DUFFCOPY [14*(64-size/16)] dst src mem)
 // 14 and 64 are magic constants.  14 is the number of bytes to encode:
 //	MOVUPS	(SI), X0
 //	ADDQ	$16, SI
@@ -358,62 +343,57 @@
 // and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy.

 // Large copying uses REP MOVSQ.
-(Move [s] dst src mem) && (SizeAndAlign(s).Size() > 16*64 || config.noDuffDevice) && SizeAndAlign(s).Size()%8 == 0 ->
-	(REPMOVSQ dst src (MOVQconst [SizeAndAlign(s).Size()/8]) mem)
+(Move [size] dst src mem) && (size > 16*64 || config.noDuffDevice) && size%8 == 0 ->
+	(REPMOVSQ dst src (MOVQconst [size/8]) mem)

 // Lowering Zero instructions
-(Zero [s] _ mem) && SizeAndAlign(s).Size() == 0 -> mem
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 1 -> (MOVBstoreconst [0] destptr mem)
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 2 -> (MOVWstoreconst [0] destptr mem)
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 4 -> (MOVLstoreconst [0] destptr mem)
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 8 -> (MOVQstoreconst [0] destptr mem)
+(Zero [0] _ mem) -> mem
+(Zero [1] destptr mem) -> (MOVBstoreconst [0] destptr mem)
+(Zero [2] destptr mem) -> (MOVWstoreconst [0] destptr mem)
+(Zero [4] destptr mem) -> (MOVLstoreconst [0] destptr mem)
+(Zero [8] destptr mem) -> (MOVQstoreconst [0] destptr mem)

-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 3 ->
+(Zero [3] destptr mem) ->
 	(MOVBstoreconst [makeValAndOff(0,2)] destptr
 		(MOVWstoreconst [0] destptr mem))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 5 ->
+(Zero [5] destptr mem) ->
 	(MOVBstoreconst [makeValAndOff(0,4)] destptr
 		(MOVLstoreconst [0] destptr mem))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 6 ->
+(Zero [6] destptr mem) ->
 	(MOVWstoreconst [makeValAndOff(0,4)] destptr
 		(MOVLstoreconst [0] destptr mem))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 7 ->
+(Zero [7] destptr mem) ->
 	(MOVLstoreconst [makeValAndOff(0,3)] destptr
 		(MOVLstoreconst [0] destptr mem))

 // Strip off any fractional word zeroing.
-(Zero [s] destptr mem) && SizeAndAlign(s).Size()%8 != 0 && SizeAndAlign(s).Size() > 8 ->
-	(Zero [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%8] (OffPtr <destptr.Type> destptr [SizeAndAlign(s).Size()%8])
+(Zero [size] destptr mem) && size%8 != 0 && size > 8 ->
+	(Zero [size-size%8] (ADDQconst destptr [size%8])
 		(MOVQstoreconst [0] destptr mem))

 // Zero small numbers of words directly.
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 16 ->
+(Zero [16] destptr mem) ->
 	(MOVQstoreconst [makeValAndOff(0,8)] destptr
 		(MOVQstoreconst [0] destptr mem))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 24 ->
+(Zero [24] destptr mem) ->
 	(MOVQstoreconst [makeValAndOff(0,16)] destptr
 		(MOVQstoreconst [makeValAndOff(0,8)] destptr
 			(MOVQstoreconst [0] destptr mem)))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 32 ->
+(Zero [32] destptr mem) ->
 	(MOVQstoreconst [makeValAndOff(0,24)] destptr
 		(MOVQstoreconst [makeValAndOff(0,16)] destptr
 			(MOVQstoreconst [makeValAndOff(0,8)] destptr
 				(MOVQstoreconst [0] destptr mem))))

 // Medium zeroing uses a duff device.
-(Zero [s] destptr mem)
-	&& SizeAndAlign(s).Size() <= 1024 && SizeAndAlign(s).Size()%8 == 0 && SizeAndAlign(s).Size()%16 != 0
-	&& !config.noDuffDevice ->
-	(Zero [SizeAndAlign(s).Size()-8] (OffPtr <destptr.Type> [8] destptr) (MOVQstore destptr (MOVQconst [0]) mem))
-(Zero [s] destptr mem)
-	&& SizeAndAlign(s).Size() <= 1024 && SizeAndAlign(s).Size()%16 == 0 && !config.noDuffDevice ->
-	(DUFFZERO [SizeAndAlign(s).Size()] destptr (MOVOconst [0]) mem)
+(Zero [size] destptr mem) && size <= 1024 && size%8 == 0 && size%16 != 0 && !config.noDuffDevice ->
+	(Zero [size-8] (ADDQconst [8] destptr) (MOVQstore destptr (MOVQconst [0]) mem))
+(Zero [size] destptr mem) && size <= 1024 && size%16 == 0 && !config.noDuffDevice ->
+	(DUFFZERO [size] destptr (MOVOconst [0]) mem)

 // Large zeroing uses REP STOSQ.
-(Zero [s] destptr mem)
-	&& (SizeAndAlign(s).Size() > 1024 || (config.noDuffDevice && SizeAndAlign(s).Size() > 32))
-	&& SizeAndAlign(s).Size()%8 == 0 ->
-	(REPSTOSQ destptr (MOVQconst [SizeAndAlign(s).Size()/8]) (MOVQconst [0]) mem)
+(Zero [size] destptr mem) && (size > 1024 || (config.noDuffDevice && size > 32)) && size%8 == 0 ->
+	(REPSTOSQ destptr (MOVQconst [size/8]) (MOVQconst [0]) mem)

 // Lowering constants
 (Const8   [val]) -> (MOVLconst [val])
@@ -422,8 +402,7 @@
 (Const64  [val]) -> (MOVQconst [val])
 (Const32F [val]) -> (MOVSSconst [val])
 (Const64F [val]) -> (MOVSDconst [val])
-(ConstNil) && config.PtrSize == 8 -> (MOVQconst [0])
-(ConstNil) && config.PtrSize == 4 -> (MOVLconst [0])
+(ConstNil) -> (MOVQconst [0])
 (ConstBool [b]) -> (MOVLconst [b])

 // Lowering calls
@@ -434,17 +413,15 @@
 (InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem)

 // Miscellaneous
-(Convert <t> x mem) && config.PtrSize == 8 -> (MOVQconvert <t> x mem)
-(Convert <t> x mem) && config.PtrSize == 4 -> (MOVLconvert <t> x mem)
-(IsNonNil p) && config.PtrSize == 8 -> (SETNE (TESTQ p p))
-(IsNonNil p) && config.PtrSize == 4 -> (SETNE (TESTL p p))
+(Convert <t> x mem) -> (MOVQconvert <t> x mem)
+(IsNonNil p) -> (SETNE (TESTQ p p))
 (IsInBounds idx len) -> (SETB (CMPQ idx len))
 (IsSliceInBounds idx len) -> (SETBE (CMPQ idx len))
 (NilCheck ptr mem) -> (LoweredNilCheck ptr mem)
 (GetG mem) -> (LoweredGetG mem)
 (GetClosurePtr) -> (LoweredGetClosurePtr)
-(Addr {sym} base) && config.PtrSize == 8 -> (LEAQ {sym} base)
-(Addr {sym} base) && config.PtrSize == 4 -> (LEAL {sym} base)
+(Addr {sym} base) -> (LEAQ {sym} base)
+(ITab (Load ptr mem)) -> (MOVQload ptr mem)

 // block rewrites
 (If (SETL  cmp) yes no) -> (LT  cmp yes no)
@@ -518,12 +495,6 @@
 (ANDLconst [c] (ANDLconst [d] x)) -> (ANDLconst [c & d] x)
 (ANDQconst [c] (ANDQconst [d] x)) -> (ANDQconst [c & d] x)

-(XORLconst [c] (XORLconst [d] x)) -> (XORLconst [c ^ d] x)
-(XORQconst [c] (XORQconst [d] x)) -> (XORQconst [c ^ d] x)
-
-(MULLconst [c] (MULLconst [d] x)) -> (MULLconst [int64(int32(c * d))] x)
-(MULQconst [c] (MULQconst [d] x)) -> (MULQconst [c * d] x)
-
 (ORQ x (MOVQconst [c])) && is32Bit(c) -> (ORQconst [c] x)
 (ORQ (MOVQconst [c]) x) && is32Bit(c) -> (ORQconst [c] x)
 (ORL x (MOVLconst [c])) -> (ORLconst [c] x)
@@ -573,16 +544,6 @@
 (SHRL x (ANDLconst [31] y)) -> (SHRL x y)
 (SHRQ x (ANDQconst [63] y)) -> (SHRQ x y)

-(ROLQconst [c] (ROLQconst [d] x)) -> (ROLQconst [(c+d)&63] x)
-(ROLLconst [c] (ROLLconst [d] x)) -> (ROLLconst [(c+d)&31] x)
-(ROLWconst [c] (ROLWconst [d] x)) -> (ROLWconst [(c+d)&15] x)
-(ROLBconst [c] (ROLBconst [d] x)) -> (ROLBconst [(c+d)& 7] x)
-
-(ROLQconst [0] x) -> x
-(ROLLconst [0] x) -> x
-(ROLWconst [0] x) -> x
-(ROLBconst [0] x) -> x
-
 // Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits)
 // because the x86 instructions are defined to use all 5 bits of the shift even
 // for the small shifts. I don't think we'll ever generate a weird shift (e.g.
@@ -1603,53 +1564,3 @@
  && x.Uses == 1
  && clobber(x)
  -> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w0 mem)
-
-// amd64p32 rules
-// same as the rules above, but with 32 instead of 64 bit pointer arithmetic.
-// LEAQ,ADDQ -> LEAL,ADDL
-(ADDLconst [c] (LEAL [d] {s} x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x)
-(LEAL [c] {s} (ADDLconst [d] x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x)
-
-(MOVQload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) ->
-	(MOVQload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVLload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) ->
-	(MOVLload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVWload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) ->
-	(MOVWload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVBload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) ->
-	(MOVBload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-
-(MOVQstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) ->
-	(MOVQstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-(MOVLstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) ->
-	(MOVLstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-(MOVWstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) ->
-	(MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-(MOVBstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) ->
-	(MOVBstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-
-(MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
-	(MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-(MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
-	(MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-(MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
-	(MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-(MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
-	(MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-
-(MOVQload  [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVQload  [off1+off2] {sym} ptr mem)
-(MOVLload  [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVLload  [off1+off2] {sym} ptr mem)
-(MOVWload  [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVWload  [off1+off2] {sym} ptr mem)
-(MOVBload  [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVBload  [off1+off2] {sym} ptr mem)
-(MOVQstore  [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVQstore  [off1+off2] {sym} ptr val mem)
-(MOVLstore  [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVLstore  [off1+off2] {sym} ptr val mem)
-(MOVWstore  [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVWstore  [off1+off2] {sym} ptr val mem)
-(MOVBstore  [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVBstore  [off1+off2] {sym} ptr val mem)
-(MOVQstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
-	(MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-(MOVLstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
-	(MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-(MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
-	(MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-(MOVBstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
-	(MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -64,6 +64,7 @@ var regNamesAMD64 = []string{

 	// pseudo-registers
 	"SB",
+	"FLAGS",
 }

 func init() {
@@ -97,36 +98,43 @@ func init() {
 		fp         = buildReg("X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15")
 		gpsp       = gp | buildReg("SP")
 		gpspsb     = gpsp | buildReg("SB")
-		callerSave = gp | fp
+		flags      = buildReg("FLAGS")
+		callerSave = gp | fp | flags
 	)
 	// Common slices of register masks
 	var (
-		gponly = []regMask{gp}
-		fponly = []regMask{fp}
+		gponly    = []regMask{gp}
+		fponly    = []regMask{fp}
+		flagsonly = []regMask{flags}
 	)

 	// Common regInfo
 	var (
-		gp01      = regInfo{inputs: nil, outputs: gponly}
-		gp11      = regInfo{inputs: []regMask{gp}, outputs: gponly}
-		gp11sp    = regInfo{inputs: []regMask{gpsp}, outputs: gponly}
+		gp01      = regInfo{inputs: []regMask{}, outputs: gponly}
+		gp11      = regInfo{inputs: []regMask{gp}, outputs: gponly, clobbers: flags}
+		gp11sp    = regInfo{inputs: []regMask{gpsp}, outputs: gponly, clobbers: flags}
+		gp11nf    = regInfo{inputs: []regMask{gpsp}, outputs: gponly} // nf: no flags clobbered
 		gp11sb    = regInfo{inputs: []regMask{gpspsb}, outputs: gponly}
-		gp21      = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
-		gp21sp    = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
+		gp21      = regInfo{inputs: []regMask{gp, gp}, outputs: gponly, clobbers: flags}
+		gp21sp    = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly, clobbers: flags}
 		gp21sb    = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly}
-		gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}}
-		gp11div   = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax, dx}}
-		gp21hmul  = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx}, clobbers: ax}
+		gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}, clobbers: flags}
+		gp11div   = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax},
+			clobbers: dx | flags}
+		gp11hmul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx},
+			clobbers: ax | flags}
+		gp11mod = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx},
+			clobbers: ax | flags}

-		gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}}
-		gp1flags = regInfo{inputs: []regMask{gpsp}}
-		flagsgp  = regInfo{inputs: nil, outputs: gponly}
+		gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly}
+		gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
+		flagsgp  = regInfo{inputs: flagsonly, outputs: gponly}

 		// for CMOVconst -- uses AX to hold constant temporary.
-		gp1flagsgp = regInfo{inputs: []regMask{gp &^ ax}, clobbers: ax, outputs: []regMask{gp &^ ax}}
+		gp1flagsgp = regInfo{inputs: []regMask{gp &^ ax, flags}, clobbers: ax | flags, outputs: []regMask{gp &^ ax}}

-		readflags = regInfo{inputs: nil, outputs: gponly}
-		flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}}
+		readflags = regInfo{inputs: flagsonly, outputs: gponly}
+		flagsgpax = regInfo{inputs: flagsonly, clobbers: ax | flags, outputs: []regMask{gp &^ ax}}

 		gpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
 		gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
@@ -136,14 +144,14 @@ func init() {
 		gpstoreidx      = regInfo{inputs: []regMask{gpspsb, gpsp, gpsp, 0}}
 		gpstoreconstidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}

-		fp01    = regInfo{inputs: nil, outputs: fponly}
+		fp01    = regInfo{inputs: []regMask{}, outputs: fponly}
 		fp21    = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
 		fp21x15 = regInfo{inputs: []regMask{fp &^ x15, fp &^ x15},
 			clobbers: x15, outputs: []regMask{fp &^ x15}}
 		fpgp     = regInfo{inputs: fponly, outputs: gponly}
 		gpfp     = regInfo{inputs: gponly, outputs: fponly}
 		fp11     = regInfo{inputs: fponly, outputs: fponly}
-		fp2flags = regInfo{inputs: []regMask{fp, fp}}
+		fp2flags = regInfo{inputs: []regMask{fp, fp}, outputs: flagsonly}

 		fpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: fponly}
 		fploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: fponly}
@@ -180,53 +188,60 @@ func init() {
 		{name: "MOVSDstoreidx8", argLength: 4, reg: fpstoreidx, asm: "MOVSD", aux: "SymOff"}, // fp64 indexed by 8i store

 		// binary ops
-		{name: "ADDQ", argLength: 2, reg: gp21sp, asm: "ADDQ", commutative: true, clobberFlags: true},                // arg0 + arg1
-		{name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true, clobberFlags: true},                // arg0 + arg1
-		{name: "ADDQconst", argLength: 1, reg: gp11sp, asm: "ADDQ", aux: "Int64", typ: "UInt64", clobberFlags: true}, // arg0 + auxint
-		{name: "ADDLconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int32", clobberFlags: true},                // arg0 + auxint
+		{name: "ADDQ", argLength: 2, reg: gp21sp, asm: "ADDQ", commutative: true},                // arg0 + arg1
+		{name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true},                // arg0 + arg1
+		{name: "ADDQconst", argLength: 1, reg: gp11sp, asm: "ADDQ", aux: "Int64", typ: "UInt64"}, // arg0 + auxint
+		{name: "ADDLconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int32"},                // arg0 + auxint

-		{name: "SUBQ", argLength: 2, reg: gp21, asm: "SUBQ", resultInArg0: true, clobberFlags: true},                    // arg0 - arg1
-		{name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true, clobberFlags: true},                    // arg0 - arg1
-		{name: "SUBQconst", argLength: 1, reg: gp11, asm: "SUBQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 - auxint
-		{name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 - auxint
+		{name: "SUBQ", argLength: 2, reg: gp21, asm: "SUBQ", resultInArg0: true},                    // arg0 - arg1
+		{name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true},                    // arg0 - arg1
+		{name: "SUBQconst", argLength: 1, reg: gp11, asm: "SUBQ", aux: "Int64", resultInArg0: true}, // arg0 - auxint
+		{name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true}, // arg0 - auxint

-		{name: "MULQ", argLength: 2, reg: gp21, asm: "IMULQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 * arg1
-		{name: "MULL", argLength: 2, reg: gp21, asm: "IMULL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 * arg1
-		{name: "MULQconst", argLength: 1, reg: gp11, asm: "IMULQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 * auxint
-		{name: "MULLconst", argLength: 1, reg: gp11, asm: "IMULL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 * auxint
+		{name: "MULQ", argLength: 2, reg: gp21, asm: "IMULQ", commutative: true, resultInArg0: true}, // arg0 * arg1
+		{name: "MULL", argLength: 2, reg: gp21, asm: "IMULL", commutative: true, resultInArg0: true}, // arg0 * arg1
+		{name: "MULQconst", argLength: 1, reg: gp11, asm: "IMULQ", aux: "Int64", resultInArg0: true}, // arg0 * auxint
+		{name: "MULLconst", argLength: 1, reg: gp11, asm: "IMULL", aux: "Int32", resultInArg0: true}, // arg0 * auxint

-		{name: "HMULQ", argLength: 2, reg: gp21hmul, asm: "IMULQ", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULL", argLength: 2, reg: gp21hmul, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULW", argLength: 2, reg: gp21hmul, asm: "IMULW", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULB", argLength: 2, reg: gp21hmul, asm: "IMULB", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULQU", argLength: 2, reg: gp21hmul, asm: "MULQ", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULLU", argLength: 2, reg: gp21hmul, asm: "MULL", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULWU", argLength: 2, reg: gp21hmul, asm: "MULW", clobberFlags: true}, // (arg0 * arg1) >> width
-		{name: "HMULBU", argLength: 2, reg: gp21hmul, asm: "MULB", clobberFlags: true}, // (arg0 * arg1) >> width
+		{name: "HMULQ", argLength: 2, reg: gp11hmul, asm: "IMULQ"}, // (arg0 * arg1) >> width
+		{name: "HMULL", argLength: 2, reg: gp11hmul, asm: "IMULL"}, // (arg0 * arg1) >> width
+		{name: "HMULW", argLength: 2, reg: gp11hmul, asm: "IMULW"}, // (arg0 * arg1) >> width
+		{name: "HMULB", argLength: 2, reg: gp11hmul, asm: "IMULB"}, // (arg0 * arg1) >> width
+		{name: "HMULQU", argLength: 2, reg: gp11hmul, asm: "MULQ"}, // (arg0 * arg1) >> width
+		{name: "HMULLU", argLength: 2, reg: gp11hmul, asm: "MULL"}, // (arg0 * arg1) >> width
+		{name: "HMULWU", argLength: 2, reg: gp11hmul, asm: "MULW"}, // (arg0 * arg1) >> width
+		{name: "HMULBU", argLength: 2, reg: gp11hmul, asm: "MULB"}, // (arg0 * arg1) >> width

-		{name: "AVGQU", argLength: 2, reg: gp21, commutative: true, resultInArg0: true, clobberFlags: true}, // (arg0 + arg1) / 2 as unsigned, all 64 result bits
+		{name: "AVGQU", argLength: 2, reg: gp21, commutative: true, resultInArg0: true}, // (arg0 + arg1) / 2 as unsigned, all 64 result bits

-		{name: "DIVQ", argLength: 2, reg: gp11div, typ: "(Int64,Int64)", asm: "IDIVQ", clobberFlags: true},   // [arg0 / arg1, arg0 % arg1]
-		{name: "DIVL", argLength: 2, reg: gp11div, typ: "(Int32,Int32)", asm: "IDIVL", clobberFlags: true},   // [arg0 / arg1, arg0 % arg1]
-		{name: "DIVW", argLength: 2, reg: gp11div, typ: "(Int16,Int16)", asm: "IDIVW", clobberFlags: true},   // [arg0 / arg1, arg0 % arg1]
-		{name: "DIVQU", argLength: 2, reg: gp11div, typ: "(UInt64,UInt64)", asm: "DIVQ", clobberFlags: true}, // [arg0 / arg1, arg0 % arg1]
-		{name: "DIVLU", argLength: 2, reg: gp11div, typ: "(UInt32,UInt32)", asm: "DIVL", clobberFlags: true}, // [arg0 / arg1, arg0 % arg1]
-		{name: "DIVWU", argLength: 2, reg: gp11div, typ: "(UInt16,UInt16)", asm: "DIVW", clobberFlags: true}, // [arg0 / arg1, arg0 % arg1]
+		{name: "DIVQ", argLength: 2, reg: gp11div, asm: "IDIVQ"}, // arg0 / arg1
+		{name: "DIVL", argLength: 2, reg: gp11div, asm: "IDIVL"}, // arg0 / arg1
+		{name: "DIVW", argLength: 2, reg: gp11div, asm: "IDIVW"}, // arg0 / arg1
+		{name: "DIVQU", argLength: 2, reg: gp11div, asm: "DIVQ"}, // arg0 / arg1
+		{name: "DIVLU", argLength: 2, reg: gp11div, asm: "DIVL"}, // arg0 / arg1
+		{name: "DIVWU", argLength: 2, reg: gp11div, asm: "DIVW"}, // arg0 / arg1

-		{name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1
-		{name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1
-		{name: "ANDQconst", argLength: 1, reg: gp11, asm: "ANDQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 & auxint
-		{name: "ANDLconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint
+		{name: "MODQ", argLength: 2, reg: gp11mod, asm: "IDIVQ"}, // arg0 % arg1
+		{name: "MODL", argLength: 2, reg: gp11mod, asm: "IDIVL"}, // arg0 % arg1
+		{name: "MODW", argLength: 2, reg: gp11mod, asm: "IDIVW"}, // arg0 % arg1
+		{name: "MODQU", argLength: 2, reg: gp11mod, asm: "DIVQ"}, // arg0 % arg1
+		{name: "MODLU", argLength: 2, reg: gp11mod, asm: "DIVL"}, // arg0 % arg1
+		{name: "MODWU", argLength: 2, reg: gp11mod, asm: "DIVW"}, // arg0 % arg1

-		{name: "ORQ", argLength: 2, reg: gp21, asm: "ORQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1
-		{name: "ORL", argLength: 2, reg: gp21, asm: "ORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1
-		{name: "ORQconst", argLength: 1, reg: gp11, asm: "ORQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 | auxint
-		{name: "ORLconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint
+		{name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ", commutative: true, resultInArg0: true}, // arg0 & arg1
+		{name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true}, // arg0 & arg1
+		{name: "ANDQconst", argLength: 1, reg: gp11, asm: "ANDQ", aux: "Int64", resultInArg0: true}, // arg0 & auxint
+		{name: "ANDLconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int32", resultInArg0: true}, // arg0 & auxint

-		{name: "XORQ", argLength: 2, reg: gp21, asm: "XORQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1
-		{name: "XORL", argLength: 2, reg: gp21, asm: "XORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1
-		{name: "XORQconst", argLength: 1, reg: gp11, asm: "XORQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint
-		{name: "XORLconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint
+		{name: "ORQ", argLength: 2, reg: gp21, asm: "ORQ", commutative: true, resultInArg0: true}, // arg0 | arg1
+		{name: "ORL", argLength: 2, reg: gp21, asm: "ORL", commutative: true, resultInArg0: true}, // arg0 | arg1
+		{name: "ORQconst", argLength: 1, reg: gp11, asm: "ORQ", aux: "Int64", resultInArg0: true}, // arg0 | auxint
+		{name: "ORLconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int32", resultInArg0: true}, // arg0 | auxint
+
+		{name: "XORQ", argLength: 2, reg: gp21, asm: "XORQ", commutative: true, resultInArg0: true}, // arg0 ^ arg1
+		{name: "XORL", argLength: 2, reg: gp21, asm: "XORL", commutative: true, resultInArg0: true}, // arg0 ^ arg1
+		{name: "XORQconst", argLength: 1, reg: gp11, asm: "XORQ", aux: "Int64", resultInArg0: true}, // arg0 ^ auxint
+		{name: "XORLconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int32", resultInArg0: true}, // arg0 ^ auxint

 		{name: "CMPQ", argLength: 2, reg: gp2flags, asm: "CMPQ", typ: "Flags"},                    // arg0 compare to arg1
 		{name: "CMPL", argLength: 2, reg: gp2flags, asm: "CMPL", typ: "Flags"},                    // arg0 compare to arg1
@@ -249,60 +264,60 @@ func init() {
 		{name: "TESTWconst", argLength: 1, reg: gp1flags, asm: "TESTW", typ: "Flags", aux: "Int16"}, // (arg0 & auxint) compare to 0
 		{name: "TESTBconst", argLength: 1, reg: gp1flags, asm: "TESTB", typ: "Flags", aux: "Int8"},  // (arg0 & auxint) compare to 0

-		{name: "SHLQ", argLength: 2, reg: gp21shift, asm: "SHLQ", resultInArg0: true, clobberFlags: true},               // arg0 << arg1, shift amount is mod 64
-		{name: "SHLL", argLength: 2, reg: gp21shift, asm: "SHLL", resultInArg0: true, clobberFlags: true},               // arg0 << arg1, shift amount is mod 32
-		{name: "SHLQconst", argLength: 1, reg: gp11, asm: "SHLQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 << auxint, shift amount 0-63
-		{name: "SHLLconst", argLength: 1, reg: gp11, asm: "SHLL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 << auxint, shift amount 0-31
+		{name: "SHLQ", argLength: 2, reg: gp21shift, asm: "SHLQ", resultInArg0: true},               // arg0 << arg1, shift amount is mod 64
+		{name: "SHLL", argLength: 2, reg: gp21shift, asm: "SHLL", resultInArg0: true},               // arg0 << arg1, shift amount is mod 32
+		{name: "SHLQconst", argLength: 1, reg: gp11, asm: "SHLQ", aux: "Int64", resultInArg0: true}, // arg0 << auxint, shift amount 0-63
+		{name: "SHLLconst", argLength: 1, reg: gp11, asm: "SHLL", aux: "Int32", resultInArg0: true}, // arg0 << auxint, shift amount 0-31
 		// Note: x86 is weird, the 16 and 8 byte shifts still use all 5 bits of shift amount!

-		{name: "SHRQ", argLength: 2, reg: gp21shift, asm: "SHRQ", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 64
-		{name: "SHRL", argLength: 2, reg: gp21shift, asm: "SHRL", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 32
-		{name: "SHRW", argLength: 2, reg: gp21shift, asm: "SHRW", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 32
-		{name: "SHRB", argLength: 2, reg: gp21shift, asm: "SHRB", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 32
-		{name: "SHRQconst", argLength: 1, reg: gp11, asm: "SHRQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-63
-		{name: "SHRLconst", argLength: 1, reg: gp11, asm: "SHRL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-31
-		{name: "SHRWconst", argLength: 1, reg: gp11, asm: "SHRW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-31
-		{name: "SHRBconst", argLength: 1, reg: gp11, asm: "SHRB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // unsigned arg0 >> auxint, shift amount 0-31
+		{name: "SHRQ", argLength: 2, reg: gp21shift, asm: "SHRQ", resultInArg0: true},               // unsigned arg0 >> arg1, shift amount is mod 64
+		{name: "SHRL", argLength: 2, reg: gp21shift, asm: "SHRL", resultInArg0: true},               // unsigned arg0 >> arg1, shift amount is mod 32
+		{name: "SHRW", argLength: 2, reg: gp21shift, asm: "SHRW", resultInArg0: true},               // unsigned arg0 >> arg1, shift amount is mod 32
+		{name: "SHRB", argLength: 2, reg: gp21shift, asm: "SHRB", resultInArg0: true},               // unsigned arg0 >> arg1, shift amount is mod 32
+		{name: "SHRQconst", argLength: 1, reg: gp11, asm: "SHRQ", aux: "Int64", resultInArg0: true}, // unsigned arg0 >> auxint, shift amount 0-63
+		{name: "SHRLconst", argLength: 1, reg: gp11, asm: "SHRL", aux: "Int32", resultInArg0: true}, // unsigned arg0 >> auxint, shift amount 0-31
+		{name: "SHRWconst", argLength: 1, reg: gp11, asm: "SHRW", aux: "Int16", resultInArg0: true}, // unsigned arg0 >> auxint, shift amount 0-31
+		{name: "SHRBconst", argLength: 1, reg: gp11, asm: "SHRB", aux: "Int8", resultInArg0: true},  // unsigned arg0 >> auxint, shift amount 0-31

-		{name: "SARQ", argLength: 2, reg: gp21shift, asm: "SARQ", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 64
-		{name: "SARL", argLength: 2, reg: gp21shift, asm: "SARL", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 32
-		{name: "SARW", argLength: 2, reg: gp21shift, asm: "SARW", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 32
-		{name: "SARB", argLength: 2, reg: gp21shift, asm: "SARB", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 32
-		{name: "SARQconst", argLength: 1, reg: gp11, asm: "SARQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-63
-		{name: "SARLconst", argLength: 1, reg: gp11, asm: "SARL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-31
-		{name: "SARWconst", argLength: 1, reg: gp11, asm: "SARW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-31
-		{name: "SARBconst", argLength: 1, reg: gp11, asm: "SARB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // signed arg0 >> auxint, shift amount 0-31
+		{name: "SARQ", argLength: 2, reg: gp21shift, asm: "SARQ", resultInArg0: true},               // signed arg0 >> arg1, shift amount is mod 64
+		{name: "SARL", argLength: 2, reg: gp21shift, asm: "SARL", resultInArg0: true},               // signed arg0 >> arg1, shift amount is mod 32
+		{name: "SARW", argLength: 2, reg: gp21shift, asm: "SARW", resultInArg0: true},               // signed arg0 >> arg1, shift amount is mod 32
+		{name: "SARB", argLength: 2, reg: gp21shift, asm: "SARB", resultInArg0: true},               // signed arg0 >> arg1, shift amount is mod 32
+		{name: "SARQconst", argLength: 1, reg: gp11, asm: "SARQ", aux: "Int64", resultInArg0: true}, // signed arg0 >> auxint, shift amount 0-63
+		{name: "SARLconst", argLength: 1, reg: gp11, asm: "SARL", aux: "Int32", resultInArg0: true}, // signed arg0 >> auxint, shift amount 0-31
+		{name: "SARWconst", argLength: 1, reg: gp11, asm: "SARW", aux: "Int16", resultInArg0: true}, // signed arg0 >> auxint, shift amount 0-31
+		{name: "SARBconst", argLength: 1, reg: gp11, asm: "SARB", aux: "Int8", resultInArg0: true},  // signed arg0 >> auxint, shift amount 0-31

-		{name: "ROLQconst", argLength: 1, reg: gp11, asm: "ROLQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-63
-		{name: "ROLLconst", argLength: 1, reg: gp11, asm: "ROLL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-31
-		{name: "ROLWconst", argLength: 1, reg: gp11, asm: "ROLW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-15
-		{name: "ROLBconst", argLength: 1, reg: gp11, asm: "ROLB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // arg0 rotate left auxint, rotate amount 0-7
+		{name: "ROLQconst", argLength: 1, reg: gp11, asm: "ROLQ", aux: "Int64", resultInArg0: true}, // arg0 rotate left auxint, rotate amount 0-63
+		{name: "ROLLconst", argLength: 1, reg: gp11, asm: "ROLL", aux: "Int32", resultInArg0: true}, // arg0 rotate left auxint, rotate amount 0-31
+		{name: "ROLWconst", argLength: 1, reg: gp11, asm: "ROLW", aux: "Int16", resultInArg0: true}, // arg0 rotate left auxint, rotate amount 0-15
+		{name: "ROLBconst", argLength: 1, reg: gp11, asm: "ROLB", aux: "Int8", resultInArg0: true},  // arg0 rotate left auxint, rotate amount 0-7

 		// unary ops
-		{name: "NEGQ", argLength: 1, reg: gp11, asm: "NEGQ", resultInArg0: true, clobberFlags: true}, // -arg0
-		{name: "NEGL", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true, clobberFlags: true}, // -arg0
+		{name: "NEGQ", argLength: 1, reg: gp11, asm: "NEGQ", resultInArg0: true}, // -arg0
+		{name: "NEGL", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true}, // -arg0

-		{name: "NOTQ", argLength: 1, reg: gp11, asm: "NOTQ", resultInArg0: true, clobberFlags: true}, // ^arg0
-		{name: "NOTL", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true, clobberFlags: true}, // ^arg0
+		{name: "NOTQ", argLength: 1, reg: gp11, asm: "NOTQ", resultInArg0: true}, // ^arg0
+		{name: "NOTL", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0

-		{name: "BSFQ", argLength: 1, reg: gp11, asm: "BSFQ", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
-		{name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
-		{name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
+		{name: "BSFQ", argLength: 1, reg: gp11, asm: "BSFQ"}, // arg0 # of low-order zeroes ; undef if zero
+		{name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL"}, // arg0 # of low-order zeroes ; undef if zero
+		{name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW"}, // arg0 # of low-order zeroes ; undef if zero

-		{name: "BSRQ", argLength: 1, reg: gp11, asm: "BSRQ", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
-		{name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
-		{name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
+		{name: "BSRQ", argLength: 1, reg: gp11, asm: "BSRQ"}, // arg0 # of high-order zeroes ; undef if zero
+		{name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL"}, // arg0 # of high-order zeroes ; undef if zero
+		{name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW"}, // arg0 # of high-order zeroes ; undef if zero

 		// Note ASM for ops moves whole register
-		{name: "CMOVQEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQEQ", typ: "UInt64", aux: "Int64", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z set
-		{name: "CMOVLEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt32", aux: "Int32", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z set
-		{name: "CMOVWEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt16", aux: "Int16", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z set
-		{name: "CMOVQNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQNE", typ: "UInt64", aux: "Int64", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z not set
-		{name: "CMOVLNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt32", aux: "Int32", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z not set
-		{name: "CMOVWNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt16", aux: "Int16", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z not set
+		{name: "CMOVQEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQEQ", typ: "UInt64", aux: "Int64", resultInArg0: true}, // replace arg0 w/ constant if Z set
+		{name: "CMOVLEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt32", aux: "Int32", resultInArg0: true}, // replace arg0 w/ constant if Z set
+		{name: "CMOVWEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt16", aux: "Int16", resultInArg0: true}, // replace arg0 w/ constant if Z set
+		{name: "CMOVQNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQNE", typ: "UInt64", aux: "Int64", resultInArg0: true}, // replace arg0 w/ constant if Z not set
+		{name: "CMOVLNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt32", aux: "Int32", resultInArg0: true}, // replace arg0 w/ constant if Z not set
+		{name: "CMOVWNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt16", aux: "Int16", resultInArg0: true}, // replace arg0 w/ constant if Z not set

-		{name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
-		{name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
+		{name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true}, // arg0 swap bytes
+		{name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true}, // arg0 swap bytes

 		{name: "SQRTSD", argLength: 1, reg: fp11, asm: "SQRTSD"}, // sqrt(arg0)

@@ -323,20 +338,20 @@ func init() {
 		// Need different opcodes for floating point conditions because
 		// any comparison involving a NaN is always FALSE and thus
 		// the patterns for inverting conditions cannot be used.
-		{name: "SETEQF", argLength: 1, reg: flagsgpax, asm: "SETEQ", clobberFlags: true}, // extract == condition from arg0
-		{name: "SETNEF", argLength: 1, reg: flagsgpax, asm: "SETNE", clobberFlags: true}, // extract != condition from arg0
-		{name: "SETORD", argLength: 1, reg: flagsgp, asm: "SETPC"},                       // extract "ordered" (No Nan present) condition from arg0
-		{name: "SETNAN", argLength: 1, reg: flagsgp, asm: "SETPS"},                       // extract "unordered" (Nan present) condition from arg0
+		{name: "SETEQF", argLength: 1, reg: flagsgpax, asm: "SETEQ"}, // extract == condition from arg0
+		{name: "SETNEF", argLength: 1, reg: flagsgpax, asm: "SETNE"}, // extract != condition from arg0
+		{name: "SETORD", argLength: 1, reg: flagsgp, asm: "SETPC"},   // extract "ordered" (No Nan present) condition from arg0
+		{name: "SETNAN", argLength: 1, reg: flagsgp, asm: "SETPS"},   // extract "unordered" (Nan present) condition from arg0

 		{name: "SETGF", argLength: 1, reg: flagsgp, asm: "SETHI"},  // extract floating > condition from arg0
 		{name: "SETGEF", argLength: 1, reg: flagsgp, asm: "SETCC"}, // extract floating >= condition from arg0

-		{name: "MOVBQSX", argLength: 1, reg: gp11, asm: "MOVBQSX"}, // sign extend arg0 from int8 to int64
-		{name: "MOVBQZX", argLength: 1, reg: gp11, asm: "MOVBQZX"}, // zero extend arg0 from int8 to int64
-		{name: "MOVWQSX", argLength: 1, reg: gp11, asm: "MOVWQSX"}, // sign extend arg0 from int16 to int64
-		{name: "MOVWQZX", argLength: 1, reg: gp11, asm: "MOVWQZX"}, // zero extend arg0 from int16 to int64
-		{name: "MOVLQSX", argLength: 1, reg: gp11, asm: "MOVLQSX"}, // sign extend arg0 from int32 to int64
-		{name: "MOVLQZX", argLength: 1, reg: gp11, asm: "MOVLQZX"}, // zero extend arg0 from int32 to int64
+		{name: "MOVBQSX", argLength: 1, reg: gp11nf, asm: "MOVBQSX"}, // sign extend arg0 from int8 to int64
+		{name: "MOVBQZX", argLength: 1, reg: gp11nf, asm: "MOVBQZX"}, // zero extend arg0 from int8 to int64
+		{name: "MOVWQSX", argLength: 1, reg: gp11nf, asm: "MOVWQSX"}, // sign extend arg0 from int16 to int64
+		{name: "MOVWQZX", argLength: 1, reg: gp11nf, asm: "MOVWQZX"}, // zero extend arg0 from int16 to int64
+		{name: "MOVLQSX", argLength: 1, reg: gp11nf, asm: "MOVLQSX"}, // sign extend arg0 from int32 to int64
+		{name: "MOVLQZX", argLength: 1, reg: gp11nf, asm: "MOVLQZX"}, // zero extend arg0 from int32 to int64

 		{name: "MOVLconst", reg: gp01, asm: "MOVL", typ: "UInt32", aux: "Int32", rematerializeable: true}, // 32 low bits of auxint
 		{name: "MOVQconst", reg: gp01, asm: "MOVQ", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint
@@ -354,15 +369,13 @@ func init() {

 		{name: "PXOR", argLength: 2, reg: fp21, asm: "PXOR", commutative: true, resultInArg0: true}, // exclusive or, applied to X regs for float negation.

-		{name: "LEAQ", argLength: 1, reg: gp11sb, asm: "LEAQ", aux: "SymOff", rematerializeable: true}, // arg0 + auxint + offset encoded in aux
-		{name: "LEAQ1", argLength: 2, reg: gp21sb, aux: "SymOff"},                                      // arg0 + arg1 + auxint + aux
-		{name: "LEAQ2", argLength: 2, reg: gp21sb, aux: "SymOff"},                                      // arg0 + 2*arg1 + auxint + aux
-		{name: "LEAQ4", argLength: 2, reg: gp21sb, aux: "SymOff"},                                      // arg0 + 4*arg1 + auxint + aux
-		{name: "LEAQ8", argLength: 2, reg: gp21sb, aux: "SymOff"},                                      // arg0 + 8*arg1 + auxint + aux
+		{name: "LEAQ", argLength: 1, reg: gp11sb, aux: "SymOff", rematerializeable: true}, // arg0 + auxint + offset encoded in aux
+		{name: "LEAQ1", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + arg1 + auxint + aux
+		{name: "LEAQ2", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + 2*arg1 + auxint + aux
+		{name: "LEAQ4", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + 4*arg1 + auxint + aux
+		{name: "LEAQ8", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + 8*arg1 + auxint + aux
 		// Note: LEAQ{1,2,4,8} must not have OpSB as either argument.

-		{name: "LEAL", argLength: 1, reg: gp11sb, asm: "LEAL", aux: "SymOff", rematerializeable: true}, // arg0 + auxint + offset encoded in aux
-
 		// auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address
 		{name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVBLZX", aux: "SymOff", typ: "UInt8"},  // load byte from arg0+auxint+aux. arg1=mem.  Zero extend.
 		{name: "MOVBQSXload", argLength: 2, reg: gpload, asm: "MOVBQSX", aux: "SymOff"},             // ditto, sign extend to int64
@@ -423,9 +436,8 @@ func init() {
 			argLength: 3,
 			reg: regInfo{
 				inputs:   []regMask{buildReg("DI"), buildReg("X0")},
-				clobbers: buildReg("DI"),
+				clobbers: buildReg("DI FLAGS"),
 			},
-			clobberFlags: true,
 		},
 		{name: "MOVOconst", reg: regInfo{nil, 0, []regMask{fp}}, typ: "Int128", aux: "Int128", rematerializeable: true},

@@ -443,11 +455,11 @@ func init() {
 			},
 		},

-		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff", clobberFlags: true},                                             // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{gpsp, buildReg("DX"), 0}, clobbers: callerSave}, aux: "Int64", clobberFlags: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
-		{name: "CALLdefer", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                               // call deferproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLgo", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                                  // call newproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "Int64", clobberFlags: true},                        // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
+		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff"},                                // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem
+		{name: "CALLclosure", argLength: 3, reg: regInfo{[]regMask{gpsp, buildReg("DX"), 0}, callerSave, nil}, aux: "Int64"}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
+		{name: "CALLdefer", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64"},                                  // call deferproc.  arg0=mem, auxint=argsize, returns mem
+		{name: "CALLgo", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64"},                                     // call newproc.  arg0=mem, auxint=argsize, returns mem
+		{name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "Int64"},           // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem

 		// arg0 = destination pointer
 		// arg1 = source pointer
@@ -460,9 +472,8 @@ func init() {
 			argLength: 3,
 			reg: regInfo{
 				inputs:   []regMask{buildReg("DI"), buildReg("SI")},
-				clobbers: buildReg("DI SI X0"), // uses X0 as a temporary
+				clobbers: buildReg("DI SI X0 FLAGS"), // uses X0 as a temporary
 			},
-			clobberFlags: true,
 		},

 		// arg0 = destination pointer
@@ -493,15 +504,14 @@ func init() {
 		// use of DX (the closure pointer)
 		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("DX")}}},
 		//arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
-		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpsp}}, clobberFlags: true},
+		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpsp}, clobbers: flags}},

 		// MOVQconvert converts between pointers and integers.
 		// We have a special op for this so as to not confuse GC
 		// (particularly stack maps).  It takes a memory arg so it
 		// gets correctly ordered with respect to GC safepoints.
 		// arg0=ptr/int arg1=mem, output=int/ptr
-		{name: "MOVQconvert", argLength: 2, reg: gp11, asm: "MOVQ"},
-		{name: "MOVLconvert", argLength: 2, reg: gp11, asm: "MOVL"}, // amd64p32 equivalent
+		{name: "MOVQconvert", argLength: 2, reg: gp11nf, asm: "MOVQ"},

 		// Constant flag values. For any comparison, there are 5 possible
 		// outcomes: the three from the signed total order (<,==,>) and the
@@ -535,14 +545,11 @@ func init() {
 	}

 	archs = append(archs, arch{
-		name:            "AMD64",
-		pkg:             "cmd/internal/obj/x86",
-		genfile:         "../../amd64/ssa.go",
-		ops:             AMD64ops,
-		blocks:          AMD64blocks,
-		regnames:        regNamesAMD64,
-		gpregmask:       gp,
-		fpregmask:       fp,
-		framepointerreg: int8(num["BP"]),
+		name:     "AMD64",
+		pkg:      "cmd/internal/obj/x86",
+		genfile:  "../../amd64/ssa.go",
+		ops:      AMD64ops,
+		blocks:   AMD64blocks,
+		regnames: regNamesAMD64,
 	})
 }
--- a/src/cmd/compile/internal/ssa/gen/ARM.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM.rules
--- a/src/cmd/compile/internal/ssa/gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules
--- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
@@ -1,455 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-import "strings"
-
-// Notes:
-//  - Integer types live in the low portion of registers. Upper portions are junk.
-//  - Boolean types use the low-order byte of a register. 0=false, 1=true.
-//    Upper bytes are junk.
-//  - *const instructions may use a constant larger than the instuction can encode.
-//    In this case the assembler expands to multiple instructions and uses tmp
-//    register (R27).
-
-// Suffixes encode the bit width of various instructions.
-// D (double word) = 64 bit
-// W (word)        = 32 bit
-// H (half word)   = 16 bit
-// HU              = 16 bit unsigned
-// B (byte)        = 8 bit
-// BU              = 8 bit unsigned
-// S (single)      = 32 bit float
-// D (double)      = 64 bit float
-
-// Note: registers not used in regalloc are not included in this list,
-// so that regmask stays within int64
-// Be careful when hand coding regmasks.
-var regNamesARM64 = []string{
-	"R0",
-	"R1",
-	"R2",
-	"R3",
-	"R4",
-	"R5",
-	"R6",
-	"R7",
-	"R8",
-	"R9",
-	"R10",
-	"R11",
-	"R12",
-	"R13",
-	"R14",
-	"R15",
-	"R16",
-	"R17",
-	"R18", // platform register, not used
-	"R19",
-	"R20",
-	"R21",
-	"R22",
-	"R23",
-	"R24",
-	"R25",
-	"R26",
-	// R27 = REGTMP not used in regalloc
-	"g",   // aka R28
-	"R29", // frame pointer, not used
-	// R30 = REGLINK not used in regalloc
-	"SP", // aka R31
-
-	"F0",
-	"F1",
-	"F2",
-	"F3",
-	"F4",
-	"F5",
-	"F6",
-	"F7",
-	"F8",
-	"F9",
-	"F10",
-	"F11",
-	"F12",
-	"F13",
-	"F14",
-	"F15",
-	"F16",
-	"F17",
-	"F18",
-	"F19",
-	"F20",
-	"F21",
-	"F22",
-	"F23",
-	"F24",
-	"F25",
-	"F26",
-	"F27",
-	"F28", // 0.0
-	"F29", // 0.5
-	"F30", // 1.0
-	"F31", // 2.0
-
-	// pseudo-registers
-	"SB",
-}
-
-func init() {
-	// Make map from reg names to reg integers.
-	if len(regNamesARM64) > 64 {
-		panic("too many registers")
-	}
-	num := map[string]int{}
-	for i, name := range regNamesARM64 {
-		num[name] = i
-	}
-	buildReg := func(s string) regMask {
-		m := regMask(0)
-		for _, r := range strings.Split(s, " ") {
-			if n, ok := num[r]; ok {
-				m |= regMask(1) << uint(n)
-				continue
-			}
-			panic("register " + r + " not found")
-		}
-		return m
-	}
-
-	// Common individual register masks
-	var (
-		gp         = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26")
-		gpg        = gp | buildReg("g")
-		gpsp       = gp | buildReg("SP")
-		gpspg      = gpg | buildReg("SP")
-		gpspsbg    = gpspg | buildReg("SB")
-		fp         = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27")
-		callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g
-	)
-	// Common regInfo
-	var (
-		gp01      = regInfo{inputs: nil, outputs: []regMask{gp}}
-		gp11      = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
-		gp11sp    = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
-		gp1flags  = regInfo{inputs: []regMask{gpg}}
-		gp1flags1 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
-		gp21      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
-		gp2flags  = regInfo{inputs: []regMask{gpg, gpg}}
-		gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
-		//gp22      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
-		//gp31      = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
-		//gp3flags  = regInfo{inputs: []regMask{gp, gp, gp}}
-		//gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
-		gpload   = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
-		gpstore  = regInfo{inputs: []regMask{gpspsbg, gpg}}
-		gpstore0 = regInfo{inputs: []regMask{gpspsbg}}
-		//gp2load   = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
-		//gp2store  = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}}
-		fp01 = regInfo{inputs: nil, outputs: []regMask{fp}}
-		fp11 = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
-		//fp1flags  = regInfo{inputs: []regMask{fp}}
-		fpgp      = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
-		gpfp      = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
-		fp21      = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
-		fp2flags  = regInfo{inputs: []regMask{fp, fp}}
-		fpload    = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}}
-		fpstore   = regInfo{inputs: []regMask{gpspsbg, fp}}
-		readflags = regInfo{inputs: nil, outputs: []regMask{gp}}
-	)
-	ops := []opData{
-		// binary ops
-		{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},     // arg0 + arg1
-		{name: "ADDconst", argLength: 1, reg: gp11sp, asm: "ADD", aux: "Int64"},   // arg0 + auxInt
-		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                        // arg0 - arg1
-		{name: "SUBconst", argLength: 1, reg: gp11, asm: "SUB", aux: "Int64"},     // arg0 - auxInt
-		{name: "MUL", argLength: 2, reg: gp21, asm: "MUL", commutative: true},     // arg0 * arg1
-		{name: "MULW", argLength: 2, reg: gp21, asm: "MULW", commutative: true},   // arg0 * arg1, 32-bit
-		{name: "MULH", argLength: 2, reg: gp21, asm: "SMULH", commutative: true},  // (arg0 * arg1) >> 64, signed
-		{name: "UMULH", argLength: 2, reg: gp21, asm: "UMULH", commutative: true}, // (arg0 * arg1) >> 64, unsigned
-		{name: "MULL", argLength: 2, reg: gp21, asm: "SMULL", commutative: true},  // arg0 * arg1, signed, 32-bit mult results in 64-bit
-		{name: "UMULL", argLength: 2, reg: gp21, asm: "UMULL", commutative: true}, // arg0 * arg1, unsigned, 32-bit mult results in 64-bit
-		{name: "DIV", argLength: 2, reg: gp21, asm: "SDIV"},                       // arg0 / arg1, signed
-		{name: "UDIV", argLength: 2, reg: gp21, asm: "UDIV"},                      // arg0 / arg1, unsighed
-		{name: "DIVW", argLength: 2, reg: gp21, asm: "SDIVW"},                     // arg0 / arg1, signed, 32 bit
-		{name: "UDIVW", argLength: 2, reg: gp21, asm: "UDIVW"},                    // arg0 / arg1, unsighed, 32 bit
-		{name: "MOD", argLength: 2, reg: gp21, asm: "REM"},                        // arg0 % arg1, signed
-		{name: "UMOD", argLength: 2, reg: gp21, asm: "UREM"},                      // arg0 % arg1, unsigned
-		{name: "MODW", argLength: 2, reg: gp21, asm: "REMW"},                      // arg0 % arg1, signed, 32 bit
-		{name: "UMODW", argLength: 2, reg: gp21, asm: "UREMW"},                    // arg0 % arg1, unsigned, 32 bit
-
-		{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true}, // arg0 + arg1
-		{name: "FADDD", argLength: 2, reg: fp21, asm: "FADDD", commutative: true}, // arg0 + arg1
-		{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"},                    // arg0 - arg1
-		{name: "FSUBD", argLength: 2, reg: fp21, asm: "FSUBD"},                    // arg0 - arg1
-		{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0 * arg1
-		{name: "FMULD", argLength: 2, reg: fp21, asm: "FMULD", commutative: true}, // arg0 * arg1
-		{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"},                    // arg0 / arg1
-		{name: "FDIVD", argLength: 2, reg: fp21, asm: "FDIVD"},                    // arg0 / arg1
-
-		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1
-		{name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int64"}, // arg0 & auxInt
-		{name: "OR", argLength: 2, reg: gp21, asm: "ORR", commutative: true},  // arg0 | arg1
-		{name: "ORconst", argLength: 1, reg: gp11, asm: "ORR", aux: "Int64"},  // arg0 | auxInt
-		{name: "XOR", argLength: 2, reg: gp21, asm: "EOR", commutative: true}, // arg0 ^ arg1
-		{name: "XORconst", argLength: 1, reg: gp11, asm: "EOR", aux: "Int64"}, // arg0 ^ auxInt
-		{name: "BIC", argLength: 2, reg: gp21, asm: "BIC"},                    // arg0 &^ arg1
-		{name: "BICconst", argLength: 1, reg: gp11, asm: "BIC", aux: "Int64"}, // arg0 &^ auxInt
-
-		// unary ops
-		{name: "MVN", argLength: 1, reg: gp11, asm: "MVN"},       // ^arg0
-		{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},       // -arg0
-		{name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS"},   // -arg0, float32
-		{name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD"},   // -arg0, float64
-		{name: "FSQRTD", argLength: 1, reg: fp11, asm: "FSQRTD"}, // sqrt(arg0), float64
-
-		// shifts
-		{name: "SLL", argLength: 2, reg: gp21, asm: "LSL"},                      // arg0 << arg1, shift amount is mod 64
-		{name: "SLLconst", argLength: 1, reg: gp11, asm: "LSL", aux: "Int64"},   // arg0 << auxInt
-		{name: "SRL", argLength: 2, reg: gp21, asm: "LSR"},                      // arg0 >> arg1, unsigned, shift amount is mod 64
-		{name: "SRLconst", argLength: 1, reg: gp11, asm: "LSR", aux: "Int64"},   // arg0 >> auxInt, unsigned
-		{name: "SRA", argLength: 2, reg: gp21, asm: "ASR"},                      // arg0 >> arg1, signed, shift amount is mod 64
-		{name: "SRAconst", argLength: 1, reg: gp11, asm: "ASR", aux: "Int64"},   // arg0 >> auxInt, signed
-		{name: "RORconst", argLength: 1, reg: gp11, asm: "ROR", aux: "Int64"},   // arg0 right rotate by auxInt bits
-		{name: "RORWconst", argLength: 1, reg: gp11, asm: "RORW", aux: "Int64"}, // uint32(arg0) right rotate by auxInt bits
-
-		// comparisons
-		{name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"},                      // arg0 compare to arg1
-		{name: "CMPconst", argLength: 1, reg: gp1flags, asm: "CMP", aux: "Int64", typ: "Flags"},   // arg0 compare to auxInt
-		{name: "CMPW", argLength: 2, reg: gp2flags, asm: "CMPW", typ: "Flags"},                    // arg0 compare to arg1, 32 bit
-		{name: "CMPWconst", argLength: 1, reg: gp1flags, asm: "CMPW", aux: "Int32", typ: "Flags"}, // arg0 compare to auxInt, 32 bit
-		{name: "CMN", argLength: 2, reg: gp2flags, asm: "CMN", typ: "Flags"},                      // arg0 compare to -arg1
-		{name: "CMNconst", argLength: 1, reg: gp1flags, asm: "CMN", aux: "Int64", typ: "Flags"},   // arg0 compare to -auxInt
-		{name: "CMNW", argLength: 2, reg: gp2flags, asm: "CMNW", typ: "Flags"},                    // arg0 compare to -arg1, 32 bit
-		{name: "CMNWconst", argLength: 1, reg: gp1flags, asm: "CMNW", aux: "Int32", typ: "Flags"}, // arg0 compare to -auxInt, 32 bit
-		{name: "FCMPS", argLength: 2, reg: fp2flags, asm: "FCMPS", typ: "Flags"},                  // arg0 compare to arg1, float32
-		{name: "FCMPD", argLength: 2, reg: fp2flags, asm: "FCMPD", typ: "Flags"},                  // arg0 compare to arg1, float64
-
-		// shifted ops
-		{name: "ADDshiftLL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int64"},                   // arg0 + arg1<<auxInt
-		{name: "ADDshiftRL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int64"},                   // arg0 + arg1>>auxInt, unsigned shift
-		{name: "ADDshiftRA", argLength: 2, reg: gp21, asm: "ADD", aux: "Int64"},                   // arg0 + arg1>>auxInt, signed shift
-		{name: "SUBshiftLL", argLength: 2, reg: gp21, asm: "SUB", aux: "Int64"},                   // arg0 - arg1<<auxInt
-		{name: "SUBshiftRL", argLength: 2, reg: gp21, asm: "SUB", aux: "Int64"},                   // arg0 - arg1>>auxInt, unsigned shift
-		{name: "SUBshiftRA", argLength: 2, reg: gp21, asm: "SUB", aux: "Int64"},                   // arg0 - arg1>>auxInt, signed shift
-		{name: "ANDshiftLL", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"},                   // arg0 & (arg1<<auxInt)
-		{name: "ANDshiftRL", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"},                   // arg0 & (arg1>>auxInt), unsigned shift
-		{name: "ANDshiftRA", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"},                   // arg0 & (arg1>>auxInt), signed shift
-		{name: "ORshiftLL", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"},                    // arg0 | arg1<<auxInt
-		{name: "ORshiftRL", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"},                    // arg0 | arg1>>auxInt, unsigned shift
-		{name: "ORshiftRA", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"},                    // arg0 | arg1>>auxInt, signed shift
-		{name: "XORshiftLL", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"},                   // arg0 ^ arg1<<auxInt
-		{name: "XORshiftRL", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"},                   // arg0 ^ arg1>>auxInt, unsigned shift
-		{name: "XORshiftRA", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"},                   // arg0 ^ arg1>>auxInt, signed shift
-		{name: "BICshiftLL", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"},                   // arg0 &^ (arg1<<auxInt)
-		{name: "BICshiftRL", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"},                   // arg0 &^ (arg1>>auxInt), unsigned shift
-		{name: "BICshiftRA", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"},                   // arg0 &^ (arg1>>auxInt), signed shift
-		{name: "CMPshiftLL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1<<auxInt
-		{name: "CMPshiftRL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, unsigned shift
-		{name: "CMPshiftRA", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, signed shift
-
-		// moves
-		{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "UInt64", rematerializeable: true},      // 32 low bits of auxint
-		{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVS", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float
-		{name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", typ: "Float64", rematerializeable: true}, // auxint as 64-bit float
-
-		{name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{buildReg("SP") | buildReg("SB")}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB
-
-		{name: "MOVBload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVB", typ: "Int8"},      // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVBUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVBU", typ: "UInt8"},   // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVHload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVH", typ: "Int16"},     // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVHUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVHU", typ: "UInt16"},  // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVWload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVW", typ: "Int32"},     // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVWUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVWU", typ: "UInt32"},  // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVDload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVD", typ: "UInt64"},    // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "FMOVSload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVS", typ: "Float32"}, // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "FMOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVD", typ: "Float64"}, // load from arg0 + auxInt + aux.  arg1=mem.
-
-		{name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem"},   // store 1 byte of arg1 to arg0 + auxInt + aux.  arg2=mem.
-		{name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem"},   // store 2 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
-		{name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem"},   // store 4 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
-		{name: "MOVDstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVD", typ: "Mem"},   // store 8 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
-		{name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem"}, // store 4 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
-		{name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem"}, // store 8 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
-
-		{name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem"}, // store 1 byte of zero to arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem"}, // store 2 bytes of zero to arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem"}, // store 4 bytes of zero to arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVD", typ: "Mem"}, // store 8 bytes of zero to arg0 + auxInt + aux.  ar12=mem.
-
-		// conversions
-		{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB"},   // move from arg0, sign-extended from byte
-		{name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte
-		{name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH"},   // move from arg0, sign-extended from half
-		{name: "MOVHUreg", argLength: 1, reg: gp11, asm: "MOVHU"}, // move from arg0, unsign-extended from half
-		{name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW"},   // move from arg0, sign-extended from word
-		{name: "MOVWUreg", argLength: 1, reg: gp11, asm: "MOVWU"}, // move from arg0, unsign-extended from word
-		{name: "MOVDreg", argLength: 1, reg: gp11, asm: "MOVD"},   // move from arg0
-
-		{name: "MOVDnop", argLength: 1, reg: regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}, resultInArg0: true}, // nop, return arg0 in same register
-
-		{name: "SCVTFWS", argLength: 1, reg: gpfp, asm: "SCVTFWS"},   // int32 -> float32
-		{name: "SCVTFWD", argLength: 1, reg: gpfp, asm: "SCVTFWD"},   // int32 -> float64
-		{name: "UCVTFWS", argLength: 1, reg: gpfp, asm: "UCVTFWS"},   // uint32 -> float32
-		{name: "UCVTFWD", argLength: 1, reg: gpfp, asm: "UCVTFWD"},   // uint32 -> float64
-		{name: "SCVTFS", argLength: 1, reg: gpfp, asm: "SCVTFS"},     // int64 -> float32
-		{name: "SCVTFD", argLength: 1, reg: gpfp, asm: "SCVTFD"},     // int64 -> float64
-		{name: "UCVTFS", argLength: 1, reg: gpfp, asm: "UCVTFS"},     // uint64 -> float32
-		{name: "UCVTFD", argLength: 1, reg: gpfp, asm: "UCVTFD"},     // uint64 -> float64
-		{name: "FCVTZSSW", argLength: 1, reg: fpgp, asm: "FCVTZSSW"}, // float32 -> int32
-		{name: "FCVTZSDW", argLength: 1, reg: fpgp, asm: "FCVTZSDW"}, // float64 -> int32
-		{name: "FCVTZUSW", argLength: 1, reg: fpgp, asm: "FCVTZUSW"}, // float32 -> uint32
-		{name: "FCVTZUDW", argLength: 1, reg: fpgp, asm: "FCVTZUDW"}, // float64 -> uint32
-		{name: "FCVTZSS", argLength: 1, reg: fpgp, asm: "FCVTZSS"},   // float32 -> int64
-		{name: "FCVTZSD", argLength: 1, reg: fpgp, asm: "FCVTZSD"},   // float64 -> int64
-		{name: "FCVTZUS", argLength: 1, reg: fpgp, asm: "FCVTZUS"},   // float32 -> uint64
-		{name: "FCVTZUD", argLength: 1, reg: fpgp, asm: "FCVTZUD"},   // float64 -> uint64
-		{name: "FCVTSD", argLength: 1, reg: fp11, asm: "FCVTSD"},     // float32 -> float64
-		{name: "FCVTDS", argLength: 1, reg: fp11, asm: "FCVTDS"},     // float64 -> float32
-
-		// conditional instructions
-		{name: "CSELULT", argLength: 3, reg: gp2flags1, asm: "CSEL"},  // returns arg0 if flags indicates unsigned LT, arg1 otherwise, arg2=flags
-		{name: "CSELULT0", argLength: 2, reg: gp1flags1, asm: "CSEL"}, // returns arg0 if flags indicates unsigned LT, 0 otherwise, arg1=flags
-
-		// function calls
-		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff", clobberFlags: true},                                              // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{gpsp, buildReg("R26"), 0}, clobbers: callerSave}, aux: "Int64", clobberFlags: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
-		{name: "CALLdefer", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                                // call deferproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLgo", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                                   // call newproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "Int64", clobberFlags: true},                         // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
-
-		// pseudo-ops
-		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpg}}}, // panic if arg0 is nil.  arg1=mem.
-
-		{name: "Equal", argLength: 1, reg: readflags},         // bool, true flags encode x==y false otherwise.
-		{name: "NotEqual", argLength: 1, reg: readflags},      // bool, true flags encode x!=y false otherwise.
-		{name: "LessThan", argLength: 1, reg: readflags},      // bool, true flags encode signed x<y false otherwise.
-		{name: "LessEqual", argLength: 1, reg: readflags},     // bool, true flags encode signed x<=y false otherwise.
-		{name: "GreaterThan", argLength: 1, reg: readflags},   // bool, true flags encode signed x>y false otherwise.
-		{name: "GreaterEqual", argLength: 1, reg: readflags},  // bool, true flags encode signed x>=y false otherwise.
-		{name: "LessThanU", argLength: 1, reg: readflags},     // bool, true flags encode unsigned x<y false otherwise.
-		{name: "LessEqualU", argLength: 1, reg: readflags},    // bool, true flags encode unsigned x<=y false otherwise.
-		{name: "GreaterThanU", argLength: 1, reg: readflags},  // bool, true flags encode unsigned x>y false otherwise.
-		{name: "GreaterEqualU", argLength: 1, reg: readflags}, // bool, true flags encode unsigned x>=y false otherwise.
-
-		// duffzero
-		// arg0 = address of memory to zero
-		// arg1 = mem
-		// auxint = offset into duffzero code to start executing
-		// returns mem
-		// R16 aka arm64.REGRT1 changed as side effect
-		{
-			name:      "DUFFZERO",
-			aux:       "Int64",
-			argLength: 2,
-			reg: regInfo{
-				inputs:   []regMask{gp},
-				clobbers: buildReg("R16"),
-			},
-		},
-
-		// large zeroing
-		// arg0 = address of memory to zero (in R16 aka arm64.REGRT1, changed as side effect)
-		// arg1 = address of the last element to zero
-		// arg2 = mem
-		// auxint = alignment
-		// returns mem
-		//	MOVD.P	ZR, 8(R16)
-		//	CMP	Rarg1, R16
-		//	BLE	-2(PC)
-		// Note: the-end-of-the-memory may be not a valid pointer. it's a problem if it is spilled.
-		// the-end-of-the-memory - 8 is with the area to zero, ok to spill.
-		{
-			name:      "LoweredZero",
-			aux:       "Int64",
-			argLength: 3,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("R16"), gp},
-				clobbers: buildReg("R16"),
-			},
-			clobberFlags: true,
-		},
-
-		// large move
-		// arg0 = address of dst memory (in R17 aka arm64.REGRT2, changed as side effect)
-		// arg1 = address of src memory (in R16 aka arm64.REGRT1, changed as side effect)
-		// arg2 = address of the last element of src
-		// arg3 = mem
-		// auxint = alignment
-		// returns mem
-		//	MOVD.P	8(R16), Rtmp
-		//	MOVD.P	Rtmp, 8(R17)
-		//	CMP	Rarg2, R16
-		//	BLE	-3(PC)
-		// Note: the-end-of-src may be not a valid pointer. it's a problem if it is spilled.
-		// the-end-of-src - 8 is within the area to copy, ok to spill.
-		{
-			name:      "LoweredMove",
-			aux:       "Int64",
-			argLength: 4,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("R17"), buildReg("R16"), gp},
-				clobbers: buildReg("R16 R17"),
-			},
-			clobberFlags: true,
-		},
-
-		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
-		// and sorts it to the very beginning of the block to prevent other
-		// use of R26 (arm64.REGCTXT, the closure pointer)
-		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("R26")}}},
-
-		// MOVDconvert converts between pointers and integers.
-		// We have a special op for this so as to not confuse GC
-		// (particularly stack maps).  It takes a memory arg so it
-		// gets correctly ordered with respect to GC safepoints.
-		// arg0=ptr/int arg1=mem, output=int/ptr
-		{name: "MOVDconvert", argLength: 2, reg: gp11, asm: "MOVD"},
-
-		// Constant flag values. For any comparison, there are 5 possible
-		// outcomes: the three from the signed total order (<,==,>) and the
-		// three from the unsigned total order. The == cases overlap.
-		// Note: there's a sixth "unordered" outcome for floating-point
-		// comparisons, but we don't use such a beast yet.
-		// These ops are for temporary use by rewrite rules. They
-		// cannot appear in the generated assembly.
-		{name: "FlagEQ"},     // equal
-		{name: "FlagLT_ULT"}, // signed < and unsigned <
-		{name: "FlagLT_UGT"}, // signed < and unsigned >
-		{name: "FlagGT_UGT"}, // signed > and unsigned <
-		{name: "FlagGT_ULT"}, // signed > and unsigned >
-
-		// (InvertFlags (CMP a b)) == (CMP b a)
-		// InvertFlags is a pseudo-op which can't appear in assembly output.
-		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
-	}
-
-	blocks := []blockData{
-		{name: "EQ"},
-		{name: "NE"},
-		{name: "LT"},
-		{name: "LE"},
-		{name: "GT"},
-		{name: "GE"},
-		{name: "ULT"},
-		{name: "ULE"},
-		{name: "UGT"},
-		{name: "UGE"},
-	}
-
-	archs = append(archs, arch{
-		name:            "ARM64",
-		pkg:             "cmd/internal/obj/arm64",
-		genfile:         "../../arm64/ssa.go",
-		ops:             ops,
-		blocks:          blocks,
-		regnames:        regNamesARM64,
-		gpregmask:       gp,
-		fpregmask:       fp,
-		framepointerreg: -1, // not used
-	})
-}
--- a/src/cmd/compile/internal/ssa/gen/ARMOps.go
+++ b/src/cmd/compile/internal/ssa/gen/ARMOps.go
@@ -6,481 +6,32 @@

 package main

-import "strings"
-
-// Notes:
-//  - Integer types live in the low portion of registers. Upper portions are junk.
-//  - Boolean types use the low-order byte of a register. 0=false, 1=true.
-//    Upper bytes are junk.
-//  - *const instructions may use a constant larger than the instuction can encode.
-//    In this case the assembler expands to multiple instructions and uses tmp
-//    register (R11).
-
-// Suffixes encode the bit width of various instructions.
-// W (word)      = 32 bit
-// H (half word) = 16 bit
-// HU            = 16 bit unsigned
-// B (byte)      = 8 bit
-// BU            = 8 bit unsigned
-// F (float)     = 32 bit float
-// D (double)    = 64 bit float
-
-var regNamesARM = []string{
-	"R0",
-	"R1",
-	"R2",
-	"R3",
-	"R4",
-	"R5",
-	"R6",
-	"R7",
-	"R8",
-	"R9",
-	"g",   // aka R10
-	"R11", // tmp
-	"R12",
-	"SP",  // aka R13
-	"R14", // link
-	"R15", // pc
-
-	"F0",
-	"F1",
-	"F2",
-	"F3",
-	"F4",
-	"F5",
-	"F6",
-	"F7",
-	"F8",
-	"F9",
-	"F10",
-	"F11",
-	"F12",
-	"F13",
-	"F14",
-	"F15", // tmp
-
-	// pseudo-registers
-	"SB",
-}
-
 func init() {
-	// Make map from reg names to reg integers.
-	if len(regNamesARM) > 64 {
-		panic("too many registers")
-	}
-	num := map[string]int{}
-	for i, name := range regNamesARM {
-		num[name] = i
-	}
-	buildReg := func(s string) regMask {
-		m := regMask(0)
-		for _, r := range strings.Split(s, " ") {
-			if n, ok := num[r]; ok {
-				m |= regMask(1) << uint(n)
-				continue
-			}
-			panic("register " + r + " not found")
-		}
-		return m
-	}
-
-	// Common individual register masks
 	var (
-		gp         = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12")
-		gpg        = gp | buildReg("g")
-		gpsp       = gp | buildReg("SP")
-		gpspg      = gpg | buildReg("SP")
-		gpspsbg    = gpspg | buildReg("SB")
-		fp         = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15")
-		callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g
-	)
-	// Common regInfo
-	var (
-		gp01      = regInfo{inputs: nil, outputs: []regMask{gp}}
-		gp11      = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
-		gp11carry = regInfo{inputs: []regMask{gpg}, outputs: []regMask{0, gp}}
-		gp11sp    = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
-		gp1flags  = regInfo{inputs: []regMask{gpg}}
-		gp1flags1 = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
-		gp21      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
-		gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{0, gp}}
-		gp2flags  = regInfo{inputs: []regMask{gpg, gpg}}
-		gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
-		gp22      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
-		gp31      = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
-		gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{0, gp}}
-		gp3flags  = regInfo{inputs: []regMask{gp, gp, gp}}
-		gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
-		gpload    = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
-		gpstore   = regInfo{inputs: []regMask{gpspsbg, gpg}}
-		gp2load   = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
-		gp2store  = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}}
-		fp01      = regInfo{inputs: nil, outputs: []regMask{fp}}
-		fp11      = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
-		fp1flags  = regInfo{inputs: []regMask{fp}}
-		fpgp      = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
-		gpfp      = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
-		fp21      = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
-		fp2flags  = regInfo{inputs: []regMask{fp, fp}}
-		fpload    = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}}
-		fpstore   = regInfo{inputs: []regMask{gpspsbg, fp}}
-		readflags = regInfo{inputs: nil, outputs: []regMask{gp}}
+		gp01       = regInfo{inputs: []regMask{}, outputs: []regMask{31}}
+		gp11       = regInfo{inputs: []regMask{31}, outputs: []regMask{31}}
+		gp21       = regInfo{inputs: []regMask{31, 31}, outputs: []regMask{31}}
+		gp2flags   = regInfo{inputs: []regMask{31, 31}, outputs: []regMask{32}}
+		gpload     = regInfo{inputs: []regMask{31}, outputs: []regMask{31}}
+		gpstore    = regInfo{inputs: []regMask{31, 31}, outputs: []regMask{}}
+		flagsgp    = regInfo{inputs: []regMask{32}, outputs: []regMask{31}}
+		callerSave = regMask(15)
 	)
 	ops := []opData{
-		// binary ops
-		{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},     // arg0 + arg1
-		{name: "ADDconst", argLength: 1, reg: gp11sp, asm: "ADD", aux: "Int32"},   // arg0 + auxInt
-		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                        // arg0 - arg1
-		{name: "SUBconst", argLength: 1, reg: gp11, asm: "SUB", aux: "Int32"},     // arg0 - auxInt
-		{name: "RSB", argLength: 2, reg: gp21, asm: "RSB"},                        // arg1 - arg0
-		{name: "RSBconst", argLength: 1, reg: gp11, asm: "RSB", aux: "Int32"},     // auxInt - arg0
-		{name: "MUL", argLength: 2, reg: gp21, asm: "MUL", commutative: true},     // arg0 * arg1
-		{name: "HMUL", argLength: 2, reg: gp21, asm: "MULL", commutative: true},   // (arg0 * arg1) >> 32, signed
-		{name: "HMULU", argLength: 2, reg: gp21, asm: "MULLU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
-		{name: "DIV", argLength: 2, reg: gp21, asm: "DIV", clobberFlags: true},    // arg0 / arg1, signed, soft div clobbers flags
-		{name: "DIVU", argLength: 2, reg: gp21, asm: "DIVU", clobberFlags: true},  // arg0 / arg1, unsighed
-		{name: "MOD", argLength: 2, reg: gp21, asm: "MOD", clobberFlags: true},    // arg0 % arg1, signed
-		{name: "MODU", argLength: 2, reg: gp21, asm: "MODU", clobberFlags: true},  // arg0 % arg1, unsigned
+		{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},  // arg0 + arg1
+		{name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "SymOff"}, // arg0 + auxInt + aux.(*gc.Sym)

-		{name: "ADDS", argLength: 2, reg: gp21carry, asm: "ADD", commutative: true}, // arg0 + arg1, set carry flag
-		{name: "ADDSconst", argLength: 1, reg: gp11carry, asm: "ADD", aux: "Int32"}, // arg0 + auxInt, set carry flag
-		{name: "ADC", argLength: 3, reg: gp2flags1, asm: "ADC", commutative: true},  // arg0 + arg1 + carry, arg2=flags
-		{name: "ADCconst", argLength: 2, reg: gp1flags1, asm: "ADC", aux: "Int32"},  // arg0 + auxInt + carry, arg1=flags
-		{name: "SUBS", argLength: 2, reg: gp21carry, asm: "SUB"},                    // arg0 - arg1, set carry flag
-		{name: "SUBSconst", argLength: 1, reg: gp11carry, asm: "SUB", aux: "Int32"}, // arg0 - auxInt, set carry flag
-		{name: "RSBSconst", argLength: 1, reg: gp11carry, asm: "RSB", aux: "Int32"}, // auxInt - arg0, set carry flag
-		{name: "SBC", argLength: 3, reg: gp2flags1, asm: "SBC"},                     // arg0 - arg1 - carry, arg2=flags
-		{name: "SBCconst", argLength: 2, reg: gp1flags1, asm: "SBC", aux: "Int32"},  // arg0 - auxInt - carry, arg1=flags
-		{name: "RSCconst", argLength: 2, reg: gp1flags1, asm: "RSC", aux: "Int32"},  // auxInt - arg0 - carry, arg1=flags
+		{name: "MOVWconst", argLength: 0, reg: gp01, aux: "Int32", asm: "MOVW", rematerializeable: true}, // 32 low bits of auxint

-		{name: "MULLU", argLength: 2, reg: gp22, asm: "MULLU", commutative: true}, // arg0 * arg1, high 32 bits in out0, low 32 bits in out1
-		{name: "MULA", argLength: 3, reg: gp31, asm: "MULA"},                      // arg0 * arg1 + arg2
+		{name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1

-		{name: "ADDF", argLength: 2, reg: fp21, asm: "ADDF", commutative: true}, // arg0 + arg1
-		{name: "ADDD", argLength: 2, reg: fp21, asm: "ADDD", commutative: true}, // arg0 + arg1
-		{name: "SUBF", argLength: 2, reg: fp21, asm: "SUBF"},                    // arg0 - arg1
-		{name: "SUBD", argLength: 2, reg: fp21, asm: "SUBD"},                    // arg0 - arg1
-		{name: "MULF", argLength: 2, reg: fp21, asm: "MULF", commutative: true}, // arg0 * arg1
-		{name: "MULD", argLength: 2, reg: fp21, asm: "MULD", commutative: true}, // arg0 * arg1
-		{name: "DIVF", argLength: 2, reg: fp21, asm: "DIVF"},                    // arg0 / arg1
-		{name: "DIVD", argLength: 2, reg: fp21, asm: "DIVD"},                    // arg0 / arg1
+		{name: "MOVWload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVW"},   // load from arg0 + auxInt + aux.  arg1=mem.
+		{name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW"}, // store 4 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.

-		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1
-		{name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int32"}, // arg0 & auxInt
-		{name: "OR", argLength: 2, reg: gp21, asm: "ORR", commutative: true},  // arg0 | arg1
-		{name: "ORconst", argLength: 1, reg: gp11, asm: "ORR", aux: "Int32"},  // arg0 | auxInt
-		{name: "XOR", argLength: 2, reg: gp21, asm: "EOR", commutative: true}, // arg0 ^ arg1
-		{name: "XORconst", argLength: 1, reg: gp11, asm: "EOR", aux: "Int32"}, // arg0 ^ auxInt
-		{name: "BIC", argLength: 2, reg: gp21, asm: "BIC"},                    // arg0 &^ arg1
-		{name: "BICconst", argLength: 1, reg: gp11, asm: "BIC", aux: "Int32"}, // arg0 &^ auxInt
-
-		// unary ops
-		{name: "MVN", argLength: 1, reg: gp11, asm: "MVN"}, // ^arg0
-
-		{name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"},   // -arg0, float32
-		{name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"},   // -arg0, float64
-		{name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64
-
-		// shifts
-		{name: "SLL", argLength: 2, reg: gp21, asm: "SLL"},                    // arg0 << arg1, shift amount is mod 256
-		{name: "SLLconst", argLength: 1, reg: gp11, asm: "SLL", aux: "Int32"}, // arg0 << auxInt
-		{name: "SRL", argLength: 2, reg: gp21, asm: "SRL"},                    // arg0 >> arg1, unsigned, shift amount is mod 256
-		{name: "SRLconst", argLength: 1, reg: gp11, asm: "SRL", aux: "Int32"}, // arg0 >> auxInt, unsigned
-		{name: "SRA", argLength: 2, reg: gp21, asm: "SRA"},                    // arg0 >> arg1, signed, shift amount is mod 256
-		{name: "SRAconst", argLength: 1, reg: gp11, asm: "SRA", aux: "Int32"}, // arg0 >> auxInt, signed
-		{name: "SRRconst", argLength: 1, reg: gp11, aux: "Int32"},             // arg0 right rotate by auxInt bits
-
-		{name: "ADDshiftLL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int32"}, // arg0 + arg1<<auxInt
-		{name: "ADDshiftRL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int32"}, // arg0 + arg1>>auxInt, unsigned shift
-		{name: "ADDshiftRA", argLength: 2, reg: gp21, asm: "ADD", aux: "Int32"}, // arg0 + arg1>>auxInt, signed shift
-		{name: "SUBshiftLL", argLength: 2, reg: gp21, asm: "SUB", aux: "Int32"}, // arg0 - arg1<<auxInt
-		{name: "SUBshiftRL", argLength: 2, reg: gp21, asm: "SUB", aux: "Int32"}, // arg0 - arg1>>auxInt, unsigned shift
-		{name: "SUBshiftRA", argLength: 2, reg: gp21, asm: "SUB", aux: "Int32"}, // arg0 - arg1>>auxInt, signed shift
-		{name: "RSBshiftLL", argLength: 2, reg: gp21, asm: "RSB", aux: "Int32"}, // arg1<<auxInt - arg0
-		{name: "RSBshiftRL", argLength: 2, reg: gp21, asm: "RSB", aux: "Int32"}, // arg1>>auxInt - arg0, unsigned shift
-		{name: "RSBshiftRA", argLength: 2, reg: gp21, asm: "RSB", aux: "Int32"}, // arg1>>auxInt - arg0, signed shift
-		{name: "ANDshiftLL", argLength: 2, reg: gp21, asm: "AND", aux: "Int32"}, // arg0 & (arg1<<auxInt)
-		{name: "ANDshiftRL", argLength: 2, reg: gp21, asm: "AND", aux: "Int32"}, // arg0 & (arg1>>auxInt), unsigned shift
-		{name: "ANDshiftRA", argLength: 2, reg: gp21, asm: "AND", aux: "Int32"}, // arg0 & (arg1>>auxInt), signed shift
-		{name: "ORshiftLL", argLength: 2, reg: gp21, asm: "ORR", aux: "Int32"},  // arg0 | arg1<<auxInt
-		{name: "ORshiftRL", argLength: 2, reg: gp21, asm: "ORR", aux: "Int32"},  // arg0 | arg1>>auxInt, unsigned shift
-		{name: "ORshiftRA", argLength: 2, reg: gp21, asm: "ORR", aux: "Int32"},  // arg0 | arg1>>auxInt, signed shift
-		{name: "XORshiftLL", argLength: 2, reg: gp21, asm: "EOR", aux: "Int32"}, // arg0 ^ arg1<<auxInt
-		{name: "XORshiftRL", argLength: 2, reg: gp21, asm: "EOR", aux: "Int32"}, // arg0 ^ arg1>>auxInt, unsigned shift
-		{name: "XORshiftRA", argLength: 2, reg: gp21, asm: "EOR", aux: "Int32"}, // arg0 ^ arg1>>auxInt, signed shift
-		{name: "BICshiftLL", argLength: 2, reg: gp21, asm: "BIC", aux: "Int32"}, // arg0 &^ (arg1<<auxInt)
-		{name: "BICshiftRL", argLength: 2, reg: gp21, asm: "BIC", aux: "Int32"}, // arg0 &^ (arg1>>auxInt), unsigned shift
-		{name: "BICshiftRA", argLength: 2, reg: gp21, asm: "BIC", aux: "Int32"}, // arg0 &^ (arg1>>auxInt), signed shift
-		{name: "MVNshiftLL", argLength: 1, reg: gp11, asm: "MVN", aux: "Int32"}, // ^(arg0<<auxInt)
-		{name: "MVNshiftRL", argLength: 1, reg: gp11, asm: "MVN", aux: "Int32"}, // ^(arg0>>auxInt), unsigned shift
-		{name: "MVNshiftRA", argLength: 1, reg: gp11, asm: "MVN", aux: "Int32"}, // ^(arg0>>auxInt), signed shift
-
-		{name: "ADCshiftLL", argLength: 3, reg: gp2flags1, asm: "ADC", aux: "Int32"}, // arg0 + arg1<<auxInt + carry, arg2=flags
-		{name: "ADCshiftRL", argLength: 3, reg: gp2flags1, asm: "ADC", aux: "Int32"}, // arg0 + arg1>>auxInt + carry, unsigned shift, arg2=flags
-		{name: "ADCshiftRA", argLength: 3, reg: gp2flags1, asm: "ADC", aux: "Int32"}, // arg0 + arg1>>auxInt + carry, signed shift, arg2=flags
-		{name: "SBCshiftLL", argLength: 3, reg: gp2flags1, asm: "SBC", aux: "Int32"}, // arg0 - arg1<<auxInt - carry, arg2=flags
-		{name: "SBCshiftRL", argLength: 3, reg: gp2flags1, asm: "SBC", aux: "Int32"}, // arg0 - arg1>>auxInt - carry, unsigned shift, arg2=flags
-		{name: "SBCshiftRA", argLength: 3, reg: gp2flags1, asm: "SBC", aux: "Int32"}, // arg0 - arg1>>auxInt - carry, signed shift, arg2=flags
-		{name: "RSCshiftLL", argLength: 3, reg: gp2flags1, asm: "RSC", aux: "Int32"}, // arg1<<auxInt - arg0 - carry, arg2=flags
-		{name: "RSCshiftRL", argLength: 3, reg: gp2flags1, asm: "RSC", aux: "Int32"}, // arg1>>auxInt - arg0 - carry, unsigned shift, arg2=flags
-		{name: "RSCshiftRA", argLength: 3, reg: gp2flags1, asm: "RSC", aux: "Int32"}, // arg1>>auxInt - arg0 - carry, signed shift, arg2=flags
-
-		{name: "ADDSshiftLL", argLength: 2, reg: gp21carry, asm: "ADD", aux: "Int32"}, // arg0 + arg1<<auxInt, set carry flag
-		{name: "ADDSshiftRL", argLength: 2, reg: gp21carry, asm: "ADD", aux: "Int32"}, // arg0 + arg1>>auxInt, unsigned shift, set carry flag
-		{name: "ADDSshiftRA", argLength: 2, reg: gp21carry, asm: "ADD", aux: "Int32"}, // arg0 + arg1>>auxInt, signed shift, set carry flag
-		{name: "SUBSshiftLL", argLength: 2, reg: gp21carry, asm: "SUB", aux: "Int32"}, // arg0 - arg1<<auxInt, set carry flag
-		{name: "SUBSshiftRL", argLength: 2, reg: gp21carry, asm: "SUB", aux: "Int32"}, // arg0 - arg1>>auxInt, unsigned shift, set carry flag
-		{name: "SUBSshiftRA", argLength: 2, reg: gp21carry, asm: "SUB", aux: "Int32"}, // arg0 - arg1>>auxInt, signed shift, set carry flag
-		{name: "RSBSshiftLL", argLength: 2, reg: gp21carry, asm: "RSB", aux: "Int32"}, // arg1<<auxInt - arg0, set carry flag
-		{name: "RSBSshiftRL", argLength: 2, reg: gp21carry, asm: "RSB", aux: "Int32"}, // arg1>>auxInt - arg0, unsigned shift, set carry flag
-		{name: "RSBSshiftRA", argLength: 2, reg: gp21carry, asm: "RSB", aux: "Int32"}, // arg1>>auxInt - arg0, signed shift, set carry flag
-
-		{name: "ADDshiftLLreg", argLength: 3, reg: gp31, asm: "ADD"}, // arg0 + arg1<<arg2
-		{name: "ADDshiftRLreg", argLength: 3, reg: gp31, asm: "ADD"}, // arg0 + arg1>>arg2, unsigned shift
-		{name: "ADDshiftRAreg", argLength: 3, reg: gp31, asm: "ADD"}, // arg0 + arg1>>arg2, signed shift
-		{name: "SUBshiftLLreg", argLength: 3, reg: gp31, asm: "SUB"}, // arg0 - arg1<<arg2
-		{name: "SUBshiftRLreg", argLength: 3, reg: gp31, asm: "SUB"}, // arg0 - arg1>>arg2, unsigned shift
-		{name: "SUBshiftRAreg", argLength: 3, reg: gp31, asm: "SUB"}, // arg0 - arg1>>arg2, signed shift
-		{name: "RSBshiftLLreg", argLength: 3, reg: gp31, asm: "RSB"}, // arg1<<arg2 - arg0
-		{name: "RSBshiftRLreg", argLength: 3, reg: gp31, asm: "RSB"}, // arg1>>arg2 - arg0, unsigned shift
-		{name: "RSBshiftRAreg", argLength: 3, reg: gp31, asm: "RSB"}, // arg1>>arg2 - arg0, signed shift
-		{name: "ANDshiftLLreg", argLength: 3, reg: gp31, asm: "AND"}, // arg0 & (arg1<<arg2)
-		{name: "ANDshiftRLreg", argLength: 3, reg: gp31, asm: "AND"}, // arg0 & (arg1>>arg2), unsigned shift
-		{name: "ANDshiftRAreg", argLength: 3, reg: gp31, asm: "AND"}, // arg0 & (arg1>>arg2), signed shift
-		{name: "ORshiftLLreg", argLength: 3, reg: gp31, asm: "ORR"},  // arg0 | arg1<<arg2
-		{name: "ORshiftRLreg", argLength: 3, reg: gp31, asm: "ORR"},  // arg0 | arg1>>arg2, unsigned shift
-		{name: "ORshiftRAreg", argLength: 3, reg: gp31, asm: "ORR"},  // arg0 | arg1>>arg2, signed shift
-		{name: "XORshiftLLreg", argLength: 3, reg: gp31, asm: "EOR"}, // arg0 ^ arg1<<arg2
-		{name: "XORshiftRLreg", argLength: 3, reg: gp31, asm: "EOR"}, // arg0 ^ arg1>>arg2, unsigned shift
-		{name: "XORshiftRAreg", argLength: 3, reg: gp31, asm: "EOR"}, // arg0 ^ arg1>>arg2, signed shift
-		{name: "BICshiftLLreg", argLength: 3, reg: gp31, asm: "BIC"}, // arg0 &^ (arg1<<arg2)
-		{name: "BICshiftRLreg", argLength: 3, reg: gp31, asm: "BIC"}, // arg0 &^ (arg1>>arg2), unsigned shift
-		{name: "BICshiftRAreg", argLength: 3, reg: gp31, asm: "BIC"}, // arg0 &^ (arg1>>arg2), signed shift
-		{name: "MVNshiftLLreg", argLength: 2, reg: gp21, asm: "MVN"}, // ^(arg0<<arg1)
-		{name: "MVNshiftRLreg", argLength: 2, reg: gp21, asm: "MVN"}, // ^(arg0>>arg1), unsigned shift
-		{name: "MVNshiftRAreg", argLength: 2, reg: gp21, asm: "MVN"}, // ^(arg0>>arg1), signed shift
-
-		{name: "ADCshiftLLreg", argLength: 4, reg: gp3flags1, asm: "ADC"}, // arg0 + arg1<<arg2 + carry, arg3=flags
-		{name: "ADCshiftRLreg", argLength: 4, reg: gp3flags1, asm: "ADC"}, // arg0 + arg1>>arg2 + carry, unsigned shift, arg3=flags
-		{name: "ADCshiftRAreg", argLength: 4, reg: gp3flags1, asm: "ADC"}, // arg0 + arg1>>arg2 + carry, signed shift, arg3=flags
-		{name: "SBCshiftLLreg", argLength: 4, reg: gp3flags1, asm: "SBC"}, // arg0 - arg1<<arg2 - carry, arg3=flags
-		{name: "SBCshiftRLreg", argLength: 4, reg: gp3flags1, asm: "SBC"}, // arg0 - arg1>>arg2 - carry, unsigned shift, arg3=flags
-		{name: "SBCshiftRAreg", argLength: 4, reg: gp3flags1, asm: "SBC"}, // arg0 - arg1>>arg2 - carry, signed shift, arg3=flags
-		{name: "RSCshiftLLreg", argLength: 4, reg: gp3flags1, asm: "RSC"}, // arg1<<arg2 - arg0 - carry, arg3=flags
-		{name: "RSCshiftRLreg", argLength: 4, reg: gp3flags1, asm: "RSC"}, // arg1>>arg2 - arg0 - carry, unsigned shift, arg3=flags
-		{name: "RSCshiftRAreg", argLength: 4, reg: gp3flags1, asm: "RSC"}, // arg1>>arg2 - arg0 - carry, signed shift, arg3=flags
-
-		{name: "ADDSshiftLLreg", argLength: 3, reg: gp31carry, asm: "ADD"}, // arg0 + arg1<<arg2, set carry flag
-		{name: "ADDSshiftRLreg", argLength: 3, reg: gp31carry, asm: "ADD"}, // arg0 + arg1>>arg2, unsigned shift, set carry flag
-		{name: "ADDSshiftRAreg", argLength: 3, reg: gp31carry, asm: "ADD"}, // arg0 + arg1>>arg2, signed shift, set carry flag
-		{name: "SUBSshiftLLreg", argLength: 3, reg: gp31carry, asm: "SUB"}, // arg0 - arg1<<arg2, set carry flag
-		{name: "SUBSshiftRLreg", argLength: 3, reg: gp31carry, asm: "SUB"}, // arg0 - arg1>>arg2, unsigned shift, set carry flag
-		{name: "SUBSshiftRAreg", argLength: 3, reg: gp31carry, asm: "SUB"}, // arg0 - arg1>>arg2, signed shift, set carry flag
-		{name: "RSBSshiftLLreg", argLength: 3, reg: gp31carry, asm: "RSB"}, // arg1<<arg2 - arg0, set carry flag
-		{name: "RSBSshiftRLreg", argLength: 3, reg: gp31carry, asm: "RSB"}, // arg1>>arg2 - arg0, unsigned shift, set carry flag
-		{name: "RSBSshiftRAreg", argLength: 3, reg: gp31carry, asm: "RSB"}, // arg1>>arg2 - arg0, signed shift, set carry flag
-
-		// comparisons
-		{name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"},                    // arg0 compare to arg1
-		{name: "CMPconst", argLength: 1, reg: gp1flags, asm: "CMP", aux: "Int32", typ: "Flags"}, // arg0 compare to auxInt
-		{name: "CMN", argLength: 2, reg: gp2flags, asm: "CMN", typ: "Flags"},                    // arg0 compare to -arg1
-		{name: "CMNconst", argLength: 1, reg: gp1flags, asm: "CMN", aux: "Int32", typ: "Flags"}, // arg0 compare to -auxInt
-		{name: "TST", argLength: 2, reg: gp2flags, asm: "TST", typ: "Flags", commutative: true}, // arg0 & arg1 compare to 0
-		{name: "TSTconst", argLength: 1, reg: gp1flags, asm: "TST", aux: "Int32", typ: "Flags"}, // arg0 & auxInt compare to 0
-		{name: "TEQ", argLength: 2, reg: gp2flags, asm: "TEQ", typ: "Flags", commutative: true}, // arg0 ^ arg1 compare to 0
-		{name: "TEQconst", argLength: 1, reg: gp1flags, asm: "TEQ", aux: "Int32", typ: "Flags"}, // arg0 ^ auxInt compare to 0
-		{name: "CMPF", argLength: 2, reg: fp2flags, asm: "CMPF", typ: "Flags"},                  // arg0 compare to arg1, float32
-		{name: "CMPD", argLength: 2, reg: fp2flags, asm: "CMPD", typ: "Flags"},                  // arg0 compare to arg1, float64
-
-		{name: "CMPshiftLL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int32", typ: "Flags"}, // arg0 compare to arg1<<auxInt
-		{name: "CMPshiftRL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int32", typ: "Flags"}, // arg0 compare to arg1>>auxInt, unsigned shift
-		{name: "CMPshiftRA", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int32", typ: "Flags"}, // arg0 compare to arg1>>auxInt, signed shift
-
-		{name: "CMPshiftLLreg", argLength: 3, reg: gp3flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1<<arg2
-		{name: "CMPshiftRLreg", argLength: 3, reg: gp3flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1>>arg2, unsigned shift
-		{name: "CMPshiftRAreg", argLength: 3, reg: gp3flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1>>arg2, signed shift
-
-		{name: "CMPF0", argLength: 1, reg: fp1flags, asm: "CMPF", typ: "Flags"}, // arg0 compare to 0, float32
-		{name: "CMPD0", argLength: 1, reg: fp1flags, asm: "CMPD", typ: "Flags"}, // arg0 compare to 0, float64
-
-		// moves
-		{name: "MOVWconst", argLength: 0, reg: gp01, aux: "Int32", asm: "MOVW", typ: "UInt32", rematerializeable: true},    // 32 low bits of auxint
-		{name: "MOVFconst", argLength: 0, reg: fp01, aux: "Float64", asm: "MOVF", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float
-		{name: "MOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "MOVD", typ: "Float64", rematerializeable: true}, // auxint as 64-bit float
-
-		{name: "MOVWaddr", argLength: 1, reg: regInfo{inputs: []regMask{buildReg("SP") | buildReg("SB")}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVW", rematerializeable: true}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB
-
-		{name: "MOVBload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVB", typ: "Int8"},     // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVBUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVBU", typ: "UInt8"},  // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVHload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVH", typ: "Int16"},    // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVHUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVHU", typ: "UInt16"}, // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVWload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVW", typ: "UInt32"},   // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVFload", argLength: 2, reg: fpload, aux: "SymOff", asm: "MOVF", typ: "Float32"},  // load from arg0 + auxInt + aux.  arg1=mem.
-		{name: "MOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "MOVD", typ: "Float64"},  // load from arg0 + auxInt + aux.  arg1=mem.
-
-		{name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem"}, // store 1 byte of arg1 to arg0 + auxInt + aux.  arg2=mem.
-		{name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
-		{name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
-		{name: "MOVFstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "MOVF", typ: "Mem"}, // store 4 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
-		{name: "MOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
-
-		{name: "MOVWloadidx", argLength: 3, reg: gp2load, asm: "MOVW"},                   // load from arg0 + arg1. arg2=mem
-		{name: "MOVWloadshiftLL", argLength: 3, reg: gp2load, asm: "MOVW", aux: "Int32"}, // load from arg0 + arg1<<auxInt. arg2=mem
-		{name: "MOVWloadshiftRL", argLength: 3, reg: gp2load, asm: "MOVW", aux: "Int32"}, // load from arg0 + arg1>>auxInt, unsigned shift. arg2=mem
-		{name: "MOVWloadshiftRA", argLength: 3, reg: gp2load, asm: "MOVW", aux: "Int32"}, // load from arg0 + arg1>>auxInt, signed shift. arg2=mem
-
-		{name: "MOVWstoreidx", argLength: 4, reg: gp2store, asm: "MOVW"},                   // store arg2 to arg0 + arg1. arg3=mem
-		{name: "MOVWstoreshiftLL", argLength: 4, reg: gp2store, asm: "MOVW", aux: "Int32"}, // store arg2 to arg0 + arg1<<auxInt. arg3=mem
-		{name: "MOVWstoreshiftRL", argLength: 4, reg: gp2store, asm: "MOVW", aux: "Int32"}, // store arg2 to arg0 + arg1>>auxInt, unsigned shift. arg3=mem
-		{name: "MOVWstoreshiftRA", argLength: 4, reg: gp2store, asm: "MOVW", aux: "Int32"}, // store arg2 to arg0 + arg1>>auxInt, signed shift. arg3=mem
-
-		{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVBS"},  // move from arg0, sign-extended from byte
-		{name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte
-		{name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVHS"},  // move from arg0, sign-extended from half
-		{name: "MOVHUreg", argLength: 1, reg: gp11, asm: "MOVHU"}, // move from arg0, unsign-extended from half
-		{name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW"},   // move from arg0
-
-		{name: "MOVWnop", argLength: 1, reg: regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}, resultInArg0: true}, // nop, return arg0 in same register
-
-		{name: "MOVWF", argLength: 1, reg: gpfp, asm: "MOVWF"},  // int32 -> float32
-		{name: "MOVWD", argLength: 1, reg: gpfp, asm: "MOVWD"},  // int32 -> float64
-		{name: "MOVWUF", argLength: 1, reg: gpfp, asm: "MOVWF"}, // uint32 -> float32, set U bit in the instruction
-		{name: "MOVWUD", argLength: 1, reg: gpfp, asm: "MOVWD"}, // uint32 -> float64, set U bit in the instruction
-		{name: "MOVFW", argLength: 1, reg: fpgp, asm: "MOVFW"},  // float32 -> int32
-		{name: "MOVDW", argLength: 1, reg: fpgp, asm: "MOVDW"},  // float64 -> int32
-		{name: "MOVFWU", argLength: 1, reg: fpgp, asm: "MOVFW"}, // float32 -> uint32, set U bit in the instruction
-		{name: "MOVDWU", argLength: 1, reg: fpgp, asm: "MOVDW"}, // float64 -> uint32, set U bit in the instruction
-		{name: "MOVFD", argLength: 1, reg: fp11, asm: "MOVFD"},  // float32 -> float64
-		{name: "MOVDF", argLength: 1, reg: fp11, asm: "MOVDF"},  // float64 -> float32
-
-		// conditional instructions, for lowering shifts
-		{name: "CMOVWHSconst", argLength: 2, reg: gp1flags1, asm: "MOVW", aux: "Int32", resultInArg0: true}, // replace arg0 w/ const if flags indicates HS, arg1=flags
-		{name: "CMOVWLSconst", argLength: 2, reg: gp1flags1, asm: "MOVW", aux: "Int32", resultInArg0: true}, // replace arg0 w/ const if flags indicates LS, arg1=flags
-		{name: "SRAcond", argLength: 3, reg: gp2flags1, asm: "SRA"},                                         // arg0 >> 31 if flags indicates HS, arg0 >> arg1 otherwise, signed shift, arg2=flags
-
-		// function calls
-		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff", clobberFlags: true},                                             // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{gpsp, buildReg("R7"), 0}, clobbers: callerSave}, aux: "Int64", clobberFlags: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
-		{name: "CALLdefer", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                               // call deferproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLgo", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                                  // call newproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "Int64", clobberFlags: true},                        // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
+		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff"}, // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem

 		// pseudo-ops
-		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpg}}}, // panic if arg0 is nil.  arg1=mem.
-
-		{name: "Equal", argLength: 1, reg: readflags},         // bool, true flags encode x==y false otherwise.
-		{name: "NotEqual", argLength: 1, reg: readflags},      // bool, true flags encode x!=y false otherwise.
-		{name: "LessThan", argLength: 1, reg: readflags},      // bool, true flags encode signed x<y false otherwise.
-		{name: "LessEqual", argLength: 1, reg: readflags},     // bool, true flags encode signed x<=y false otherwise.
-		{name: "GreaterThan", argLength: 1, reg: readflags},   // bool, true flags encode signed x>y false otherwise.
-		{name: "GreaterEqual", argLength: 1, reg: readflags},  // bool, true flags encode signed x>=y false otherwise.
-		{name: "LessThanU", argLength: 1, reg: readflags},     // bool, true flags encode unsigned x<y false otherwise.
-		{name: "LessEqualU", argLength: 1, reg: readflags},    // bool, true flags encode unsigned x<=y false otherwise.
-		{name: "GreaterThanU", argLength: 1, reg: readflags},  // bool, true flags encode unsigned x>y false otherwise.
-		{name: "GreaterEqualU", argLength: 1, reg: readflags}, // bool, true flags encode unsigned x>=y false otherwise.
-
-		// duffzero (must be 4-byte aligned)
-		// arg0 = address of memory to zero (in R1, changed as side effect)
-		// arg1 = value to store (always zero)
-		// arg2 = mem
-		// auxint = offset into duffzero code to start executing
-		// returns mem
-		{
-			name:      "DUFFZERO",
-			aux:       "Int64",
-			argLength: 3,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("R1"), buildReg("R0")},
-				clobbers: buildReg("R1"),
-			},
-		},
-
-		// duffcopy (must be 4-byte aligned)
-		// arg0 = address of dst memory (in R2, changed as side effect)
-		// arg1 = address of src memory (in R1, changed as side effect)
-		// arg2 = mem
-		// auxint = offset into duffcopy code to start executing
-		// returns mem
-		{
-			name:      "DUFFCOPY",
-			aux:       "Int64",
-			argLength: 3,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("R2"), buildReg("R1")},
-				clobbers: buildReg("R0 R1 R2"),
-			},
-		},
-
-		// large or unaligned zeroing
-		// arg0 = address of memory to zero (in R1, changed as side effect)
-		// arg1 = address of the last element to zero
-		// arg2 = value to store (always zero)
-		// arg3 = mem
-		// returns mem
-		//	MOVW.P	Rarg2, 4(R1)
-		//	CMP	R1, Rarg1
-		//	BLE	-2(PC)
-		{
-			name:      "LoweredZero",
-			aux:       "Int64",
-			argLength: 4,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("R1"), gp, gp},
-				clobbers: buildReg("R1"),
-			},
-			clobberFlags: true,
-		},
-
-		// large or unaligned move
-		// arg0 = address of dst memory (in R2, changed as side effect)
-		// arg1 = address of src memory (in R1, changed as side effect)
-		// arg2 = address of the last element of src
-		// arg3 = mem
-		// returns mem
-		//	MOVW.P	4(R1), Rtmp
-		//	MOVW.P	Rtmp, 4(R2)
-		//	CMP	R1, Rarg2
-		//	BLE	-3(PC)
-		{
-			name:      "LoweredMove",
-			aux:       "Int64",
-			argLength: 4,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("R2"), buildReg("R1"), gp},
-				clobbers: buildReg("R1 R2"),
-			},
-			clobberFlags: true,
-		},
-
-		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
-		// and sorts it to the very beginning of the block to prevent other
-		// use of R7 (arm.REGCTXT, the closure pointer)
-		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("R7")}}},
-
-		// MOVWconvert converts between pointers and integers.
-		// We have a special op for this so as to not confuse GC
-		// (particularly stack maps).  It takes a memory arg so it
-		// gets correctly ordered with respect to GC safepoints.
-		// arg0=ptr/int arg1=mem, output=int/ptr
-		{name: "MOVWconvert", argLength: 2, reg: gp11, asm: "MOVW"},
-
-		// Constant flag values. For any comparison, there are 5 possible
-		// outcomes: the three from the signed total order (<,==,>) and the
-		// three from the unsigned total order. The == cases overlap.
-		// Note: there's a sixth "unordered" outcome for floating-point
-		// comparisons, but we don't use such a beast yet.
-		// These ops are for temporary use by rewrite rules. They
-		// cannot appear in the generated assembly.
-		{name: "FlagEQ"},     // equal
-		{name: "FlagLT_ULT"}, // signed < and unsigned <
-		{name: "FlagLT_UGT"}, // signed < and unsigned >
-		{name: "FlagGT_UGT"}, // signed > and unsigned <
-		{name: "FlagGT_ULT"}, // signed > and unsigned >
-
-		// (InvertFlags (CMP a b)) == (CMP b a)
-		// InvertFlags is a pseudo-op which can't appear in assembly output.
-		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
+		{name: "LessThan", argLength: 1, reg: flagsgp}, // bool, 1 flags encode x<y 0 otherwise.
 	}

 	blocks := []blockData{
@@ -496,15 +47,22 @@ func init() {
 		{name: "UGE"},
 	}

+	regNames := []string{
+		"R0",
+		"R1",
+		"R2",
+		"R3",
+		"SP",
+		"FLAGS",
+		"SB",
+	}
+
 	archs = append(archs, arch{
-		name:            "ARM",
-		pkg:             "cmd/internal/obj/arm",
-		genfile:         "../../arm/ssa.go",
-		ops:             ops,
-		blocks:          blocks,
-		regnames:        regNamesARM,
-		gpregmask:       gp,
-		fpregmask:       fp,
-		framepointerreg: -1, // not used
+		name:     "ARM",
+		pkg:      "cmd/internal/obj/arm",
+		genfile:  "../../arm/ssa.go",
+		ops:      ops,
+		blocks:   blocks,
+		regnames: regNames,
 	})
 }
--- a/src/cmd/compile/internal/ssa/gen/PPC64.rules
+++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules
@@ -1,573 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Lowering arithmetic
-(Add64  x y) -> (ADD  x y)
-(AddPtr x y) -> (ADD  x y)
-(Add32  x y) -> (ADD x y)
-(Add16  x y) -> (ADD x y)
-(Add8   x y) -> (ADD x y)
-(Add64F x y) -> (FADD x y)
-(Add32F x y) -> (FADDS x y)
-
-(Sub64  x y) -> (SUB  x y)
-(SubPtr x y) -> (SUB  x y)
-(Sub32  x y) -> (SUB x y)
-(Sub16  x y) -> (SUB x y)
-(Sub8   x y) -> (SUB x y)
-(Sub32F x y) -> (FSUBS x y)
-(Sub64F x y) -> (FSUB x y)
-
-(Mod16 x y) -> (Mod32 (SignExt16to32 x) (SignExt16to32 y))
-(Mod16u x y) -> (Mod32u (ZeroExt16to32 x) (ZeroExt16to32 y))
-(Mod8 x y) -> (Mod32 (SignExt8to32 x) (SignExt8to32 y))
-(Mod8u x y) -> (Mod32u (ZeroExt8to32 x) (ZeroExt8to32 y))
-(Mod64 x y) -> (SUB x (MULLD y (DIVD x y)))
-(Mod64u x y) -> (SUB x (MULLD y (DIVDU x y)))
-(Mod32 x y) -> (SUB x (MULLW y (DIVW x y)))
-(Mod32u x y) -> (SUB x (MULLW y (DIVWU x y)))
-
-(Avg64u <t> x y) -> (ADD (ADD <t> (SRD <t> x (MOVDconst <t> [1])) (SRD <t> y (MOVDconst <t> [1]))) (ANDconst <t> (AND <t> x y) [1]))
-
-(Mul64  x y) -> (MULLD  x y)
-(Mul32  x y) -> (MULLW  x y)
-(Mul16  x y) -> (MULLW x y)
-(Mul8   x y) -> (MULLW x y)
-
-(Div64  x y) -> (DIVD  x y)
-(Div64u x y) -> (DIVDU x y)
-(Div32  x y) -> (DIVW  x y)
-(Div32u x y) -> (DIVWU x y)
-(Div16  x y) -> (DIVW  (SignExt16to32 x) (SignExt16to32 y))
-(Div16u x y) -> (DIVWU (ZeroExt16to32 x) (ZeroExt16to32 y))
-(Div8   x y) -> (DIVW  (SignExt8to32 x) (SignExt8to32 y))
-(Div8u  x y) -> (DIVWU (ZeroExt8to32 x) (ZeroExt8to32 y))
-
-(Hmul64  x y) -> (MULHD  x y)
-(Hmul64u  x y) -> (MULHDU x y)
-(Hmul32  x y) -> (MULHW  x y)
-(Hmul32u  x y) -> (MULHWU x y)
-(Hmul16 x y) -> (SRAWconst (MULLW <config.fe.TypeInt32()> (SignExt16to32 x) (SignExt16to32 y)) [16])
-(Hmul16u x y) -> (SRWconst (MULLW <config.fe.TypeUInt32()> (ZeroExt16to32 x) (ZeroExt16to32 y)) [16])
-(Hmul8 x y) -> (SRAWconst (MULLW <config.fe.TypeInt16()> (SignExt8to32 x) (SignExt8to32 y)) [8])
-(Hmul8u x y) -> (SRWconst (MULLW <config.fe.TypeUInt16()> (ZeroExt8to32 x) (ZeroExt8to32 y)) [8])
-
-(Mul32F x y) -> (FMULS x y)
-(Mul64F x y) -> (FMUL x y)
-
-(Div32F x y) -> (FDIVS x y)
-(Div64F x y) -> (FDIV x y)
-
-// Lowering float <-> int
-(Cvt32to32F x) -> (FRSP (FCFID (Xi2f64 (SignExt32to64 x))))
-(Cvt32to64F x) -> (FCFID (Xi2f64 (SignExt32to64 x)))
-(Cvt64to32F x) -> (FRSP (FCFID (Xi2f64 x)))
-(Cvt64to64F x) -> (FCFID (Xi2f64 x))
-
-(Cvt32Fto32 x) -> (Xf2i64 (FCTIWZ x))
-(Cvt32Fto64 x) -> (Xf2i64 (FCTIDZ x))
-(Cvt64Fto32 x) -> (Xf2i64 (FCTIWZ x))
-(Cvt64Fto64 x) -> (Xf2i64 (FCTIDZ x))
-
-(Cvt32Fto64F x) -> x // Note x will have the wrong type for patterns dependent on Float32/Float64
-(Cvt64Fto32F x) -> (FRSP x)
-
-(Sqrt x) -> (FSQRT x)
-
-(Rsh64x64 x y)  -> (SRAD x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] y))))
-(Rsh64Ux64 x y) -> (SRD  x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] y))))
-(Lsh64x64 x y)  -> (SLD  x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] y))))
-
-(Rsh32x64 x y)  -> (SRAW x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] y))))
-(Rsh32Ux64 x y) -> (SRW  x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] y))))
-(Lsh32x64 x y)  -> (SLW  x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] y))))
-
-(Rsh16x64 x y)  -> (SRAW (SignExt16to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] y))))
-(Rsh16Ux64 x y) -> (SRW  (ZeroExt16to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] y))))
-(Lsh16x64 x y)  -> (SLW  x                 (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] y))))
-
-(Rsh8x64 x y)  -> (SRAW (SignExt8to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] y))))
-(Rsh8Ux64 x y) -> (SRW  (ZeroExt8to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] y))))
-(Lsh8x64 x y)  -> (SLW  x                (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] y))))
-
-
-(Rsh64x32 x y)  -> (SRAD x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt32to64 y)))))
-(Rsh64Ux32 x y) -> (SRD x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt32to64 y)))))
-(Lsh64x32 x y)  -> (SLD x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt32to64 y)))))
-
-(Rsh32x32 x y)  -> (SRAW x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt32to64 y)))))
-(Rsh32Ux32 x y) -> (SRW x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt32to64 y)))))
-(Lsh32x32 x y)  -> (SLW x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt32to64 y)))))
-
-(Rsh16x32 x y)  -> (SRAW (SignExt16to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt32to64 y)))))
-(Rsh16Ux32 x y) -> (SRW  (ZeroExt16to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt32to64 y)))))
-(Lsh16x32 x y)  -> (SLW  x                 (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt32to64 y)))))
-
-(Rsh8x32 x y)  -> (SRAW (SignExt8to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt32to64 y)))))
-(Rsh8Ux32 x y) -> (SRW  (ZeroExt8to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt32to64 y)))))
-(Lsh8x32 x y)  -> (SLW  x                (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt32to64 y)))))
-
-
-(Rsh64x16 x y)  -> (SRAD x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt16to64 y)))))
-(Rsh64Ux16 x y) -> (SRD x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt16to64 y)))))
-(Lsh64x16 x y)  -> (SLD x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt16to64 y)))))
-
-(Rsh32x16 x y)  -> (SRAW x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt16to64 y)))))
-(Rsh32Ux16 x y) -> (SRW x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt16to64 y)))))
-(Lsh32x16 x y)  -> (SLW x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt16to64 y)))))
-
-(Rsh16x16 x y)  -> (SRAW (SignExt16to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt16to64 y)))))
-(Rsh16Ux16 x y) -> (SRW  (ZeroExt16to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt16to64 y)))))
-(Lsh16x16 x y)  -> (SLW  x                 (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt16to64 y)))))
-
-(Rsh8x16 x y)  -> (SRAW (SignExt8to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt16to64 y)))))
-(Rsh8Ux16 x y) -> (SRW  (ZeroExt8to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt16to64 y)))))
-(Lsh8x16 x y)  -> (SLW  x                (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt16to64 y)))))
-
-
-(Rsh64x8 x y)  -> (SRAD x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt8to64 y)))))
-(Rsh64Ux8 x y) -> (SRD x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt8to64 y)))))
-(Lsh64x8 x y)  -> (SLD x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt8to64 y)))))
-
-(Rsh32x8 x y)  -> (SRAW x (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt8to64 y)))))
-(Rsh32Ux8 x y) -> (SRW x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt8to64 y)))))
-(Lsh32x8 x y)  -> (SLW x  (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt8to64 y)))))
-
-(Rsh16x8 x y)  -> (SRAW (SignExt16to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt8to64 y)))))
-(Rsh16Ux8 x y) -> (SRW  (ZeroExt16to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt8to64 y)))))
-(Lsh16x8 x y)  -> (SLW  x                 (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt8to64 y)))))
-
-(Rsh8x8 x y)  -> (SRAW (SignExt8to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt8to64 y)))))
-(Rsh8Ux8 x y) -> (SRW  (ZeroExt8to32 x) (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt8to64 y)))))
-(Lsh8x8 x y)  -> (SLW  x                (ORN y <config.fe.TypeInt64()> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt8to64 y)))))
-
-// Potentially useful optimizing rewrites.
-// (ADDconstForCarry [k] c), k < 0 && (c < 0 || k+c >= 0) -> CarrySet
-// (ADDconstForCarry [k] c), K < 0 && (c >= 0 && k+c < 0) -> CarryClear
-// (MaskIfNotCarry CarrySet) -> 0
-// (MaskIfNotCarry CarrySet) -> -1
-
-// Lowering constants
-(Const8   [val]) -> (MOVWconst [val])
-(Const16  [val]) -> (MOVWconst [val])
-(Const32  [val]) -> (MOVWconst [val])
-(Const64  [val]) -> (MOVDconst [val])
-(Const32F [val]) -> (FMOVSconst [val])
-(Const64F [val]) -> (FMOVDconst [val])
-(ConstNil) -> (MOVDconst [0])
-(ConstBool [b]) -> (MOVWconst [b])
-
-(Addr {sym} base) -> (MOVDaddr {sym} base)
-// (Addr {sym} base) -> (ADDconst {sym} base)
-(OffPtr [off] ptr) -> (ADD (MOVDconst <config.Frontend().TypeInt64()> [off]) ptr)
-
-(And64 x y) -> (AND x y)
-(And32 x y) -> (AND x y)
-(And16 x y) -> (AND x y)
-(And8  x y) -> (AND x y)
-
-(Or64 x y) -> (OR x y)
-(Or32 x y) -> (OR x y)
-(Or16 x y) -> (OR x y)
-(Or8  x y) -> (OR x y)
-
-(Xor64 x y) -> (XOR x y)
-(Xor32 x y) -> (XOR x y)
-(Xor16 x y) -> (XOR x y)
-(Xor8  x y) -> (XOR x y)
-
-(Neg64F x) -> (FNEG x)
-(Neg32F x) -> (FNEG x)
-(Neg64  x) -> (NEG x)
-(Neg32  x) -> (NEG x)
-(Neg16  x) -> (NEG x)
-(Neg8   x) -> (NEG x)
-
-(Com64 x) -> (XORconst [-1] x)
-(Com32 x) -> (XORconst [-1] x)
-(Com16 x) -> (XORconst [-1] x)
-(Com8  x) -> (XORconst [-1] x)
-
-// Lowering boolean ops
-(AndB x y) -> (AND x y)
-(OrB x y) -> (OR x y)
-(Not x) -> (XORconst [1] x)
-
-// Lowering comparisons
-(EqB x y)  -> (ANDconst [1] (EQV x y))
-(Eq8 x y)  -> (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
-(Eq16 x y) -> (Equal (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
-(Eq32 x y) -> (Equal (CMPW x y))
-(Eq64 x y) -> (Equal (CMP x y))
-(Eq32F x y) -> (Equal (FCMPU x y))
-(Eq64F x y) -> (Equal (FCMPU x y))
-(EqPtr x y) -> (Equal (CMP x y))
-
-(NeqB x y)  -> (XOR x y)
-(Neq8 x y)  -> (NotEqual (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
-(Neq16 x y) -> (NotEqual (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
-(Neq32 x y) -> (NotEqual (CMPW x y))
-(Neq64 x y) -> (NotEqual (CMP x y))
-(Neq32F x y) -> (NotEqual (FCMPU x y))
-(Neq64F x y) -> (NotEqual (FCMPU x y))
-(NeqPtr x y) -> (NotEqual (CMP x y))
-
-(Less8 x y)  -> (LessThan (CMPW (SignExt8to32 x) (SignExt8to32 y)))
-(Less16 x y) -> (LessThan (CMPW (SignExt16to32 x) (SignExt16to32 y)))
-(Less32 x y) -> (LessThan (CMPW x y))
-(Less64 x y) -> (LessThan (CMP x y))
-(Less32F x y) -> (FLessThan (FCMPU x y))
-(Less64F x y) -> (FLessThan (FCMPU x y))
-
-(Less8U x y)  -> (LessThan (CMPWU (ZeroExt8to32 x) (ZeroExt8to32 y)))
-(Less16U x y) -> (LessThan (CMPWU (ZeroExt16to32 x) (ZeroExt16to32 y)))
-(Less32U x y) -> (LessThan (CMPWU x y))
-(Less64U x y) -> (LessThan (CMPU x y))
-
-(Leq8 x y)  -> (LessEqual (CMPW (SignExt8to32 x) (SignExt8to32 y)))
-(Leq16 x y) -> (LessEqual (CMPW (SignExt16to32 x) (SignExt16to32 y)))
-(Leq32 x y) -> (LessEqual (CMPW x y))
-(Leq64 x y) -> (LessEqual (CMP x y))
-(Leq32F x y) -> (FLessEqual (FCMPU x y))
-(Leq64F x y) -> (FLessEqual (FCMPU x y))
-
-(Leq8U x y)  -> (LessEqual (CMPWU (ZeroExt8to32 x) (ZeroExt8to32 y)))
-(Leq16U x y) -> (LessEqual (CMPWU (ZeroExt16to32 x) (ZeroExt16to32 y)))
-(Leq32U x y) -> (LessEqual (CMPWU x y))
-(Leq64U x y) -> (LessEqual (CMPU x y))
-
-(Greater8 x y)  -> (GreaterThan (CMPW (SignExt8to32 x) (SignExt8to32 y)))
-(Greater16 x y) -> (GreaterThan (CMPW (SignExt16to32 x) (SignExt16to32 y)))
-(Greater32 x y) -> (GreaterThan (CMPW x y))
-(Greater64 x y) -> (GreaterThan (CMP x y))
-(Greater32F x y) -> (FGreaterThan (FCMPU x y))
-(Greater64F x y) -> (FGreaterThan (FCMPU x y))
-
-(Greater8U x y)  -> (GreaterThan (CMPWU (ZeroExt8to32 x) (ZeroExt8to32 y)))
-(Greater16U x y) -> (GreaterThan (CMPWU (ZeroExt16to32 x) (ZeroExt16to32 y)))
-(Greater32U x y) -> (GreaterThan (CMPWU x y))
-(Greater64U x y) -> (GreaterThan (CMPU x y))
-
-(Geq8 x y)  -> (GreaterEqual (CMPW (SignExt8to32 x) (SignExt8to32 y)))
-(Geq16 x y) -> (GreaterEqual (CMPW (SignExt16to32 x) (SignExt16to32 y)))
-(Geq32 x y) -> (GreaterEqual (CMPW x y))
-(Geq64 x y) -> (GreaterEqual (CMP x y))
-(Geq32F x y) -> (FGreaterEqual (FCMPU x y))
-(Geq64F x y) -> (FGreaterEqual (FCMPU x y))
-
-(Geq8U x y)  -> (GreaterEqual (CMPU (ZeroExt8to32 x) (ZeroExt8to32 y)))
-(Geq16U x y) -> (GreaterEqual (CMPU (ZeroExt16to32 x) (ZeroExt16to32 y)))
-(Geq32U x y) -> (GreaterEqual (CMPU x y))
-(Geq64U x y) -> (GreaterEqual (CMPU x y))
-
-// Absorb pseudo-ops into blocks.
-(If (Equal cc) yes no) -> (EQ cc yes no)
-(If (NotEqual cc) yes no) -> (NE cc yes no)
-(If (LessThan cc) yes no) -> (LT cc yes no)
-(If (LessEqual cc) yes no) -> (LE cc yes no)
-(If (GreaterThan cc) yes no) -> (GT cc yes no)
-(If (GreaterEqual cc) yes no) -> (GE cc yes no)
-(If (FLessThan cc) yes no) -> (FLT cc yes no)
-(If (FLessEqual cc) yes no) -> (FLE cc yes no)
-(If (FGreaterThan cc) yes no) -> (FGT cc yes no)
-(If (FGreaterEqual cc) yes no) -> (FGE cc yes no)
-
-(If cond yes no) -> (NE (CMPWconst [0] cond) yes no)
-
-// Absorb boolean tests into block
-(NE (CMPWconst [0] (Equal cc)) yes no) -> (EQ cc yes no)
-(NE (CMPWconst [0] (NotEqual cc)) yes no) -> (NE cc yes no)
-(NE (CMPWconst [0] (LessThan cc)) yes no) -> (LT cc yes no)
-(NE (CMPWconst [0] (LessEqual cc)) yes no) -> (LE cc yes no)
-(NE (CMPWconst [0] (GreaterThan cc)) yes no) -> (GT cc yes no)
-(NE (CMPWconst [0] (GreaterEqual cc)) yes no) -> (GE cc yes no)
-// (NE (CMPWconst [0] (FLessThan cc)) yes no) -> (FLT cc yes no)
-// (NE (CMPWconst [0] (FLessEqual cc)) yes no) -> (FLE cc yes no)
-// (NE (CMPWconst [0] (FGreaterThan cc)) yes no) -> (FGT cc yes no)
-// (NE (CMPWconst [0] (FGreaterEqual cc)) yes no) -> (FGE cc yes no)
-
-// absorb flag constants into branches
-(EQ (FlagEQ) yes no) -> (First nil yes no)
-(EQ (FlagLT) yes no) -> (First nil no yes)
-(EQ (FlagGT) yes no) -> (First nil no yes)
-
-(NE (FlagEQ) yes no) -> (First nil no yes)
-(NE (FlagLT) yes no) -> (First nil yes no)
-(NE (FlagGT) yes no) -> (First nil yes no)
-
-(LT (FlagEQ) yes no) -> (First nil no yes)
-(LT (FlagLT) yes no) -> (First nil yes no)
-(LT (FlagGT) yes no) -> (First nil no yes)
-
-(LE (FlagEQ) yes no) -> (First nil yes no)
-(LE (FlagLT) yes no) -> (First nil yes no)
-(LE (FlagGT) yes no) -> (First nil no yes)
-
-(GT (FlagEQ) yes no) -> (First nil no yes)
-(GT (FlagLT) yes no) -> (First nil no yes)
-(GT (FlagGT) yes no) -> (First nil yes no)
-
-(GE (FlagEQ) yes no) -> (First nil yes no)
-(GE (FlagLT) yes no) -> (First nil no yes)
-(GE (FlagGT) yes no) -> (First nil yes no)
-
-// absorb InvertFlags into branches
-(LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
-(GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
-(LE (InvertFlags cmp) yes no) -> (GE cmp yes no)
-(GE (InvertFlags cmp) yes no) -> (LE cmp yes no)
-(EQ (InvertFlags cmp) yes no) -> (EQ cmp yes no)
-(NE (InvertFlags cmp) yes no) -> (NE cmp yes no)
-
-// (FLT (InvertFlags cmp) yes no) -> (FGT cmp yes no)
-// (FGT (InvertFlags cmp) yes no) -> (FLT cmp yes no)
-// (FLE (InvertFlags cmp) yes no) -> (FGE cmp yes no)
-// (FGE (InvertFlags cmp) yes no) -> (FLE cmp yes no)
-
-// constant comparisons
-(CMPWconst (MOVWconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ)
-(CMPWconst (MOVWconst [x]) [y]) && int32(x)<int32(y)  -> (FlagLT)
-(CMPWconst (MOVWconst [x]) [y]) && int32(x)>int32(y)  -> (FlagGT)
-
-(CMPconst (MOVDconst [x]) [y]) && int64(x)==int64(y) -> (FlagEQ)
-(CMPconst (MOVDconst [x]) [y]) && int64(x)<int64(y)  -> (FlagLT)
-(CMPconst (MOVDconst [x]) [y]) && int64(x)>int64(y)  -> (FlagGT)
-
-(CMPWUconst (MOVWconst [x]) [y]) && int32(x)==int32(y)  -> (FlagEQ)
-(CMPWUconst (MOVWconst [x]) [y]) && uint32(x)<uint32(y) -> (FlagLT)
-(CMPWUconst (MOVWconst [x]) [y]) && uint32(x)>uint32(y) -> (FlagGT)
-
-(CMPUconst (MOVDconst [x]) [y]) && int64(x)==int64(y)  -> (FlagEQ)
-(CMPUconst (MOVDconst [x]) [y]) && uint64(x)<uint64(y) -> (FlagLT)
-(CMPUconst (MOVDconst [x]) [y]) && uint64(x)>uint64(y) -> (FlagGT)
-
-// other known comparisons
-//(CMPconst (MOVBUreg _) [c]) && 0xff < c -> (FlagLT)
-//(CMPconst (MOVHUreg _) [c]) && 0xffff < c -> (FlagLT)
-//(CMPconst (ANDconst _ [m]) [n]) && 0 <= int32(m) && int32(m) < int32(n) -> (FlagLT)
-//(CMPconst (SRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint32(32-c)) <= uint32(n) -> (FlagLT)
-
-// absorb flag constants into boolean values
-(Equal (FlagEQ)) -> (MOVWconst [1])
-(Equal (FlagLT)) -> (MOVWconst [0])
-(Equal (FlagGT)) -> (MOVWconst [0])
-
-(NotEqual (FlagEQ)) -> (MOVWconst [0])
-(NotEqual (FlagLT)) -> (MOVWconst [1])
-(NotEqual (FlagGT)) -> (MOVWconst [1])
-
-(LessThan (FlagEQ)) -> (MOVWconst [0])
-(LessThan (FlagLT)) -> (MOVWconst [1])
-(LessThan (FlagGT)) -> (MOVWconst [0])
-
-(LessEqual (FlagEQ)) -> (MOVWconst [1])
-(LessEqual (FlagLT)) -> (MOVWconst [1])
-(LessEqual (FlagGT)) -> (MOVWconst [0])
-
-(GreaterThan (FlagEQ)) -> (MOVWconst [0])
-(GreaterThan (FlagLT)) -> (MOVWconst [0])
-(GreaterThan (FlagGT)) -> (MOVWconst [1])
-
-(GreaterEqual (FlagEQ)) -> (MOVWconst [1])
-(GreaterEqual (FlagLT)) -> (MOVWconst [0])
-(GreaterEqual (FlagGT)) -> (MOVWconst [1])
-
-// absorb InvertFlags into boolean values
-(Equal (InvertFlags x)) -> (Equal x)
-(NotEqual (InvertFlags x)) -> (NotEqual x)
-(LessThan (InvertFlags x)) -> (GreaterThan x)
-(GreaterThan (InvertFlags x)) -> (LessThan x)
-(LessEqual (InvertFlags x)) -> (GreaterEqual x)
-(GreaterEqual (InvertFlags x)) -> (LessEqual x)
-(FLessThan (InvertFlags x)) -> (FGreaterThan x)
-(FGreaterThan (InvertFlags x)) -> (FLessThan x)
-(FLessEqual (InvertFlags x)) -> (FGreaterEqual x)
-(FGreaterEqual (InvertFlags x)) -> (FLessEqual x)
-
-
-// Lowering loads
-(Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVDload ptr mem)
-(Load <t> ptr mem) && is32BitInt(t) && isSigned(t) -> (MOVWload ptr mem)
-(Load <t> ptr mem) && is32BitInt(t) && !isSigned(t) -> (MOVWZload ptr mem)
-(Load <t> ptr mem) && is16BitInt(t) && isSigned(t) -> (MOVHload ptr mem)
-(Load <t> ptr mem) && is16BitInt(t) && !isSigned(t) -> (MOVHZload ptr mem)
-(Load <t> ptr mem) && (t.IsBoolean() || (is8BitInt(t) && isSigned(t))) -> (MOVBload ptr mem)
-(Load <t> ptr mem) && is8BitInt(t) && !isSigned(t) -> (MOVBZload ptr mem)
-
-(Load <t> ptr mem) && is32BitFloat(t) -> (FMOVSload ptr mem)
-(Load <t> ptr mem) && is64BitFloat(t) -> (FMOVDload ptr mem)
-
-(Store [8] ptr val mem) && is64BitFloat(val.Type) -> (FMOVDstore ptr val mem)
-(Store [8] ptr val mem) && is32BitFloat(val.Type) -> (FMOVDstore ptr val mem) // glitch from (Cvt32Fto64F x) -> x -- type is wrong
-(Store [4] ptr val mem) && is32BitFloat(val.Type) -> (FMOVSstore ptr val mem)
-(Store [8] ptr val mem) && (is64BitInt(val.Type) || isPtr(val.Type)) -> (MOVDstore ptr val mem)
-(Store [4] ptr val mem) && is32BitInt(val.Type) -> (MOVWstore ptr val mem)
-(Store [2] ptr val mem) -> (MOVHstore ptr val mem)
-(Store [1] ptr val mem) -> (MOVBstore ptr val mem)
-
-(Zero [s] _ mem) && SizeAndAlign(s).Size() == 0 -> mem
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 1 -> (MOVBstorezero destptr mem)
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 2 && SizeAndAlign(s).Align()%2 == 0 ->
-	(MOVHstorezero destptr mem)
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 2 ->
-	(MOVBstorezero [1] destptr
-		(MOVBstorezero [0] destptr mem))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 4 && SizeAndAlign(s).Align()%4 == 0 ->
-	(MOVWstorezero destptr mem)
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 4 && SizeAndAlign(s).Align()%2 == 0 ->
-	(MOVHstorezero [2] destptr
-		(MOVHstorezero [0] destptr mem))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 4 ->
-	(MOVBstorezero [3] destptr
-		(MOVBstorezero [2] destptr
-			(MOVBstorezero [1] destptr
-				(MOVBstorezero [0] destptr mem))))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 8 && SizeAndAlign(s).Align()%8 == 0 ->
-	(MOVDstorezero [0] destptr mem)
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 8 && SizeAndAlign(s).Align()%4 == 0 ->
-	(MOVWstorezero [4] destptr
-		(MOVWstorezero [0] destptr mem))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 8 && SizeAndAlign(s).Align()%2 == 0 ->
-	(MOVHstorezero [6] destptr
-		(MOVHstorezero [4] destptr
-			(MOVHstorezero [2] destptr
-				(MOVHstorezero [0] destptr mem))))
-
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 3 ->
-	(MOVBstorezero [2] destptr
-		(MOVBstorezero [1] destptr
-			(MOVBstorezero [0] destptr mem)))
-
-// Zero small numbers of words directly.
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 16 && SizeAndAlign(s).Align()%8 == 0 ->
-	(MOVDstorezero [8] destptr
-                (MOVDstorezero [0] destptr mem))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 24 && SizeAndAlign(s).Align()%8 == 0 ->
-	(MOVDstorezero [16] destptr
-		(MOVDstorezero [8] destptr
-			(MOVDstorezero [0] destptr mem)))
-(Zero [s] destptr mem) && SizeAndAlign(s).Size() == 32 && SizeAndAlign(s).Align()%8 == 0 ->
-	(MOVDstorezero [24] destptr
-		(MOVDstorezero [16] destptr
-			(MOVDstorezero [8] destptr
-				(MOVDstorezero [0] destptr mem))))
-
-// Large zeroing uses a loop
-(Zero [s] ptr mem)
-	&& (SizeAndAlign(s).Size() > 512 || config.noDuffDevice) || SizeAndAlign(s).Align()%8 != 0 ->
-	(LoweredZero [SizeAndAlign(s).Align()]
-		ptr
-		(ADDconst <ptr.Type> ptr [SizeAndAlign(s).Size()-moveSize(SizeAndAlign(s).Align(), config)])
-		mem)
-
-// moves
-(Move [s] _ _ mem) && SizeAndAlign(s).Size() == 0 -> mem
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 1 -> (MOVBstore dst (MOVBZload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 2 && SizeAndAlign(s).Align()%2 == 0 ->
-	(MOVHstore dst (MOVHZload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 2 ->
-	(MOVBstore [1] dst (MOVBZload [1] src mem)
-		(MOVBstore dst (MOVBZload src mem) mem))
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 4 && SizeAndAlign(s).Align()%4 == 0 ->
-	(MOVWstore dst (MOVWload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 4 && SizeAndAlign(s).Align()%2 == 0 ->
-	(MOVHstore [2] dst (MOVHZload [2] src mem)
-		(MOVHstore dst (MOVHZload src mem) mem))
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 4 ->
-	(MOVBstore [3] dst (MOVBZload [3] src mem)
-		(MOVBstore [2] dst (MOVBZload [2] src mem)
-			(MOVBstore [1] dst (MOVBZload [1] src mem)
-				(MOVBstore dst (MOVBZload src mem) mem))))
-
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 8 && SizeAndAlign(s).Align()%8 == 0 ->
-	(MOVDstore dst (MOVDload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 8 && SizeAndAlign(s).Align()%4 == 0 ->
-	(MOVWstore [4] dst (MOVWZload [4] src mem)
-		(MOVWstore dst (MOVWZload src mem) mem))
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 8 && SizeAndAlign(s).Align()%2 == 0->
-	(MOVHstore [6] dst (MOVHZload [6] src mem)
-		(MOVHstore [4] dst (MOVHZload [4] src mem)
-			(MOVHstore [2] dst (MOVHZload [2] src mem)
-				(MOVHstore dst (MOVHZload src mem) mem))))
-
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 3 ->
-	(MOVBstore [2] dst (MOVBZload [2] src mem)
-		(MOVBstore [1] dst (MOVBZload [1] src mem)
-			(MOVBstore dst (MOVBZload src mem) mem)))
-
-// Large move uses a loop
-(Move [s] dst src mem)
-	&& (SizeAndAlign(s).Size() > 512 || config.noDuffDevice) || SizeAndAlign(s).Align()%8 != 0 ->
-	(LoweredMove [SizeAndAlign(s).Align()]
-		dst
-		src
-		(ADDconst <src.Type> src [SizeAndAlign(s).Size()-moveSize(SizeAndAlign(s).Align(), config)])
-		mem)
-
-// Calls
-// Lowering calls
-(StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem)
-(ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)
-(DeferCall [argwid] mem) -> (CALLdefer [argwid] mem)
-(GoCall [argwid] mem) -> (CALLgo [argwid] mem)
-(InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem)
-
-// Miscellaneous
-(Convert <t> x mem) -> (MOVDconvert <t> x mem)
-(GetClosurePtr) -> (LoweredGetClosurePtr)
-(IsNonNil ptr) -> (NotEqual (CMPconst [0] ptr))
-(IsInBounds idx len) -> (LessThan (CMPU idx len))
-(IsSliceInBounds idx len) -> (LessEqual (CMPU idx len))
-(NilCheck ptr mem) -> (LoweredNilCheck ptr mem)
-
-// Optimizations
-
-(ADD (MOVDconst [c]) x) && int64(int32(c)) == c -> (ADDconst [c] x)
-(ADD x (MOVDconst [c])) && int64(int32(c)) == c -> (ADDconst [c] x)
-
-// Fold offsets for stores.
-(MOVDstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(off1+off2) -> (MOVDstore [off1+off2] {sym} x val mem)
-(MOVWstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(off1+off2) -> (MOVWstore [off1+off2] {sym} x val mem)
-(MOVHstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(off1+off2) -> (MOVHstore [off1+off2] {sym} x val mem)
-(MOVBstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(off1+off2) -> (MOVBstore [off1+off2] {sym} x val mem)
-
-// Store of zero -> storezero
-(MOVDstore [off] {sym} ptr (MOVDconst [c]) mem) && c == 0 -> (MOVDstorezero [off] {sym} ptr mem)
-(MOVWstore [off] {sym} ptr (MOVDconst [c]) mem) && c == 0 -> (MOVWstorezero [off] {sym} ptr mem)
-(MOVHstore [off] {sym} ptr (MOVDconst [c]) mem) && c == 0 -> (MOVHstorezero [off] {sym} ptr mem)
-(MOVBstore [off] {sym} ptr (MOVDconst [c]) mem) && c == 0 -> (MOVBstorezero [off] {sym} ptr mem)
-
-// Fold offsets for storezero
-(MOVDstorezero [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) ->
-    (MOVDstorezero [off1+off2] {sym} x mem)
-(MOVWstorezero [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) ->
-    (MOVWstorezero [off1+off2] {sym} x mem)
-(MOVHstorezero [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) ->
-    (MOVHstorezero [off1+off2] {sym} x mem)
-(MOVBstorezero [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) ->
-    (MOVBstorezero [off1+off2] {sym} x mem)
-
-// Lowering extension
-// Note: we always extend to 64 bits even though some ops don't need that many result bits.
-(SignExt8to16  x) -> (MOVBreg x)
-(SignExt8to32  x) -> (MOVBreg x)
-(SignExt8to64  x) -> (MOVBreg x)
-(SignExt16to32 x) -> (MOVHreg x)
-(SignExt16to64 x) -> (MOVHreg x)
-(SignExt32to64 x) -> (MOVWreg x)
-
-(ZeroExt8to16  x) -> (MOVBZreg x)
-(ZeroExt8to32  x) -> (MOVBZreg x)
-(ZeroExt8to64  x) -> (MOVBZreg x)
-(ZeroExt16to32 x) -> (MOVHZreg x)
-(ZeroExt16to64 x) -> (MOVHZreg x)
-(ZeroExt32to64 x) -> (MOVWZreg x)
-
-(Trunc16to8  x) -> (MOVBreg x)
-(Trunc32to8  x) -> (MOVBreg x)
-(Trunc32to16 x) -> (MOVHreg x)
-(Trunc64to8  x) -> (MOVBreg x)
-(Trunc64to16 x) -> (MOVHreg x)
-(Trunc64to32 x) -> (MOVWreg x)
-
--- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
@@ -1,395 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-import "strings"
-
-// Notes:
-//  - Less-than-64-bit integer types live in the low portion of registers.
-//    For now, the upper portion is junk; sign/zero-extension might be optimized in the future, but not yet.
-//  - Boolean types are zero or 1; stored in a byte, but loaded with AMOVBZ so the upper bytes of a register are zero.
-//  - *const instructions may use a constant larger than the instuction can encode.
-//    In this case the assembler expands to multiple instructions and uses tmp
-//    register (R31).
-
-var regNamesPPC64 = []string{
-	// "R0", // REGZERO
-	"SP", // REGSP
-	"SB", // REGSB
-	"R3",
-	"R4",
-	"R5",
-	"R6",
-	"R7",
-	"R8",
-	"R9",
-	"R10",
-	"R11", // REGCTXT for closures
-	"R12",
-	"R13", // REGTLS
-	"R14",
-	"R15",
-	"R16",
-	"R17",
-	"R18",
-	"R19",
-	"R20",
-	"R21",
-	"R22",
-	"R23",
-	"R24",
-	"R25",
-	"R26",
-	"R27",
-	"R28",
-	"R29",
-	"g",   // REGG.  Using name "g" and setting Config.hasGReg makes it "just happen".
-	"R31", // REGTMP
-
-	"F0",
-	"F1",
-	"F2",
-	"F3",
-	"F4",
-	"F5",
-	"F6",
-	"F7",
-	"F8",
-	"F9",
-	"F10",
-	"F11",
-	"F12",
-	"F13",
-	"F14",
-	"F15",
-	"F16",
-	"F17",
-	"F18",
-	"F19",
-	"F20",
-	"F21",
-	"F22",
-	"F23",
-	"F24",
-	"F25",
-	"F26",
-	// "F27", // reserved for "floating conversion constant"
-	// "F28", // 0.0
-	// "F29", // 0.5
-	// "F30", // 1.0
-	// "F31", // 2.0
-
-	// "CR0",
-	// "CR1",
-	// "CR2",
-	// "CR3",
-	// "CR4",
-	// "CR5",
-	// "CR6",
-	// "CR7",
-
-	// "CR",
-	// "XER",
-	// "LR",
-	// "CTR",
-}
-
-func init() {
-	// Make map from reg names to reg integers.
-	if len(regNamesPPC64) > 64 {
-		panic("too many registers")
-	}
-	num := map[string]int{}
-	for i, name := range regNamesPPC64 {
-		num[name] = i
-	}
-	buildReg := func(s string) regMask {
-		m := regMask(0)
-		for _, r := range strings.Split(s, " ") {
-			if n, ok := num[r]; ok {
-				m |= regMask(1) << uint(n)
-				continue
-			}
-			panic("register " + r + " not found")
-		}
-		return m
-	}
-
-	var (
-		gp = buildReg("R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29")
-		fp = buildReg("F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26")
-		sp = buildReg("SP")
-		sb = buildReg("SB")
-		// gr  = buildReg("g")
-		// cr  = buildReg("CR")
-		// ctr = buildReg("CTR")
-		// lr  = buildReg("LR")
-		tmp  = buildReg("R31")
-		ctxt = buildReg("R11")
-		// tls = buildReg("R13")
-		gp01        = regInfo{inputs: nil, outputs: []regMask{gp}}
-		gp11        = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
-		gp21        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
-		gp1cr       = regInfo{inputs: []regMask{gp | sp | sb}}
-		gp2cr       = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
-		crgp        = regInfo{inputs: nil, outputs: []regMask{gp}}
-		gpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
-		gpstore     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
-		gpstorezero = regInfo{inputs: []regMask{gp | sp | sb}} // ppc64.REGZERO is reserved zero value
-		fp01        = regInfo{inputs: nil, outputs: []regMask{fp}}
-		fp11        = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
-		fpgp        = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
-		gpfp        = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
-		fp21        = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
-		fp2cr       = regInfo{inputs: []regMask{fp, fp}}
-		fpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{fp}}
-		fpstore     = regInfo{inputs: []regMask{gp | sp | sb, fp}}
-		callerSave  = regMask(gp | fp)
-	)
-	ops := []opData{
-		{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},     // arg0 + arg1
-		{name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "SymOff"},    // arg0 + auxInt + aux.(*gc.Sym)
-		{name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true},   // arg0+arg1
-		{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true}, // arg0+arg1
-		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                        // arg0-arg1
-		{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"},                      // arg0-arg1
-		{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"},                    // arg0-arg1
-
-		{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
-		{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
-
-		{name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true},   // (arg0 * arg1) >> 64, signed
-		{name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true},   // (arg0 * arg1) >> 32, signed
-		{name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned
-		{name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
-
-		{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true},   // arg0*arg1
-		{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
-
-		{name: "SRAD", argLength: 2, reg: gp21, asm: "SRAD"}, // arg0 >>a arg1, 64 bits (all sign if arg1 & 64 != 0)
-		{name: "SRAW", argLength: 2, reg: gp21, asm: "SRAW"}, // arg0 >>a arg1, 32 bits (all sign if arg1 & 32 != 0)
-		{name: "SRD", argLength: 2, reg: gp21, asm: "SRD"},   // arg0 >> arg1, 64 bits  (0 if arg1 & 64 != 0)
-		{name: "SRW", argLength: 2, reg: gp21, asm: "SRW"},   // arg0 >> arg1, 32 bits  (0 if arg1 & 32 != 0)
-		{name: "SLD", argLength: 2, reg: gp21, asm: "SLD"},   // arg0 << arg1, 64 bits  (0 if arg1 & 64 != 0)
-		{name: "SLW", argLength: 2, reg: gp21, asm: "SLW"},   // arg0 << arg1, 32 bits  (0 if arg1 & 32 != 0)
-
-		{name: "ADDconstForCarry", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, aux: "Int16", asm: "ADDC", typ: "Flags"}, // _, carry := arg0 + aux
-		{name: "MaskIfNotCarry", argLength: 1, reg: crgp, asm: "ADDME", typ: "Int64"},                                                                   // carry - 1 (if carry then 0 else -1)
-
-		{name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "Int64"}, // arg0 >>a aux, 64 bits
-		{name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "Int64"}, // arg0 >>a aux, 32 bits
-		{name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int64"},   // arg0 >> aux, 64 bits
-		{name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int64"},   // arg0 >> aux, 32 bits
-		{name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"},   // arg0 << aux, 64 bits
-		{name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int64"},   // arg0 << aux, 32 bits
-
-		{name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"},   // arg0/arg1
-		{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1
-
-		{name: "DIVD", argLength: 2, reg: gp21, asm: "DIVD", typ: "Int64"},   // arg0/arg1 (signed 64-bit)
-		{name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", typ: "Int32"},   // arg0/arg1 (signed 32-bit)
-		{name: "DIVDU", argLength: 2, reg: gp21, asm: "DIVDU", typ: "Int64"}, // arg0/arg1 (unsigned 64-bit)
-		{name: "DIVWU", argLength: 2, reg: gp21, asm: "DIVWU", typ: "Int32"}, // arg0/arg1 (unsigned 32-bit)
-
-		// MOD is implemented as rem := arg0 - (arg0/arg1) * arg1
-
-		// Conversions are all float-to-float register operations.  "Integer" refers to encoding in the FP register.
-		{name: "FCTIDZ", argLength: 1, reg: fp11, asm: "FCTIDZ", typ: "Float64"}, // convert float to 64-bit int round towards zero
-		{name: "FCTIWZ", argLength: 1, reg: fp11, asm: "FCTIWZ", typ: "Float64"}, // convert float to 32-bit int round towards zero
-		{name: "FCFID", argLength: 1, reg: fp11, asm: "FCFID", typ: "Float64"},   // convert 64-bit integer to float
-		{name: "FRSP", argLength: 1, reg: fp11, asm: "FRSP", typ: "Float64"},     // round float to 32-bit value
-
-		// Movement between float and integer registers with no change in bits; accomplished with stores+loads on PPC.
-		// Because the 32-bit load-literal-bits instructions have impoverished addressability, always widen the
-		// data instead and use FMOVDload and FMOVDstore instead (this will also dodge endianess issues).
-		// There are optimizations that should apply -- (Xi2f64 (MOVWload (not-ADD-ptr+offset) ) ) could use
-		// the word-load instructions.  (Xi2f64 (MOVDload ptr )) can be (FMOVDload ptr)
-
-		{name: "Xf2i64", argLength: 1, reg: fpgp, typ: "Int64"},   // move 64 bits of F register into G register
-		{name: "Xi2f64", argLength: 1, reg: gpfp, typ: "Float64"}, // move 64 bits of G register into F register
-
-		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true},               // arg0&arg1
-		{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"},                                // arg0&^arg1
-		{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true},                 // arg0|arg1
-		{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"},                                  // arg0|^arg1
-		{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true}, // arg0^arg1
-		{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true}, // arg0^^arg1
-		{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},                                  // -arg0 (integer)
-		{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"},                                // -arg0 (floating point)
-		{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"},                              // sqrt(arg0) (floating point)
-		{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"},                            // sqrt(arg0) (floating point, single precision)
-
-		{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"},                                                                                     // arg0|aux
-		{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"},                                                                                   // arg0^aux
-		{name: "ANDconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, asm: "ANDCC", aux: "Int64", clobberFlags: true}, // arg0&aux // and-immediate sets CC on PPC, always.
-
-		{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB", typ: "Int64"},                      // sign extend int8 to int64
-		{name: "MOVBZreg", argLength: 1, reg: gp11, asm: "MOVBZ", typ: "Int64"},                    // zero extend uint8 to uint64
-		{name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH", typ: "Int64"},                      // sign extend int16 to int64
-		{name: "MOVHZreg", argLength: 1, reg: gp11, asm: "MOVHZ", typ: "Int64"},                    // zero extend uint16 to uint64
-		{name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW", typ: "Int64"},                      // sign extend int32 to int64
-		{name: "MOVWZreg", argLength: 1, reg: gp11, asm: "MOVWZ", typ: "Int64"},                    // zero extend uint32 to uint64
-		{name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVB", aux: "SymOff", typ: "Int8"},     // sign extend int8 to int64
-		{name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8"},  // zero extend uint8 to uint64
-		{name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", typ: "Int16"},    // sign extend int16 to int64
-		{name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16"}, // zero extend uint16 to uint64
-		{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", typ: "Int32"},    // sign extend int32 to int64
-		{name: "MOVWZload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", typ: "UInt32"}, // zero extend uint32 to uint64
-		{name: "MOVDload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", typ: "Int64"},
-
-		{name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", typ: "Float64"},
-		{name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", typ: "Float32"},
-		{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem"},
-		{name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem"},
-		{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem"},
-		{name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem"},
-		{name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "FMOVD", aux: "SymOff", typ: "Mem"},
-		{name: "FMOVSstore", argLength: 3, reg: fpstore, asm: "FMOVS", aux: "SymOff", typ: "Mem"},
-
-		{name: "MOVBstorezero", argLength: 2, reg: gpstorezero, asm: "MOVB", aux: "SymOff", typ: "Mem"}, // store zero byte to arg0+aux.  arg1=mem
-		{name: "MOVHstorezero", argLength: 2, reg: gpstorezero, asm: "MOVH", aux: "SymOff", typ: "Mem"}, // store zero 2 bytes to ...
-		{name: "MOVWstorezero", argLength: 2, reg: gpstorezero, asm: "MOVW", aux: "SymOff", typ: "Mem"}, // store zero 4 bytes to ...
-		{name: "MOVDstorezero", argLength: 2, reg: gpstorezero, asm: "MOVD", aux: "SymOff", typ: "Mem"}, // store zero 8 bytes to ...
-
-		{name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{sp | sb}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB
-
-		{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", rematerializeable: true},     //
-		{name: "MOVWconst", argLength: 0, reg: gp01, aux: "Int32", asm: "MOVW", rematerializeable: true},     // 32 low bits of auxint
-		{name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", rematerializeable: true}, //
-		{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float32", asm: "FMOVS", rematerializeable: true}, //
-		{name: "FCMPU", argLength: 2, reg: fp2cr, asm: "FCMPU", typ: "Flags"},
-
-		{name: "CMP", argLength: 2, reg: gp2cr, asm: "CMP", typ: "Flags"},     // arg0 compare to arg1
-		{name: "CMPU", argLength: 2, reg: gp2cr, asm: "CMPU", typ: "Flags"},   // arg0 compare to arg1
-		{name: "CMPW", argLength: 2, reg: gp2cr, asm: "CMPW", typ: "Flags"},   // arg0 compare to arg1
-		{name: "CMPWU", argLength: 2, reg: gp2cr, asm: "CMPWU", typ: "Flags"}, // arg0 compare to arg1
-		{name: "CMPconst", argLength: 1, reg: gp1cr, asm: "CMP", aux: "Int64", typ: "Flags"},
-		{name: "CMPUconst", argLength: 1, reg: gp1cr, asm: "CMPU", aux: "Int64", typ: "Flags"},
-		{name: "CMPWconst", argLength: 1, reg: gp1cr, asm: "CMPW", aux: "Int32", typ: "Flags"},
-		{name: "CMPWUconst", argLength: 1, reg: gp1cr, asm: "CMPWU", aux: "Int32", typ: "Flags"},
-
-		// pseudo-ops
-		{name: "Equal", argLength: 1, reg: crgp},         // bool, true flags encode x==y false otherwise.
-		{name: "NotEqual", argLength: 1, reg: crgp},      // bool, true flags encode x!=y false otherwise.
-		{name: "LessThan", argLength: 1, reg: crgp},      // bool, true flags encode  x<y false otherwise.
-		{name: "FLessThan", argLength: 1, reg: crgp},     // bool, true flags encode  x<y false otherwise.
-		{name: "LessEqual", argLength: 1, reg: crgp},     // bool, true flags encode  x<=y false otherwise.
-		{name: "FLessEqual", argLength: 1, reg: crgp},    // bool, true flags encode  x<=y false otherwise; PPC <= === !> which is wrong for NaN
-		{name: "GreaterThan", argLength: 1, reg: crgp},   // bool, true flags encode  x>y false otherwise.
-		{name: "FGreaterThan", argLength: 1, reg: crgp},  // bool, true flags encode  x>y false otherwise.
-		{name: "GreaterEqual", argLength: 1, reg: crgp},  // bool, true flags encode  x>=y false otherwise.
-		{name: "FGreaterEqual", argLength: 1, reg: crgp}, // bool, true flags encode  x>=y false otherwise.; PPC >= === !< which is wrong for NaN
-
-		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
-		// and sorts it to the very beginning of the block to prevent other
-		// use of the closure pointer.
-		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{ctxt}}},
-
-		//arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
-		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, clobberFlags: true},
-
-		// Convert pointer to integer, takes a memory operand for ordering.
-		{name: "MOVDconvert", argLength: 2, reg: gp11, asm: "MOVD"},
-
-		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff", clobberFlags: true},                                      // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{gp | sp, ctxt, 0}, clobbers: callerSave}, aux: "Int64", clobberFlags: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
-		{name: "CALLdefer", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                        // call deferproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLgo", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64", clobberFlags: true},                                           // call newproc.  arg0=mem, auxint=argsize, returns mem
-		{name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "Int64", clobberFlags: true},                 // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
-
-		// large or unaligned zeroing
-		// arg0 = address of memory to zero (in R3, changed as side effect)
-		// arg1 = address of the last element to zero
-		// arg2 = mem
-		// returns mem
-		//  ADD -8,R3,R3 // intermediate value not valid GC ptr, cannot expose to opt+GC
-		//	MOVDU	R0, 8(R3)
-		//	CMP	R3, Rarg1
-		//	BLE	-2(PC)
-		{
-			name:      "LoweredZero",
-			aux:       "Int64",
-			argLength: 3,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("R3"), gp},
-				clobbers: buildReg("R3"),
-			},
-			clobberFlags: true,
-			typ:          "Mem",
-		},
-
-		// large or unaligned move
-		// arg0 = address of dst memory (in R3, changed as side effect)
-		// arg1 = address of src memory (in R4, changed as side effect)
-		// arg2 = address of the last element of src
-		// arg3 = mem
-		// returns mem
-		//  ADD -8,R3,R3 // intermediate value not valid GC ptr, cannot expose to opt+GC
-		//  ADD -8,R4,R4 // intermediate value not valid GC ptr, cannot expose to opt+GC
-		//	MOVDU	8(R4), Rtmp
-		//	MOVDU	Rtmp, 8(R3)
-		//	CMP	R4, Rarg2
-		//	BLT	-3(PC)
-		{
-			name:      "LoweredMove",
-			aux:       "Int64",
-			argLength: 4,
-			reg: regInfo{
-				inputs:   []regMask{buildReg("R3"), buildReg("R4"), gp},
-				clobbers: buildReg("R3 R4"),
-			},
-			clobberFlags: true,
-			typ:          "Mem",
-		},
-
-		// (InvertFlags (CMP a b)) == (CMP b a)
-		// So if we want (LessThan (CMP a b)) but we can't do that because a is a constant,
-		// then we do (LessThan (InvertFlags (CMP b a))) instead.
-		// Rewrites will convert this to (GreaterThan (CMP b a)).
-		// InvertFlags is a pseudo-op which can't appear in assembly output.
-		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
-
-		// Constant flag values. For any comparison, there are 3 possible
-		// outcomes: either the three from the signed total order (<,==,>)
-		// or the three from the unsigned total order, depending on which
-		// comparison operation was used (CMP or CMPU -- PPC is different from
-		// the other architectures, which have a single comparison producing
-		// both signed and unsigned comparison results.)
-
-		// These ops are for temporary use by rewrite rules. They
-		// cannot appear in the generated assembly.
-		{name: "FlagEQ"}, // equal
-		{name: "FlagLT"}, // signed < or unsigned <
-		{name: "FlagGT"}, // signed > or unsigned >
-
-	}
-
-	blocks := []blockData{
-		{name: "EQ"},
-		{name: "NE"},
-		{name: "LT"},
-		{name: "LE"},
-		{name: "GT"},
-		{name: "GE"},
-		{name: "FLT"},
-		{name: "FLE"},
-		{name: "FGT"},
-		{name: "FGE"},
-	}
-
-	archs = append(archs, arch{
-		name:            "PPC64",
-		pkg:             "cmd/internal/obj/ppc64",
-		genfile:         "../../ppc64/ssa.go",
-		ops:             ops,
-		blocks:          blocks,
-		regnames:        regNamesPPC64,
-		gpregmask:       gp,
-		fpregmask:       fp,
-		framepointerreg: int8(num["SP"]),
-	})
-}
--- a/src/cmd/compile/internal/ssa/gen/dec64.rules
+++ b/src/cmd/compile/internal/ssa/gen/dec64.rules
@@ -1,407 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This file contains rules to decompose [u]int64 types on 32-bit
-// architectures. These rules work together with the decomposeBuiltIn
-// pass which handles phis of these types.
-
-(Int64Hi (Int64Make hi _)) -> hi
-(Int64Lo (Int64Make _ lo)) -> lo
-
-// Assuming little endian (we don't support big endian 32-bit architecture yet)
-(Load <t> ptr mem) && is64BitInt(t) && t.IsSigned() ->
-	(Int64Make
-		(Load <config.fe.TypeInt32()> (OffPtr <config.fe.TypeInt32().PtrTo()> [4] ptr) mem)
-		(Load <config.fe.TypeUInt32()> ptr mem))
-(Load <t> ptr mem) && is64BitInt(t) && !t.IsSigned() ->
-	(Int64Make
-		(Load <config.fe.TypeUInt32()> (OffPtr <config.fe.TypeUInt32().PtrTo()> [4] ptr) mem)
-		(Load <config.fe.TypeUInt32()> ptr mem))
-
-(Store [8] dst (Int64Make hi lo) mem) ->
-	(Store [4]
-		(OffPtr <hi.Type.PtrTo()> [4] dst)
-		hi
-		(Store [4] dst lo mem))
-
-(Arg {n} [off]) && is64BitInt(v.Type) && v.Type.IsSigned() ->
-  (Int64Make
-    (Arg <config.fe.TypeInt32()> {n} [off+4])
-    (Arg <config.fe.TypeUInt32()> {n} [off]))
-(Arg {n} [off]) && is64BitInt(v.Type) && !v.Type.IsSigned() ->
-  (Int64Make
-    (Arg <config.fe.TypeUInt32()> {n} [off+4])
-    (Arg <config.fe.TypeUInt32()> {n} [off]))
-
-(Add64 x y) ->
-	(Int64Make
-		(Add32withcarry <config.fe.TypeInt32()>
-			(Int64Hi x)
-			(Int64Hi y)
-			(Select0 <TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y))))
-		(Select1 <config.fe.TypeUInt32()> (Add32carry (Int64Lo x) (Int64Lo y))))
-
-(Sub64 x y) ->
-	(Int64Make
-		(Sub32withcarry <config.fe.TypeInt32()>
-			(Int64Hi x)
-			(Int64Hi y)
-			(Select0 <TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y))))
-		(Select1 <config.fe.TypeUInt32()> (Sub32carry (Int64Lo x) (Int64Lo y))))
-
-(Mul64 x y) ->
-	(Int64Make
-		(Add32 <config.fe.TypeUInt32()>
-			(Mul32 <config.fe.TypeUInt32()> (Int64Lo x) (Int64Hi y))
-			(Add32 <config.fe.TypeUInt32()>
-				(Mul32 <config.fe.TypeUInt32()> (Int64Hi x) (Int64Lo y))
-				(Select0 <config.fe.TypeUInt32()> (Mul32uhilo (Int64Lo x) (Int64Lo y)))))
-		(Select1 <config.fe.TypeUInt32()> (Mul32uhilo (Int64Lo x) (Int64Lo y))))
-
-(And64 x y) ->
-	(Int64Make
-		(And32 <config.fe.TypeUInt32()> (Int64Hi x) (Int64Hi y))
-		(And32 <config.fe.TypeUInt32()> (Int64Lo x) (Int64Lo y)))
-
-(Or64 x y) ->
-	(Int64Make
-		(Or32 <config.fe.TypeUInt32()> (Int64Hi x) (Int64Hi y))
-		(Or32 <config.fe.TypeUInt32()> (Int64Lo x) (Int64Lo y)))
-
-(Xor64 x y) ->
-	(Int64Make
-		(Xor32 <config.fe.TypeUInt32()> (Int64Hi x) (Int64Hi y))
-		(Xor32 <config.fe.TypeUInt32()> (Int64Lo x) (Int64Lo y)))
-
-(Neg64 <t> x) -> (Sub64 (Const64 <t> [0]) x)
-
-(Com64 x) ->
-	(Int64Make
-		(Com32 <config.fe.TypeUInt32()> (Int64Hi x))
-		(Com32 <config.fe.TypeUInt32()> (Int64Lo x)))
-
-(SignExt32to64 x) -> (Int64Make (Signmask x) x)
-(SignExt16to64 x) -> (SignExt32to64 (SignExt16to32 x))
-(SignExt8to64 x) -> (SignExt32to64 (SignExt8to32 x))
-
-(ZeroExt32to64 x) -> (Int64Make (Const32 <config.fe.TypeUInt32()> [0]) x)
-(ZeroExt16to64 x) -> (ZeroExt32to64 (ZeroExt16to32 x))
-(ZeroExt8to64 x) -> (ZeroExt32to64 (ZeroExt8to32 x))
-
-(Trunc64to32 (Int64Make _ lo)) -> lo
-(Trunc64to16 (Int64Make _ lo)) -> (Trunc32to16 lo)
-(Trunc64to8 (Int64Make _ lo)) -> (Trunc32to8 lo)
-
-(Lsh32x64 _ (Int64Make (Const32 [c]) _)) && c != 0 -> (Const32 [0])
-(Rsh32x64 x (Int64Make (Const32 [c]) _)) && c != 0 -> (Signmask x)
-(Rsh32Ux64 _ (Int64Make (Const32 [c]) _)) && c != 0 -> (Const32 [0])
-(Lsh16x64 _ (Int64Make (Const32 [c]) _)) && c != 0 -> (Const32 [0])
-(Rsh16x64 x (Int64Make (Const32 [c]) _)) && c != 0 -> (Signmask (SignExt16to32 x))
-(Rsh16Ux64 _ (Int64Make (Const32 [c]) _)) && c != 0 -> (Const32 [0])
-(Lsh8x64 _ (Int64Make (Const32 [c]) _)) && c != 0 -> (Const32 [0])
-(Rsh8x64 x (Int64Make (Const32 [c]) _)) && c != 0 -> (Signmask (SignExt8to32 x))
-(Rsh8Ux64 _ (Int64Make (Const32 [c]) _)) && c != 0 -> (Const32 [0])
-
-(Lsh32x64 x (Int64Make (Const32 [0]) lo)) -> (Lsh32x32 x lo)
-(Rsh32x64 x (Int64Make (Const32 [0]) lo)) -> (Rsh32x32 x lo)
-(Rsh32Ux64 x (Int64Make (Const32 [0]) lo)) -> (Rsh32Ux32 x lo)
-(Lsh16x64 x (Int64Make (Const32 [0]) lo)) -> (Lsh16x32 x lo)
-(Rsh16x64 x (Int64Make (Const32 [0]) lo)) -> (Rsh16x32 x lo)
-(Rsh16Ux64 x (Int64Make (Const32 [0]) lo)) -> (Rsh16Ux32 x lo)
-(Lsh8x64 x (Int64Make (Const32 [0]) lo)) -> (Lsh8x32 x lo)
-(Rsh8x64 x (Int64Make (Const32 [0]) lo)) -> (Rsh8x32 x lo)
-(Rsh8Ux64 x (Int64Make (Const32 [0]) lo)) -> (Rsh8Ux32 x lo)
-
-(Lsh64x64 _ (Int64Make (Const32 [c]) _)) && c != 0 -> (Const64 [0])
-(Rsh64x64 x (Int64Make (Const32 [c]) _)) && c != 0 -> (Int64Make (Signmask (Int64Hi x)) (Signmask (Int64Hi x)))
-(Rsh64Ux64 _ (Int64Make (Const32 [c]) _)) && c != 0 -> (Const64 [0])
-
-(Lsh64x64 x (Int64Make (Const32 [0]) lo)) -> (Lsh64x32 x lo)
-(Rsh64x64 x (Int64Make (Const32 [0]) lo)) -> (Rsh64x32 x lo)
-(Rsh64Ux64 x (Int64Make (Const32 [0]) lo)) -> (Rsh64Ux32 x lo)
-
-// turn x64 non-constant shifts to x32 shifts
-// if high 32-bit of the shift is nonzero, make a huge shift
-(Lsh64x64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Lsh64x32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Rsh64x64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Rsh64x32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Rsh64Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Rsh64Ux32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Lsh32x64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Lsh32x32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Rsh32x64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Rsh32x32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Rsh32Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Rsh32Ux32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Lsh16x64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Lsh16x32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Rsh16x64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Rsh16x32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Rsh16Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Rsh16Ux32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Lsh8x64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Lsh8x32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Rsh8x64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Rsh8x32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-(Rsh8Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 ->
-	(Rsh8Ux32 x (Or32 <config.fe.TypeUInt32()> (Zeromask hi) lo))
-
-// 64x left shift
-// result.hi = hi<<s | lo>>(32-s) | lo<<(s-32) // >> is unsigned, large shifts result 0
-// result.lo = lo<<s
-(Lsh64x32 (Int64Make hi lo) s) ->
-	(Int64Make
-		(Or32 <config.fe.TypeUInt32()>
-			(Or32 <config.fe.TypeUInt32()>
-				(Lsh32x32 <config.fe.TypeUInt32()> hi s)
-				(Rsh32Ux32 <config.fe.TypeUInt32()>
-					lo
-					(Sub32 <config.fe.TypeUInt32()> (Const32 <config.fe.TypeUInt32()> [32]) s)))
-			(Lsh32x32 <config.fe.TypeUInt32()>
-				lo
-				(Sub32 <config.fe.TypeUInt32()> s (Const32 <config.fe.TypeUInt32()> [32]))))
-		(Lsh32x32 <config.fe.TypeUInt32()> lo s))
-(Lsh64x16 (Int64Make hi lo) s) ->
-	(Int64Make
-		(Or32 <config.fe.TypeUInt32()>
-			(Or32 <config.fe.TypeUInt32()>
-				(Lsh32x16 <config.fe.TypeUInt32()> hi s)
-				(Rsh32Ux16 <config.fe.TypeUInt32()>
-					lo
-					(Sub16 <config.fe.TypeUInt16()> (Const16 <config.fe.TypeUInt16()> [32]) s)))
-			(Lsh32x16 <config.fe.TypeUInt32()>
-				lo
-				(Sub16 <config.fe.TypeUInt16()> s (Const16 <config.fe.TypeUInt16()> [32]))))
-		(Lsh32x16 <config.fe.TypeUInt32()> lo s))
-(Lsh64x8 (Int64Make hi lo) s) ->
-	(Int64Make
-		(Or32 <config.fe.TypeUInt32()>
-			(Or32 <config.fe.TypeUInt32()>
-				(Lsh32x8 <config.fe.TypeUInt32()> hi s)
-				(Rsh32Ux8 <config.fe.TypeUInt32()>
-					lo
-					(Sub8 <config.fe.TypeUInt8()> (Const8 <config.fe.TypeUInt8()> [32]) s)))
-			(Lsh32x8 <config.fe.TypeUInt32()>
-				lo
-				(Sub8 <config.fe.TypeUInt8()> s (Const8 <config.fe.TypeUInt8()> [32]))))
-		(Lsh32x8 <config.fe.TypeUInt32()> lo s))
-
-// 64x unsigned right shift
-// result.hi = hi>>s
-// result.lo = lo>>s | hi<<(32-s) | hi>>(s-32) // >> is unsigned, large shifts result 0
-(Rsh64Ux32 (Int64Make hi lo) s) ->
-	(Int64Make
-		(Rsh32Ux32 <config.fe.TypeUInt32()> hi s)
-		(Or32 <config.fe.TypeUInt32()>
-			(Or32 <config.fe.TypeUInt32()>
-				(Rsh32Ux32 <config.fe.TypeUInt32()> lo s)
-				(Lsh32x32 <config.fe.TypeUInt32()>
-					hi
-					(Sub32 <config.fe.TypeUInt32()> (Const32 <config.fe.TypeUInt32()> [32]) s)))
-			(Rsh32Ux32 <config.fe.TypeUInt32()>
-				hi
-				(Sub32 <config.fe.TypeUInt32()> s (Const32 <config.fe.TypeUInt32()> [32])))))
-(Rsh64Ux16 (Int64Make hi lo) s) ->
-	(Int64Make
-		(Rsh32Ux16 <config.fe.TypeUInt32()> hi s)
-		(Or32 <config.fe.TypeUInt32()>
-			(Or32 <config.fe.TypeUInt32()>
-				(Rsh32Ux16 <config.fe.TypeUInt32()> lo s)
-				(Lsh32x16 <config.fe.TypeUInt32()>
-					hi
-					(Sub16 <config.fe.TypeUInt16()> (Const16 <config.fe.TypeUInt16()> [32]) s)))
-			(Rsh32Ux16 <config.fe.TypeUInt32()>
-				hi
-				(Sub16 <config.fe.TypeUInt16()> s (Const16 <config.fe.TypeUInt16()> [32])))))
-(Rsh64Ux8 (Int64Make hi lo) s) ->
-	(Int64Make
-		(Rsh32Ux8 <config.fe.TypeUInt32()> hi s)
-		(Or32 <config.fe.TypeUInt32()>
-			(Or32 <config.fe.TypeUInt32()>
-				(Rsh32Ux8 <config.fe.TypeUInt32()> lo s)
-				(Lsh32x8 <config.fe.TypeUInt32()>
-					hi
-					(Sub8 <config.fe.TypeUInt8()> (Const8 <config.fe.TypeUInt8()> [32]) s)))
-			(Rsh32Ux8 <config.fe.TypeUInt32()>
-				hi
-				(Sub8 <config.fe.TypeUInt8()> s (Const8 <config.fe.TypeUInt8()> [32])))))
-
-// 64x signed right shift
-// result.hi = hi>>s
-// result.lo = lo>>s | hi<<(32-s) | (hi>>(s-32))&zeromask(s>>5) // hi>>(s-32) is signed, large shifts result 0/-1
-(Rsh64x32 (Int64Make hi lo) s) ->
-	(Int64Make
-		(Rsh32x32 <config.fe.TypeUInt32()> hi s)
-		(Or32 <config.fe.TypeUInt32()>
-			(Or32 <config.fe.TypeUInt32()>
-				(Rsh32Ux32 <config.fe.TypeUInt32()> lo s)
-				(Lsh32x32 <config.fe.TypeUInt32()>
-					hi
-					(Sub32 <config.fe.TypeUInt32()> (Const32 <config.fe.TypeUInt32()> [32]) s)))
-			(And32 <config.fe.TypeUInt32()>
-				(Rsh32x32 <config.fe.TypeUInt32()>
-					hi
-					(Sub32 <config.fe.TypeUInt32()> s (Const32 <config.fe.TypeUInt32()> [32])))
-				(Zeromask
-					(Rsh32Ux32 <config.fe.TypeUInt32()> s (Const32 <config.fe.TypeUInt32()> [5]))))))
-(Rsh64x16 (Int64Make hi lo) s) ->
-	(Int64Make
-		(Rsh32x16 <config.fe.TypeUInt32()> hi s)
-		(Or32 <config.fe.TypeUInt32()>
-			(Or32 <config.fe.TypeUInt32()>
-				(Rsh32Ux16 <config.fe.TypeUInt32()> lo s)
-				(Lsh32x16 <config.fe.TypeUInt32()>
-					hi
-					(Sub16 <config.fe.TypeUInt16()> (Const16 <config.fe.TypeUInt16()> [32]) s)))
-			(And32 <config.fe.TypeUInt32()>
-				(Rsh32x16 <config.fe.TypeUInt32()>
-					hi
-					(Sub16 <config.fe.TypeUInt16()> s (Const16 <config.fe.TypeUInt16()> [32])))
-				(Zeromask
-					(ZeroExt16to32
-						(Rsh16Ux32 <config.fe.TypeUInt16()> s (Const32 <config.fe.TypeUInt32()> [5])))))))
-(Rsh64x8 (Int64Make hi lo) s) ->
-	(Int64Make
-		(Rsh32x8 <config.fe.TypeUInt32()> hi s)
-		(Or32 <config.fe.TypeUInt32()>
-			(Or32 <config.fe.TypeUInt32()>
-				(Rsh32Ux8 <config.fe.TypeUInt32()> lo s)
-				(Lsh32x8 <config.fe.TypeUInt32()>
-					hi
-					(Sub8 <config.fe.TypeUInt8()> (Const8 <config.fe.TypeUInt8()> [32]) s)))
-			(And32 <config.fe.TypeUInt32()>
-				(Rsh32x8 <config.fe.TypeUInt32()>
-					hi
-					(Sub8 <config.fe.TypeUInt8()> s (Const8 <config.fe.TypeUInt8()> [32])))
-				(Zeromask
-					(ZeroExt8to32
-						(Rsh8Ux32 <config.fe.TypeUInt8()> s (Const32 <config.fe.TypeUInt32()> [5])))))))
-
-// 64xConst32 shifts
-// we probably do not need them -- lateopt may take care of them just fine
-//(Lsh64x32 _ (Const32 [c])) && uint32(c) >= 64 -> (Const64 [0])
-//(Rsh64x32 x (Const32 [c])) && uint32(c) >= 64 -> (Int64Make (Signmask (Int64Hi x)) (Signmask (Int64Hi x)))
-//(Rsh64Ux32 _ (Const32 [c])) && uint32(c) >= 64 -> (Const64 [0])
-//
-//(Lsh64x32 x (Const32 [c])) && c < 64 && c > 32 ->
-//	(Int64Make
-//		(Lsh32x32 <config.fe.TypeUInt32()> (Int64Lo x) (Const32 <config.fe.TypeUInt32()> [c-32]))
-//		(Const32 <config.fe.TypeUInt32()> [0]))
-//(Rsh64x32 x (Const32 [c])) && c < 64 && c > 32 ->
-//	(Int64Make
-//		(Signmask (Int64Hi x))
-//		(Rsh32x32 <config.fe.TypeInt32()> (Int64Hi x) (Const32 <config.fe.TypeUInt32()> [c-32])))
-//(Rsh64Ux32 x (Const32 [c])) && c < 64 && c > 32 ->
-//	(Int64Make
-//		(Const32 <config.fe.TypeUInt32()> [0])
-//		(Rsh32Ux32 <config.fe.TypeUInt32()> (Int64Hi x) (Const32 <config.fe.TypeUInt32()> [c-32])))
-//
-//(Lsh64x32 x (Const32 [32])) -> (Int64Make (Int64Lo x) (Const32 <config.fe.TypeUInt32()> [0]))
-//(Rsh64x32 x (Const32 [32])) -> (Int64Make (Signmask (Int64Hi x)) (Int64Hi x))
-//(Rsh64Ux32 x (Const32 [32])) -> (Int64Make (Const32 <config.fe.TypeUInt32()> [0]) (Int64Hi x))
-//
-//(Lsh64x32 x (Const32 [c])) && c < 32 && c > 0 ->
-//	(Int64Make
-//		(Or32 <config.fe.TypeUInt32()>
-//			(Lsh32x32 <config.fe.TypeUInt32()> (Int64Hi x) (Const32 <config.fe.TypeUInt32()> [c]))
-//			(Rsh32Ux32 <config.fe.TypeUInt32()> (Int64Lo x) (Const32 <config.fe.TypeUInt32()> [32-c])))
-//		(Lsh32x32 <config.fe.TypeUInt32()> (Int64Lo x) (Const32 <config.fe.TypeUInt32()> [c])))
-//(Rsh64x32 x (Const32 [c])) && c < 32 && c > 0 ->
-//	(Int64Make
-//		(Rsh32x32 <config.fe.TypeInt32()> (Int64Hi x) (Const32 <config.fe.TypeUInt32()> [c]))
-//		(Or32 <config.fe.TypeUInt32()>
-//			(Rsh32Ux32 <config.fe.TypeUInt32()> (Int64Lo x) (Const32 <config.fe.TypeUInt32()> [c]))
-//			(Lsh32x32 <config.fe.TypeUInt32()> (Int64Hi x) (Const32 <config.fe.TypeUInt32()> [32-c]))))
-//(Rsh64Ux32 x (Const32 [c])) && c < 32 && c > 0 ->
-//	(Int64Make
-//		(Rsh32Ux32 <config.fe.TypeUInt32()> (Int64Hi x) (Const32 <config.fe.TypeUInt32()> [c]))
-//		(Or32 <config.fe.TypeUInt32()>
-//			(Rsh32Ux32 <config.fe.TypeUInt32()> (Int64Lo x) (Const32 <config.fe.TypeUInt32()> [c]))
-//			(Lsh32x32 <config.fe.TypeUInt32()> (Int64Hi x) (Const32 <config.fe.TypeUInt32()> [32-c]))))
-//
-//(Lsh64x32 x (Const32 [0])) -> x
-//(Rsh64x32 x (Const32 [0])) -> x
-//(Rsh64Ux32 x (Const32 [0])) -> x
-
-(Lrot64 (Int64Make hi lo) [c]) && c <= 32 ->
-	(Int64Make
-		(Or32 <config.fe.TypeUInt32()>
-			(Lsh32x32 <config.fe.TypeUInt32()> hi (Const32 <config.fe.TypeUInt32()> [c]))
-			(Rsh32Ux32 <config.fe.TypeUInt32()> lo (Const32 <config.fe.TypeUInt32()> [32-c])))
-		(Or32 <config.fe.TypeUInt32()>
-			(Lsh32x32 <config.fe.TypeUInt32()> lo (Const32 <config.fe.TypeUInt32()> [c]))
-			(Rsh32Ux32 <config.fe.TypeUInt32()> hi (Const32 <config.fe.TypeUInt32()> [32-c]))))
-(Lrot64 (Int64Make hi lo) [c]) && c > 32 -> (Lrot64 (Int64Make lo hi) [c-32])
-
-(Const64 <t> [c]) && t.IsSigned() ->
-	(Int64Make (Const32 <config.fe.TypeInt32()> [c>>32]) (Const32 <config.fe.TypeUInt32()> [int64(int32(c))]))
-(Const64 <t> [c]) && !t.IsSigned() ->
-	(Int64Make (Const32 <config.fe.TypeUInt32()> [c>>32]) (Const32 <config.fe.TypeUInt32()> [int64(int32(c))]))
-
-(Eq64 x y) ->
-	(AndB
-		(Eq32 (Int64Hi x) (Int64Hi y))
-		(Eq32 (Int64Lo x) (Int64Lo y)))
-
-(Neq64 x y) ->
-	(OrB
-		(Neq32 (Int64Hi x) (Int64Hi y))
-		(Neq32 (Int64Lo x) (Int64Lo y)))
-
-(Less64U x y) ->
-	(OrB
-		(Less32U (Int64Hi x) (Int64Hi y))
-		(AndB
-			(Eq32 (Int64Hi x) (Int64Hi y))
-			(Less32U (Int64Lo x) (Int64Lo y))))
-
-(Leq64U x y) ->
-	(OrB
-		(Less32U (Int64Hi x) (Int64Hi y))
-		(AndB
-			(Eq32 (Int64Hi x) (Int64Hi y))
-			(Leq32U (Int64Lo x) (Int64Lo y))))
-
-(Greater64U x y) ->
-	(OrB
-		(Greater32U (Int64Hi x) (Int64Hi y))
-		(AndB
-			(Eq32 (Int64Hi x) (Int64Hi y))
-			(Greater32U (Int64Lo x) (Int64Lo y))))
-
-(Geq64U x y) ->
-	(OrB
-		(Greater32U (Int64Hi x) (Int64Hi y))
-		(AndB
-			(Eq32 (Int64Hi x) (Int64Hi y))
-			(Geq32U (Int64Lo x) (Int64Lo y))))
-
-(Less64 x y) ->
-	(OrB
-		(Less32 (Int64Hi x) (Int64Hi y))
-		(AndB
-			(Eq32 (Int64Hi x) (Int64Hi y))
-			(Less32U (Int64Lo x) (Int64Lo y))))
-
-(Leq64 x y) ->
-	(OrB
-		(Less32 (Int64Hi x) (Int64Hi y))
-		(AndB
-			(Eq32 (Int64Hi x) (Int64Hi y))
-			(Leq32U (Int64Lo x) (Int64Lo y))))
-
-(Greater64 x y) ->
-	(OrB
-		(Greater32 (Int64Hi x) (Int64Hi y))
-		(AndB
-			(Eq32 (Int64Hi x) (Int64Hi y))
-			(Greater32U (Int64Lo x) (Int64Lo y))))
-
-(Geq64 x y) ->
-	(OrB
-		(Greater32 (Int64Hi x) (Int64Hi y))
-		(AndB
-			(Eq32 (Int64Hi x) (Int64Hi y))
-			(Geq32U (Int64Lo x) (Int64Lo y))))
--- a/src/cmd/compile/internal/ssa/gen/dec64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/dec64Ops.go
@@ -1,20 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-var dec64Ops = []opData{}
-
-var dec64Blocks = []blockData{}
-
-func init() {
-	archs = append(archs, arch{
-		name:    "dec64",
-		ops:     dec64Ops,
-		blocks:  dec64Blocks,
-		generic: true,
-	})
-}
--- a/src/cmd/compile/internal/ssa/gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/gen/generic.rules
@@ -67,12 +67,6 @@
        (Const32F [f2i(float64(i2f32(c) * i2f32(d)))])
 (Mul64F (Const64F [c]) (Const64F [d])) -> (Const64F [f2i(i2f(c) * i2f(d))])

-// Convert x * -1 to -x. The front-end catches some but not all of these.
-(Mul8  (Const8  [-1]) x) -> (Neg8  x)
-(Mul16 (Const16 [-1]) x) -> (Neg16 x)
-(Mul32 (Const32 [-1]) x) -> (Neg32 x)
-(Mul64 (Const64 [-1]) x) -> (Neg64 x)
-
 (Mod8  (Const8  [c]) (Const8  [d])) && d != 0 -> (Const8  [int64(int8(c % d))])
 (Mod16 (Const16 [c]) (Const16 [d])) && d != 0 -> (Const16 [int64(int16(c % d))])
 (Mod32 (Const32 [c]) (Const32 [d])) && d != 0 -> (Const32 [int64(int32(c % d))])
@@ -631,10 +625,8 @@
        (Store [t.FieldType(0).Size()] dst f0 mem))))

 // un-SSAable values use mem->mem copies
-(Store [size] dst (Load <t> src mem) mem) && !config.fe.CanSSA(t) ->
-	(Move [MakeSizeAndAlign(size, t.Alignment()).Int64()] dst src mem)
-(Store [size] dst (Load <t> src mem) (VarDef {x} mem)) && !config.fe.CanSSA(t) ->
-	(Move [MakeSizeAndAlign(size, t.Alignment()).Int64()] dst src (VarDef {x} mem))
+(Store [size] dst (Load <t> src mem) mem) && !config.fe.CanSSA(t) -> (Move [size] dst src mem)
+(Store [size] dst (Load <t> src mem) (VarDef {x} mem)) && !config.fe.CanSSA(t) -> (Move [size] dst src (VarDef {x} mem))

 // string ops
 // Decomposing StringMake and lowering of StringPtr and StringLen
@@ -840,23 +832,3 @@
  -> (Sub64 x (Mul64 <t> (Div64  <t> x (Const64 <t> [c])) (Const64 <t> [c])))
 (Mod64u <t> x (Const64 [c])) && x.Op != OpConst64 && umagic64ok(c)
  -> (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c])))
-
-// floating point optimizations
-(Add32F x (Const32F [0])) -> x
-(Add32F (Const32F [0]) x) -> x
-(Add64F x (Const64F [0])) -> x
-(Add64F (Const64F [0]) x) -> x
-(Sub32F x (Const32F [0])) -> x
-(Sub64F x (Const64F [0])) -> x
-(Mul32F x (Const32F [f2i(1)])) -> x
-(Mul32F (Const32F [f2i(1)]) x) -> x
-(Mul64F x (Const64F [f2i(1)])) -> x
-(Mul64F (Const64F [f2i(1)]) x) -> x
-(Mul32F x (Const32F [f2i(-1)])) -> (Neg32F x)
-(Mul32F (Const32F [f2i(-1)]) x) -> (Neg32F x)
-(Mul64F x (Const64F [f2i(-1)])) -> (Neg64F x)
-(Mul64F (Const64F [f2i(-1)]) x) -> (Neg64F x)
-(Div32F x (Const32F [f2i(1)])) -> x
-(Div64F x (Const64F [f2i(1)])) -> x
-(Div32F x (Const32F [f2i(-1)])) -> (Neg32F x)
-(Div64F x (Const64F [f2i(-1)])) -> (Neg32F x)
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -173,76 +173,76 @@ var genericOps = []opData{
 	{name: "Lrot64", argLength: 1, aux: "Int64"},

 	// 2-input comparisons
-	{name: "Eq8", argLength: 2, commutative: true, typ: "Bool"}, // arg0 == arg1
-	{name: "Eq16", argLength: 2, commutative: true, typ: "Bool"},
-	{name: "Eq32", argLength: 2, commutative: true, typ: "Bool"},
-	{name: "Eq64", argLength: 2, commutative: true, typ: "Bool"},
-	{name: "EqPtr", argLength: 2, commutative: true, typ: "Bool"},
-	{name: "EqInter", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend
-	{name: "EqSlice", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend
-	{name: "Eq32F", argLength: 2, typ: "Bool"},
-	{name: "Eq64F", argLength: 2, typ: "Bool"},
+	{name: "Eq8", argLength: 2, commutative: true}, // arg0 == arg1
+	{name: "Eq16", argLength: 2, commutative: true},
+	{name: "Eq32", argLength: 2, commutative: true},
+	{name: "Eq64", argLength: 2, commutative: true},
+	{name: "EqPtr", argLength: 2, commutative: true},
+	{name: "EqInter", argLength: 2}, // arg0 or arg1 is nil; other cases handled by frontend
+	{name: "EqSlice", argLength: 2}, // arg0 or arg1 is nil; other cases handled by frontend
+	{name: "Eq32F", argLength: 2},
+	{name: "Eq64F", argLength: 2},

-	{name: "Neq8", argLength: 2, commutative: true, typ: "Bool"}, // arg0 != arg1
-	{name: "Neq16", argLength: 2, commutative: true, typ: "Bool"},
-	{name: "Neq32", argLength: 2, commutative: true, typ: "Bool"},
-	{name: "Neq64", argLength: 2, commutative: true, typ: "Bool"},
-	{name: "NeqPtr", argLength: 2, commutative: true, typ: "Bool"},
-	{name: "NeqInter", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend
-	{name: "NeqSlice", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend
-	{name: "Neq32F", argLength: 2, typ: "Bool"},
+	{name: "Neq8", argLength: 2, commutative: true}, // arg0 != arg1
+	{name: "Neq16", argLength: 2, commutative: true},
+	{name: "Neq32", argLength: 2, commutative: true},
+	{name: "Neq64", argLength: 2, commutative: true},
+	{name: "NeqPtr", argLength: 2, commutative: true},
+	{name: "NeqInter", argLength: 2}, // arg0 or arg1 is nil; other cases handled by frontend
+	{name: "NeqSlice", argLength: 2}, // arg0 or arg1 is nil; other cases handled by frontend
+	{name: "Neq32F", argLength: 2},
 	{name: "Neq64F", argLength: 2},

-	{name: "Less8", argLength: 2, typ: "Bool"},  // arg0 < arg1, signed
-	{name: "Less8U", argLength: 2, typ: "Bool"}, // arg0 < arg1, unsigned
-	{name: "Less16", argLength: 2, typ: "Bool"},
-	{name: "Less16U", argLength: 2, typ: "Bool"},
-	{name: "Less32", argLength: 2, typ: "Bool"},
-	{name: "Less32U", argLength: 2, typ: "Bool"},
-	{name: "Less64", argLength: 2, typ: "Bool"},
-	{name: "Less64U", argLength: 2, typ: "Bool"},
-	{name: "Less32F", argLength: 2, typ: "Bool"},
-	{name: "Less64F", argLength: 2, typ: "Bool"},
+	{name: "Less8", argLength: 2},  // arg0 < arg1, signed
+	{name: "Less8U", argLength: 2}, // arg0 < arg1, unsigned
+	{name: "Less16", argLength: 2},
+	{name: "Less16U", argLength: 2},
+	{name: "Less32", argLength: 2},
+	{name: "Less32U", argLength: 2},
+	{name: "Less64", argLength: 2},
+	{name: "Less64U", argLength: 2},
+	{name: "Less32F", argLength: 2},
+	{name: "Less64F", argLength: 2},

-	{name: "Leq8", argLength: 2, typ: "Bool"},  // arg0 <= arg1, signed
-	{name: "Leq8U", argLength: 2, typ: "Bool"}, // arg0 <= arg1, unsigned
-	{name: "Leq16", argLength: 2, typ: "Bool"},
-	{name: "Leq16U", argLength: 2, typ: "Bool"},
-	{name: "Leq32", argLength: 2, typ: "Bool"},
-	{name: "Leq32U", argLength: 2, typ: "Bool"},
-	{name: "Leq64", argLength: 2, typ: "Bool"},
-	{name: "Leq64U", argLength: 2, typ: "Bool"},
-	{name: "Leq32F", argLength: 2, typ: "Bool"},
-	{name: "Leq64F", argLength: 2, typ: "Bool"},
+	{name: "Leq8", argLength: 2},  // arg0 <= arg1, signed
+	{name: "Leq8U", argLength: 2}, // arg0 <= arg1, unsigned
+	{name: "Leq16", argLength: 2},
+	{name: "Leq16U", argLength: 2},
+	{name: "Leq32", argLength: 2},
+	{name: "Leq32U", argLength: 2},
+	{name: "Leq64", argLength: 2},
+	{name: "Leq64U", argLength: 2},
+	{name: "Leq32F", argLength: 2},
+	{name: "Leq64F", argLength: 2},

-	{name: "Greater8", argLength: 2, typ: "Bool"},  // arg0 > arg1, signed
-	{name: "Greater8U", argLength: 2, typ: "Bool"}, // arg0 > arg1, unsigned
-	{name: "Greater16", argLength: 2, typ: "Bool"},
-	{name: "Greater16U", argLength: 2, typ: "Bool"},
-	{name: "Greater32", argLength: 2, typ: "Bool"},
-	{name: "Greater32U", argLength: 2, typ: "Bool"},
-	{name: "Greater64", argLength: 2, typ: "Bool"},
-	{name: "Greater64U", argLength: 2, typ: "Bool"},
-	{name: "Greater32F", argLength: 2, typ: "Bool"},
-	{name: "Greater64F", argLength: 2, typ: "Bool"},
+	{name: "Greater8", argLength: 2},  // arg0 > arg1, signed
+	{name: "Greater8U", argLength: 2}, // arg0 > arg1, unsigned
+	{name: "Greater16", argLength: 2},
+	{name: "Greater16U", argLength: 2},
+	{name: "Greater32", argLength: 2},
+	{name: "Greater32U", argLength: 2},
+	{name: "Greater64", argLength: 2},
+	{name: "Greater64U", argLength: 2},
+	{name: "Greater32F", argLength: 2},
+	{name: "Greater64F", argLength: 2},

-	{name: "Geq8", argLength: 2, typ: "Bool"},  // arg0 <= arg1, signed
-	{name: "Geq8U", argLength: 2, typ: "Bool"}, // arg0 <= arg1, unsigned
-	{name: "Geq16", argLength: 2, typ: "Bool"},
-	{name: "Geq16U", argLength: 2, typ: "Bool"},
-	{name: "Geq32", argLength: 2, typ: "Bool"},
-	{name: "Geq32U", argLength: 2, typ: "Bool"},
-	{name: "Geq64", argLength: 2, typ: "Bool"},
-	{name: "Geq64U", argLength: 2, typ: "Bool"},
-	{name: "Geq32F", argLength: 2, typ: "Bool"},
-	{name: "Geq64F", argLength: 2, typ: "Bool"},
+	{name: "Geq8", argLength: 2},  // arg0 <= arg1, signed
+	{name: "Geq8U", argLength: 2}, // arg0 <= arg1, unsigned
+	{name: "Geq16", argLength: 2},
+	{name: "Geq16U", argLength: 2},
+	{name: "Geq32", argLength: 2},
+	{name: "Geq32U", argLength: 2},
+	{name: "Geq64", argLength: 2},
+	{name: "Geq64U", argLength: 2},
+	{name: "Geq32F", argLength: 2},
+	{name: "Geq64F", argLength: 2},

 	// boolean ops
-	{name: "AndB", argLength: 2, typ: "Bool"}, // arg0 && arg1 (not shortcircuited)
-	{name: "OrB", argLength: 2, typ: "Bool"},  // arg0 || arg1 (not shortcircuited)
-	{name: "EqB", argLength: 2, typ: "Bool"},  // arg0 == arg1
-	{name: "NeqB", argLength: 2, typ: "Bool"}, // arg0 != arg1
-	{name: "Not", argLength: 1, typ: "Bool"},  // !arg0, boolean
+	{name: "AndB", argLength: 2}, // arg0 && arg1 (not shortcircuited)
+	{name: "OrB", argLength: 2},  // arg0 || arg1 (not shortcircuited)
+	{name: "EqB", argLength: 2},  // arg0 == arg1
+	{name: "NeqB", argLength: 2}, // arg0 != arg1
+	{name: "Not", argLength: 1},  // !arg0, boolean

 	// 1-input ops
 	{name: "Neg8", argLength: 1}, // -arg0
@@ -312,8 +312,8 @@ var genericOps = []opData{
 	// Memory operations
 	{name: "Load", argLength: 2},                            // Load from arg0.  arg1=memory
 	{name: "Store", argLength: 3, typ: "Mem", aux: "Int64"}, // Store arg1 to arg0.  arg2=memory, auxint=size.  Returns memory.
-	{name: "Move", argLength: 3, typ: "Mem", aux: "Int64"},  // arg0=destptr, arg1=srcptr, arg2=mem, auxint=size.  Returns memory.
-	{name: "Zero", argLength: 2, typ: "Mem", aux: "Int64"},  // arg0=destptr, arg1=mem, auxint=size. Returns memory.
+	{name: "Move", argLength: 3, aux: "Int64"},              // arg0=destptr, arg1=srcptr, arg2=mem, auxint=size.  Returns memory.
+	{name: "Zero", argLength: 2, aux: "Int64"},              // arg0=destptr, arg1=mem, auxint=size. Returns memory.

 	// Function calls. Arguments to the call have already been written to the stack.
 	// Return values appear on the stack. The method receiver, if any, is treated
@@ -326,17 +326,17 @@ var genericOps = []opData{

 	// Conversions: signed extensions, zero (unsigned) extensions, truncations
 	{name: "SignExt8to16", argLength: 1, typ: "Int16"},
-	{name: "SignExt8to32", argLength: 1, typ: "Int32"},
-	{name: "SignExt8to64", argLength: 1, typ: "Int64"},
-	{name: "SignExt16to32", argLength: 1, typ: "Int32"},
-	{name: "SignExt16to64", argLength: 1, typ: "Int64"},
-	{name: "SignExt32to64", argLength: 1, typ: "Int64"},
+	{name: "SignExt8to32", argLength: 1},
+	{name: "SignExt8to64", argLength: 1},
+	{name: "SignExt16to32", argLength: 1},
+	{name: "SignExt16to64", argLength: 1},
+	{name: "SignExt32to64", argLength: 1},
 	{name: "ZeroExt8to16", argLength: 1, typ: "UInt16"},
-	{name: "ZeroExt8to32", argLength: 1, typ: "UInt32"},
-	{name: "ZeroExt8to64", argLength: 1, typ: "UInt64"},
-	{name: "ZeroExt16to32", argLength: 1, typ: "UInt32"},
-	{name: "ZeroExt16to64", argLength: 1, typ: "UInt64"},
-	{name: "ZeroExt32to64", argLength: 1, typ: "UInt64"},
+	{name: "ZeroExt8to32", argLength: 1},
+	{name: "ZeroExt8to64", argLength: 1},
+	{name: "ZeroExt16to32", argLength: 1},
+	{name: "ZeroExt16to64", argLength: 1},
+	{name: "ZeroExt32to64", argLength: 1},
 	{name: "Trunc16to8", argLength: 1},
 	{name: "Trunc32to8", argLength: 1},
 	{name: "Trunc32to16", argLength: 1},
@@ -416,31 +416,6 @@ var genericOps = []opData{
 	{name: "VarKill", argLength: 1, aux: "Sym"},            // aux is a *gc.Node of a variable that is known to be dead.  arg0=mem, returns mem
 	{name: "VarLive", argLength: 1, aux: "Sym"},            // aux is a *gc.Node of a variable that must be kept live.  arg0=mem, returns mem
 	{name: "KeepAlive", argLength: 2, typ: "Mem"},          // arg[0] is a value that must be kept alive until this mark.  arg[1]=mem, returns mem
-
-	// Ops for breaking 64-bit operations on 32-bit architectures
-	{name: "Int64Make", argLength: 2, typ: "UInt64"}, // arg0=hi, arg1=lo
-	{name: "Int64Hi", argLength: 1, typ: "UInt32"},   // high 32-bit of arg0
-	{name: "Int64Lo", argLength: 1, typ: "UInt32"},   // low 32-bit of arg0
-
-	{name: "Add32carry", argLength: 2, commutative: true, typ: "(Flags,UInt32)"}, // arg0 + arg1, returns (carry, value)
-	{name: "Add32withcarry", argLength: 3, commutative: true},                    // arg0 + arg1 + arg2, arg2=carry (0 or 1)
-
-	{name: "Sub32carry", argLength: 2, typ: "(Flags,UInt32)"}, // arg0 - arg1, returns (carry, value)
-	{name: "Sub32withcarry", argLength: 3},                    // arg0 - arg1 - arg2, arg2=carry (0 or 1)
-
-	{name: "Mul32uhilo", argLength: 2, typ: "(UInt32,UInt32)"}, // arg0 * arg1, returns (hi, lo)
-
-	{name: "Signmask", argLength: 1, typ: "Int32"},  // 0 if arg0 >= 0, -1 if arg0 < 0
-	{name: "Zeromask", argLength: 1, typ: "UInt32"}, // 0 if arg0 == 0, 0xffffffff if arg0 != 0
-
-	{name: "Cvt32Uto32F", argLength: 1}, // uint32 -> float32, only used on 32-bit arch
-	{name: "Cvt32Uto64F", argLength: 1}, // uint32 -> float64, only used on 32-bit arch
-	{name: "Cvt32Fto32U", argLength: 1}, // float32 -> uint32, only used on 32-bit arch
-	{name: "Cvt64Fto32U", argLength: 1}, // float64 -> uint32, only used on 32-bit arch
-
-	// pseudo-ops for breaking Tuple
-	{name: "Select0", argLength: 1}, // the first component of a tuple
-	{name: "Select1", argLength: 1}, // the second component of a tuple
 }

 //     kind           control    successors       implicit exit
--- a/src/cmd/compile/internal/ssa/gen/main.go
+++ b/src/cmd/compile/internal/ssa/gen/main.go
@@ -21,16 +21,13 @@ import (
 )

 type arch struct {
-	name            string
-	pkg             string // obj package to import for this arch.
-	genfile         string // source file containing opcode code generation.
-	ops             []opData
-	blocks          []blockData
-	regnames        []string
-	gpregmask       regMask
-	fpregmask       regMask
-	framepointerreg int8
-	generic         bool
+	name     string
+	pkg      string // obj package to import for this arch.
+	genfile  string // source file containing opcode code generation.
+	ops      []opData
+	blocks   []blockData
+	regnames []string
+	generic  bool
 }

 type opData struct {
@@ -41,9 +38,8 @@ type opData struct {
 	aux               string
 	rematerializeable bool
 	argLength         int32 // number of arguments, if -1, then this operation has a variable number of arguments
-	commutative       bool  // this operation is commutative on its first 2 arguments (e.g. addition)
-	resultInArg0      bool  // last output of v and v.Args[0] must be allocated to the same register
-	clobberFlags      bool  // this op clobbers flags register
+	commutative       bool  // this operation is commutative (e.g. addition)
+	resultInArg0      bool  // v and v.Args[0] must be allocated to the same register
 }

 type blockData struct {
@@ -77,7 +73,6 @@ var archs []arch

 func main() {
 	flag.Parse()
-	sort.Sort(ArchsByName(archs))
 	genOp()
 	genLower()
 }
@@ -160,16 +155,13 @@ func genOp() {
 			}
 			if v.resultInArg0 {
 				fmt.Fprintln(w, "resultInArg0: true,")
-				if v.reg.inputs[0] != v.reg.outputs[len(v.reg.outputs)-1] {
-					log.Fatalf("input[0] and last output register must be equal for %s", v.name)
+				if v.reg.inputs[0] != v.reg.outputs[0] {
+					log.Fatalf("input[0] and output registers must be equal for %s", v.name)
 				}
-				if v.commutative && v.reg.inputs[1] != v.reg.outputs[len(v.reg.outputs)-1] {
-					log.Fatalf("input[1] and last output register must be equal for %s", v.name)
+				if v.commutative && v.reg.inputs[1] != v.reg.outputs[0] {
+					log.Fatalf("input[1] and output registers must be equal for %s", v.name)
 				}
 			}
-			if v.clobberFlags {
-				fmt.Fprintln(w, "clobberFlags: true,")
-			}
 			if a.name == "generic" {
 				fmt.Fprintln(w, "generic:true,")
 				fmt.Fprintln(w, "},") // close op
@@ -199,22 +191,14 @@ func genOp() {
 				}
 				fmt.Fprintln(w, "},")
 			}
-
 			if v.reg.clobbers > 0 {
 				fmt.Fprintf(w, "clobbers: %d,%s\n", v.reg.clobbers, a.regMaskComment(v.reg.clobbers))
 			}
-
 			// reg outputs
-			s = s[:0]
-			for i, r := range v.reg.outputs {
-				s = append(s, intPair{countRegs(r), i})
-			}
-			if len(s) > 0 {
-				sort.Sort(byKey(s))
-				fmt.Fprintln(w, "outputs: []outputInfo{")
-				for _, p := range s {
-					r := v.reg.outputs[p.val]
-					fmt.Fprintf(w, "{%d,%d},%s\n", p.val, r, a.regMaskComment(r))
+			if len(v.reg.outputs) > 0 {
+				fmt.Fprintln(w, "outputs: []regMask{")
+				for _, r := range v.reg.outputs {
+					fmt.Fprintf(w, "%d,%s\n", r, a.regMaskComment(r))
 				}
 				fmt.Fprintln(w, "},")
 			}
@@ -239,9 +223,6 @@ func genOp() {
 			fmt.Fprintf(w, "  {%d, \"%s\"},\n", i, r)
 		}
 		fmt.Fprintln(w, "}")
-		fmt.Fprintf(w, "var gpRegMask%s = regMask(%d)\n", a.name, a.gpregmask)
-		fmt.Fprintf(w, "var fpRegMask%s = regMask(%d)\n", a.name, a.fpregmask)
-		fmt.Fprintf(w, "var framepointerReg%s = int8(%d)\n", a.name, a.framepointerreg)
 	}

 	// gofmt result
@@ -317,9 +298,3 @@ type byKey []intPair
 func (a byKey) Len() int           { return len(a) }
 func (a byKey) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
 func (a byKey) Less(i, j int) bool { return a[i].key < a[j].key }
-
-type ArchsByName []arch
-
-func (x ArchsByName) Len() int           { return len(x) }
-func (x ArchsByName) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
-func (x ArchsByName) Less(i, j int) bool { return x[i].name < x[j].name }
--- a/src/cmd/compile/internal/ssa/gen/rulegen.go
+++ b/src/cmd/compile/internal/ssa/gen/rulegen.go
@@ -117,17 +117,15 @@ func genRules(arch arch) {
 		if unbalanced(rule) {
 			continue
 		}
-
+		op := strings.Split(rule, " ")[0][1:]
+		if op[len(op)-1] == ')' {
+			op = op[:len(op)-1] // rule has only opcode, e.g. (ConstNil) -> ...
+		}
 		loc := fmt.Sprintf("%s.rules:%d", arch.name, ruleLineno)
-		r := Rule{rule: rule, loc: loc}
-		if rawop := strings.Split(rule, " ")[0][1:]; isBlock(rawop, arch) {
-			blockrules[rawop] = append(blockrules[rawop], r)
+		if isBlock(op, arch) {
+			blockrules[op] = append(blockrules[op], Rule{rule: rule, loc: loc})
 		} else {
-			// Do fancier value op matching.
-			match, _, _ := r.parse()
-			op, oparch, _, _, _, _ := parseValue(match, arch, loc)
-			opname := fmt.Sprintf("Op%s%s", oparch, op.name)
-			oprules[opname] = append(oprules[opname], r)
+			oprules[op] = append(oprules[op], Rule{rule: rule, loc: loc})
 		}
 		rule = ""
 		ruleLineno = 0
@@ -159,8 +157,8 @@ func genRules(arch arch) {
 	fmt.Fprintf(w, "func rewriteValue%s(v *Value, config *Config) bool {\n", arch.name)
 	fmt.Fprintf(w, "switch v.Op {\n")
 	for _, op := range ops {
-		fmt.Fprintf(w, "case %s:\n", op)
-		fmt.Fprintf(w, "return rewriteValue%s_%s(v, config)\n", arch.name, op)
+		fmt.Fprintf(w, "case %s:\n", opName(op, arch))
+		fmt.Fprintf(w, "return rewriteValue%s_%s(v, config)\n", arch.name, opName(op, arch))
 	}
 	fmt.Fprintf(w, "}\n")
 	fmt.Fprintf(w, "return false\n")
@@ -169,7 +167,7 @@ func genRules(arch arch) {
 	// Generate a routine per op. Note that we don't make one giant routine
 	// because it is too big for some compilers.
 	for _, op := range ops {
-		fmt.Fprintf(w, "func rewriteValue%s_%s(v *Value, config *Config) bool {\n", arch.name, op)
+		fmt.Fprintf(w, "func rewriteValue%s_%s(v *Value, config *Config) bool {\n", arch.name, opName(op, arch))
 		fmt.Fprintln(w, "b := v.Block")
 		fmt.Fprintln(w, "_ = b")
 		var canFail bool
@@ -336,108 +334,141 @@ func genMatch0(w io.Writer, arch arch, match, v string, m map[string]struct{}, t
 	}
 	canFail := false

-	op, oparch, typ, auxint, aux, args := parseValue(match, arch, loc)
+	// split body up into regions. Split by spaces/tabs, except those
+	// contained in () or {}.
+	s := split(match[1 : len(match)-1]) // remove parens, then split
+
+	// Find op record
+	var op opData
+	for _, x := range genericOps {
+		if x.name == s[0] {
+			op = x
+			break
+		}
+	}
+	for _, x := range arch.ops {
+		if x.name == s[0] {
+			op = x
+			break
+		}
+	}
+	if op.name == "" {
+		log.Fatalf("%s: unknown op %s", loc, s[0])
+	}

 	// check op
 	if !top {
-		fmt.Fprintf(w, "if %s.Op != Op%s%s {\nbreak\n}\n", v, oparch, op.name)
+		fmt.Fprintf(w, "if %s.Op != %s {\nbreak\n}\n", v, opName(s[0], arch))
 		canFail = true
 	}

-	if typ != "" {
-		if !isVariable(typ) {
-			// code. We must match the results of this code.
-			fmt.Fprintf(w, "if %s.Type != %s {\nbreak\n}\n", v, typ)
-			canFail = true
-		} else {
-			// variable
-			if _, ok := m[typ]; ok {
-				// must match previous variable
-				fmt.Fprintf(w, "if %s.Type != %s {\nbreak\n}\n", v, typ)
+	// check type/aux/args
+	argnum := 0
+	for _, a := range s[1:] {
+		if a[0] == '<' {
+			// type restriction
+			t := a[1 : len(a)-1] // remove <>
+			if !isVariable(t) {
+				// code. We must match the results of this code.
+				fmt.Fprintf(w, "if %s.Type != %s {\nbreak\n}\n", v, t)
 				canFail = true
 			} else {
-				m[typ] = struct{}{}
-				fmt.Fprintf(w, "%s := %s.Type\n", typ, v)
+				// variable
+				if _, ok := m[t]; ok {
+					// must match previous variable
+					fmt.Fprintf(w, "if %s.Type != %s {\nbreak\n}\n", v, t)
+					canFail = true
+				} else {
+					m[t] = struct{}{}
+					fmt.Fprintf(w, "%s := %s.Type\n", t, v)
+				}
 			}
-		}
-	}
-
-	if auxint != "" {
-		if !isVariable(auxint) {
-			// code
-			fmt.Fprintf(w, "if %s.AuxInt != %s {\nbreak\n}\n", v, auxint)
-			canFail = true
-		} else {
-			// variable
-			if _, ok := m[auxint]; ok {
-				fmt.Fprintf(w, "if %s.AuxInt != %s {\nbreak\n}\n", v, auxint)
+		} else if a[0] == '[' {
+			// auxint restriction
+			switch op.aux {
+			case "Bool", "Int8", "Int16", "Int32", "Int64", "Int128", "Float32", "Float64", "SymOff", "SymValAndOff", "SymInt32":
+			default:
+				log.Fatalf("%s: op %s %s can't have auxint", loc, op.name, op.aux)
+			}
+			x := a[1 : len(a)-1] // remove []
+			if !isVariable(x) {
+				// code
+				fmt.Fprintf(w, "if %s.AuxInt != %s {\nbreak\n}\n", v, x)
 				canFail = true
 			} else {
-				m[auxint] = struct{}{}
-				fmt.Fprintf(w, "%s := %s.AuxInt\n", auxint, v)
+				// variable
+				if _, ok := m[x]; ok {
+					fmt.Fprintf(w, "if %s.AuxInt != %s {\nbreak\n}\n", v, x)
+					canFail = true
+				} else {
+					m[x] = struct{}{}
+					fmt.Fprintf(w, "%s := %s.AuxInt\n", x, v)
+				}
 			}
-		}
-	}
-
-	if aux != "" {
-
-		if !isVariable(aux) {
-			// code
-			fmt.Fprintf(w, "if %s.Aux != %s {\nbreak\n}\n", v, aux)
-			canFail = true
-		} else {
-			// variable
-			if _, ok := m[aux]; ok {
-				fmt.Fprintf(w, "if %s.Aux != %s {\nbreak\n}\n", v, aux)
+		} else if a[0] == '{' {
+			// aux restriction
+			switch op.aux {
+			case "String", "Sym", "SymOff", "SymValAndOff", "SymInt32":
+			default:
+				log.Fatalf("%s: op %s %s can't have aux", loc, op.name, op.aux)
+			}
+			x := a[1 : len(a)-1] // remove {}
+			if !isVariable(x) {
+				// code
+				fmt.Fprintf(w, "if %s.Aux != %s {\nbreak\n}\n", v, x)
 				canFail = true
 			} else {
-				m[aux] = struct{}{}
-				fmt.Fprintf(w, "%s := %s.Aux\n", aux, v)
+				// variable
+				if _, ok := m[x]; ok {
+					fmt.Fprintf(w, "if %s.Aux != %s {\nbreak\n}\n", v, x)
+					canFail = true
+				} else {
+					m[x] = struct{}{}
+					fmt.Fprintf(w, "%s := %s.Aux\n", x, v)
+				}
 			}
-		}
-	}
-
-	for i, arg := range args {
-		if arg == "_" {
-			continue
-		}
-		if !strings.Contains(arg, "(") {
+		} else if a == "_" {
+			argnum++
+		} else if !strings.Contains(a, "(") {
 			// leaf variable
-			if _, ok := m[arg]; ok {
+			if _, ok := m[a]; ok {
 				// variable already has a definition. Check whether
 				// the old definition and the new definition match.
 				// For example, (add x x).  Equality is just pointer equality
 				// on Values (so cse is important to do before lowering).
-				fmt.Fprintf(w, "if %s != %s.Args[%d] {\nbreak\n}\n", arg, v, i)
+				fmt.Fprintf(w, "if %s != %s.Args[%d] {\nbreak\n}\n", a, v, argnum)
 				canFail = true
 			} else {
 				// remember that this variable references the given value
-				m[arg] = struct{}{}
-				fmt.Fprintf(w, "%s := %s.Args[%d]\n", arg, v, i)
+				m[a] = struct{}{}
+				fmt.Fprintf(w, "%s := %s.Args[%d]\n", a, v, argnum)
 			}
-			continue
-		}
-		// compound sexpr
-		var argname string
-		colon := strings.Index(arg, ":")
-		openparen := strings.Index(arg, "(")
-		if colon >= 0 && openparen >= 0 && colon < openparen {
-			// rule-specified name
-			argname = arg[:colon]
-			arg = arg[colon+1:]
+			argnum++
 		} else {
-			// autogenerated name
-			argname = fmt.Sprintf("%s_%d", v, i)
-		}
-		fmt.Fprintf(w, "%s := %s.Args[%d]\n", argname, v, i)
-		if genMatch0(w, arch, arg, argname, m, false, loc) {
-			canFail = true
+			// compound sexpr
+			var argname string
+			colon := strings.Index(a, ":")
+			openparen := strings.Index(a, "(")
+			if colon >= 0 && openparen >= 0 && colon < openparen {
+				// rule-specified name
+				argname = a[:colon]
+				a = a[colon+1:]
+			} else {
+				// autogenerated name
+				argname = fmt.Sprintf("%s_%d", v, argnum)
+			}
+			fmt.Fprintf(w, "%s := %s.Args[%d]\n", argname, v, argnum)
+			if genMatch0(w, arch, a, argname, m, false, loc) {
+				canFail = true
+			}
+			argnum++
 		}
 	}
-
 	if op.argLength == -1 {
-		fmt.Fprintf(w, "if len(%s.Args) != %d {\nbreak\n}\n", v, len(args))
+		fmt.Fprintf(w, "if len(%s.Args) != %d {\nbreak\n}\n", v, argnum)
 		canFail = true
+	} else if int(op.argLength) != argnum {
+		log.Fatalf("%s: op %s should have %d args, has %d", loc, op.name, op.argLength, argnum)
 	}
 	return canFail
 }
@@ -469,44 +500,105 @@ func genResult0(w io.Writer, arch arch, result string, alloc *int, top, move boo
 		return result
 	}

-	op, oparch, typ, auxint, aux, args := parseValue(result, arch, loc)
+	s := split(result[1 : len(result)-1]) // remove parens, then split

-	// Find the type of the variable.
-	typeOverride := typ != ""
-	if typ == "" && op.typ != "" {
-		typ = typeName(op.typ)
+	// Find op record
+	var op opData
+	for _, x := range genericOps {
+		if x.name == s[0] {
+			op = x
+			break
+		}
+	}
+	for _, x := range arch.ops {
+		if x.name == s[0] {
+			op = x
+			break
+		}
+	}
+	if op.name == "" {
+		log.Fatalf("%s: unknown op %s", loc, s[0])
 	}

+	// Find the type of the variable.
+	var opType string
+	var typeOverride bool
+	for _, a := range s[1:] {
+		if a[0] == '<' {
+			// type restriction
+			opType = a[1 : len(a)-1] // remove <>
+			typeOverride = true
+			break
+		}
+	}
+	if opType == "" {
+		// find default type, if any
+		for _, op := range arch.ops {
+			if op.name == s[0] && op.typ != "" {
+				opType = typeName(op.typ)
+				break
+			}
+		}
+	}
+	if opType == "" {
+		for _, op := range genericOps {
+			if op.name == s[0] && op.typ != "" {
+				opType = typeName(op.typ)
+				break
+			}
+		}
+	}
 	var v string
 	if top && !move {
 		v = "v"
-		fmt.Fprintf(w, "v.reset(Op%s%s)\n", oparch, op.name)
+		fmt.Fprintf(w, "v.reset(%s)\n", opName(s[0], arch))
 		if typeOverride {
-			fmt.Fprintf(w, "v.Type = %s\n", typ)
+			fmt.Fprintf(w, "v.Type = %s\n", opType)
 		}
 	} else {
-		if typ == "" {
-			log.Fatalf("sub-expression %s (op=Op%s%s) must have a type", result, oparch, op.name)
+		if opType == "" {
+			log.Fatalf("sub-expression %s (op=%s) must have a type", result, s[0])
 		}
 		v = fmt.Sprintf("v%d", *alloc)
 		*alloc++
-		fmt.Fprintf(w, "%s := b.NewValue0(v.Line, Op%s%s, %s)\n", v, oparch, op.name, typ)
+		fmt.Fprintf(w, "%s := b.NewValue0(v.Line, %s, %s)\n", v, opName(s[0], arch), opType)
 		if move && top {
 			// Rewrite original into a copy
 			fmt.Fprintf(w, "v.reset(OpCopy)\n")
 			fmt.Fprintf(w, "v.AddArg(%s)\n", v)
 		}
 	}
-
-	if auxint != "" {
-		fmt.Fprintf(w, "%s.AuxInt = %s\n", v, auxint)
+	argnum := 0
+	for _, a := range s[1:] {
+		if a[0] == '<' {
+			// type restriction, handled above
+		} else if a[0] == '[' {
+			// auxint restriction
+			switch op.aux {
+			case "Bool", "Int8", "Int16", "Int32", "Int64", "Int128", "Float32", "Float64", "SymOff", "SymValAndOff", "SymInt32":
+			default:
+				log.Fatalf("%s: op %s %s can't have auxint", loc, op.name, op.aux)
+			}
+			x := a[1 : len(a)-1] // remove []
+			fmt.Fprintf(w, "%s.AuxInt = %s\n", v, x)
+		} else if a[0] == '{' {
+			// aux restriction
+			switch op.aux {
+			case "String", "Sym", "SymOff", "SymValAndOff", "SymInt32":
+			default:
+				log.Fatalf("%s: op %s %s can't have aux", loc, op.name, op.aux)
+			}
+			x := a[1 : len(a)-1] // remove {}
+			fmt.Fprintf(w, "%s.Aux = %s\n", v, x)
+		} else {
+			// regular argument (sexpr or variable)
+			x := genResult0(w, arch, a, alloc, false, move, loc)
+			fmt.Fprintf(w, "%s.AddArg(%s)\n", v, x)
+			argnum++
+		}
 	}
-	if aux != "" {
-		fmt.Fprintf(w, "%s.Aux = %s\n", v, aux)
-	}
-	for _, arg := range args {
-		x := genResult0(w, arch, arg, alloc, false, move, loc)
-		fmt.Fprintf(w, "%s.AddArg(%s)\n", v, x)
+	if op.argLength != -1 && int(op.argLength) != argnum {
+		log.Fatalf("%s: op %s should have %d args, has %d", loc, op.name, op.argLength, argnum)
 	}

 	return v
@@ -574,102 +666,16 @@ func isBlock(name string, arch arch) bool {
 	return false
 }

-// parseValue parses a parenthesized value from a rule.
-// The value can be from the match or the result side.
-// It returns the op and unparsed strings for typ, auxint, and aux restrictions and for all args.
-// oparch is the architecture that op is located in, or "" for generic.
-func parseValue(val string, arch arch, loc string) (op opData, oparch string, typ string, auxint string, aux string, args []string) {
-	val = val[1 : len(val)-1] // remove ()
-
-	// Split val up into regions.
-	// Split by spaces/tabs, except those contained in (), {}, [], or <>.
-	s := split(val)
-
-	// Extract restrictions and args.
-	for _, a := range s[1:] {
-		switch a[0] {
-		case '<':
-			typ = a[1 : len(a)-1] // remove <>
-		case '[':
-			auxint = a[1 : len(a)-1] // remove []
-		case '{':
-			aux = a[1 : len(a)-1] // remove {}
-		default:
-			args = append(args, a)
+// opName converts from an op name specified in a rule file to an Op enum.
+// if the name matches a generic op, returns "Op" plus the specified name.
+// Otherwise, returns "Op" plus arch name plus op name.
+func opName(name string, arch arch) string {
+	for _, op := range genericOps {
+		if op.name == name {
+			return "Op" + name
 		}
 	}
-
-	// Resolve the op.
-
-	// match reports whether x is a good op to select.
-	// If strict is true, rule generation might succeed.
-	// If strict is false, rule generation has failed,
-	// but we're trying to generate a useful error.
-	// Doing strict=true then strict=false allows
-	// precise op matching while retaining good error messages.
-	match := func(x opData, strict bool, archname string) bool {
-		if x.name != s[0] {
-			return false
-		}
-		if x.argLength != -1 && int(x.argLength) != len(args) {
-			if strict {
-				return false
-			} else {
-				log.Printf("%s: op %s (%s) should have %d args, has %d", loc, s[0], archname, op.argLength, len(args))
-			}
-		}
-		return true
-	}
-
-	for _, x := range genericOps {
-		if match(x, true, "generic") {
-			op = x
-			break
-		}
-	}
-	if arch.name != "generic" {
-		for _, x := range arch.ops {
-			if match(x, true, arch.name) {
-				if op.name != "" {
-					log.Fatalf("%s: matches for op %s found in both generic and %s", loc, op.name, arch.name)
-				}
-				op = x
-				oparch = arch.name
-				break
-			}
-		}
-	}
-
-	if op.name == "" {
-		// Failed to find the op.
-		// Run through everything again with strict=false
-		// to generate useful diagnosic messages before failing.
-		for _, x := range genericOps {
-			match(x, false, "generic")
-		}
-		for _, x := range arch.ops {
-			match(x, false, arch.name)
-		}
-		log.Fatalf("%s: unknown op %s", loc, s)
-	}
-
-	// Sanity check aux, auxint.
-	if auxint != "" {
-		switch op.aux {
-		case "Bool", "Int8", "Int16", "Int32", "Int64", "Int128", "Float32", "Float64", "SymOff", "SymValAndOff", "SymInt32":
-		default:
-			log.Fatalf("%s: op %s %s can't have auxint", loc, op.name, op.aux)
-		}
-	}
-	if aux != "" {
-		switch op.aux {
-		case "String", "Sym", "SymOff", "SymValAndOff", "SymInt32":
-		default:
-			log.Fatalf("%s: op %s %s can't have aux", loc, op.name, op.aux)
-		}
-	}
-
-	return
+	return "Op" + arch.name + name
 }

 func blockName(name string, arch arch) string {
@@ -683,13 +689,6 @@ func blockName(name string, arch arch) string {

 // typeName returns the string to use to generate a type.
 func typeName(typ string) string {
-	if typ[0] == '(' {
-		ts := strings.Split(typ[1:len(typ)-1], ",")
-		if len(ts) != 2 {
-			panic("Tuple expect 2 arguments")
-		}
-		return "MakeTuple(" + typeName(ts[0]) + ", " + typeName(ts[1]) + ")"
-	}
 	switch typ {
 	case "Flags", "Mem", "Void", "Int128":
 		return "Type" + typ
--- a/src/cmd/compile/internal/ssa/html.go
+++ b/src/cmd/compile/internal/ssa/html.go
@@ -359,7 +359,7 @@ func (v *Value) LongHTML() string {
 	}
 	r := v.Block.Func.RegAlloc
 	if int(v.ID) < len(r) && r[v.ID] != nil {
-		s += " : " + html.EscapeString(r[v.ID].Name())
+		s += " : " + r[v.ID].Name()
 	}
 	s += "</span>"
 	return s
--- a/src/cmd/compile/internal/ssa/location.go
+++ b/src/cmd/compile/internal/ssa/location.go
@@ -36,16 +36,3 @@ func (s LocalSlot) Name() string {
 	}
 	return fmt.Sprintf("%s+%d[%s]", s.N, s.Off, s.Type)
 }
-
-type LocPair [2]Location
-
-func (t LocPair) Name() string {
-	n0, n1 := "nil", "nil"
-	if t[0] != nil {
-		n0 = t[0].Name()
-	}
-	if t[1] != nil {
-		n1 = t[1].Name()
-	}
-	return fmt.Sprintf("<%s,%s>", n0, n1)
-}
--- a/src/cmd/compile/internal/ssa/lower.go
+++ b/src/cmd/compile/internal/ssa/lower.go
@@ -21,15 +21,10 @@ func checkLower(f *Func) {
 				continue // lowered
 			}
 			switch v.Op {
-			case OpSP, OpSB, OpInitMem, OpArg, OpPhi, OpVarDef, OpVarKill, OpVarLive, OpKeepAlive, OpSelect0, OpSelect1:
+			case OpSP, OpSB, OpInitMem, OpArg, OpPhi, OpVarDef, OpVarKill, OpVarLive, OpKeepAlive:
 				continue // ok not to lower
-			case OpGetG:
-				if f.Config.hasGReg {
-					// has hardware g register, regalloc takes care of it
-					continue // ok not to lower
-				}
 			}
-			s := "not lowered: " + v.String() + ", " + v.Op.String() + " " + v.Type.SimpleString()
+			s := "not lowered: " + v.Op.String() + " " + v.Type.SimpleString()
 			for _, a := range v.Args {
 				s += " " + a.Type.SimpleString()
 			}
--- a/src/cmd/compile/internal/ssa/op.go
+++ b/src/cmd/compile/internal/ssa/op.go
@@ -26,8 +26,7 @@ type opInfo struct {
 	generic           bool // this is a generic (arch-independent) opcode
 	rematerializeable bool // this op is rematerializeable
 	commutative       bool // this operation is commutative (e.g. addition)
-	resultInArg0      bool // last output of v and v.Args[0] must be allocated to the same register
-	clobberFlags      bool // this op clobbers flags register
+	resultInArg0      bool // v and v.Args[0] must be allocated to the same register
 }

 type inputInfo struct {
@@ -35,15 +34,10 @@ type inputInfo struct {
 	regs regMask // allowed input registers
 }

-type outputInfo struct {
-	idx  int     // index in output tuple
-	regs regMask // allowed output registers
-}
-
 type regInfo struct {
 	inputs   []inputInfo // ordered in register allocation order
 	clobbers regMask
-	outputs  []outputInfo // ordered in register allocation order
+	outputs  []regMask // NOTE: values can only have 1 output for now.
 }

 type auxType int8
@@ -130,31 +124,3 @@ func (x ValAndOff) add(off int64) int64 {
 	}
 	return makeValAndOff(x.Val(), x.Off()+off)
 }
-
-// SizeAndAlign holds both the size and the alignment of a type,
-// used in Zero and Move ops.
-// The high 8 bits hold the alignment.
-// The low 56 bits hold the size.
-type SizeAndAlign int64
-
-func (x SizeAndAlign) Size() int64 {
-	return int64(x) & (1<<56 - 1)
-}
-func (x SizeAndAlign) Align() int64 {
-	return int64(uint64(x) >> 56)
-}
-func (x SizeAndAlign) Int64() int64 {
-	return int64(x)
-}
-func (x SizeAndAlign) String() string {
-	return fmt.Sprintf("size=%d,align=%d", x.Size(), x.Align())
-}
-func MakeSizeAndAlign(size, align int64) SizeAndAlign {
-	if size&^(1<<56-1) != 0 {
-		panic("size too big in SizeAndAlign")
-	}
-	if align >= 1<<8 {
-		panic("alignment too big in SizeAndAlign")
-	}
-	return SizeAndAlign(size | align<<56)
-}
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
--- a/src/cmd/compile/internal/ssa/opt.go
+++ b/src/cmd/compile/internal/ssa/opt.go
@@ -11,7 +11,4 @@ func opt(f *Func) {

 func dec(f *Func) {
 	applyRewrite(f, rewriteBlockdec, rewriteValuedec)
-	if f.Config.IntSize == 4 && f.Config.arch != "amd64p32" {
-		applyRewrite(f, rewriteBlockdec64, rewriteValuedec64)
-	}
 }
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@@ -206,7 +206,6 @@ type regAllocState struct {
 	numRegs     register
 	SPReg       register
 	SBReg       register
-	GReg        register
 	allocatable regMask

 	// for each block, its primary predecessor.
@@ -333,14 +332,14 @@ func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
 	s.f.setHome(c, &s.registers[r])
 }

-// allocReg chooses a register from the set of registers in mask.
+// allocReg chooses a register for v from the set of registers in mask.
 // If there is no unused register, a Value will be kicked out of
 // a register to make room.
-func (s *regAllocState) allocReg(mask regMask, v *Value) register {
+func (s *regAllocState) allocReg(v *Value, mask regMask) register {
 	mask &= s.allocatable
 	mask &^= s.nospill
 	if mask == 0 {
-		s.f.Fatalf("no register available for %s", v)
+		s.f.Fatalf("no register available")
 	}

 	// Pick an unused register if one is available.
@@ -401,7 +400,7 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, line
 	}

 	// Allocate a register.
-	r := s.allocReg(mask, v)
+	r := s.allocReg(v, mask)

 	// Allocate v to the new register.
 	var c *Value
@@ -439,76 +438,28 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, line
 func (s *regAllocState) init(f *Func) {
 	s.f = f
 	s.registers = f.Config.registers
-	if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
-		s.f.Fatalf("bad number of registers: %d", nr)
-	} else {
-		s.numRegs = register(nr)
+	s.numRegs = register(len(s.registers))
+	if s.numRegs > noRegister || s.numRegs > register(unsafe.Sizeof(regMask(0))*8) {
+		panic("too many registers")
 	}
-	// Locate SP, SB, and g registers.
-	s.SPReg = noRegister
-	s.SBReg = noRegister
-	s.GReg = noRegister
 	for r := register(0); r < s.numRegs; r++ {
-		switch s.registers[r].Name() {
-		case "SP":
+		if s.registers[r].Name() == "SP" {
 			s.SPReg = r
-		case "SB":
-			s.SBReg = r
-		case "g":
-			s.GReg = r
 		}
-	}
-	// Make sure we found all required registers.
-	switch noRegister {
-	case s.SPReg:
-		s.f.Fatalf("no SP register found")
-	case s.SBReg:
-		s.f.Fatalf("no SB register found")
-	case s.GReg:
-		if f.Config.hasGReg {
-			s.f.Fatalf("no g register found")
+		if s.registers[r].Name() == "SB" {
+			s.SBReg = r
 		}
 	}

 	// Figure out which registers we're allowed to use.
-	s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask
+	s.allocatable = regMask(1)<<s.numRegs - 1
 	s.allocatable &^= 1 << s.SPReg
 	s.allocatable &^= 1 << s.SBReg
-	if s.f.Config.hasGReg {
-		s.allocatable &^= 1 << s.GReg
-	}
-	if s.f.Config.ctxt.Framepointer_enabled && s.f.Config.FPReg >= 0 {
-		s.allocatable &^= 1 << uint(s.f.Config.FPReg)
+	if s.f.Config.ctxt.Framepointer_enabled {
+		s.allocatable &^= 1 << 5 // BP
 	}
 	if s.f.Config.ctxt.Flag_dynlink {
-		switch s.f.Config.arch {
-		case "amd64":
-			s.allocatable &^= 1 << 15 // R15
-		case "arm":
-			s.allocatable &^= 1 << 9 // R9
-		case "arm64":
-			// nothing to do?
-		case "386":
-			// nothing to do.
-			// Note that for Flag_shared (position independent code)
-			// we do need to be careful, but that carefulness is hidden
-			// in the rewrite rules so we always have a free register
-			// available for global load/stores. See gen/386.rules (search for Flag_shared).
-		default:
-			s.f.Config.fe.Unimplementedf(0, "arch %s not implemented", s.f.Config.arch)
-		}
-	}
-	if s.f.Config.nacl {
-		switch s.f.Config.arch {
-		case "arm":
-			s.allocatable &^= 1 << 9 // R9 is "thread pointer" on nacl/arm
-		case "amd64p32":
-			s.allocatable &^= 1 << 5  // BP - reserved for nacl
-			s.allocatable &^= 1 << 15 // R15 - reserved for nacl
-		}
-	}
-	if s.f.Config.use387 {
-		s.allocatable &^= 1 << 15 // X7 disallowed (one 387 register is used as scratch space during SSE->387 generation in ../x86/387.go)
+		s.allocatable &^= 1 << 15 // R15
 	}

 	s.regs = make([]regState, s.numRegs)
@@ -516,13 +467,11 @@ func (s *regAllocState) init(f *Func) {
 	s.orig = make([]*Value, f.NumValues())
 	for _, b := range f.Blocks {
 		for _, v := range b.Values {
-			if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() {
+			if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() {
 				s.values[v.ID].needReg = true
 				s.values[v.ID].rematerializeable = v.rematerializeable()
 				s.orig[v.ID] = v
 			}
-			// Note: needReg is false for values returning Tuple types.
-			// Instead, we mark the corresponding Selects as needReg.
 		}
 	}
 	s.computeLive()
@@ -615,9 +564,9 @@ func (s *regAllocState) setState(regs []endReg) {
 func (s *regAllocState) compatRegs(t Type) regMask {
 	var m regMask
 	if t.IsFloat() || t == TypeInt128 {
-		m = s.f.Config.fpRegMask
+		m = 0xffff << 16 // X0-X15
 	} else {
-		m = s.f.Config.gpRegMask
+		m = 0xffff << 0 // AX-R15
 	}
 	return m & s.allocatable
 }
@@ -837,9 +786,6 @@ func (s *regAllocState) regalloc(f *Func) {
 				if phiRegs[i] != noRegister {
 					continue
 				}
-				if s.f.Config.use387 && v.Type.IsFloat() {
-					continue // 387 can't handle floats in registers between blocks
-				}
 				m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
 				if m != 0 {
 					r := pickReg(m)
@@ -969,7 +915,6 @@ func (s *regAllocState) regalloc(f *Func) {
 			if s.f.pass.debug > regDebug {
 				fmt.Printf("  processing %s\n", v.LongString())
 			}
-			regspec := opcodeTable[v.Op].reg
 			if v.Op == OpPhi {
 				f.Fatalf("phi %s not at start of block", v)
 			}
@@ -985,28 +930,6 @@ func (s *regAllocState) regalloc(f *Func) {
 				s.advanceUses(v)
 				continue
 			}
-			if v.Op == OpSelect0 || v.Op == OpSelect1 {
-				if s.values[v.ID].needReg {
-					var i = 0
-					if v.Op == OpSelect1 {
-						i = 1
-					}
-					s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).Num), v, v)
-				}
-				b.Values = append(b.Values, v)
-				s.advanceUses(v)
-				goto issueSpill
-			}
-			if v.Op == OpGetG && s.f.Config.hasGReg {
-				// use hardware g register
-				if s.regs[s.GReg].v != nil {
-					s.freeReg(s.GReg) // kick out the old value
-				}
-				s.assignReg(s.GReg, v, v)
-				b.Values = append(b.Values, v)
-				s.advanceUses(v)
-				goto issueSpill
-			}
 			if v.Op == OpArg {
 				// Args are "pre-spilled" values. We don't allocate
 				// any register here. We just set up the spill pointer to
@@ -1034,6 +957,7 @@ func (s *regAllocState) regalloc(f *Func) {
 				b.Values = append(b.Values, v)
 				continue
 			}
+			regspec := opcodeTable[v.Op].reg
 			if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
 				// No register allocation required (or none specified yet)
 				s.freeRegs(regspec.clobbers)
@@ -1078,6 +1002,10 @@ func (s *regAllocState) regalloc(f *Func) {
 			args = append(args[:0], v.Args...)
 			for _, i := range regspec.inputs {
 				mask := i.regs
+				if mask == flagRegMask {
+					// TODO: remove flag input from regspec.inputs.
+					continue
+				}
 				if mask&s.values[args[i.idx].ID].regs == 0 {
 					// Need a new register for the input.
 					mask &= s.allocatable
@@ -1187,73 +1115,49 @@ func (s *regAllocState) regalloc(f *Func) {
 			// Dump any registers which will be clobbered
 			s.freeRegs(regspec.clobbers)

-			// Pick registers for outputs.
-			{
-				outRegs := [2]register{noRegister, noRegister}
-				var used regMask
-				for _, out := range regspec.outputs {
-					mask := out.regs & s.allocatable &^ used
-					if mask == 0 {
-						continue
-					}
-					if opcodeTable[v.Op].resultInArg0 && out.idx == len(regspec.outputs)-1 {
-						if !opcodeTable[v.Op].commutative {
-							// Output must use the same register as input 0.
-							r := register(s.f.getHome(args[0].ID).(*Register).Num)
-							mask = regMask(1) << r
-						} else {
-							// Output must use the same register as input 0 or 1.
-							r0 := register(s.f.getHome(args[0].ID).(*Register).Num)
-							r1 := register(s.f.getHome(args[1].ID).(*Register).Num)
-							// Check r0 and r1 for desired output register.
-							found := false
-							for _, r := range dinfo[idx].out {
-								if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
-									mask = regMask(1) << r
-									found = true
-									if r == r1 {
-										args[0], args[1] = args[1], args[0]
-									}
-									break
+			// Pick register for output.
+			if s.values[v.ID].needReg {
+				mask := regspec.outputs[0] & s.allocatable
+				if opcodeTable[v.Op].resultInArg0 {
+					if !opcodeTable[v.Op].commutative {
+						// Output must use the same register as input 0.
+						r := register(s.f.getHome(args[0].ID).(*Register).Num)
+						mask = regMask(1) << r
+					} else {
+						// Output must use the same register as input 0 or 1.
+						r0 := register(s.f.getHome(args[0].ID).(*Register).Num)
+						r1 := register(s.f.getHome(args[1].ID).(*Register).Num)
+						// Check r0 and r1 for desired output register.
+						found := false
+						for _, r := range dinfo[idx].out {
+							if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
+								mask = regMask(1) << r
+								found = true
+								if r == r1 {
+									args[0], args[1] = args[1], args[0]
 								}
-							}
-							if !found {
-								// Neither are desired, pick r0.
-								mask = regMask(1) << r0
+								break
 							}
 						}
-					}
-					for _, r := range dinfo[idx].out {
-						if r != noRegister && (mask&^s.used)>>r&1 != 0 {
-							// Desired register is allowed and unused.
-							mask = regMask(1) << r
-							break
+						if !found {
+							// Neither are desired, pick r0.
+							mask = regMask(1) << r0
 						}
 					}
-					// Avoid registers we're saving for other values.
-					if mask&^desired.avoid != 0 {
-						mask &^= desired.avoid
-					}
-					r := s.allocReg(mask, v)
-					outRegs[out.idx] = r
-					used |= regMask(1) << r
 				}
-				// Record register choices
-				if v.Type.IsTuple() {
-					var outLocs LocPair
-					if r := outRegs[0]; r != noRegister {
-						outLocs[0] = &s.registers[r]
-					}
-					if r := outRegs[1]; r != noRegister {
-						outLocs[1] = &s.registers[r]
-					}
-					s.f.setHome(v, outLocs)
-					// Note that subsequent SelectX instructions will do the assignReg calls.
-				} else {
-					if r := outRegs[0]; r != noRegister {
-						s.assignReg(r, v, v)
+				for _, r := range dinfo[idx].out {
+					if r != noRegister && (mask&^s.used)>>r&1 != 0 {
+						// Desired register is allowed and unused.
+						mask = regMask(1) << r
+						break
 					}
 				}
+				// Avoid registers we're saving for other values.
+				if mask&^desired.avoid != 0 {
+					mask &^= desired.avoid
+				}
+				r := s.allocReg(v, mask)
+				s.assignReg(r, v, v)
 			}

 			// Issue the Value itself.
@@ -1272,7 +1176,6 @@ func (s *regAllocState) regalloc(f *Func) {
 			//     f()
 			// }
 			// It would be good to have both spill and restore inside the IF.
-		issueSpill:
 			if s.values[v.ID].needReg {
 				spill := b.NewValue1(v.Line, OpStoreReg, v.Type, v)
 				s.setOrig(spill, v)
@@ -1291,10 +1194,9 @@ func (s *regAllocState) regalloc(f *Func) {
 			if s.f.pass.debug > regDebug {
 				fmt.Printf("  processing control %s\n", v.LongString())
 			}
-			// We assume that a control input can be passed in any
-			// type-compatible register. If this turns out not to be true,
-			// we'll need to introduce a regspec for a block's control value.
-			s.allocValToReg(v, s.compatRegs(v.Type), false, b.Line)
+			// TODO: regspec for block control values, instead of using
+			// register set from the control op's output.
+			s.allocValToReg(v, opcodeTable[v.Op].reg.outputs[0], false, b.Line)
 			// Remove this use from the uses list.
 			vi := &s.values[v.ID]
 			u := vi.uses
@@ -1306,11 +1208,6 @@ func (s *regAllocState) regalloc(f *Func) {
 			s.freeUseRecords = u
 		}

-		// Spill any values that can't live across basic block boundaries.
-		if s.f.Config.use387 {
-			s.freeRegs(s.f.Config.fpRegMask)
-		}
-
 		// If we are approaching a merge point and we are the primary
 		// predecessor of it, find live values that we use soon after
 		// the merge point and promote them to registers now.
@@ -1334,9 +1231,6 @@ func (s *regAllocState) regalloc(f *Func) {
 					continue
 				}
 				v := s.orig[vid]
-				if s.f.Config.use387 && v.Type.IsFloat() {
-					continue // 387 can't handle floats in registers between blocks
-				}
 				m := s.compatRegs(v.Type) &^ s.used
 				if m&^desired.avoid != 0 {
 					m &^= desired.avoid
@@ -1875,9 +1769,6 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value) bool {
 			(*splice).Uses--
 			*splice = occupant.c
 			occupant.c.Uses++
-			if occupant.c.Op == OpStoreReg {
-				e.s.lateSpillUse(vid)
-			}
 		}
 		// Note: if splice==nil then c will appear dead. This is
 		// non-SSA formed code, so be careful after this pass not to run
@@ -2119,8 +2010,6 @@ func (e *edgeState) findRegFor(typ Type) Location {
 	return nil
 }

-// rematerializeable reports whether the register allocator should recompute
-// a value instead of spilling/restoring it.
 func (v *Value) rematerializeable() bool {
 	if !opcodeTable[v.Op].rematerializeable {
 		return false
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -205,11 +205,6 @@ func is32Bit(n int64) bool {
 	return n == int64(int32(n))
 }

-// is16Bit reports whether n can be represented as a signed 16 bit integer.
-func is16Bit(n int64) bool {
-	return n == int64(int16(n))
-}
-
 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
 func b2i(b bool) int64 {
 	if b {
@@ -259,19 +254,6 @@ func isSamePtr(p1, p2 *Value) bool {
 	return false
 }

-// moveSize returns the number of bytes an aligned MOV instruction moves
-func moveSize(align int64, c *Config) int64 {
-	switch {
-	case align%8 == 0 && c.IntSize == 8:
-		return 8
-	case align%4 == 0:
-		return 4
-	case align%2 == 0:
-		return 2
-	}
-	return 1
-}
-
 // mergePoint finds a block among a's blocks which dominates b and is itself
 // dominated by all of a's blocks. Returns nil if it can't find one.
 // Might return nil even if one does exist.
--- a/src/cmd/compile/internal/ssa/rewrite386.go
+++ b/src/cmd/compile/internal/ssa/rewrite386.go
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
--- a/src/cmd/compile/internal/ssa/rewriteARM.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM.go
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
--- a/src/cmd/compile/internal/ssa/rewritePPC64.go
+++ b/src/cmd/compile/internal/ssa/rewritePPC64.go
--- a/src/cmd/compile/internal/ssa/rewritedec64.go
+++ b/src/cmd/compile/internal/ssa/rewritedec64.go
--- a/src/cmd/compile/internal/ssa/rewritegeneric.go
+++ b/src/cmd/compile/internal/ssa/rewritegeneric.go
@@ -54,12 +54,8 @@ func rewriteValuegeneric(v *Value, config *Config) bool {
 		return rewriteValuegeneric_OpCvt32Fto64F(v, config)
 	case OpCvt64Fto32F:
 		return rewriteValuegeneric_OpCvt64Fto32F(v, config)
-	case OpDiv32F:
-		return rewriteValuegeneric_OpDiv32F(v, config)
 	case OpDiv64:
 		return rewriteValuegeneric_OpDiv64(v, config)
-	case OpDiv64F:
-		return rewriteValuegeneric_OpDiv64F(v, config)
 	case OpDiv64u:
 		return rewriteValuegeneric_OpDiv64u(v, config)
 	case OpEq16:
@@ -502,40 +498,6 @@ func rewriteValuegeneric_OpAdd32F(v *Value, config *Config) bool {
 		v.AuxInt = f2i(float64(i2f32(c) + i2f32(d)))
 		return true
 	}
-	// match: (Add32F x (Const32F [0]))
-	// cond:
-	// result: x
-	for {
-		x := v.Args[0]
-		v_1 := v.Args[1]
-		if v_1.Op != OpConst32F {
-			break
-		}
-		if v_1.AuxInt != 0 {
-			break
-		}
-		v.reset(OpCopy)
-		v.Type = x.Type
-		v.AddArg(x)
-		return true
-	}
-	// match: (Add32F (Const32F [0]) x)
-	// cond:
-	// result: x
-	for {
-		v_0 := v.Args[0]
-		if v_0.Op != OpConst32F {
-			break
-		}
-		if v_0.AuxInt != 0 {
-			break
-		}
-		x := v.Args[1]
-		v.reset(OpCopy)
-		v.Type = x.Type
-		v.AddArg(x)
-		return true
-	}
 	return false
 }
 func rewriteValuegeneric_OpAdd64(v *Value, config *Config) bool {
@@ -620,40 +582,6 @@ func rewriteValuegeneric_OpAdd64F(v *Value, config *Config) bool {
 		v.AuxInt = f2i(i2f(c) + i2f(d))
 		return true
 	}
-	// match: (Add64F x (Const64F [0]))
-	// cond:
-	// result: x
-	for {
-		x := v.Args[0]
-		v_1 := v.Args[1]
-		if v_1.Op != OpConst64F {
-			break
-		}
-		if v_1.AuxInt != 0 {
-			break
-		}
-		v.reset(OpCopy)
-		v.Type = x.Type
-		v.AddArg(x)
-		return true
-	}
-	// match: (Add64F (Const64F [0]) x)
-	// cond:
-	// result: x
-	for {
-		v_0 := v.Args[0]
-		if v_0.Op != OpConst64F {
-			break
-		}
-		if v_0.AuxInt != 0 {
-			break
-		}
-		x := v.Args[1]
-		v.reset(OpCopy)
-		v.Type = x.Type
-		v.AddArg(x)
-		return true
-	}
 	return false
 }
 func rewriteValuegeneric_OpAdd8(v *Value, config *Config) bool {
@@ -733,8 +661,8 @@ func rewriteValuegeneric_OpAddPtr(v *Value, config *Config) bool {
 		c := v_1.AuxInt
 		v.reset(OpOffPtr)
 		v.Type = t
-		v.AuxInt = c
 		v.AddArg(x)
+		v.AuxInt = c
 		return true
 	}
 	return false
@@ -1370,19 +1298,19 @@ func rewriteValuegeneric_OpArg(v *Value, config *Config) bool {
 	// cond: v.Type.IsString()
 	// result: (StringMake     (Arg <config.fe.TypeBytePtr()> {n} [off])     (Arg <config.fe.TypeInt()> {n} [off+config.PtrSize]))
 	for {
-		off := v.AuxInt
 		n := v.Aux
+		off := v.AuxInt
 		if !(v.Type.IsString()) {
 			break
 		}
 		v.reset(OpStringMake)
 		v0 := b.NewValue0(v.Line, OpArg, config.fe.TypeBytePtr())
-		v0.AuxInt = off
 		v0.Aux = n
+		v0.AuxInt = off
 		v.AddArg(v0)
 		v1 := b.NewValue0(v.Line, OpArg, config.fe.TypeInt())
-		v1.AuxInt = off + config.PtrSize
 		v1.Aux = n
+		v1.AuxInt = off + config.PtrSize
 		v.AddArg(v1)
 		return true
 	}
@@ -1390,23 +1318,23 @@ func rewriteValuegeneric_OpArg(v *Value, config *Config) bool {
 	// cond: v.Type.IsSlice()
 	// result: (SliceMake     (Arg <v.Type.ElemType().PtrTo()> {n} [off])     (Arg <config.fe.TypeInt()> {n} [off+config.PtrSize])     (Arg <config.fe.TypeInt()> {n} [off+2*config.PtrSize]))
 	for {
-		off := v.AuxInt
 		n := v.Aux
+		off := v.AuxInt
 		if !(v.Type.IsSlice()) {
 			break
 		}
 		v.reset(OpSliceMake)
 		v0 := b.NewValue0(v.Line, OpArg, v.Type.ElemType().PtrTo())
-		v0.AuxInt = off
 		v0.Aux = n
+		v0.AuxInt = off
 		v.AddArg(v0)
 		v1 := b.NewValue0(v.Line, OpArg, config.fe.TypeInt())
-		v1.AuxInt = off + config.PtrSize
 		v1.Aux = n
+		v1.AuxInt = off + config.PtrSize
 		v.AddArg(v1)
 		v2 := b.NewValue0(v.Line, OpArg, config.fe.TypeInt())
-		v2.AuxInt = off + 2*config.PtrSize
 		v2.Aux = n
+		v2.AuxInt = off + 2*config.PtrSize
 		v.AddArg(v2)
 		return true
 	}
@@ -1414,19 +1342,19 @@ func rewriteValuegeneric_OpArg(v *Value, config *Config) bool {
 	// cond: v.Type.IsInterface()
 	// result: (IMake     (Arg <config.fe.TypeBytePtr()> {n} [off])     (Arg <config.fe.TypeBytePtr()> {n} [off+config.PtrSize]))
 	for {
-		off := v.AuxInt
 		n := v.Aux
+		off := v.AuxInt
 		if !(v.Type.IsInterface()) {
 			break
 		}
 		v.reset(OpIMake)
 		v0 := b.NewValue0(v.Line, OpArg, config.fe.TypeBytePtr())
-		v0.AuxInt = off
 		v0.Aux = n
+		v0.AuxInt = off
 		v.AddArg(v0)
 		v1 := b.NewValue0(v.Line, OpArg, config.fe.TypeBytePtr())
-		v1.AuxInt = off + config.PtrSize
 		v1.Aux = n
+		v1.AuxInt = off + config.PtrSize
 		v.AddArg(v1)
 		return true
 	}
@@ -1434,19 +1362,19 @@ func rewriteValuegeneric_OpArg(v *Value, config *Config) bool {
 	// cond: v.Type.IsComplex() && v.Type.Size() == 16
 	// result: (ComplexMake     (Arg <config.fe.TypeFloat64()> {n} [off])     (Arg <config.fe.TypeFloat64()> {n} [off+8]))
 	for {
-		off := v.AuxInt
 		n := v.Aux
+		off := v.AuxInt
 		if !(v.Type.IsComplex() && v.Type.Size() == 16) {
 			break
 		}
 		v.reset(OpComplexMake)
 		v0 := b.NewValue0(v.Line, OpArg, config.fe.TypeFloat64())
-		v0.AuxInt = off
 		v0.Aux = n
+		v0.AuxInt = off
 		v.AddArg(v0)
 		v1 := b.NewValue0(v.Line, OpArg, config.fe.TypeFloat64())
-		v1.AuxInt = off + 8
 		v1.Aux = n
+		v1.AuxInt = off + 8
 		v.AddArg(v1)
 		return true
 	}
@@ -1454,19 +1382,19 @@ func rewriteValuegeneric_OpArg(v *Value, config *Config) bool {
 	// cond: v.Type.IsComplex() && v.Type.Size() == 8
 	// result: (ComplexMake     (Arg <config.fe.TypeFloat32()> {n} [off])     (Arg <config.fe.TypeFloat32()> {n} [off+4]))
 	for {
-		off := v.AuxInt
 		n := v.Aux
+		off := v.AuxInt
 		if !(v.Type.IsComplex() && v.Type.Size() == 8) {
 			break
 		}
 		v.reset(OpComplexMake)
 		v0 := b.NewValue0(v.Line, OpArg, config.fe.TypeFloat32())
-		v0.AuxInt = off
 		v0.Aux = n
+		v0.AuxInt = off
 		v.AddArg(v0)
 		v1 := b.NewValue0(v.Line, OpArg, config.fe.TypeFloat32())
-		v1.AuxInt = off + 4
 		v1.Aux = n
+		v1.AuxInt = off + 4
 		v.AddArg(v1)
 		return true
 	}
@@ -1486,15 +1414,15 @@ func rewriteValuegeneric_OpArg(v *Value, config *Config) bool {
 	// result: (StructMake1     (Arg <t.FieldType(0)> {n} [off+t.FieldOff(0)]))
 	for {
 		t := v.Type
-		off := v.AuxInt
 		n := v.Aux
+		off := v.AuxInt
 		if !(t.IsStruct() && t.NumFields() == 1 && config.fe.CanSSA(t)) {
 			break
 		}
 		v.reset(OpStructMake1)
 		v0 := b.NewValue0(v.Line, OpArg, t.FieldType(0))
-		v0.AuxInt = off + t.FieldOff(0)
 		v0.Aux = n
+		v0.AuxInt = off + t.FieldOff(0)
 		v.AddArg(v0)
 		return true
 	}
@@ -1503,19 +1431,19 @@ func rewriteValuegeneric_OpArg(v *Value, config *Config) bool {
 	// result: (StructMake2     (Arg <t.FieldType(0)> {n} [off+t.FieldOff(0)])     (Arg <t.FieldType(1)> {n} [off+t.FieldOff(1)]))
 	for {
 		t := v.Type
-		off := v.AuxInt
 		n := v.Aux
+		off := v.AuxInt
 		if !(t.IsStruct() && t.NumFields() == 2 && config.fe.CanSSA(t)) {
 			break
 		}
 		v.reset(OpStructMake2)
 		v0 := b.NewValue0(v.Line, OpArg, t.FieldType(0))
-		v0.AuxInt = off + t.FieldOff(0)
 		v0.Aux = n
+		v0.AuxInt = off + t.FieldOff(0)
 		v.AddArg(v0)
 		v1 := b.NewValue0(v.Line, OpArg, t.FieldType(1))
-		v1.AuxInt = off + t.FieldOff(1)
 		v1.Aux = n
+		v1.AuxInt = off + t.FieldOff(1)
 		v.AddArg(v1)
 		return true
 	}
@@ -1524,23 +1452,23 @@ func rewriteValuegeneric_OpArg(v *Value, config *Config) bool {
 	// result: (StructMake3     (Arg <t.FieldType(0)> {n} [off+t.FieldOff(0)])     (Arg <t.FieldType(1)> {n} [off+t.FieldOff(1)])     (Arg <t.FieldType(2)> {n} [off+t.FieldOff(2)]))
 	for {
 		t := v.Type
-		off := v.AuxInt
 		n := v.Aux
+		off := v.AuxInt
 		if !(t.IsStruct() && t.NumFields() == 3 && config.fe.CanSSA(t)) {
 			break
 		}
 		v.reset(OpStructMake3)
 		v0 := b.NewValue0(v.Line, OpArg, t.FieldType(0))
-		v0.AuxInt = off + t.FieldOff(0)
 		v0.Aux = n
+		v0.AuxInt = off + t.FieldOff(0)
 		v.AddArg(v0)
 		v1 := b.NewValue0(v.Line, OpArg, t.FieldType(1))
-		v1.AuxInt = off + t.FieldOff(1)
 		v1.Aux = n
+		v1.AuxInt = off + t.FieldOff(1)
 		v.AddArg(v1)
 		v2 := b.NewValue0(v.Line, OpArg, t.FieldType(2))
-		v2.AuxInt = off + t.FieldOff(2)
 		v2.Aux = n
+		v2.AuxInt = off + t.FieldOff(2)
 		v.AddArg(v2)
 		return true
 	}
@@ -1549,27 +1477,27 @@ func rewriteValuegeneric_OpArg(v *Value, config *Config) bool {
 	// result: (StructMake4     (Arg <t.FieldType(0)> {n} [off+t.FieldOff(0)])     (Arg <t.FieldType(1)> {n} [off+t.FieldOff(1)])     (Arg <t.FieldType(2)> {n} [off+t.FieldOff(2)])     (Arg <t.FieldType(3)> {n} [off+t.FieldOff(3)]))
 	for {
 		t := v.Type
-		off := v.AuxInt
 		n := v.Aux
+		off := v.AuxInt
 		if !(t.IsStruct() && t.NumFields() == 4 && config.fe.CanSSA(t)) {
 			break
 		}
 		v.reset(OpStructMake4)
 		v0 := b.NewValue0(v.Line, OpArg, t.FieldType(0))
-		v0.AuxInt = off + t.FieldOff(0)
 		v0.Aux = n
+		v0.AuxInt = off + t.FieldOff(0)
 		v.AddArg(v0)
 		v1 := b.NewValue0(v.Line, OpArg, t.FieldType(1))
-		v1.AuxInt = off + t.FieldOff(1)
 		v1.Aux = n
+		v1.AuxInt = off + t.FieldOff(1)
 		v.AddArg(v1)
 		v2 := b.NewValue0(v.Line, OpArg, t.FieldType(2))
-		v2.AuxInt = off + t.FieldOff(2)
 		v2.Aux = n
+		v2.AuxInt = off + t.FieldOff(2)
 		v.AddArg(v2)
 		v3 := b.NewValue0(v.Line, OpArg, t.FieldType(3))
-		v3.AuxInt = off + t.FieldOff(3)
 		v3.Aux = n
+		v3.AuxInt = off + t.FieldOff(3)
 		v.AddArg(v3)
 		return true
 	}
@@ -1914,44 +1842,6 @@ func rewriteValuegeneric_OpCvt64Fto32F(v *Value, config *Config) bool {
 	}
 	return false
 }
-func rewriteValuegeneric_OpDiv32F(v *Value, config *Config) bool {
-	b := v.Block
-	_ = b
-	// match: (Div32F x (Const32F [f2i(1)]))
-	// cond:
-	// result: x
-	for {
-		x := v.Args[0]
-		v_1 := v.Args[1]
-		if v_1.Op != OpConst32F {
-			break
-		}
-		if v_1.AuxInt != f2i(1) {
-			break
-		}
-		v.reset(OpCopy)
-		v.Type = x.Type
-		v.AddArg(x)
-		return true
-	}
-	// match: (Div32F x (Const32F [f2i(-1)]))
-	// cond:
-	// result: (Neg32F x)
-	for {
-		x := v.Args[0]
-		v_1 := v.Args[1]
-		if v_1.Op != OpConst32F {
-			break
-		}
-		if v_1.AuxInt != f2i(-1) {
-			break
-		}
-		v.reset(OpNeg32F)
-		v.AddArg(x)
-		return true
-	}
-	return false
-}
 func rewriteValuegeneric_OpDiv64(v *Value, config *Config) bool {
 	b := v.Block
 	_ = b
@@ -2107,44 +1997,6 @@ func rewriteValuegeneric_OpDiv64(v *Value, config *Config) bool {
 	}
 	return false
 }
-func rewriteValuegeneric_OpDiv64F(v *Value, config *Config) bool {
-	b := v.Block
-	_ = b
-	// match: (Div64F x (Const64F [f2i(1)]))
-	// cond:
-	// result: x
-	for {
-		x := v.Args[0]
-		v_1 := v.Args[1]
-		if v_1.Op != OpConst64F {
-			break
-		}
-		if v_1.AuxInt != f2i(1) {
-			break
-		}
-		v.reset(OpCopy)
-		v.Type = x.Type
-		v.AddArg(x)
-		return true
-	}
-	// match: (Div64F x (Const64F [f2i(-1)]))
-	// cond:
-	// result: (Neg32F x)
-	for {
-		x := v.Args[0]
-		v_1 := v.Args[1]
-		if v_1.Op != OpConst64F {
-			break
-		}
-		if v_1.AuxInt != f2i(-1) {
-			break
-		}
-		v.reset(OpNeg32F)
-		v.AddArg(x)
-		return true
-	}
-	return false
-}
 func rewriteValuegeneric_OpDiv64u(v *Value, config *Config) bool {
 	b := v.Block
 	_ = b
@@ -5270,22 +5122,6 @@ func rewriteValuegeneric_OpMul16(v *Value, config *Config) bool {
 		v.AuxInt = int64(int16(c * d))
 		return true
 	}
-	// match: (Mul16 (Const16 [-1]) x)
-	// cond:
-	// result: (Neg16 x)
-	for {
-		v_0 := v.Args[0]
-		if v_0.Op != OpConst16 {
-			break
-		}
-		if v_0.AuxInt != -1 {
-			break
-		}
-		x := v.Args[1]
-		v.reset(OpNeg16)
-		v.AddArg(x)
-		return true
-	}
 	// match: (Mul16 x (Const16 <t> [c]))
 	// cond: x.Op != OpConst16
 	// result: (Mul16 (Const16 <t> [c]) x)
@@ -5345,22 +5181,6 @@ func rewriteValuegeneric_OpMul32(v *Value, config *Config) bool {
 		v.AuxInt = int64(int32(c * d))
 		return true
 	}
-	// match: (Mul32 (Const32 [-1]) x)
-	// cond:
-	// result: (Neg32 x)
-	for {
-		v_0 := v.Args[0]
-		if v_0.Op != OpConst32 {
-			break
-		}
-		if v_0.AuxInt != -1 {
-			break
-		}
-		x := v.Args[1]
-		v.reset(OpNeg32)
-		v.AddArg(x)
-		return true
-	}
 	// match: (Mul32 x (Const32 <t> [c]))
 	// cond: x.Op != OpConst32
 	// result: (Mul32 (Const32 <t> [c]) x)
@@ -5458,72 +5278,6 @@ func rewriteValuegeneric_OpMul32F(v *Value, config *Config) bool {
 		v.AuxInt = f2i(float64(i2f32(c) * i2f32(d)))
 		return true
 	}
-	// match: (Mul32F x (Const32F [f2i(1)]))
-	// cond:
-	// result: x
-	for {
-		x := v.Args[0]
-		v_1 := v.Args[1]
-		if v_1.Op != OpConst32F {
-			break
-		}
-		if v_1.AuxInt != f2i(1) {
-			break
-		}
-		v.reset(OpCopy)
-		v.Type = x.Type
-		v.AddArg(x)
-		return true
-	}
-	// match: (Mul32F (Const32F [f2i(1)]) x)
-	// cond:
-	// result: x
-	for {
-		v_0 := v.Args[0]
-		if v_0.Op != OpConst32F {
-			break
-		}
-		if v_0.AuxInt != f2i(1) {
-			break
-		}
-		x := v.Args[1]
-		v.reset(OpCopy)
-		v.Type = x.Type
-		v.AddArg(x)
-		return true
-	}
-	// match: (Mul32F x (Const32F [f2i(-1)]))
-	// cond:
-	// result: (Neg32F x)
-	for {
-		x := v.Args[0]
-		v_1 := v.Args[1]
-		if v_1.Op != OpConst32F {
-			break
-		}
-		if v_1.AuxInt != f2i(-1) {
-			break
-		}
-		v.reset(OpNeg32F)
-		v.AddArg(x)
-		return true
-	}
-	// match: (Mul32F (Const32F [f2i(-1)]) x)
-	// cond:
-	// result: (Neg32F x)
-	for {
-		v_0 := v.Args[0]
-		if v_0.Op != OpConst32F {
-			break
-		}
-		if v_0.AuxInt != f2i(-1) {
-			break
-		}
-		x := v.Args[1]
-		v.reset(OpNeg32F)
-		v.AddArg(x)
-		return true
-	}
 	return false
 }
 func rewriteValuegeneric_OpMul64(v *Value, config *Config) bool {
@@ -5547,22 +5301,6 @@ func rewriteValuegeneric_OpMul64(v *Value, config *Config) bool {
 		v.AuxInt = c * d
 		return true
 	}
-	// match: (Mul64 (Const64 [-1]) x)
-	// cond:
-	// result: (Neg64 x)
-	for {
-		v_0 := v.Args[0]
-		if v_0.Op != OpConst64 {
-			break
-		}
-		if v_0.AuxInt != -1 {
-			break
-		}
-		x := v.Args[1]
-		v.reset(OpNeg64)
-		v.AddArg(x)
-		return true
-	}
 	// match: (Mul64 x (Const64 <t> [c]))
 	// cond: x.Op != OpConst64
 	// result: (Mul64 (Const64 <t> [c]) x)
@@ -5660,72 +5398,6 @@ func rewriteValuegeneric_OpMul64F(v *Value, config *Config) bool {
 		v.AuxInt = f2i(i2f(c) * i2f(d))
 		return true
 	}
-	// match: (Mul64F x (Const64F [f2i(1)]))
-	// cond:
-	// result: x
-	for {
-		x := v.Args[0]
-		v_1 := v.Args[1]
-		if v_1.Op != OpConst64F {
-			break
-		}
-		if v_1.AuxInt != f2i(1) {
-			break
-		}
-		v.reset(OpCopy)
-		v.Type = x.Type
-		v.AddArg(x)
-		return true
-	}
-	// match: (Mul64F (Const64F [f2i(1)]) x)
-	// cond:
-	// result: x
-	for {
-		v_0 := v.Args[0]
-		if v_0.Op != OpConst64F {
-			break
-		}
-		if v_0.AuxInt != f2i(1) {
-			break
-		}
-		x := v.Args[1]
-		v.reset(OpCopy)
-		v.Type = x.Type
-		v.AddArg(x)
-		return true
-	}
-	// match: (Mul64F x (Const64F [f2i(-1)]))
-	// cond:
-	// result: (Neg64F x)
-	for {
-		x := v.Args[0]
-		v_1 := v.Args[1]
-		if v_1.Op != OpConst64F {
-			break
-		}
-		if v_1.AuxInt != f2i(-1) {
-			break
-		}
-		v.reset(OpNeg64F)
-		v.AddArg(x)
-		return true
-	}
-	// match: (Mul64F (Const64F [f2i(-1)]) x)
-	// cond:
-	// result: (Neg64F x)
-	for {
-		v_0 := v.Args[0]
-		if v_0.Op != OpConst64F {
-			break
-		}
-		if v_0.AuxInt != f2i(-1) {
-			break
-		}
-		x := v.Args[1]
-		v.reset(OpNeg64F)
-		v.AddArg(x)
-		return true
-	}
 	return false
 }
 func rewriteValuegeneric_OpMul8(v *Value, config *Config) bool {
@@ -5749,22 +5421,6 @@ func rewriteValuegeneric_OpMul8(v *Value, config *Config) bool {
 		v.AuxInt = int64(int8(c * d))
 		return true
 	}
-	// match: (Mul8  (Const8  [-1]) x)
-	// cond:
-	// result: (Neg8  x)
-	for {
-		v_0 := v.Args[0]
-		if v_0.Op != OpConst8 {
-			break
-		}
-		if v_0.AuxInt != -1 {
-			break
-		}
-		x := v.Args[1]
-		v.reset(OpNeg8)
-		v.AddArg(x)
-		return true
-	}
 	// match: (Mul8  x (Const8  <t> [c]))
 	// cond: x.Op != OpConst8
 	// result: (Mul8  (Const8  <t> [c]) x)
@@ -6359,26 +6015,26 @@ func rewriteValuegeneric_OpOffPtr(v *Value, config *Config) bool {
 	// cond:
 	// result: (OffPtr p [a+b])
 	for {
-		a := v.AuxInt
 		v_0 := v.Args[0]
 		if v_0.Op != OpOffPtr {
 			break
 		}
-		b := v_0.AuxInt
 		p := v_0.Args[0]
+		b := v_0.AuxInt
+		a := v.AuxInt
 		v.reset(OpOffPtr)
-		v.AuxInt = a + b
 		v.AddArg(p)
+		v.AuxInt = a + b
 		return true
 	}
 	// match: (OffPtr p [0])
 	// cond: v.Type.Compare(p.Type) == CMPeq
 	// result: p
 	for {
+		p := v.Args[0]
 		if v.AuxInt != 0 {
 			break
 		}
-		p := v.Args[0]
 		if !(v.Type.Compare(p.Type) == CMPeq) {
 			break
 		}
@@ -9181,7 +8837,7 @@ func rewriteValuegeneric_OpStore(v *Value, config *Config) bool {
 	}
 	// match: (Store [size] dst (Load <t> src mem) mem)
 	// cond: !config.fe.CanSSA(t)
-	// result: (Move [MakeSizeAndAlign(size, t.Alignment()).Int64()] dst src mem)
+	// result: (Move [size] dst src mem)
 	for {
 		size := v.AuxInt
 		dst := v.Args[0]
@@ -9199,7 +8855,7 @@ func rewriteValuegeneric_OpStore(v *Value, config *Config) bool {
 			break
 		}
 		v.reset(OpMove)
-		v.AuxInt = MakeSizeAndAlign(size, t.Alignment()).Int64()
+		v.AuxInt = size
 		v.AddArg(dst)
 		v.AddArg(src)
 		v.AddArg(mem)
@@ -9207,7 +8863,7 @@ func rewriteValuegeneric_OpStore(v *Value, config *Config) bool {
 	}
 	// match: (Store [size] dst (Load <t> src mem) (VarDef {x} mem))
 	// cond: !config.fe.CanSSA(t)
-	// result: (Move [MakeSizeAndAlign(size, t.Alignment()).Int64()] dst src (VarDef {x} mem))
+	// result: (Move [size] dst src (VarDef {x} mem))
 	for {
 		size := v.AuxInt
 		dst := v.Args[0]
@@ -9230,7 +8886,7 @@ func rewriteValuegeneric_OpStore(v *Value, config *Config) bool {
 			break
 		}
 		v.reset(OpMove)
-		v.AuxInt = MakeSizeAndAlign(size, t.Alignment()).Int64()
+		v.AuxInt = size
 		v.AddArg(dst)
 		v.AddArg(src)
 		v0 := b.NewValue0(v.Line, OpVarDef, TypeMem)
@@ -9692,23 +9348,6 @@ func rewriteValuegeneric_OpSub32F(v *Value, config *Config) bool {
 		v.AuxInt = f2i(float64(i2f32(c) - i2f32(d)))
 		return true
 	}
-	// match: (Sub32F x (Const32F [0]))
-	// cond:
-	// result: x
-	for {
-		x := v.Args[0]
-		v_1 := v.Args[1]
-		if v_1.Op != OpConst32F {
-			break
-		}
-		if v_1.AuxInt != 0 {
-			break
-		}
-		v.reset(OpCopy)
-		v.Type = x.Type
-		v.AddArg(x)
-		return true
-	}
 	return false
 }
 func rewriteValuegeneric_OpSub64(v *Value, config *Config) bool {
@@ -9824,23 +9463,6 @@ func rewriteValuegeneric_OpSub64F(v *Value, config *Config) bool {
 		v.AuxInt = f2i(i2f(c) - i2f(d))
 		return true
 	}
-	// match: (Sub64F x (Const64F [0]))
-	// cond:
-	// result: x
-	for {
-		x := v.Args[0]
-		v_1 := v.Args[1]
-		if v_1.Op != OpConst64F {
-			break
-		}
-		if v_1.AuxInt != 0 {
-			break
-		}
-		v.reset(OpCopy)
-		v.Type = x.Type
-		v.AddArg(x)
-		return true
-	}
 	return false
 }
 func rewriteValuegeneric_OpSub8(v *Value, config *Config) bool {
--- a/src/cmd/compile/internal/ssa/schedule.go
+++ b/src/cmd/compile/internal/ssa/schedule.go
@@ -8,7 +8,6 @@ import "container/heap"

 const (
 	ScorePhi = iota // towards top of block
-	ScoreReadTuple
 	ScoreVarDef
 	ScoreMemory
 	ScoreDefault
@@ -84,7 +83,7 @@ func schedule(f *Func) {
 		// Compute score. Larger numbers are scheduled closer to the end of the block.
 		for _, v := range b.Values {
 			switch {
-			case v.Op == OpAMD64LoweredGetClosurePtr || v.Op == OpPPC64LoweredGetClosurePtr || v.Op == OpARMLoweredGetClosurePtr || v.Op == OpARM64LoweredGetClosurePtr || v.Op == Op386LoweredGetClosurePtr:
+			case v.Op == OpAMD64LoweredGetClosurePtr:
 				// We also score GetLoweredClosurePtr as early as possible to ensure that the
 				// context register is not stomped. GetLoweredClosurePtr should only appear
 				// in the entry block where there are no phi functions, so there is no
@@ -104,14 +103,7 @@ func schedule(f *Func) {
 				// reduce register pressure. It also helps make sure
 				// VARDEF ops are scheduled before the corresponding LEA.
 				score[v.ID] = ScoreMemory
-			case v.Op == OpSelect0 || v.Op == OpSelect1:
-				// Schedule the pseudo-op of reading part of a tuple
-				// immediately after the tuple-generating op, since
-				// this value is already live. This also removes its
-				// false dependency on the other part of the tuple.
-				// Also ensures tuple is never spilled.
-				score[v.ID] = ScoreReadTuple
-			case v.Type.IsFlags() || v.Type.IsTuple():
+			case v.Type.IsFlags():
 				// Schedule flag register generation as late as possible.
 				// This makes sure that we only have one live flags
 				// value at a time.
@@ -196,7 +188,6 @@ func schedule(f *Func) {

 		// Schedule highest priority value, update use counts, repeat.
 		order = order[:0]
-		tuples := make(map[ID][]*Value)
 		for {
 			// Find highest priority schedulable value.
 			// Note that schedule is assembled backwards.
@@ -208,31 +199,7 @@ func schedule(f *Func) {
 			v := heap.Pop(priq).(*Value)

 			// Add it to the schedule.
-			// Do not emit tuple-reading ops until we're ready to emit the tuple-generating op.
-			//TODO: maybe remove ReadTuple score above, if it does not help on performance
-			switch {
-			case v.Op == OpSelect0:
-				if tuples[v.Args[0].ID] == nil {
-					tuples[v.Args[0].ID] = make([]*Value, 2)
-				}
-				tuples[v.Args[0].ID][0] = v
-			case v.Op == OpSelect1:
-				if tuples[v.Args[0].ID] == nil {
-					tuples[v.Args[0].ID] = make([]*Value, 2)
-				}
-				tuples[v.Args[0].ID][1] = v
-			case v.Type.IsTuple() && tuples[v.ID] != nil:
-				if tuples[v.ID][1] != nil {
-					order = append(order, tuples[v.ID][1])
-				}
-				if tuples[v.ID][0] != nil {
-					order = append(order, tuples[v.ID][0])
-				}
-				delete(tuples, v.ID)
-				fallthrough
-			default:
-				order = append(order, v)
-			}
+			order = append(order, v)

 			// Update use counts of arguments.
 			for _, w := range v.Args {
--- a/src/cmd/compile/internal/ssa/tighten.go
+++ b/src/cmd/compile/internal/ssa/tighten.go
@@ -54,19 +54,13 @@ func tighten(f *Func) {
 		for _, b := range f.Blocks {
 			for i := 0; i < len(b.Values); i++ {
 				v := b.Values[i]
-				switch v.Op {
-				case OpPhi, OpGetClosurePtr, OpConvert, OpArg:
+				if v.Op == OpPhi || v.Op == OpGetClosurePtr || v.Op == OpConvert || v.Op == OpArg {
 					// GetClosurePtr & Arg must stay in entry block.
 					// OpConvert must not float over call sites.
 					// TODO do we instead need a dependence edge of some sort for OpConvert?
 					// Would memory do the trick, or do we need something else that relates
 					// to safe point operations?
 					continue
-				default:
-				}
-				if v.Op == OpSelect0 || v.Op == OpSelect1 {
-					// tuple selector must stay with tuple generator
-					continue
 				}
 				if len(v.Args) > 0 && v.Args[len(v.Args)-1].Type.IsMemory() {
 					// We can't move values which have a memory arg - it might
--- a/src/cmd/compile/internal/ssa/type.go
+++ b/src/cmd/compile/internal/ssa/type.go
@@ -27,7 +27,6 @@ type Type interface {
 	IsMemory() bool // special ssa-package-only types
 	IsFlags() bool
 	IsVoid() bool
-	IsTuple() bool

 	ElemType() Type // given []T or *T or [n]T, return T
 	PtrTo() Type    // given T, return *T
@@ -70,7 +69,6 @@ func (t *CompilerType) IsInterface() bool      { return false }
 func (t *CompilerType) IsMemory() bool         { return t.Memory }
 func (t *CompilerType) IsFlags() bool          { return t.Flags }
 func (t *CompilerType) IsVoid() bool           { return t.Void }
-func (t *CompilerType) IsTuple() bool          { return false }
 func (t *CompilerType) String() string         { return t.Name }
 func (t *CompilerType) SimpleString() string   { return t.Name }
 func (t *CompilerType) ElemType() Type         { panic("not implemented") }
@@ -81,38 +79,6 @@ func (t *CompilerType) FieldOff(i int) int64   { panic("not implemented") }
 func (t *CompilerType) FieldName(i int) string { panic("not implemented") }
 func (t *CompilerType) NumElem() int64         { panic("not implemented") }

-type TupleType struct {
-	first  Type
-	second Type
-}
-
-func (t *TupleType) Size() int64            { panic("not implemented") }
-func (t *TupleType) Alignment() int64       { panic("not implemented") }
-func (t *TupleType) IsBoolean() bool        { return false }
-func (t *TupleType) IsInteger() bool        { return false }
-func (t *TupleType) IsSigned() bool         { return false }
-func (t *TupleType) IsFloat() bool          { return false }
-func (t *TupleType) IsComplex() bool        { return false }
-func (t *TupleType) IsPtrShaped() bool      { return false }
-func (t *TupleType) IsString() bool         { return false }
-func (t *TupleType) IsSlice() bool          { return false }
-func (t *TupleType) IsArray() bool          { return false }
-func (t *TupleType) IsStruct() bool         { return false }
-func (t *TupleType) IsInterface() bool      { return false }
-func (t *TupleType) IsMemory() bool         { return false }
-func (t *TupleType) IsFlags() bool          { return false }
-func (t *TupleType) IsVoid() bool           { return false }
-func (t *TupleType) IsTuple() bool          { return true }
-func (t *TupleType) String() string         { return t.first.String() + "," + t.second.String() }
-func (t *TupleType) SimpleString() string   { return "Tuple" }
-func (t *TupleType) ElemType() Type         { panic("not implemented") }
-func (t *TupleType) PtrTo() Type            { panic("not implemented") }
-func (t *TupleType) NumFields() int         { panic("not implemented") }
-func (t *TupleType) FieldType(i int) Type   { panic("not implemented") }
-func (t *TupleType) FieldOff(i int) int64   { panic("not implemented") }
-func (t *TupleType) FieldName(i int) string { panic("not implemented") }
-func (t *TupleType) NumElem() int64         { panic("not implemented") }
-
 // Cmp is a comparison between values a and b.
 // -1 if a < b
 //  0 if a == b
@@ -150,25 +116,6 @@ func (t *CompilerType) Compare(u Type) Cmp {
 	return CMPlt
 }

-func (t *TupleType) Compare(u Type) Cmp {
-	// ssa.TupleType is greater than ssa.CompilerType
-	if _, ok := u.(*CompilerType); ok {
-		return CMPgt
-	}
-	// ssa.TupleType is smaller than any other type
-	x, ok := u.(*TupleType)
-	if !ok {
-		return CMPlt
-	}
-	if t == x {
-		return CMPeq
-	}
-	if c := t.first.Compare(x.first); c != CMPeq {
-		return c
-	}
-	return t.second.Compare(x.second)
-}
-
 var (
 	TypeInvalid = &CompilerType{Name: "invalid"}
 	TypeMem     = &CompilerType{Name: "mem", Memory: true}
@@ -176,7 +123,3 @@ var (
 	TypeVoid    = &CompilerType{Name: "void", Void: true}
 	TypeInt128  = &CompilerType{Name: "int128", size: 16, Int128: true}
 )
-
-func MakeTuple(t0, t1 Type) *TupleType {
-	return &TupleType{first: t0, second: t1}
-}
--- a/src/cmd/compile/internal/ssa/type_test.go
+++ b/src/cmd/compile/internal/ssa/type_test.go
@@ -39,7 +39,6 @@ func (t *TypeImpl) IsStruct() bool         { return t.struct_ }
 func (t *TypeImpl) IsInterface() bool      { return t.inter }
 func (t *TypeImpl) IsMemory() bool         { return false }
 func (t *TypeImpl) IsFlags() bool          { return false }
-func (t *TypeImpl) IsTuple() bool          { return false }
 func (t *TypeImpl) IsVoid() bool           { return false }
 func (t *TypeImpl) String() string         { return t.Name }
 func (t *TypeImpl) SimpleString() string   { return t.Name }
--- a/src/cmd/compile/internal/x86/387.go
+++ b/src/cmd/compile/internal/x86/387.go
@@ -1,386 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package x86
-
-import (
-	"cmd/compile/internal/gc"
-	"cmd/compile/internal/ssa"
-	"cmd/internal/obj"
-	"cmd/internal/obj/x86"
-	"math"
-)
-
-// Generates code for v using 387 instructions.  Reports whether
-// the instruction was handled by this routine.
-func ssaGenValue387(s *gc.SSAGenState, v *ssa.Value) bool {
-	// The SSA compiler pretends that it has an SSE backend.
-	// If we don't have one of those, we need to translate
-	// all the SSE ops to equivalent 387 ops. That's what this
-	// function does.
-
-	switch v.Op {
-	case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
-		p := gc.Prog(loadPush(v.Type))
-		p.From.Type = obj.TYPE_FCONST
-		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = x86.REG_F0
-		popAndSave(s, v)
-		return true
-	case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2:
-		p := gc.Prog(loadPush(v.Type))
-		p.From.Type = obj.TYPE_MEM
-		p.From.Reg = gc.SSARegNum(v.Args[0])
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = x86.REG_F0
-		popAndSave(s, v)
-		return true
-
-	case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1, ssa.Op386MOVSSloadidx4, ssa.Op386MOVSDloadidx8:
-		p := gc.Prog(loadPush(v.Type))
-		p.From.Type = obj.TYPE_MEM
-		p.From.Reg = gc.SSARegNum(v.Args[0])
-		gc.AddAux(&p.From, v)
-		switch v.Op {
-		case ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1:
-			p.From.Scale = 1
-			p.From.Index = gc.SSARegNum(v.Args[1])
-		case ssa.Op386MOVSSloadidx4:
-			p.From.Scale = 4
-			p.From.Index = gc.SSARegNum(v.Args[1])
-		case ssa.Op386MOVSDloadidx8:
-			p.From.Scale = 8
-			p.From.Index = gc.SSARegNum(v.Args[1])
-		}
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = x86.REG_F0
-		popAndSave(s, v)
-		return true
-
-	case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore:
-		// Push to-be-stored value on top of stack.
-		push(s, v.Args[1])
-
-		// Pop and store value.
-		var op obj.As
-		switch v.Op {
-		case ssa.Op386MOVSSstore:
-			op = x86.AFMOVFP
-		case ssa.Op386MOVSDstore:
-			op = x86.AFMOVDP
-		}
-		p := gc.Prog(op)
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = x86.REG_F0
-		p.To.Type = obj.TYPE_MEM
-		p.To.Reg = gc.SSARegNum(v.Args[0])
-		gc.AddAux(&p.To, v)
-		return true
-
-	case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1, ssa.Op386MOVSSstoreidx4, ssa.Op386MOVSDstoreidx8:
-		push(s, v.Args[2])
-		var op obj.As
-		switch v.Op {
-		case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSSstoreidx4:
-			op = x86.AFMOVFP
-		case ssa.Op386MOVSDstoreidx1, ssa.Op386MOVSDstoreidx8:
-			op = x86.AFMOVDP
-		}
-		p := gc.Prog(op)
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = x86.REG_F0
-		p.To.Type = obj.TYPE_MEM
-		p.To.Reg = gc.SSARegNum(v.Args[0])
-		gc.AddAux(&p.To, v)
-		switch v.Op {
-		case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1:
-			p.To.Scale = 1
-			p.To.Index = gc.SSARegNum(v.Args[1])
-		case ssa.Op386MOVSSstoreidx4:
-			p.To.Scale = 4
-			p.To.Index = gc.SSARegNum(v.Args[1])
-		case ssa.Op386MOVSDstoreidx8:
-			p.To.Scale = 8
-			p.To.Index = gc.SSARegNum(v.Args[1])
-		}
-		return true
-
-	case ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
-		ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD:
-		if gc.SSARegNum(v) != gc.SSARegNum(v.Args[0]) {
-			v.Fatalf("input[0] and output not in same register %s", v.LongString())
-		}
-
-		// Push arg1 on top of stack
-		push(s, v.Args[1])
-
-		// Set precision if needed.  64 bits is the default.
-		switch v.Op {
-		case ssa.Op386ADDSS, ssa.Op386SUBSS, ssa.Op386MULSS, ssa.Op386DIVSS:
-			p := gc.Prog(x86.AFSTCW)
-			scratch387(s, &p.To)
-			p = gc.Prog(x86.AFLDCW)
-			p.From.Type = obj.TYPE_MEM
-			p.From.Name = obj.NAME_EXTERN
-			p.From.Sym = gc.Linksym(gc.Pkglookup("controlWord32", gc.Runtimepkg))
-		}
-
-		var op obj.As
-		switch v.Op {
-		case ssa.Op386ADDSS, ssa.Op386ADDSD:
-			op = x86.AFADDDP
-		case ssa.Op386SUBSS, ssa.Op386SUBSD:
-			op = x86.AFSUBDP
-		case ssa.Op386MULSS, ssa.Op386MULSD:
-			op = x86.AFMULDP
-		case ssa.Op386DIVSS, ssa.Op386DIVSD:
-			op = x86.AFDIVDP
-		}
-		p := gc.Prog(op)
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = x86.REG_F0
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = s.SSEto387[gc.SSARegNum(v)] + 1
-
-		// Restore precision if needed.
-		switch v.Op {
-		case ssa.Op386ADDSS, ssa.Op386SUBSS, ssa.Op386MULSS, ssa.Op386DIVSS:
-			p := gc.Prog(x86.AFLDCW)
-			scratch387(s, &p.From)
-		}
-
-		return true
-
-	case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
-		push(s, v.Args[0])
-
-		// Compare.
-		p := gc.Prog(x86.AFUCOMP)
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = x86.REG_F0
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = s.SSEto387[gc.SSARegNum(v.Args[1])] + 1
-
-		// Save AX.
-		p = gc.Prog(x86.AMOVL)
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = x86.REG_AX
-		scratch387(s, &p.To)
-
-		// Move status word into AX.
-		p = gc.Prog(x86.AFSTSW)
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = x86.REG_AX
-
-		// Then move the flags we need to the integer flags.
-		gc.Prog(x86.ASAHF)
-
-		// Restore AX.
-		p = gc.Prog(x86.AMOVL)
-		scratch387(s, &p.From)
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = x86.REG_AX
-
-		return true
-
-	case ssa.Op386SQRTSD:
-		push(s, v.Args[0])
-		gc.Prog(x86.AFSQRT)
-		popAndSave(s, v)
-		return true
-
-	case ssa.Op386FCHS:
-		push(s, v.Args[0])
-		gc.Prog(x86.AFCHS)
-		popAndSave(s, v)
-		return true
-
-	case ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD:
-		p := gc.Prog(x86.AMOVL)
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = gc.SSARegNum(v.Args[0])
-		scratch387(s, &p.To)
-		p = gc.Prog(x86.AFMOVL)
-		scratch387(s, &p.From)
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = x86.REG_F0
-		popAndSave(s, v)
-		return true
-
-	case ssa.Op386CVTTSD2SL, ssa.Op386CVTTSS2SL:
-		push(s, v.Args[0])
-
-		// Save control word.
-		p := gc.Prog(x86.AFSTCW)
-		scratch387(s, &p.To)
-		p.To.Offset += 4
-
-		// Load control word which truncates (rounds towards zero).
-		p = gc.Prog(x86.AFLDCW)
-		p.From.Type = obj.TYPE_MEM
-		p.From.Name = obj.NAME_EXTERN
-		p.From.Sym = gc.Linksym(gc.Pkglookup("controlWord64trunc", gc.Runtimepkg))
-
-		// Now do the conversion.
-		p = gc.Prog(x86.AFMOVLP)
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = x86.REG_F0
-		scratch387(s, &p.To)
-		p = gc.Prog(x86.AMOVL)
-		scratch387(s, &p.From)
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = gc.SSARegNum(v)
-
-		// Restore control word.
-		p = gc.Prog(x86.AFLDCW)
-		scratch387(s, &p.From)
-		p.From.Offset += 4
-		return true
-
-	case ssa.Op386CVTSS2SD:
-		// float32 -> float64 is a nop
-		push(s, v.Args[0])
-		popAndSave(s, v)
-		return true
-
-	case ssa.Op386CVTSD2SS:
-		// Round to nearest float32.
-		push(s, v.Args[0])
-		p := gc.Prog(x86.AFMOVFP)
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = x86.REG_F0
-		scratch387(s, &p.To)
-		p = gc.Prog(x86.AFMOVF)
-		scratch387(s, &p.From)
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = x86.REG_F0
-		popAndSave(s, v)
-		return true
-
-	case ssa.OpLoadReg:
-		if !v.Type.IsFloat() {
-			return false
-		}
-		// Load+push the value we need.
-		p := gc.Prog(loadPush(v.Type))
-		n, off := gc.AutoVar(v.Args[0])
-		p.From.Type = obj.TYPE_MEM
-		p.From.Node = n
-		p.From.Sym = gc.Linksym(n.Sym)
-		p.From.Offset = off
-		if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT {
-			p.From.Name = obj.NAME_PARAM
-			p.From.Offset += n.Xoffset
-		} else {
-			p.From.Name = obj.NAME_AUTO
-		}
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = x86.REG_F0
-		// Move the value to its assigned register.
-		popAndSave(s, v)
-		return true
-
-	case ssa.OpStoreReg:
-		if !v.Type.IsFloat() {
-			return false
-		}
-		push(s, v.Args[0])
-		var op obj.As
-		switch v.Type.Size() {
-		case 4:
-			op = x86.AFMOVFP
-		case 8:
-			op = x86.AFMOVDP
-		}
-		p := gc.Prog(op)
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = x86.REG_F0
-		n, off := gc.AutoVar(v)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Node = n
-		p.To.Sym = gc.Linksym(n.Sym)
-		p.To.Offset = off
-		if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT {
-			p.To.Name = obj.NAME_PARAM
-			p.To.Offset += n.Xoffset
-		} else {
-			p.To.Name = obj.NAME_AUTO
-		}
-		return true
-
-	case ssa.OpCopy:
-		if !v.Type.IsFloat() {
-			return false
-		}
-		push(s, v.Args[0])
-		popAndSave(s, v)
-		return true
-
-	case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLdefer, ssa.Op386CALLgo, ssa.Op386CALLinter:
-		flush387(s)  // Calls must empty the the FP stack.
-		return false // then issue the call as normal
-	}
-	return false
-}
-
-// push pushes v onto the floating-point stack.  v must be in a register.
-func push(s *gc.SSAGenState, v *ssa.Value) {
-	p := gc.Prog(x86.AFMOVD)
-	p.From.Type = obj.TYPE_REG
-	p.From.Reg = s.SSEto387[gc.SSARegNum(v)]
-	p.To.Type = obj.TYPE_REG
-	p.To.Reg = x86.REG_F0
-}
-
-// popAndSave pops a value off of the floating-point stack and stores
-// it in the reigster assigned to v.
-func popAndSave(s *gc.SSAGenState, v *ssa.Value) {
-	r := gc.SSARegNum(v)
-	if _, ok := s.SSEto387[r]; ok {
-		// Pop value, write to correct register.
-		p := gc.Prog(x86.AFMOVDP)
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = x86.REG_F0
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = s.SSEto387[gc.SSARegNum(v)] + 1
-	} else {
-		// Don't actually pop value. This 387 register is now the
-		// new home for the not-yet-assigned-a-home SSE register.
-		// Increase the register mapping of all other registers by one.
-		for rSSE, r387 := range s.SSEto387 {
-			s.SSEto387[rSSE] = r387 + 1
-		}
-		s.SSEto387[r] = x86.REG_F0
-	}
-}
-
-// loadPush returns the opcode for load+push of the given type.
-func loadPush(t ssa.Type) obj.As {
-	if t.Size() == 4 {
-		return x86.AFMOVF
-	}
-	return x86.AFMOVD
-}
-
-// flush387 removes all entries from the 387 floating-point stack.
-func flush387(s *gc.SSAGenState) {
-	for k := range s.SSEto387 {
-		p := gc.Prog(x86.AFMOVDP)
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = x86.REG_F0
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = x86.REG_F0
-		delete(s.SSEto387, k)
-	}
-}
-
-// scratch387 initializes a to the scratch location used by some 387 rewrites.
-func scratch387(s *gc.SSAGenState, a *obj.Addr) {
-	a.Type = obj.TYPE_MEM
-	a.Name = obj.NAME_AUTO
-	a.Node = s.ScratchFpMem
-	a.Sym = gc.Linksym(s.ScratchFpMem.Sym)
-	a.Reg = x86.REG_SP
-}
--- a/src/cmd/compile/internal/x86/galign.go
+++ b/src/cmd/compile/internal/x86/galign.go
@@ -77,11 +77,6 @@ func Main() {
 	gc.Thearch.Doregbits = doregbits
 	gc.Thearch.Regnames = regnames

-	gc.Thearch.SSARegToReg = ssaRegToReg
-	gc.Thearch.SSAMarkMoves = ssaMarkMoves
-	gc.Thearch.SSAGenValue = ssaGenValue
-	gc.Thearch.SSAGenBlock = ssaGenBlock
-
 	gc.Main()
 	gc.Exit(0)
 }
--- a/src/cmd/compile/internal/x86/ssa.go
+++ b/src/cmd/compile/internal/x86/ssa.go
--- a/src/cmd/internal/obj/arm/a.out.go
+++ b/src/cmd/internal/obj/arm/a.out.go
@@ -234,8 +234,6 @@ const (
 	ASQRTD
 	AABSF
 	AABSD
-	ANEGF
-	ANEGD

 	ASRL
 	ASRA
--- a/src/cmd/internal/obj/arm/anames.go
+++ b/src/cmd/internal/obj/arm/anames.go
@@ -59,8 +59,6 @@ var Anames = []string{
 	"SQRTD",
 	"ABSF",
 	"ABSD",
-	"NEGF",
-	"NEGD",
 	"SRL",
 	"SRA",
 	"SLL",
--- a/src/cmd/internal/obj/arm/asm5.go
+++ b/src/cmd/internal/obj/arm/asm5.go
@@ -1434,8 +1434,6 @@ func buildop(ctxt *obj.Link) {
 			opset(AMOVDF, r0)
 			opset(AABSF, r0)
 			opset(AABSD, r0)
-			opset(ANEGF, r0)
-			opset(ANEGD, r0)

 		case ACMPF:
 			opset(ACMPD, r0)
@@ -1932,7 +1930,7 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
 		r := int(p.Reg)
 		if r == 0 {
 			r = rt
-			if p.As == AMOVF || p.As == AMOVD || p.As == AMOVFD || p.As == AMOVDF || p.As == ASQRTF || p.As == ASQRTD || p.As == AABSF || p.As == AABSD || p.As == ANEGF || p.As == ANEGD {
+			if p.As == AMOVF || p.As == AMOVD || p.As == AMOVFD || p.As == AMOVDF || p.As == ASQRTF || p.As == ASQRTD || p.As == AABSF || p.As == AABSD {
 				r = 0
 			}
 		}
@@ -2510,10 +2508,6 @@ func oprrr(ctxt *obj.Link, a obj.As, sc int) uint32 {
 		return o | 0xe<<24 | 0xb<<20 | 0<<16 | 0xb<<8 | 0xc<<4
 	case AABSF:
 		return o | 0xe<<24 | 0xb<<20 | 0<<16 | 0xa<<8 | 0xc<<4
-	case ANEGD:
-		return o | 0xe<<24 | 0xb<<20 | 1<<16 | 0xb<<8 | 0x4<<4
-	case ANEGF:
-		return o | 0xe<<24 | 0xb<<20 | 1<<16 | 0xa<<8 | 0x4<<4
 	case ACMPD:
 		return o | 0xe<<24 | 0xb<<20 | 4<<16 | 0xb<<8 | 0xc<<4
 	case ACMPF:
--- a/src/cmd/internal/obj/arm/obj5.go
+++ b/src/cmd/internal/obj/arm/obj5.go
@@ -669,9 +669,7 @@ func softfloat(ctxt *obj.Link, cursym *obj.LSym) {
 			ASQRTF,
 			ASQRTD,
 			AABSF,
-			AABSD,
-			ANEGF,
-			ANEGD:
+			AABSD:
 			goto soft

 		default:
--- a/src/cmd/internal/obj/arm64/a.out.go
+++ b/src/cmd/internal/obj/arm64/a.out.go
@@ -274,7 +274,6 @@ const (
 	C_ADDCON   // 12-bit unsigned, shifted left by 0 or 12
 	C_MOVCON   // generated by a 16-bit constant, optionally inverted and/or shifted by multiple of 16
 	C_BITCON   // bitfield and logical immediate masks
-	C_ABCON0   // could be C_ADDCON0 or C_BITCON
 	C_ABCON    // could be C_ADDCON or C_BITCON
 	C_MBCON    // could be C_MOVCON or C_BITCON
 	C_LCON     // 32-bit constant
@@ -714,10 +713,3 @@ const (
 	AB  = obj.AJMP
 	ABL = obj.ACALL
 )
-
-const (
-	// shift types
-	SHIFT_LL = 0 << 22
-	SHIFT_LR = 1 << 22
-	SHIFT_AR = 2 << 22
-)
--- a/src/cmd/internal/obj/arm64/anames7.go
+++ b/src/cmd/internal/obj/arm64/anames7.go
@@ -20,7 +20,6 @@ var cnames7 = []string{
 	"ADDCON",
 	"MOVCON",
 	"BITCON",
-	"ABCON0",
 	"ABCON",
 	"MBCON",
 	"LCON",
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -161,12 +161,10 @@ var optab = []Optab{
 	{AADD, C_ADDCON, C_RSP, C_RSP, 2, 4, 0, 0, 0},
 	{AADD, C_ADDCON, C_NONE, C_RSP, 2, 4, 0, 0, 0},
 	{ACMP, C_ADDCON, C_RSP, C_NONE, 2, 4, 0, 0, 0},
-	{AADD, C_MOVCON, C_RSP, C_RSP, 62, 8, 0, 0, 0},
-	{AADD, C_MOVCON, C_NONE, C_RSP, 62, 8, 0, 0, 0},
-	{ACMP, C_MOVCON, C_RSP, C_NONE, 62, 8, 0, 0, 0},
-	{AADD, C_BITCON, C_RSP, C_RSP, 62, 8, 0, 0, 0},
-	{AADD, C_BITCON, C_NONE, C_RSP, 62, 8, 0, 0, 0},
-	{ACMP, C_BITCON, C_RSP, C_NONE, 62, 8, 0, 0, 0},
+	// TODO: these don't work properly.
+	// {AADD, C_MBCON, C_RSP, C_RSP, 2, 4, 0, 0, 0},
+	// {AADD, C_MBCON, C_NONE, C_RSP, 2, 4, 0, 0, 0},
+	// {ACMP, C_MBCON, C_RSP, C_NONE, 2, 4, 0, 0, 0},
 	{AADD, C_VCON, C_RSP, C_RSP, 13, 8, 0, LFROM, 0},
 	{AADD, C_VCON, C_NONE, C_RSP, 13, 8, 0, LFROM, 0},
 	{ACMP, C_VCON, C_REG, C_NONE, 13, 8, 0, LFROM, 0},
@@ -190,14 +188,11 @@ var optab = []Optab{
 	{AAND, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
 	{ABIC, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0},
 	{ABIC, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
-	{AAND, C_BITCON, C_REG, C_REG, 53, 4, 0, 0, 0},
-	{AAND, C_BITCON, C_NONE, C_REG, 53, 4, 0, 0, 0},
-	{ABIC, C_BITCON, C_REG, C_REG, 53, 4, 0, 0, 0},
-	{ABIC, C_BITCON, C_NONE, C_REG, 53, 4, 0, 0, 0},
-	{AAND, C_MOVCON, C_REG, C_REG, 62, 8, 0, 0, 0},
-	{AAND, C_MOVCON, C_NONE, C_REG, 62, 8, 0, 0, 0},
-	{ABIC, C_MOVCON, C_REG, C_REG, 62, 8, 0, 0, 0},
-	{ABIC, C_MOVCON, C_NONE, C_REG, 62, 8, 0, 0, 0},
+	// TODO: these don't work properly.
+	// {AAND, C_BITCON, C_REG, C_REG, 53, 4, 0, 0, 0},
+	// {AAND, C_BITCON, C_NONE, C_REG, 53, 4, 0, 0, 0},
+	// {ABIC, C_BITCON, C_REG, C_REG, 53, 4, 0, 0, 0},
+	// {ABIC, C_BITCON, C_NONE, C_REG, 53, 4, 0, 0, 0},
 	{AAND, C_VCON, C_REG, C_REG, 28, 8, 0, LFROM, 0},
 	{AAND, C_VCON, C_NONE, C_REG, 28, 8, 0, LFROM, 0},
 	{ABIC, C_VCON, C_REG, C_REG, 28, 8, 0, LFROM, 0},
@@ -221,8 +216,8 @@ var optab = []Optab{
 	// TODO: these don't work properly.
 	// { AMOVW,		C_ADDCON,	C_NONE,	C_REG,		2, 4, 0 , 0},
 	// { AMOVD,		C_ADDCON,	C_NONE,	C_REG,		2, 4, 0 , 0},
-	{AMOVW, C_BITCON, C_NONE, C_REG, 32, 4, 0, 0, 0},
-	{AMOVD, C_BITCON, C_NONE, C_REG, 32, 4, 0, 0, 0},
+	// { AMOVW,		C_BITCON,	C_NONE,	C_REG,		53, 4, 0 , 0},
+	// { AMOVD,		C_BITCON,	C_NONE,	C_REG,		53, 4, 0 , 0},

 	{AMOVK, C_VCON, C_NONE, C_REG, 33, 4, 0, 0, 0},
 	{AMOVD, C_AACON, C_NONE, C_REG, 4, 4, REGFROM, 0, 0},
@@ -720,18 +715,15 @@ func flushpool(ctxt *obj.Link, p *obj.Prog, skip int) {
 */
 func addpool(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
 	c := aclass(ctxt, a)
-	lit := ctxt.Instoffset
 	t := *ctxt.NewProg()
 	t.As = AWORD
 	sz := 4

-	// MOVD foo(SB), R is actually
-	//	MOVD addr, REGTMP
-	//	MOVD REGTMP, R
+	// MOVW foo(SB), R is actually
+	//	MOV addr, REGTEMP
+	//	MOVW REGTEMP, R
 	// where addr is the address of the DWORD containing the address of foo.
-	if p.As == AMOVD || c == C_ADDR || c == C_VCON || int64(lit) != int64(int32(lit)) || uint64(lit) != uint64(uint32(lit)) {
-		// conservative: don't know if we want signed or unsigned extension.
-		// in case of ambiguity, store 64-bit
+	if p.As == AMOVD || c == C_ADDR || c == C_VCON {
 		t.As = ADWORD
 		sz = 8
 	}
@@ -748,12 +740,29 @@ func addpool(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
 		t.To.Type = a.Type
 		t.To.Name = a.Name

-	/* This is here because MOV uint12<<12, R is disabled in optab.
-	Because of this, we need to load the constant from memory. */
-	case C_ADDCON:
-		fallthrough
+		/* This is here to work around a bug where we generate negative
+		operands that match C_MOVCON, but we use them with
+		instructions that only accept unsigned immediates. This
+		will cause oplook to return a variant of the instruction
+		that loads the negative constant from memory, rather than
+		using the immediate form. Because of that load, we get here,
+		so we need to know what to do with C_MOVCON.

-	case C_PSAUTO,
+		The correct fix is to use the "negation" instruction variant,
+		e.g. CMN $1, R instead of CMP $-1, R, or SUB $1, R instead
+		of ADD $-1, R. */
+	case C_MOVCON,
+
+		/* This is here because MOV uint12<<12, R is disabled in optab.
+		Because of this, we need to load the constant from memory. */
+		C_ADDCON,
+
+		/* These are here because they are disabled in optab.
+		Because of this, we need to load the constant from memory. */
+		C_BITCON,
+		C_ABCON,
+		C_MBCON,
+		C_PSAUTO,
 		C_PPAUTO,
 		C_UAUTO4K,
 		C_UAUTO8K,
@@ -781,7 +790,7 @@ func addpool(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
 		}

 		t.To.Type = obj.TYPE_CONST
-		t.To.Offset = lit
+		t.To.Offset = ctxt.Instoffset
 		break
 	}

@@ -835,137 +844,11 @@ func isaddcon(v int64) bool {
 	return v <= 0xFFF
 }

-// isbitcon returns whether a constant can be encoded into a logical instruction.
-// bitcon has a binary form of repetition of a bit sequence of length 2, 4, 8, 16, 32, or 64,
-// which itself is a rotate (w.r.t. the length of the unit) of a sequence of ones.
-// special cases: 0 and -1 are not bitcon.
-// this function needs to run against virtually all the constants, so it needs to be fast.
-// for this reason, bitcon testing and bitcon encoding are separate functions.
-func isbitcon(x uint64) bool {
-	if x == 1<<64-1 || x == 0 {
-		return false
-	}
-	// determine the period and sign-extend a unit to 64 bits
-	switch {
-	case x != x>>32|x<<32:
-		// period is 64
-		// nothing to do
-	case x != x>>16|x<<48:
-		// period is 32
-		x = uint64(int64(int32(x)))
-	case x != x>>8|x<<56:
-		// period is 16
-		x = uint64(int64(int16(x)))
-	case x != x>>4|x<<60:
-		// period is 8
-		x = uint64(int64(int8(x)))
-	default:
-		// period is 4 or 2, always true
-		// 0001, 0010, 0100, 1000 -- 0001 rotate
-		// 0011, 0110, 1100, 1001 -- 0011 rotate
-		// 0111, 1011, 1101, 1110 -- 0111 rotate
-		// 0101, 1010             -- 01   rotate, repeat
-		return true
-	}
-	return sequenceOfOnes(x) || sequenceOfOnes(^x)
-}
-
-// sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros
-func sequenceOfOnes(x uint64) bool {
-	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
-	y += x
-	return (y-1)&y == 0
-}
-
-// bitconEncode returns the encoding of a bitcon used in logical instructions
-// x is known to be a bitcon
-// a bitcon is a sequence of n ones at low bits (i.e. 1<<n-1), right rotated
-// by R bits, and repeated with period of 64, 32, 16, 8, 4, or 2.
-// it is encoded in logical instructions with 3 bitfields
-// N (1 bit) : R (6 bits) : S (6 bits), where
-// N=1           -- period=64
-// N=0, S=0xxxxx -- period=32
-// N=0, S=10xxxx -- period=16
-// N=0, S=110xxx -- period=8
-// N=0, S=1110xx -- period=4
-// N=0, S=11110x -- period=2
-// R is the shift amount, low bits of S = n-1
-func bitconEncode(x uint64, mode int) uint32 {
-	var period uint32
-	// determine the period and sign-extend a unit to 64 bits
-	switch {
-	case x != x>>32|x<<32:
-		period = 64
-	case x != x>>16|x<<48:
-		period = 32
-		x = uint64(int64(int32(x)))
-	case x != x>>8|x<<56:
-		period = 16
-		x = uint64(int64(int16(x)))
-	case x != x>>4|x<<60:
-		period = 8
-		x = uint64(int64(int8(x)))
-	case x != x>>2|x<<62:
-		period = 4
-		x = uint64(int64(x<<60) >> 60)
-	default:
-		period = 2
-		x = uint64(int64(x<<62) >> 62)
-	}
-	neg := false
-	if int64(x) < 0 {
-		x = ^x
-		neg = true
-	}
-	y := x & -x // lowest set bit of x.
-	s := log2(y)
-	n := log2(x+y) - s // x (or ^x) is a sequence of n ones left shifted by s bits
-	if neg {
-		// ^x is a sequence of n ones left shifted by s bits
-		// adjust n, s for x
-		s = n + s
-		n = period - n
-	}
-
-	N := uint32(0)
-	if mode == 64 && period == 64 {
-		N = 1
-	}
-	R := (period - s) & (period - 1) & uint32(mode-1) // shift amount of right rotate
-	S := (n - 1) | 63&^(period<<1-1)                  // low bits = #ones - 1, high bits encodes period
-	return N<<22 | R<<16 | S<<10
-}
-
-func log2(x uint64) uint32 {
-	if x == 0 {
-		panic("log2 of 0")
-	}
-	n := uint32(0)
-	if x >= 1<<32 {
-		x >>= 32
-		n += 32
-	}
-	if x >= 1<<16 {
-		x >>= 16
-		n += 16
-	}
-	if x >= 1<<8 {
-		x >>= 8
-		n += 8
-	}
-	if x >= 1<<4 {
-		x >>= 4
-		n += 4
-	}
-	if x >= 1<<2 {
-		x >>= 2
-		n += 2
-	}
-	if x >= 1<<1 {
-		x >>= 1
-		n += 1
-	}
-	return n
+func isbitcon(v uint64) bool {
+	/*  fancy bimm32 or bimm64? */
+	// TODO(aram):
+	return false
+	// return findmask(v) != nil || (v>>32) == 0 && findmask(v|(v<<32)) != nil
 }

 func autoclass(l int64) int {
@@ -1136,9 +1019,6 @@ func aclass(ctxt *obj.Link, a *obj.Addr) int {
 			}
 			if isaddcon(v) {
 				if v <= 0xFFF {
-					if isbitcon(uint64(v)) {
-						return C_ABCON0
-					}
 					return C_ADDCON0
 				}
 				if isbitcon(uint64(v)) {
@@ -1205,10 +1085,6 @@ func aclass(ctxt *obj.Link, a *obj.Addr) int {
 	return C_GOK
 }

-func oclass(a *obj.Addr) int {
-	return int(a.Class) - 1
-}
-
 func oplook(ctxt *obj.Link, p *obj.Prog) *Optab {
 	a1 := int(p.Optab)
 	if a1 != 0 {
@@ -1275,17 +1151,17 @@ func cmp(a int, b int) bool {
 		}

 	case C_ADDCON0:
-		if b == C_ZCON || b == C_ABCON0 {
+		if b == C_ZCON {
 			return true
 		}

 	case C_ADDCON:
-		if b == C_ZCON || b == C_ABCON0 || b == C_ADDCON0 || b == C_ABCON {
+		if b == C_ZCON || b == C_ADDCON0 || b == C_ABCON {
 			return true
 		}

 	case C_BITCON:
-		if b == C_ABCON0 || b == C_ABCON || b == C_MBCON {
+		if b == C_ABCON || b == C_MBCON {
 			return true
 		}

@@ -1295,7 +1171,7 @@ func cmp(a int, b int) bool {
 		}

 	case C_LCON:
-		if b == C_ZCON || b == C_BITCON || b == C_ADDCON || b == C_ADDCON0 || b == C_ABCON || b == C_ABCON0 || b == C_MBCON || b == C_MOVCON {
+		if b == C_ZCON || b == C_BITCON || b == C_ADDCON || b == C_ADDCON0 || b == C_ABCON || b == C_MBCON || b == C_MOVCON {
 			return true
 		}

@@ -2430,7 +2306,34 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
 		o2 = olsr12u(ctxt, int32(opldr12(ctxt, p.As)), ((v-hi)>>uint(s))&0xFFF, REGTMP, int(p.To.Reg))

 	case 32: /* mov $con, R -> movz/movn */
-		o1 = omovconst(ctxt, p.As, p, &p.From, int(p.To.Reg))
+		r := 32
+
+		if p.As == AMOVD {
+			r = 64
+		}
+		d := p.From.Offset
+		s := movcon(d)
+		if s < 0 || s >= r {
+			d = ^d
+			s = movcon(d)
+			if s < 0 || s >= r {
+				ctxt.Diag("impossible move wide: %#x\n%v", uint64(p.From.Offset), p)
+			}
+			if p.As == AMOVD {
+				o1 = opirr(ctxt, AMOVN)
+			} else {
+				o1 = opirr(ctxt, AMOVNW)
+			}
+		} else {
+			if p.As == AMOVD {
+				o1 = opirr(ctxt, AMOVZ)
+			} else {
+				o1 = opirr(ctxt, AMOVZW)
+			}
+		}
+
+		rt := int(p.To.Reg)
+		o1 |= uint32((((d >> uint(s*16)) & 0xFFFF) << 5) | int64((uint32(s)&3)<<21) | int64(rt&31))

 	case 33: /* movk $uimm16 << pos */
 		o1 = opirr(ctxt, p.As)
@@ -2698,26 +2601,8 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {

 		o1 |= uint32((p.From.Offset & 0x7F) << 5)

-	case 53: /* and/or/eor/bic/... $bitcon, Rn, Rd */
-		a := p.As
-		rt := int(p.To.Reg)
-		r := int(p.Reg)
-		if r == 0 {
-			r = rt
-		}
-		mode := 64
-		v := uint64(p.From.Offset)
-		switch p.As {
-		case AANDW, AORRW, AEORW, AANDSW:
-			mode = 32
-		case ABIC, AORN, AEON, ABICS:
-			v = ^v
-		case ABICW, AORNW, AEONW, ABICSW:
-			v = ^v
-			mode = 32
-		}
-		o1 = opirr(ctxt, a)
-		o1 |= bitconEncode(v, mode) | uint32(r&31)<<5 | uint32(rt&31)
+	case 53: /* and/or/eor/bic/... $bimmN, Rn, Rd -> op (N,r,s), Rn, Rd */
+		ctxt.Diag("bitmask immediate not implemented\n%v", p)

 	case 54: /* floating point arith */
 		o1 = oprrr(ctxt, p.As)
@@ -2809,31 +2694,6 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {

 		o1 = ADR(0, uint32(d), uint32(p.To.Reg))

-	case 62: /* op $movcon, [R], R -> mov $movcon, REGTMP + op REGTMP, [R], R */
-		if p.Reg == REGTMP {
-			ctxt.Diag("cannot use REGTMP as source: %v\n", p)
-		}
-		o1 = omovconst(ctxt, AMOVD, p, &p.From, REGTMP)
-
-		rt := int(p.To.Reg)
-		if p.To.Type == obj.TYPE_NONE {
-			rt = REGZERO
-		}
-		r := int(p.Reg)
-		if r == 0 {
-			r = rt
-		}
-		if p.To.Type != obj.TYPE_NONE && (p.To.Reg == REGSP || r == REGSP) {
-			o2 = opxrrr(ctxt, p.As)
-			o2 |= REGTMP & 31 << 16
-			o2 |= LSL0_64
-		} else {
-			o2 = oprrr(ctxt, p.As)
-			o2 |= REGTMP & 31 << 16 /* shift is 0 */
-		}
-		o2 |= uint32(r&31) << 5
-		o2 |= uint32(rt & 31)
-
 		/* reloc ops */
 	case 64: /* movT R,addr -> adrp + add + movT R, (REGTMP) */
 		o1 = ADR(1, 0, REGTMP)
@@ -3514,28 +3374,28 @@ func opirr(ctxt *obj.Link, a obj.As) uint32 {
 		return 1<<31 | 0x10<<24

 		/* op $bimm, Rn, Rd */
-	case AAND, ABIC:
+	case AAND:
 		return S64 | 0<<29 | 0x24<<23

-	case AANDW, ABICW:
+	case AANDW:
 		return S32 | 0<<29 | 0x24<<23 | 0<<22

-	case AORR, AORN:
+	case AORR:
 		return S64 | 1<<29 | 0x24<<23

-	case AORRW, AORNW:
+	case AORRW:
 		return S32 | 1<<29 | 0x24<<23 | 0<<22

-	case AEOR, AEON:
+	case AEOR:
 		return S64 | 2<<29 | 0x24<<23

-	case AEORW, AEONW:
+	case AEORW:
 		return S32 | 2<<29 | 0x24<<23 | 0<<22

-	case AANDS, ABICS:
+	case AANDS:
 		return S64 | 3<<29 | 0x24<<23

-	case AANDSW, ABICSW:
+	case AANDSW:
 		return S32 | 3<<29 | 0x24<<23 | 0<<22

 	case AASR:
@@ -4240,52 +4100,6 @@ func omovlit(ctxt *obj.Link, as obj.As, p *obj.Prog, a *obj.Addr, dr int) uint32
 	return uint32(o1)
 }

-// load a constant (MOVCON or BITCON) in a into rt
-func omovconst(ctxt *obj.Link, as obj.As, p *obj.Prog, a *obj.Addr, rt int) (o1 uint32) {
-	if c := oclass(a); c == C_BITCON || c == C_ABCON || c == C_ABCON0 {
-		// or $bitcon, REGZERO, rt
-		mode := 64
-		var as1 obj.As
-		switch as {
-		case AMOVW:
-			as1 = AORRW
-			mode = 32
-		case AMOVD:
-			as1 = AORR
-		}
-		o1 = opirr(ctxt, as1)
-		o1 |= bitconEncode(uint64(a.Offset), mode) | uint32(REGZERO&31)<<5 | uint32(rt&31)
-		return o1
-	}
-
-	r := 32
-	if as == AMOVD {
-		r = 64
-	}
-	d := a.Offset
-	s := movcon(d)
-	if s < 0 || s >= r {
-		d = ^d
-		s = movcon(d)
-		if s < 0 || s >= r {
-			ctxt.Diag("impossible move wide: %#x\n%v", uint64(a.Offset), p)
-		}
-		if as == AMOVD {
-			o1 = opirr(ctxt, AMOVN)
-		} else {
-			o1 = opirr(ctxt, AMOVNW)
-		}
-	} else {
-		if as == AMOVD {
-			o1 = opirr(ctxt, AMOVZ)
-		} else {
-			o1 = opirr(ctxt, AMOVZW)
-		}
-	}
-	o1 |= uint32((((d >> uint(s*16)) & 0xFFFF) << 5) | int64((uint32(s)&3)<<21) | int64(rt&31))
-	return o1
-}
-
 func opbfm(ctxt *obj.Link, a obj.As, r int, s int, rf int, rt int) uint32 {
 	var c uint32
 	o := opirr(ctxt, a)
--- a/src/cmd/internal/obj/arm64/obj7.go
+++ b/src/cmd/internal/obj/arm64/obj7.go
@@ -279,30 +279,20 @@ func progedit(ctxt *obj.Link, p *obj.Prog) {
 	// Rewrite negative immediates as positive immediates with
 	// complementary instruction.
 	switch p.As {
-	case AADD, ASUB, ACMP, ACMN:
-		if p.From.Type == obj.TYPE_CONST && p.From.Offset < 0 && p.From.Offset != -1<<63 {
+	case AADD,
+		AADDW,
+		ASUB,
+		ASUBW,
+		ACMP,
+		ACMPW,
+		ACMN,
+		ACMNW:
+		if p.From.Type == obj.NAME_EXTERN && p.From.Offset < 0 {
 			p.From.Offset = -p.From.Offset
 			p.As = complements[p.As]
 		}
-	case AADDW, ASUBW, ACMPW, ACMNW:
-		if p.From.Type == obj.TYPE_CONST && p.From.Offset < 0 && int32(p.From.Offset) != -1<<31 {
-			p.From.Offset = -p.From.Offset
-			p.As = complements[p.As]
-		}
-	}

-	// For 32-bit logical instruction with constant,
-	// rewrite the high 32-bit to be a repetition of
-	// the low 32-bit, so that the BITCON test can be
-	// shared for both 32-bit and 64-bit. 32-bit ops
-	// will zero the high 32-bit of the destination
-	// register anyway.
-	switch p.As {
-	case AANDW, AORRW, AEORW, AANDSW:
-		if p.From.Type == obj.TYPE_CONST {
-			v := p.From.Offset & 0xffffffff
-			p.From.Offset = v | v<<32
-		}
+		break
 	}

 	if ctxt.Flag_dynlink {
--- a/src/cmd/internal/obj/link.go
+++ b/src/cmd/internal/obj/link.go
@@ -112,17 +112,13 @@ import (
 //			val = int32(y)
 //
 //	reg<<shift, reg>>shift, reg->shift, reg@>shift
-//		Shifted register value, for ARM and ARM64.
+//		Shifted register value, for ARM.
 //		In this form, reg must be a register and shift can be a register or an integer constant.
 //		Encoding:
 //			type = TYPE_SHIFT
-//		On ARM:
 //			offset = (reg&15) | shifttype<<5 | count
 //			shifttype = 0, 1, 2, 3 for <<, >>, ->, @>
 //			count = (reg&15)<<8 | 1<<4 for a register shift count, (n&31)<<7 for an integer constant.
-//		On ARM64:
-//			offset = (reg&31)<<16 | shifttype<<22 | (count&63)<<10
-//			shifttype = 0, 1, 2 for <<, >>, ->
 //
 //	(reg, reg)
 //		A destination register pair. When used as the last argument of an instruction,
--- a/src/cmd/internal/obj/ppc64/a.out.go
+++ b/src/cmd/internal/obj/ppc64/a.out.go
@@ -185,15 +185,6 @@ const (
 	NOSCHED = 1 << 9
 )

-// Bit settings from the CR
-
-const (
-	C_COND_LT = iota // 0 result is negative
-	C_COND_GT        // 1 result is positive
-	C_COND_EQ        // 2 result is zero
-	C_COND_SO        // 3 summary overflow
-)
-
 const (
 	C_NONE = iota
 	C_REG
@@ -219,8 +210,8 @@ const (
 	C_LAUTO
 	C_SEXT
 	C_LEXT
-	C_ZOREG // conjecture: either (1) register + zeroed offset, or (2) "R0" implies zero or C_REG
-	C_SOREG // register + signed offset
+	C_ZOREG
+	C_SOREG
 	C_LOREG
 	C_FPSCR
 	C_MSR
@@ -324,8 +315,6 @@ const (
 	AFMOVDU
 	AFMOVS
 	AFMOVSU
-	AFMOVSX
-	AFMOVSZ
 	AFMSUB
 	AFMSUBCC
 	AFMSUBS
--- a/src/cmd/internal/obj/ppc64/anames.go
+++ b/src/cmd/internal/obj/ppc64/anames.go
@@ -91,8 +91,6 @@ var Anames = []string{
 	"FMOVDU",
 	"FMOVS",
 	"FMOVSU",
-	"FMOVSX",
-	"FMOVSZ",
 	"FMSUB",
 	"FMSUBCC",
 	"FMSUBS",
--- a/src/cmd/internal/obj/ppc64/asm9.go
+++ b/src/cmd/internal/obj/ppc64/asm9.go
@@ -53,7 +53,7 @@ type Optab struct {
 	a2    uint8
 	a3    uint8
 	a4    uint8
-	type_ int8 // cases in asmout below. E.g., 44 = st r,(ra+rb); 45 = ld (ra+rb), r
+	type_ int8
 	size  int8
 	param int16
 }
@@ -310,12 +310,6 @@ var optab = []Optab{
 	{AFMOVD, C_FREG, C_NONE, C_NONE, C_LAUTO, 35, 8, REGSP},
 	{AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, 35, 8, REGZERO},
 	{AFMOVD, C_FREG, C_NONE, C_NONE, C_ADDR, 74, 8, 0},
-	{AFMOVSX, C_ZOREG, C_REG, C_NONE, C_FREG, 45, 4, 0},
-	{AFMOVSX, C_ZOREG, C_NONE, C_NONE, C_FREG, 45, 4, 0},
-	{AFMOVSX, C_FREG, C_REG, C_NONE, C_ZOREG, 44, 4, 0},
-	{AFMOVSX, C_FREG, C_NONE, C_NONE, C_ZOREG, 44, 4, 0},
-	{AFMOVSZ, C_ZOREG, C_REG, C_NONE, C_FREG, 45, 4, 0},
-	{AFMOVSZ, C_ZOREG, C_NONE, C_NONE, C_FREG, 45, 4, 0},
 	{ASYNC, C_NONE, C_NONE, C_NONE, C_NONE, 46, 4, 0},
 	{AWORD, C_LCON, C_NONE, C_NONE, C_NONE, 40, 4, 0},
 	{ADWORD, C_LCON, C_NONE, C_NONE, C_NONE, 31, 8, 0},
@@ -926,7 +920,7 @@ func buildop(ctxt *obj.Link) {
 		switch r {
 		default:
 			ctxt.Diag("unknown op in build: %v", obj.Aconv(r))
-			log.Fatalf("instruction missing from switch in asm9.go:buildop: %v", obj.Aconv(r))
+			log.Fatalf("bad code")

 		case ADCBF: /* unary indexed: op (b+a); op (b) */
 			opset(ADCBI, r0)
@@ -1271,8 +1265,6 @@ func buildop(ctxt *obj.Link) {

 		case AADD,
 			AANDCC, /* and. Rb,Rs,Ra; andi. $uimm,Rs,Ra; andis. $uimm,Rs,Ra */
-			AFMOVSX,
-			AFMOVSZ,
 			ALSW,
 			AMOVW,
 			/* load/store/move word with sign extension; special 32-bit move; move 32-bit literals */
@@ -3246,10 +3238,6 @@ func oploadx(ctxt *obj.Link, a obj.As) uint32 {
 		return OPVCC(31, 535, 0, 0) /* lfsx */
 	case AFMOVSU:
 		return OPVCC(31, 567, 0, 0) /* lfsux */
-	case AFMOVSX:
-		return OPVCC(31, 855, 0, 0) /* lfiwax - power6, isa 2.05 */
-	case AFMOVSZ:
-		return OPVCC(31, 887, 0, 0) /* lfiwzx - power7, isa 2.06 */
 	case AMOVH:
 		return OPVCC(31, 343, 0, 0) /* lhax */
 	case AMOVHU:
@@ -3344,8 +3332,6 @@ func opstorex(ctxt *obj.Link, a obj.As) uint32 {
 		return OPVCC(31, 663, 0, 0) /* stfsx */
 	case AFMOVSU:
 		return OPVCC(31, 695, 0, 0) /* stfsux */
-	case AFMOVSX:
-		return OPVCC(31, 983, 0, 0) /* stfiwx */

 	case AMOVHZ, AMOVH:
 		return OPVCC(31, 407, 0, 0) /* sthx */
--- a/src/cmd/internal/obj/util.go
+++ b/src/cmd/internal/obj/util.go
@@ -286,23 +286,14 @@ func Dconv(p *Prog, a *Addr) string {

 	case TYPE_SHIFT:
 		v := int(a.Offset)
-		ops := "<<>>->@>"
-		switch goarch := Getgoarch(); goarch {
-		case "arm":
-			op := ops[((v>>5)&3)<<1:]
-			if v&(1<<4) != 0 {
-				str = fmt.Sprintf("R%d%c%cR%d", v&15, op[0], op[1], (v>>8)&15)
-			} else {
-				str = fmt.Sprintf("R%d%c%c%d", v&15, op[0], op[1], (v>>7)&31)
-			}
-			if a.Reg != 0 {
-				str += fmt.Sprintf("(%v)", Rconv(int(a.Reg)))
-			}
-		case "arm64":
-			op := ops[((v>>22)&3)<<1:]
-			str = fmt.Sprintf("R%d%c%c%d", (v>>16)&31, op[0], op[1], (v>>10)&63)
-		default:
-			panic("TYPE_SHIFT is not supported on " + goarch)
+		op := "<<>>->@>"[((v>>5)&3)<<1:]
+		if v&(1<<4) != 0 {
+			str = fmt.Sprintf("R%d%c%cR%d", v&15, op[0], op[1], (v>>8)&15)
+		} else {
+			str = fmt.Sprintf("R%d%c%c%d", v&15, op[0], op[1], (v>>7)&31)
+		}
+		if a.Reg != 0 {
+			str += fmt.Sprintf("(%v)", Rconv(int(a.Reg)))
 		}

 	case TYPE_REGREG:
--- a/src/cmd/internal/obj/x86/asm6.go
+++ b/src/cmd/internal/obj/x86/asm6.go
@@ -2835,9 +2835,7 @@ func asmandsz(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int)
 				goto bad
 			}
 			if p.Mode == 32 && ctxt.Flag_shared {
-				// The base register has already been set. It holds the PC
-				// of this instruction returned by a PC-reading thunk.
-				// See obj6.go:rewriteToPcrel.
+				base = REG_CX
 			} else {
 				base = REG_NONE
 			}
@@ -2882,9 +2880,7 @@ func asmandsz(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int)
 			ctxt.Diag("bad addr: %v", p)
 		}
 		if p.Mode == 32 && ctxt.Flag_shared {
-			// The base register has already been set. It holds the PC
-			// of this instruction returned by a PC-reading thunk.
-			// See obj6.go:rewriteToPcrel.
+			base = REG_CX
 		} else {
 			base = REG_NONE
 		}
@@ -4020,26 +4016,25 @@ func doasm(ctxt *obj.Link, p *obj.Prog) {
 							obj.Hnacl:
 							if ctxt.Flag_shared {
 								// Note that this is not generating the same insns as the other cases.
-								//     MOV TLS, dst
+								//     MOV TLS, R_to
 								// becomes
-								//     call __x86.get_pc_thunk.dst
-								//     movl (gotpc + g@gotntpoff)(dst), dst
+								//     call __x86.get_pc_thunk.cx
+								//     movl (gotpc + g@gotntpoff)(%ecx),$R_To
 								// which is encoded as
-								//     call __x86.get_pc_thunk.dst
-								//     movq 0(dst), dst
+								//     call __x86.get_pc_thunk.cx
+								//     movq 0(%ecx), R_to
 								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
 								// is g, which we can't check here, but will when we assemble the second
 								// instruction.
-								dst := p.To.Reg
 								ctxt.AsmBuf.Put1(0xe8)
 								r = obj.Addrel(ctxt.Cursym)
 								r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
 								r.Type = obj.R_CALL
 								r.Siz = 4
-								r.Sym = obj.Linklookup(ctxt, "__x86.get_pc_thunk."+strings.ToLower(Rconv(int(dst))), 0)
+								r.Sym = obj.Linklookup(ctxt, "__x86.get_pc_thunk.cx", 0)
 								ctxt.AsmBuf.PutInt32(0)

-								ctxt.AsmBuf.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
+								ctxt.AsmBuf.Put2(0x8B, byte(2<<6|reg[REG_CX]|(reg[p.To.Reg]<<3)))
 								r = obj.Addrel(ctxt.Cursym)
 								r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
 								r.Type = obj.R_TLS_IE
--- a/src/cmd/internal/obj/x86/obj6.go
+++ b/src/cmd/internal/obj/x86/obj6.go
@@ -36,7 +36,6 @@ import (
 	"fmt"
 	"log"
 	"math"
-	"strings"
 )

 func CanUse1InsnTLS(ctxt *obj.Link) bool {
@@ -334,13 +333,6 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog) {
 		lea = ALEAL
 		mov = AMOVL
 		reg = REG_CX
-		if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
-			// Special case: clobber the destination register with
-			// the PC so we don't have to clobber CX.
-			// The SSA backend depends on CX not being clobbered across LEAL.
-			// See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared).
-			reg = p.To.Reg
-		}
 	}

 	if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO {
@@ -399,7 +391,7 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog) {
 			dest = p.To
 			p.As = mov
 			p.To.Type = obj.TYPE_REG
-			p.To.Reg = reg
+			p.To.Reg = REG_CX
 			p.To.Sym = nil
 			p.To.Name = obj.NAME_NONE
 		}
@@ -420,7 +412,7 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog) {
 			q.As = pAs
 			q.To = dest
 			q.From.Type = obj.TYPE_REG
-			q.From.Reg = reg
+			q.From.Reg = REG_CX
 		}
 	}
 	if p.From3 != nil && p.From3.Name == obj.NAME_EXTERN {
@@ -517,7 +509,7 @@ func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog) {
 		return
 	}
 	// Any Prog (aside from the above special cases) with an Addr with Name ==
-	// NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX
+	// NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.cx
 	// inserted before it.
 	isName := func(a *obj.Addr) bool {
 		if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 {
@@ -550,18 +542,12 @@ func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog) {
 	if !isName(&p.From) && !isName(&p.To) && (p.From3 == nil || !isName(p.From3)) {
 		return
 	}
-	var dst int16 = REG_CX
-	if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
-		dst = p.To.Reg
-		// Why?  See the comment near the top of rewriteToUseGot above.
-		// AMOVLs might be introduced by the GOT rewrites.
-	}
 	q := obj.Appendp(ctxt, p)
 	q.RegTo2 = 1
 	r := obj.Appendp(ctxt, q)
 	r.RegTo2 = 1
 	q.As = obj.ACALL
-	q.To.Sym = obj.Linklookup(ctxt, "__x86.get_pc_thunk."+strings.ToLower(Rconv(int(dst))), 0)
+	q.To.Sym = obj.Linklookup(ctxt, "__x86.get_pc_thunk.cx", 0)
 	q.To.Type = obj.TYPE_MEM
 	q.To.Name = obj.NAME_EXTERN
 	q.To.Sym.Local = true
@@ -571,15 +557,6 @@ func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog) {
 	r.From3 = p.From3
 	r.Reg = p.Reg
 	r.To = p.To
-	if isName(&p.From) {
-		r.From.Reg = dst
-	}
-	if isName(&p.To) {
-		r.To.Reg = dst
-	}
-	if p.From3 != nil && isName(p.From3) {
-		r.From3.Reg = dst
-	}
 	obj.Nopout(p)
 }

--- a/src/cmd/link/internal/x86/asm.go
+++ b/src/cmd/link/internal/x86/asm.go
@@ -55,37 +55,21 @@ func gentext() {
 		return
 	}

-	// Generate little thunks that load the PC of the next instruction into a register.
-	for _, r := range [...]struct {
-		name string
-		num  uint8
-	}{
-		{"ax", 0},
-		{"cx", 1},
-		{"dx", 2},
-		{"bx", 3},
-		// sp
-		{"bp", 5},
-		{"si", 6},
-		{"di", 7},
-	} {
-		thunkfunc := ld.Linklookup(ld.Ctxt, "__x86.get_pc_thunk."+r.name, 0)
-		thunkfunc.Type = obj.STEXT
-		thunkfunc.Attr |= ld.AttrLocal
-		thunkfunc.Attr |= ld.AttrReachable //TODO: remove?
-		o := func(op ...uint8) {
-			for _, op1 := range op {
-				ld.Adduint8(ld.Ctxt, thunkfunc, op1)
-			}
+	thunkfunc := ld.Linklookup(ld.Ctxt, "__x86.get_pc_thunk.cx", 0)
+	thunkfunc.Type = obj.STEXT
+	thunkfunc.Attr |= ld.AttrLocal
+	thunkfunc.Attr |= ld.AttrReachable
+	o := func(op ...uint8) {
+		for _, op1 := range op {
+			ld.Adduint8(ld.Ctxt, thunkfunc, op1)
 		}
-		// 8b 04 24	mov    (%esp),%eax
-		// Destination register is in bits 3-5 of the middle byte, so add that in.
-		o(0x8b, 0x04+r.num<<3, 0x24)
-		// c3		ret
-		o(0xc3)
-
-		ld.Ctxt.Textp = append(ld.Ctxt.Textp, thunkfunc)
 	}
+	// 8b 0c 24	mov    (%esp),%ecx
+	o(0x8b, 0x0c, 0x24)
+	// c3		ret
+	o(0xc3)
+
+	ld.Ctxt.Textp = append(ld.Ctxt.Textp, thunkfunc)

 	addmoduledata := ld.Linklookup(ld.Ctxt, "runtime.addmoduledata", 0)
 	if addmoduledata.Type == obj.STEXT {
@@ -100,7 +84,7 @@ func gentext() {
 	initfunc.Type = obj.STEXT
 	initfunc.Attr |= ld.AttrLocal
 	initfunc.Attr |= ld.AttrReachable
-	o := func(op ...uint8) {
+	o = func(op ...uint8) {
 		for _, op1 := range op {
 			ld.Adduint8(ld.Ctxt, initfunc, op1)
 		}
--- a/src/compress/flate/deflate.go
+++ b/src/compress/flate/deflate.go
@@ -724,7 +724,7 @@ func (w *Writer) Close() error {
 // the result of NewWriter or NewWriterDict called with dst
 // and w's level and dictionary.
 func (w *Writer) Reset(dst io.Writer) {
-	if dw, ok := w.d.w.w.(*dictWriter); ok {
+	if dw, ok := w.d.w.writer.(*dictWriter); ok {
 		// w was created with NewWriterDict
 		dw.w = dst
 		w.d.reset(dw)
--- a/src/compress/flate/deflate_test.go
+++ b/src/compress/flate/deflate_test.go
@@ -6,6 +6,7 @@ package flate

 import (
 	"bytes"
+	"errors"
 	"fmt"
 	"internal/testenv"
 	"io"
@@ -631,3 +632,52 @@ func TestBestSpeed(t *testing.T) {
 		}
 	}
 }
+
+var errIO = errors.New("IO error")
+
+// failWriter fails with errIO exactly at the nth call to Write.
+type failWriter struct{ n int }
+
+func (w *failWriter) Write(b []byte) (int, error) {
+	w.n--
+	if w.n == -1 {
+		return 0, errIO
+	}
+	return len(b), nil
+}
+
+func TestWriterPersistentError(t *testing.T) {
+	d, err := ioutil.ReadFile("../testdata/Mark.Twain-Tom.Sawyer.txt")
+	if err != nil {
+		t.Fatalf("ReadFile: %v", err)
+	}
+	d = d[:10000] // Keep this test short
+
+	zw, err := NewWriter(nil, DefaultCompression)
+	if err != nil {
+		t.Fatalf("NewWriter: %v", err)
+	}
+
+	// Sweep over the threshold at which an error is returned.
+	// The variable i makes it such that the ith call to failWriter.Write will
+	// return errIO. Since failWriter errors are not persistent, we must ensure
+	// that flate.Writer errors are persistent.
+	for i := 0; i < 1000; i++ {
+		fw := &failWriter{i}
+		zw.Reset(fw)
+
+		_, werr := zw.Write(d)
+		cerr := zw.Close()
+		if werr != errIO && werr != nil {
+			t.Errorf("test %d, mismatching Write error: got %v, want %v", i, werr, errIO)
+		}
+		if cerr != errIO && fw.n < 0 {
+			t.Errorf("test %d, mismatching Close error: got %v, want %v", i, cerr, errIO)
+		}
+		if fw.n >= 0 {
+			// At this point, the failure threshold was sufficiently high enough
+			// that we wrote the whole stream without any errors.
+			return
+		}
+	}
+}
--- a/src/compress/flate/huffman_bit_writer.go
+++ b/src/compress/flate/huffman_bit_writer.go
@@ -77,7 +77,11 @@ var offsetBase = []uint32{
 var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}

 type huffmanBitWriter struct {
-	w io.Writer
+	// writer is the underlying writer.
+	// Do not use it directly; use the write method, which ensures
+	// that Write errors are sticky.
+	writer io.Writer
+
 	// Data waiting to be written is bytes[0:nbytes]
 	// and then the low nbits of bits.
 	bits            uint64
@@ -96,7 +100,7 @@ type huffmanBitWriter struct {

 func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
 	return &huffmanBitWriter{
-		w:               w,
+		writer:          w,
 		literalFreq:     make([]int32, maxNumLit),
 		offsetFreq:      make([]int32, offsetCodeCount),
 		codegen:         make([]uint8, maxNumLit+offsetCodeCount+1),
@@ -107,7 +111,7 @@ func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
 }

 func (w *huffmanBitWriter) reset(writer io.Writer) {
-	w.w = writer
+	w.writer = writer
 	w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
 	w.bytes = [bufferSize]byte{}
 }
@@ -129,11 +133,21 @@ func (w *huffmanBitWriter) flush() {
 		n++
 	}
 	w.bits = 0
-	_, w.err = w.w.Write(w.bytes[:n])
+	w.write(w.bytes[:n])
 	w.nbytes = 0
 }

+func (w *huffmanBitWriter) write(b []byte) {
+	if w.err != nil {
+		return
+	}
+	_, w.err = w.writer.Write(b)
+}
+
 func (w *huffmanBitWriter) writeBits(b int32, nb uint) {
+	if w.err != nil {
+		return
+	}
 	w.bits |= uint64(b) << w.nbits
 	w.nbits += nb
 	if w.nbits >= 48 {
@@ -150,7 +164,7 @@ func (w *huffmanBitWriter) writeBits(b int32, nb uint) {
 		bytes[5] = byte(bits >> 40)
 		n += 6
 		if n >= bufferFlushSize {
-			_, w.err = w.w.Write(w.bytes[:n])
+			w.write(w.bytes[:n])
 			n = 0
 		}
 		w.nbytes = n
@@ -173,13 +187,10 @@ func (w *huffmanBitWriter) writeBytes(bytes []byte) {
 		n++
 	}
 	if n != 0 {
-		_, w.err = w.w.Write(w.bytes[:n])
-		if w.err != nil {
-			return
-		}
+		w.write(w.bytes[:n])
 	}
 	w.nbytes = 0
-	_, w.err = w.w.Write(bytes)
+	w.write(bytes)
 }

 // RFC 1951 3.2.7 specifies a special run-length encoding for specifying
@@ -341,7 +352,7 @@ func (w *huffmanBitWriter) writeCode(c hcode) {
 		bytes[5] = byte(bits >> 40)
 		n += 6
 		if n >= bufferFlushSize {
-			_, w.err = w.w.Write(w.bytes[:n])
+			w.write(w.bytes[:n])
 			n = 0
 		}
 		w.nbytes = n
@@ -572,6 +583,9 @@ func (w *huffmanBitWriter) indexTokens(tokens []token) (numLiterals, numOffsets
 // writeTokens writes a slice of tokens to the output.
 // codes for literal and offset encoding must be supplied.
 func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) {
+	if w.err != nil {
+		return
+	}
 	for _, t := range tokens {
 		if t < matchType {
 			w.writeCode(leCodes[t.literal()])
@@ -676,9 +690,9 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
 		if n < bufferFlushSize {
 			continue
 		}
-		_, w.err = w.w.Write(w.bytes[:n])
+		w.write(w.bytes[:n])
 		if w.err != nil {
-			return
+			return // Return early in the event of write failures
 		}
 		n = 0
 	}
--- a/src/crypto/aes/cbc_s390x.go
+++ b/src/crypto/aes/cbc_s390x.go
@@ -48,7 +48,9 @@ func (x *cbc) CryptBlocks(dst, src []byte) {
 	if len(dst) < len(src) {
 		panic("crypto/cipher: output smaller than input")
 	}
-	cryptBlocksChain(x.c, &x.iv[0], &x.b.key[0], &dst[0], &src[0], len(src))
+	if len(src) > 0 {
+		cryptBlocksChain(x.c, &x.iv[0], &x.b.key[0], &dst[0], &src[0], len(src))
+	}
 }

 func (x *cbc) SetIV(iv []byte) {
--- a/src/crypto/cipher/cipher_test.go
+++ b/src/crypto/cipher/cipher_test.go
@@ -5,8 +5,10 @@
 package cipher_test

 import (
+	"bytes"
 	"crypto/aes"
 	"crypto/cipher"
+	"crypto/des"
 	"testing"
 )

@@ -34,3 +36,55 @@ func mustPanic(t *testing.T, msg string, f func()) {
 	}()
 	f()
 }
+
+func TestEmptyPlaintext(t *testing.T) {
+	var key [16]byte
+	a, err := aes.NewCipher(key[:16])
+	if err != nil {
+		t.Fatal(err)
+	}
+	d, err := des.NewCipher(key[:8])
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	s := 16
+	pt := make([]byte, s)
+	ct := make([]byte, s)
+	for i := 0; i < 16; i++ {
+		pt[i], ct[i] = byte(i), byte(i)
+	}
+
+	assertEqual := func(name string, got, want []byte) {
+		if !bytes.Equal(got, want) {
+			t.Fatalf("%s: got %v, want %v", name, got, want)
+		}
+	}
+
+	for _, b := range []cipher.Block{a, d} {
+		iv := make([]byte, b.BlockSize())
+		cbce := cipher.NewCBCEncrypter(b, iv)
+		cbce.CryptBlocks(ct, pt[:0])
+		assertEqual("CBC encrypt", ct, pt)
+
+		cbcd := cipher.NewCBCDecrypter(b, iv)
+		cbcd.CryptBlocks(ct, pt[:0])
+		assertEqual("CBC decrypt", ct, pt)
+
+		cfbe := cipher.NewCFBEncrypter(b, iv)
+		cfbe.XORKeyStream(ct, pt[:0])
+		assertEqual("CFB encrypt", ct, pt)
+
+		cfbd := cipher.NewCFBDecrypter(b, iv)
+		cfbd.XORKeyStream(ct, pt[:0])
+		assertEqual("CFB decrypt", ct, pt)
+
+		ctr := cipher.NewCTR(b, iv)
+		ctr.XORKeyStream(ct, pt[:0])
+		assertEqual("CTR", ct, pt)
+
+		ofb := cipher.NewOFB(b, iv)
+		ofb.XORKeyStream(ct, pt[:0])
+		assertEqual("OFB", ct, pt)
+	}
+}
--- a/src/crypto/tls/conn.go
+++ b/src/crypto/tls/conn.go
@@ -29,10 +29,14 @@ type Conn struct {

 	// constant after handshake; protected by handshakeMutex
 	handshakeMutex sync.Mutex // handshakeMutex < in.Mutex, out.Mutex, errMutex
-	handshakeErr   error      // error resulting from handshake
-	vers           uint16     // TLS version
-	haveVers       bool       // version has been negotiated
-	config         *Config    // configuration passed to constructor
+	// handshakeCond, if not nil, indicates that a goroutine is committed
+	// to running the handshake for this Conn. Other goroutines that need
+	// to wait for the handshake can wait on this, under handshakeMutex.
+	handshakeCond *sync.Cond
+	handshakeErr  error   // error resulting from handshake
+	vers          uint16  // TLS version
+	haveVers      bool    // version has been negotiated
+	config        *Config // configuration passed to constructor
 	// handshakeComplete is true if the connection is currently transfering
 	// application data (i.e. is not currently processing a handshake).
 	handshakeComplete bool
@@ -1206,26 +1210,50 @@ func (c *Conn) Handshake() error {
 	// need to check whether a handshake is pending (such as Write) to
 	// block.
 	//
-	// Thus we take c.handshakeMutex first and, if we find that a handshake
-	// is needed, then we unlock, acquire c.in and c.handshakeMutex in the
-	// correct order, and check again.
+	// Thus we first take c.handshakeMutex to check whether a handshake is
+	// needed.
+	//
+	// If so then, previously, this code would unlock handshakeMutex and
+	// then lock c.in and handshakeMutex in the correct order to run the
+	// handshake. The problem was that it was possible for a Read to
+	// complete the handshake once handshakeMutex was unlocked and then
+	// keep c.in while waiting for network data. Thus a concurrent
+	// operation could be blocked on c.in.
+	//
+	// Thus handshakeCond is used to signal that a goroutine is committed
+	// to running the handshake and other goroutines can wait on it if they
+	// need. handshakeCond is protected by handshakeMutex.
 	c.handshakeMutex.Lock()
 	defer c.handshakeMutex.Unlock()

-	for i := 0; i < 2; i++ {
-		if i == 1 {
-			c.handshakeMutex.Unlock()
-			c.in.Lock()
-			defer c.in.Unlock()
-			c.handshakeMutex.Lock()
-		}
-
+	for {
 		if err := c.handshakeErr; err != nil {
 			return err
 		}
 		if c.handshakeComplete {
 			return nil
 		}
+		if c.handshakeCond == nil {
+			break
+		}
+
+		c.handshakeCond.Wait()
+	}
+
+	// Set handshakeCond to indicate that this goroutine is committing to
+	// running the handshake.
+	c.handshakeCond = sync.NewCond(&c.handshakeMutex)
+	c.handshakeMutex.Unlock()
+
+	c.in.Lock()
+	defer c.in.Unlock()
+
+	c.handshakeMutex.Lock()
+
+	// The handshake cannot have completed when handshakeMutex was unlocked
+	// because this goroutine set handshakeCond.
+	if c.handshakeErr != nil || c.handshakeComplete {
+		panic("handshake should not have been able to complete after handshakeCond was set")
 	}

 	if c.isClient {
@@ -1236,6 +1264,16 @@ func (c *Conn) Handshake() error {
 	if c.handshakeErr == nil {
 		c.handshakes++
 	}
+
+	if c.handshakeErr == nil && !c.handshakeComplete {
+		panic("handshake should have had a result.")
+	}
+
+	// Wake any other goroutines that are waiting for this handshake to
+	// complete.
+	c.handshakeCond.Broadcast()
+	c.handshakeCond = nil
+
 	return c.handshakeErr
 }

--- a/src/crypto/tls/handshake_client_test.go
+++ b/src/crypto/tls/handshake_client_test.go
@@ -1045,3 +1045,57 @@ func TestBuffering(t *testing.T) {
 		t.Errorf("expected server handshake to complete with only two writes, but saw %d", n)
 	}
 }
+
+func TestHandshakeRace(t *testing.T) {
+	// This test races a Read and Write to try and complete a handshake in
+	// order to provide some evidence that there are no races or deadlocks
+	// in the handshake locking.
+	for i := 0; i < 32; i++ {
+		c, s := net.Pipe()
+
+		go func() {
+			server := Server(s, testConfig)
+			if err := server.Handshake(); err != nil {
+				panic(err)
+			}
+
+			var request [1]byte
+			if n, err := server.Read(request[:]); err != nil || n != 1 {
+				panic(err)
+			}
+
+			server.Write(request[:])
+			server.Close()
+		}()
+
+		startWrite := make(chan struct{})
+		startRead := make(chan struct{})
+		readDone := make(chan struct{})
+
+		client := Client(c, testConfig)
+		go func() {
+			<-startWrite
+			var request [1]byte
+			client.Write(request[:])
+		}()
+
+		go func() {
+			<-startRead
+			var reply [1]byte
+			if n, err := client.Read(reply[:]); err != nil || n != 1 {
+				panic(err)
+			}
+			c.Close()
+			readDone <- struct{}{}
+		}()
+
+		if i&1 == 1 {
+			startWrite <- struct{}{}
+			startRead <- struct{}{}
+		} else {
+			startRead <- struct{}{}
+			startWrite <- struct{}{}
+		}
+		<-readDone
+	}
+}
--- a/src/hash/crc32/crc32_s390x.go
+++ b/src/hash/crc32/crc32_s390x.go
@@ -4,14 +4,9 @@

 package crc32

-import (
-	"unsafe"
-)
-
 const (
 	vxMinLen    = 64
-	vxAlignment = 16
-	vxAlignMask = vxAlignment - 1
+	vxAlignMask = 15 // align to 16 bytes
 )

 // hasVectorFacility reports whether the machine has the z/Architecture
@@ -49,20 +44,13 @@ func genericIEEE(crc uint32, p []byte) uint32 {
 	return update(crc, IEEETable, p)
 }

-// updateCastagnoli calculates the checksum of p using genericCastagnoli to
-// align the data appropriately for vectorCastagnoli. It avoids using
-// vectorCastagnoli entirely if the length of p is less than or equal to
-// vxMinLen.
+// updateCastagnoli calculates the checksum of p using
+// vectorizedCastagnoli if possible and falling back onto
+// genericCastagnoli as needed.
 func updateCastagnoli(crc uint32, p []byte) uint32 {
 	// Use vectorized function if vector facility is available and
 	// data length is above threshold.
-	if hasVX && len(p) > vxMinLen {
-		pAddr := uintptr(unsafe.Pointer(&p[0]))
-		if pAddr&vxAlignMask != 0 {
-			prealign := vxAlignment - int(pAddr&vxAlignMask)
-			crc = genericCastagnoli(crc, p[:prealign])
-			p = p[prealign:]
-		}
+	if hasVX && len(p) >= vxMinLen {
 		aligned := len(p) & ^vxAlignMask
 		crc = vectorizedCastagnoli(crc, p[:aligned])
 		p = p[aligned:]
@@ -75,19 +63,12 @@ func updateCastagnoli(crc uint32, p []byte) uint32 {
 	return genericCastagnoli(crc, p)
 }

-// updateIEEE calculates the checksum of p using genericIEEE to align the data
-// appropriately for vectorIEEE. It avoids using vectorIEEE entirely if the length
-// of p is less than or equal to vxMinLen.
+// updateIEEE calculates the checksum of p using vectorizedIEEE if
+// possible and falling back onto genericIEEE as needed.
 func updateIEEE(crc uint32, p []byte) uint32 {
 	// Use vectorized function if vector facility is available and
 	// data length is above threshold.
-	if hasVX && len(p) > vxMinLen {
-		pAddr := uintptr(unsafe.Pointer(&p[0]))
-		if pAddr&vxAlignMask != 0 {
-			prealign := vxAlignment - int(pAddr&vxAlignMask)
-			crc = genericIEEE(crc, p[:prealign])
-			p = p[prealign:]
-		}
+	if hasVX && len(p) >= vxMinLen {
 		aligned := len(p) & ^vxAlignMask
 		crc = vectorizedIEEE(crc, p[:aligned])
 		p = p[aligned:]
--- a/src/hash/crc32/crc32_s390x.s
+++ b/src/hash/crc32/crc32_s390x.s
@@ -128,6 +128,10 @@ TEXT vectorizedBody<>(SB),NOSPLIT,$0
 	VZERO   V0
 	VLVGF   $3, R2, V0

+	// Crash if the input size is less than 64-bytes.
+	CMP     R4, $64
+	BLT     crash
+
 	// Load a 64-byte data chunk and XOR with CRC
 	VLM     0(R3), V1, V4    // 64-bytes into V1..V4

@@ -243,3 +247,6 @@ done:
 	XOR     $0xffffffff, R2 // NOTW R2
 	MOVWZ   R2, ret + 32(FP)
 	RET
+
+crash:
+	MOVD    $0, (R0) // input size is less than 64-bytes
--- a/src/io/multi.go
+++ b/src/io/multi.go
@@ -18,15 +18,16 @@ func (mr *multiReader) Read(p []byte) (n int, err error) {
 			}
 		}
 		n, err = mr.readers[0].Read(p)
+		if err == EOF {
+			mr.readers = mr.readers[1:]
+		}
 		if n > 0 || err != EOF {
-			if err == EOF {
-				// Don't return EOF yet. There may be more bytes
-				// in the remaining readers.
+			if err == EOF && len(mr.readers) > 0 {
+				// Don't return EOF yet. More readers remain.
 				err = nil
 			}
 			return
 		}
-		mr.readers = mr.readers[1:]
 	}
 	return 0, EOF
 }
--- a/src/io/multi_test.go
+++ b/src/io/multi_test.go
@@ -196,3 +196,41 @@ func TestMultiReaderFlatten(t *testing.T) {
 			myDepth+2, readDepth)
 	}
 }
+
+// byteAndEOFReader is a Reader which reads one byte (the underlying
+// byte) and io.EOF at once in its Read call.
+type byteAndEOFReader byte
+
+func (b byteAndEOFReader) Read(p []byte) (n int, err error) {
+	if len(p) == 0 {
+		// Read(0 bytes) is useless. We expect no such useless
+		// calls in this test.
+		panic("unexpected call")
+	}
+	p[0] = byte(b)
+	return 1, EOF
+}
+
+// In Go 1.7, this yielded bytes forever.
+func TestMultiReaderSingleByteWithEOF(t *testing.T) {
+	got, err := ioutil.ReadAll(LimitReader(MultiReader(byteAndEOFReader('a'), byteAndEOFReader('b')), 10))
+	if err != nil {
+		t.Fatal(err)
+	}
+	const want = "ab"
+	if string(got) != want {
+		t.Errorf("got %q; want %q", got, want)
+	}
+}
+
+// Test that a reader returning (n, EOF) at the end of an MultiReader
+// chain continues to return EOF on its final read, rather than
+// yielding a (0, EOF).
+func TestMultiReaderFinalEOF(t *testing.T) {
+	r := MultiReader(bytes.NewReader(nil), byteAndEOFReader('a'))
+	buf := make([]byte, 2)
+	n, err := r.Read(buf)
+	if n != 1 || err != EOF {
+		t.Errorf("got %v, %v; want 1, EOF", n, err)
+	}
+}
--- a/src/net/dnsclient_unix.go
+++ b/src/net/dnsclient_unix.go
@@ -141,7 +141,7 @@ func (d *Dialer) dialDNS(ctx context.Context, network, server string) (dnsConn,
 }

 // exchange sends a query on the connection and hopes for a response.
-func exchange(ctx context.Context, server, name string, qtype uint16) (*dnsMsg, error) {
+func exchange(ctx context.Context, server, name string, qtype uint16, timeout time.Duration) (*dnsMsg, error) {
 	d := testHookDNSDialer()
 	out := dnsMsg{
 		dnsMsgHdr: dnsMsgHdr{
@@ -152,6 +152,12 @@ func exchange(ctx context.Context, server, name string, qtype uint16) (*dnsMsg,
 		},
 	}
 	for _, network := range []string{"udp", "tcp"} {
+		// TODO(mdempsky): Refactor so defers from UDP-based
+		// exchanges happen before TCP-based exchange.
+
+		ctx, cancel := context.WithDeadline(ctx, time.Now().Add(timeout))
+		defer cancel()
+
 		c, err := d.dialDNS(ctx, network, server)
 		if err != nil {
 			return nil, err
@@ -180,17 +186,10 @@ func tryOneName(ctx context.Context, cfg *dnsConfig, name string, qtype uint16)
 		return "", nil, &DNSError{Err: "no DNS servers", Name: name}
 	}

-	deadline := time.Now().Add(cfg.timeout)
-	if old, ok := ctx.Deadline(); !ok || deadline.Before(old) {
-		var cancel context.CancelFunc
-		ctx, cancel = context.WithDeadline(ctx, deadline)
-		defer cancel()
-	}
-
 	var lastErr error
 	for i := 0; i < cfg.attempts; i++ {
 		for _, server := range cfg.servers {
-			msg, err := exchange(ctx, server, name, qtype)
+			msg, err := exchange(ctx, server, name, qtype, cfg.timeout)
 			if err != nil {
 				lastErr = &DNSError{
 					Err:    err.Error(),
@@ -338,8 +337,9 @@ func lookup(ctx context.Context, name string, qtype uint16) (cname string, rrs [
 }

 // avoidDNS reports whether this is a hostname for which we should not
-// use DNS. Currently this includes only .onion and .local names,
-// per RFC 7686 and RFC 6762, respectively. See golang.org/issue/13705.
+// use DNS. Currently this includes only .onion, per RFC 7686. See
+// golang.org/issue/13705. Does not cover .local names (RFC 6762),
+// see golang.org/issue/16739.
 func avoidDNS(name string) bool {
 	if name == "" {
 		return true
@@ -347,7 +347,7 @@ func avoidDNS(name string) bool {
 	if name[len(name)-1] == '.' {
 		name = name[:len(name)-1]
 	}
-	return stringsHasSuffixFold(name, ".onion") || stringsHasSuffixFold(name, ".local")
+	return stringsHasSuffixFold(name, ".onion")
 }

 // nameList returns a list of names for sequential DNS queries.
--- a/src/net/dnsclient_unix_test.go
+++ b/src/net/dnsclient_unix_test.go
@@ -40,9 +40,9 @@ func TestDNSTransportFallback(t *testing.T) {
 	testenv.MustHaveExternalNetwork(t)

 	for _, tt := range dnsTransportFallbackTests {
-		ctx, cancel := context.WithTimeout(context.Background(), time.Duration(tt.timeout)*time.Second)
+		ctx, cancel := context.WithCancel(context.Background())
 		defer cancel()
-		msg, err := exchange(ctx, tt.server, tt.name, tt.qtype)
+		msg, err := exchange(ctx, tt.server, tt.name, tt.qtype, time.Second)
 		if err != nil {
 			t.Error(err)
 			continue
@@ -82,9 +82,9 @@ func TestSpecialDomainName(t *testing.T) {

 	server := "8.8.8.8:53"
 	for _, tt := range specialDomainNameTests {
-		ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
+		ctx, cancel := context.WithCancel(context.Background())
 		defer cancel()
-		msg, err := exchange(ctx, server, tt.name, tt.qtype)
+		msg, err := exchange(ctx, server, tt.name, tt.qtype, 3*time.Second)
 		if err != nil {
 			t.Error(err)
 			continue
@@ -112,10 +112,11 @@ func TestAvoidDNSName(t *testing.T) {
 		{"foo.ONION", true},
 		{"foo.ONION.", true},

-		{"foo.local.", true},
-		{"foo.local", true},
-		{"foo.LOCAL", true},
-		{"foo.LOCAL.", true},
+		// But do resolve *.local address; Issue 16739
+		{"foo.local.", false},
+		{"foo.local", false},
+		{"foo.LOCAL", false},
+		{"foo.LOCAL.", false},

 		{"", true}, // will be rejected earlier too

@@ -500,7 +501,7 @@ func TestErrorForOriginalNameWhenSearching(t *testing.T) {
 	d := &fakeDNSDialer{}
 	testHookDNSDialer = func() dnsDialer { return d }

-	d.rh = func(s string, q *dnsMsg) (*dnsMsg, error) {
+	d.rh = func(s string, q *dnsMsg, _ time.Time) (*dnsMsg, error) {
 		r := &dnsMsg{
 			dnsMsgHdr: dnsMsgHdr{
 				id: q.id,
@@ -539,14 +540,15 @@ func TestIgnoreLameReferrals(t *testing.T) {
 	}
 	defer conf.teardown()

-	if err := conf.writeAndUpdate([]string{"nameserver 192.0.2.1", "nameserver 192.0.2.2"}); err != nil {
+	if err := conf.writeAndUpdate([]string{"nameserver 192.0.2.1", // the one that will give a lame referral
+		"nameserver 192.0.2.2"}); err != nil {
 		t.Fatal(err)
 	}

 	d := &fakeDNSDialer{}
 	testHookDNSDialer = func() dnsDialer { return d }

-	d.rh = func(s string, q *dnsMsg) (*dnsMsg, error) {
+	d.rh = func(s string, q *dnsMsg, _ time.Time) (*dnsMsg, error) {
 		t.Log(s, q)
 		r := &dnsMsg{
 			dnsMsgHdr: dnsMsgHdr{
@@ -633,28 +635,30 @@ func BenchmarkGoLookupIPWithBrokenNameServer(b *testing.B) {

 type fakeDNSDialer struct {
 	// reply handler
-	rh func(s string, q *dnsMsg) (*dnsMsg, error)
+	rh func(s string, q *dnsMsg, t time.Time) (*dnsMsg, error)
 }

 func (f *fakeDNSDialer) dialDNS(_ context.Context, n, s string) (dnsConn, error) {
-	return &fakeDNSConn{f.rh, s}, nil
+	return &fakeDNSConn{f.rh, s, time.Time{}}, nil
 }

 type fakeDNSConn struct {
-	rh func(s string, q *dnsMsg) (*dnsMsg, error)
+	rh func(s string, q *dnsMsg, t time.Time) (*dnsMsg, error)
 	s  string
+	t  time.Time
 }

 func (f *fakeDNSConn) Close() error {
 	return nil
 }

-func (f *fakeDNSConn) SetDeadline(time.Time) error {
+func (f *fakeDNSConn) SetDeadline(t time.Time) error {
+	f.t = t
 	return nil
 }

 func (f *fakeDNSConn) dnsRoundTrip(q *dnsMsg) (*dnsMsg, error) {
-	return f.rh(f.s, q)
+	return f.rh(f.s, q, f.t)
 }

 // UDP round-tripper algorithm should ignore invalid DNS responses (issue 13281).
@@ -724,3 +728,72 @@ func TestIgnoreDNSForgeries(t *testing.T) {
 		t.Errorf("got address %v, want %v", got, TestAddr)
 	}
 }
+
+// Issue 16865. If a name server times out, continue to the next.
+func TestRetryTimeout(t *testing.T) {
+	origTestHookDNSDialer := testHookDNSDialer
+	defer func() { testHookDNSDialer = origTestHookDNSDialer }()
+
+	conf, err := newResolvConfTest()
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer conf.teardown()
+
+	if err := conf.writeAndUpdate([]string{"nameserver 192.0.2.1", // the one that will timeout
+		"nameserver 192.0.2.2"}); err != nil {
+		t.Fatal(err)
+	}
+
+	d := &fakeDNSDialer{}
+	testHookDNSDialer = func() dnsDialer { return d }
+
+	var deadline0 time.Time
+
+	d.rh = func(s string, q *dnsMsg, deadline time.Time) (*dnsMsg, error) {
+		t.Log(s, q, deadline)
+
+		if deadline.IsZero() {
+			t.Error("zero deadline")
+		}
+
+		if s == "192.0.2.1:53" {
+			deadline0 = deadline
+			time.Sleep(10 * time.Millisecond)
+			return nil, errTimeout
+		}
+
+		if deadline == deadline0 {
+			t.Error("deadline didn't change")
+		}
+
+		r := &dnsMsg{
+			dnsMsgHdr: dnsMsgHdr{
+				id:                  q.id,
+				response:            true,
+				recursion_available: true,
+			},
+			question: q.question,
+			answer: []dnsRR{
+				&dnsRR_CNAME{
+					Hdr: dnsRR_Header{
+						Name:   q.question[0].Name,
+						Rrtype: dnsTypeCNAME,
+						Class:  dnsClassINET,
+					},
+					Cname: "golang.org",
+				},
+			},
+		}
+		return r, nil
+	}
+
+	_, err = goLookupCNAME(context.Background(), "www.golang.org")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if deadline0.IsZero() {
+		t.Error("deadline0 still zero", deadline0)
+	}
+}
--- a/Show More
+++ b/Show More