misc/nacl: include parser.go for cmd/compile/internal/syntax tests

Fix suggested by Minux. Change-Id: Ia7aa8ccccc16535af4ec3ad23830ef0aa5d776ac Reviewed-on: https://go-review.googlesource.com/27193 Run-TryBot: Matthew Dempsky <mdempsky@google.com> Reviewed-by: Minux Ma <minux@golang.org>
cmd/compile/internal/syntax: match old parser errors and line numbers
2026-01-29 15:12:08 +03:00 · 2016-08-16 22:42:13 +00:00 · 2016-08-16 14:32:09 -07:00 · 2016-08-16 14:32:09 -07:00 · 2016-08-16 14:32:09 -07:00 · 2016-08-16 14:32:09 -07:00
22 changed files with 6600 additions and 31 deletions
--- a/misc/nacl/testzip.proto
+++ b/misc/nacl/testzip.proto
@@ -18,6 +18,10 @@ go	src=..
 					asm
 						testdata
 							+
+			compile
+				internal
+					syntax
+						parser.go
 			doc
 				main.go
 				pkg.go
--- a/src/cmd/compile/internal/gc/main.go
+++ b/src/cmd/compile/internal/gc/main.go
@@ -29,6 +29,8 @@ var (
 	goarch  string
 	goroot  string
 	buildid string
+
+	flag_newparser bool
 )

 var (
@@ -182,6 +184,7 @@ func Main() {
 	obj.Flagcount("live", "debug liveness analysis", &debuglive)
 	obj.Flagcount("m", "print optimization decisions", &Debug['m'])
 	flag.BoolVar(&flag_msan, "msan", false, "build code compatible with C/C++ memory sanitizer")
+	flag.BoolVar(&flag_newparser, "newparser", false, "use new parser")
 	flag.BoolVar(&newexport, "newexport", true, "use new export format") // TODO(gri) remove eventually (issue 15323)
 	flag.BoolVar(&nolocalimports, "nolocalimports", false, "reject local (relative) imports")
 	flag.StringVar(&outfile, "o", "", "write output to `file`")
@@ -311,25 +314,14 @@ func Main() {
 		}

 		linehistpush(infile)
-
-		f, err := os.Open(infile)
-		if err != nil {
-			fmt.Printf("open %s: %v\n", infile, err)
-			errorexit()
-		}
-		bin := bufio.NewReader(f)
-
-		// Skip initial BOM if present.
-		if r, _, _ := bin.ReadRune(); r != BOM {
-			bin.UnreadRune()
-		}
-
 		block = 1
 		iota_ = -1000000
-
 		imported_unsafe = false
-
-		parse_file(bin)
+		if flag_newparser {
+			parseFile(infile)
+		} else {
+			oldParseFile(infile)
+		}
 		if nsyntaxerrors != 0 {
 			errorexit()
 		}
@@ -338,9 +330,7 @@ func Main() {
 		// for the line history to work, and which then has to be corrected elsewhere,
 		// just add a line here.
 		lexlineno++
-
 		linehistpop()
-		f.Close()
 	}

 	testdclstack()
--- a/src/cmd/compile/internal/gc/noder.go
+++ b/src/cmd/compile/internal/gc/noder.go
--- a/src/cmd/compile/internal/gc/parser.go
+++ b/src/cmd/compile/internal/gc/parser.go
@@ -15,6 +15,7 @@ package gc
 import (
 	"bufio"
 	"fmt"
+	"os"
 	"strconv"
 	"strings"
 )
@@ -26,8 +27,20 @@ func parse_import(bin *bufio.Reader, indent []byte) {
 	newparser(bin, indent).import_package()
 }

-// parse_file parses a single Go source file.
-func parse_file(bin *bufio.Reader) {
+// oldParseFile parses a single Go source file.
+func oldParseFile(infile string) {
+	f, err := os.Open(infile)
+	if err != nil {
+		fmt.Printf("open %s: %v\n", infile, err)
+		errorexit()
+	}
+	defer f.Close()
+	bin := bufio.NewReader(f)
+
+	// Skip initial BOM if present.
+	if r, _, _ := bin.ReadRune(); r != BOM {
+		bin.UnreadRune()
+	}
 	newparser(bin, nil).file()
 }

--- a/src/cmd/compile/internal/syntax/dumper.go
+++ b/src/cmd/compile/internal/syntax/dumper.go
@@ -0,0 +1,212 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements printing of syntax tree structures.
+
+package syntax
+
+import (
+	"fmt"
+	"io"
+	"reflect"
+	"unicode"
+	"unicode/utf8"
+)
+
+// Fdump dumps the structure of the syntax tree rooted at n to w.
+// It is intended for debugging purposes; no specific output format
+// is guaranteed.
+func Fdump(w io.Writer, n Node) (err error) {
+	p := dumper{
+		output: w,
+		ptrmap: make(map[Node]int),
+		last:   '\n', // force printing of line number on first line
+	}
+
+	defer func() {
+		if e := recover(); e != nil {
+			err = e.(localError).err // re-panics if it's not a localError
+		}
+	}()
+
+	if n == nil {
+		p.printf("nil\n")
+		return
+	}
+	p.dump(reflect.ValueOf(n), n)
+	p.printf("\n")
+
+	return
+}
+
+type dumper struct {
+	output io.Writer
+	ptrmap map[Node]int // node -> dump line number
+	indent int          // current indentation level
+	last   byte         // last byte processed by Write
+	line   int          // current line number
+}
+
+var indentBytes = []byte(".  ")
+
+func (p *dumper) Write(data []byte) (n int, err error) {
+	var m int
+	for i, b := range data {
+		// invariant: data[0:n] has been written
+		if b == '\n' {
+			m, err = p.output.Write(data[n : i+1])
+			n += m
+			if err != nil {
+				return
+			}
+		} else if p.last == '\n' {
+			p.line++
+			_, err = fmt.Fprintf(p.output, "%6d  ", p.line)
+			if err != nil {
+				return
+			}
+			for j := p.indent; j > 0; j-- {
+				_, err = p.output.Write(indentBytes)
+				if err != nil {
+					return
+				}
+			}
+		}
+		p.last = b
+	}
+	if len(data) > n {
+		m, err = p.output.Write(data[n:])
+		n += m
+	}
+	return
+}
+
+// localError wraps locally caught errors so we can distinguish
+// them from genuine panics which we don't want to return as errors.
+type localError struct {
+	err error
+}
+
+// printf is a convenience wrapper that takes care of print errors.
+func (p *dumper) printf(format string, args ...interface{}) {
+	if _, err := fmt.Fprintf(p, format, args...); err != nil {
+		panic(localError{err})
+	}
+}
+
+// dump prints the contents of x.
+// If x is the reflect.Value of a struct s, where &s
+// implements Node, then &s should be passed for n -
+// this permits printing of the unexported span and
+// comments fields of the embedded isNode field by
+// calling the Span() and Comment() instead of using
+// reflection.
+func (p *dumper) dump(x reflect.Value, n Node) {
+	switch x.Kind() {
+	case reflect.Interface:
+		if x.IsNil() {
+			p.printf("nil")
+			return
+		}
+		p.dump(x.Elem(), nil)
+
+	case reflect.Ptr:
+		if x.IsNil() {
+			p.printf("nil")
+			return
+		}
+
+		// special cases for identifiers w/o attached comments (common case)
+		if x, ok := x.Interface().(*Name); ok {
+			p.printf(x.Value)
+			return
+		}
+
+		p.printf("*")
+		// Fields may share type expressions, and declarations
+		// may share the same group - use ptrmap to keep track
+		// of nodes that have been printed already.
+		if ptr, ok := x.Interface().(Node); ok {
+			if line, exists := p.ptrmap[ptr]; exists {
+				p.printf("(Node @ %d)", line)
+				return
+			}
+			p.ptrmap[ptr] = p.line
+			n = ptr
+		}
+		p.dump(x.Elem(), n)
+
+	case reflect.Slice:
+		if x.IsNil() {
+			p.printf("nil")
+			return
+		}
+		p.printf("%s (%d entries) {", x.Type(), x.Len())
+		if x.Len() > 0 {
+			p.indent++
+			p.printf("\n")
+			for i, n := 0, x.Len(); i < n; i++ {
+				p.printf("%d: ", i)
+				p.dump(x.Index(i), nil)
+				p.printf("\n")
+			}
+			p.indent--
+		}
+		p.printf("}")
+
+	case reflect.Struct:
+		typ := x.Type()
+
+		// if span, ok := x.Interface().(lexical.Span); ok {
+		// 	p.printf("%s", &span)
+		// 	return
+		// }
+
+		p.printf("%s {", typ)
+		p.indent++
+
+		first := true
+		if n != nil {
+			p.printf("\n")
+			first = false
+			// p.printf("Span: %s\n", n.Span())
+			// if c := *n.Comments(); c != nil {
+			// 	p.printf("Comments: ")
+			// 	p.dump(reflect.ValueOf(c), nil) // a Comment is not a Node
+			// 	p.printf("\n")
+			// }
+		}
+
+		for i, n := 0, typ.NumField(); i < n; i++ {
+			// Exclude non-exported fields because their
+			// values cannot be accessed via reflection.
+			if name := typ.Field(i).Name; isExported(name) {
+				if first {
+					p.printf("\n")
+					first = false
+				}
+				p.printf("%s: ", name)
+				p.dump(x.Field(i), nil)
+				p.printf("\n")
+			}
+		}
+
+		p.indent--
+		p.printf("}")
+
+	default:
+		switch x := x.Interface().(type) {
+		case string:
+			// print strings in quotes
+			p.printf("%q", x)
+		default:
+			p.printf("%v", x)
+		}
+	}
+}
+
+func isExported(name string) bool {
+	ch, _ := utf8.DecodeRuneInString(name)
+	return unicode.IsUpper(ch)
+}
--- a/src/cmd/compile/internal/syntax/dumper_test.go
+++ b/src/cmd/compile/internal/syntax/dumper_test.go
@@ -0,0 +1,22 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+	"os"
+	"testing"
+)
+
+func TestDump(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping test in short mode")
+	}
+
+	ast, err := ReadFile(*src, nil, 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+	Fdump(os.Stdout, ast)
+}
--- a/src/cmd/compile/internal/syntax/nodes.go
+++ b/src/cmd/compile/internal/syntax/nodes.go
@@ -0,0 +1,437 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+// ----------------------------------------------------------------------------
+// Nodes
+
+type Node interface {
+	Line() uint32
+	aNode()
+}
+
+type node struct {
+	doc  *Comment // nil means no comment(s) attached
+	pos  uint32
+	line uint32
+}
+
+func (*node) aNode() {}
+
+func (n *node) Line() uint32 {
+	return n.line
+}
+
+func (n *node) init(p *parser) {
+	n.pos = uint32(p.pos)
+	n.line = uint32(p.line)
+}
+
+// ----------------------------------------------------------------------------
+// Files
+
+type File struct {
+	PkgName  *Name
+	DeclList []Decl
+	Pragmas  []Pragma
+	Lines    int
+	node
+}
+
+type Pragma struct {
+	Line int
+	Text string
+}
+
+// ----------------------------------------------------------------------------
+// Declarations
+
+type (
+	Decl interface {
+		Node
+		aDecl()
+	}
+
+	ImportDecl struct {
+		LocalPkgName *Name // including "."; nil means no rename present
+		Path         *BasicLit
+		Group        *Group // nil means not part of a group
+		decl
+	}
+
+	ConstDecl struct {
+		NameList []*Name
+		Type     Expr   // nil means no type
+		Values   Expr   // nil means no values
+		Group    *Group // nil means not part of a group
+		decl
+	}
+
+	TypeDecl struct {
+		Name  *Name
+		Type  Expr
+		Group *Group // nil means not part of a group
+		decl
+	}
+
+	VarDecl struct {
+		NameList []*Name
+		Type     Expr   // nil means no type
+		Values   Expr   // nil means no values
+		Group    *Group // nil means not part of a group
+		decl
+	}
+
+	FuncDecl struct {
+		Attr map[string]bool // go:attr map
+		Recv *Field          // nil means regular function
+		Name *Name
+		Type *FuncType
+		Body []Stmt // nil means no body (forward declaration)
+		decl
+		EndLine uint32 // TODO(mdempsky): Cleaner solution.
+	}
+)
+
+type decl struct{ node }
+
+func (*decl) aDecl() {}
+
+// All declarations belonging to the same group point to the same Group node.
+type Group struct {
+	dummy int // not empty so we are guaranteed different Group instances
+}
+
+// ----------------------------------------------------------------------------
+// Expressions
+
+type (
+	Expr interface {
+		Node
+		aExpr()
+	}
+
+	// Value
+	Name struct {
+		Value string
+		expr
+	}
+
+	// Value
+	BasicLit struct {
+		Value string
+		Kind  LitKind
+		expr
+	}
+
+	// Type { ElemList[0], ElemList[1], ... }
+	CompositeLit struct {
+		Type     Expr // nil means no literal type
+		ElemList []Expr
+		NKeys    int // number of elements with keys
+		expr
+	}
+
+	// Key: Value
+	KeyValueExpr struct {
+		Key, Value Expr
+		expr
+	}
+
+	// func Type { Body }
+	FuncLit struct {
+		Type    *FuncType
+		Body    []Stmt
+		EndLine uint32 // TODO(mdempsky): Cleaner solution.
+		expr
+	}
+
+	// (X)
+	ParenExpr struct {
+		X Expr
+		expr
+	}
+
+	// X.Sel
+	SelectorExpr struct {
+		X   Expr
+		Sel *Name
+		expr
+	}
+
+	// X[Index]
+	IndexExpr struct {
+		X     Expr
+		Index Expr
+		expr
+	}
+
+	// X[Index[0] : Index[1] : Index[2]]
+	SliceExpr struct {
+		X     Expr
+		Index [3]Expr
+		// Full indicates whether this is a simple or full slice expression.
+		// In a valid AST, this is equivalent to Index[2] != nil.
+		// TODO(mdempsky): This is only needed to report the "3-index
+		// slice of string" error when Index[2] is missing.
+		Full bool
+		expr
+	}
+
+	// X.(Type)
+	AssertExpr struct {
+		X Expr
+		// TODO(gri) consider using Name{"..."} instead of nil (permits attaching of comments)
+		Type Expr
+		expr
+	}
+
+	Operation struct {
+		Op   Operator
+		X, Y Expr // Y == nil means unary expression
+		expr
+	}
+
+	// Fun(ArgList[0], ArgList[1], ...)
+	CallExpr struct {
+		Fun     Expr
+		ArgList []Expr
+		HasDots bool // last argument is followed by ...
+		expr
+	}
+
+	// ElemList[0], ElemList[1], ...
+	ListExpr struct {
+		ElemList []Expr
+		expr
+	}
+
+	// [Len]Elem
+	ArrayType struct {
+		// TODO(gri) consider using Name{"..."} instead of nil (permits attaching of comments)
+		Len  Expr // nil means Len is ...
+		Elem Expr
+		expr
+	}
+
+	// []Elem
+	SliceType struct {
+		Elem Expr
+		expr
+	}
+
+	// ...Elem
+	DotsType struct {
+		Elem Expr
+		expr
+	}
+
+	// struct { FieldList[0] TagList[0]; FieldList[1] TagList[1]; ... }
+	StructType struct {
+		FieldList []*Field
+		TagList   []*BasicLit // i >= len(TagList) || TagList[i] == nil means no tag for field i
+		expr
+	}
+
+	// Name Type
+	//      Type
+	Field struct {
+		Name *Name // nil means anonymous field/parameter (structs/parameters), or embedded interface (interfaces)
+		Type Expr  // field names declared in a list share the same Type (identical pointers)
+		node
+	}
+
+	// interface { MethodList[0]; MethodList[1]; ... }
+	InterfaceType struct {
+		MethodList []*Field
+		expr
+	}
+
+	FuncType struct {
+		ParamList  []*Field
+		ResultList []*Field
+		expr
+	}
+
+	// map[Key]Value
+	MapType struct {
+		Key   Expr
+		Value Expr
+		expr
+	}
+
+	//   chan Elem
+	// <-chan Elem
+	// chan<- Elem
+	ChanType struct {
+		Dir  ChanDir // 0 means no direction
+		Elem Expr
+		expr
+	}
+)
+
+type expr struct{ node }
+
+func (*expr) aExpr() {}
+
+type ChanDir uint
+
+const (
+	_ ChanDir = iota
+	SendOnly
+	RecvOnly
+)
+
+// ----------------------------------------------------------------------------
+// Statements
+
+type (
+	Stmt interface {
+		Node
+		aStmt()
+	}
+
+	SimpleStmt interface {
+		Stmt
+		aSimpleStmt()
+	}
+
+	EmptyStmt struct {
+		simpleStmt
+	}
+
+	LabeledStmt struct {
+		Label *Name
+		Stmt  Stmt
+		stmt
+	}
+
+	BlockStmt struct {
+		Body []Stmt
+		stmt
+	}
+
+	ExprStmt struct {
+		X Expr
+		simpleStmt
+	}
+
+	SendStmt struct {
+		Chan, Value Expr // Chan <- Value
+		simpleStmt
+	}
+
+	DeclStmt struct {
+		DeclList []Decl
+		stmt
+	}
+
+	AssignStmt struct {
+		Op       Operator // 0 means no operation
+		Lhs, Rhs Expr     // Rhs == ImplicitOne means Lhs++ (Op == Add) or Lhs-- (Op == Sub)
+		simpleStmt
+	}
+
+	BranchStmt struct {
+		Tok   token // Break, Continue, Fallthrough, or Goto
+		Label *Name
+		stmt
+	}
+
+	CallStmt struct {
+		Tok  token // Go or Defer
+		Call *CallExpr
+		stmt
+	}
+
+	ReturnStmt struct {
+		Results Expr // nil means no explicit return values
+		stmt
+	}
+
+	IfStmt struct {
+		Init SimpleStmt
+		Cond Expr
+		Then []Stmt
+		Else Stmt // either *IfStmt or *BlockStmt
+		stmt
+	}
+
+	ForStmt struct {
+		Init SimpleStmt // incl. *RangeClause
+		Cond Expr
+		Post SimpleStmt
+		Body []Stmt
+		stmt
+	}
+
+	SwitchStmt struct {
+		Init SimpleStmt
+		Tag  Expr
+		Body []*CaseClause
+		stmt
+	}
+
+	SelectStmt struct {
+		Body []*CommClause
+		stmt
+	}
+)
+
+type (
+	RangeClause struct {
+		Lhs Expr // nil means no Lhs = or Lhs :=
+		Def bool // means :=
+		X   Expr // range X
+		simpleStmt
+	}
+
+	TypeSwitchGuard struct {
+		// TODO(gri) consider using Name{"..."} instead of nil (permits attaching of comments)
+		Lhs *Name // nil means no Lhs :=
+		X   Expr  // X.(type)
+		expr
+	}
+
+	CaseClause struct {
+		Cases Expr // nil means default clause
+		Body  []Stmt
+		node
+	}
+
+	CommClause struct {
+		Comm SimpleStmt // send or receive stmt; nil means default clause
+		Body []Stmt
+		node
+	}
+)
+
+type stmt struct{ node }
+
+func (stmt) aStmt() {}
+
+type simpleStmt struct {
+	stmt
+}
+
+func (simpleStmt) aSimpleStmt() {}
+
+// ----------------------------------------------------------------------------
+// Comments
+
+type CommentKind uint
+
+const (
+	Above CommentKind = iota
+	Below
+	Left
+	Right
+)
+
+type Comment struct {
+	Kind CommentKind
+	Text string
+	Next *Comment
+}
--- a/src/cmd/compile/internal/syntax/parser.go
+++ b/src/cmd/compile/internal/syntax/parser.go
--- a/src/cmd/compile/internal/syntax/parser_test.go
+++ b/src/cmd/compile/internal/syntax/parser_test.go
@@ -0,0 +1,157 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+	"bytes"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+)
+
+var fast = flag.Bool("fast", false, "parse package files in parallel")
+var src = flag.String("src", "parser.go", "source file to parse")
+var verify = flag.Bool("verify", false, "verify idempotent printing")
+
+func TestParse(t *testing.T) {
+	_, err := ReadFile(*src, nil, 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestStdLib(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping test in short mode")
+	}
+
+	var m1 runtime.MemStats
+	runtime.ReadMemStats(&m1)
+	start := time.Now()
+
+	type parseResult struct {
+		filename string
+		lines    int
+	}
+
+	results := make(chan parseResult)
+	go func() {
+		for _, dir := range []string{
+			runtime.GOROOT(),
+			//"/Users/gri/src",
+		} {
+			walkDirs(t, dir, func(filename string) {
+				if debug {
+					fmt.Printf("parsing %s\n", filename)
+				}
+				ast, err := ReadFile(filename, nil, 0)
+				if err != nil {
+					t.Fatal(err)
+				}
+				if *verify {
+					verifyPrint(filename, ast)
+				}
+				results <- parseResult{filename, ast.Lines}
+			})
+		}
+		close(results)
+	}()
+
+	var count, lines int
+	for res := range results {
+		count++
+		lines += res.lines
+		if testing.Verbose() {
+			fmt.Printf("%5d  %s (%d lines)\n", count, res.filename, res.lines)
+		}
+	}
+
+	dt := time.Since(start)
+	var m2 runtime.MemStats
+	runtime.ReadMemStats(&m2)
+	dm := float64(m2.TotalAlloc-m1.TotalAlloc) / 1e6
+
+	fmt.Printf("parsed %d lines (%d files) in %v (%d lines/s)\n", lines, count, dt, int64(float64(lines)/dt.Seconds()))
+	fmt.Printf("allocated %.3fMb (%.3fMb/s)\n", dm, dm/dt.Seconds())
+}
+
+func walkDirs(t *testing.T, dir string, action func(string)) {
+	fis, err := ioutil.ReadDir(dir)
+	if err != nil {
+		t.Error(err)
+		return
+	}
+
+	var files, dirs []string
+	for _, fi := range fis {
+		if fi.Mode().IsRegular() {
+			if strings.HasSuffix(fi.Name(), ".go") {
+				path := filepath.Join(dir, fi.Name())
+				files = append(files, path)
+			}
+		} else if fi.IsDir() && fi.Name() != "testdata" {
+			path := filepath.Join(dir, fi.Name())
+			if !strings.Contains(path, "go/test") {
+				dirs = append(dirs, path)
+			}
+		}
+	}
+
+	if *fast {
+		var wg sync.WaitGroup
+		wg.Add(len(files))
+		for _, filename := range files {
+			go func(filename string) {
+				defer wg.Done()
+				action(filename)
+			}(filename)
+		}
+		wg.Wait()
+	} else {
+		for _, filename := range files {
+			action(filename)
+		}
+	}
+
+	for _, dir := range dirs {
+		walkDirs(t, dir, action)
+	}
+}
+
+func verifyPrint(filename string, ast1 *File) {
+	var buf1 bytes.Buffer
+	_, err := Fprint(&buf1, ast1, true)
+	if err != nil {
+		panic(err)
+	}
+
+	ast2, err := ReadBytes(buf1.Bytes(), nil, 0)
+	if err != nil {
+		panic(err)
+	}
+
+	var buf2 bytes.Buffer
+	_, err = Fprint(&buf2, ast2, true)
+	if err != nil {
+		panic(err)
+	}
+
+	if bytes.Compare(buf1.Bytes(), buf2.Bytes()) != 0 {
+		fmt.Printf("--- %s ---\n", filename)
+		fmt.Printf("%s\n", buf1.Bytes())
+		fmt.Println()
+
+		fmt.Printf("--- %s ---\n", filename)
+		fmt.Printf("%s\n", buf2.Bytes())
+		fmt.Println()
+		panic("not equal")
+	}
+}
--- a/src/cmd/compile/internal/syntax/printer.go
+++ b/src/cmd/compile/internal/syntax/printer.go
@@ -0,0 +1,942 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements printing of syntax trees in source format.
+
+package syntax
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"strings"
+)
+
+// TODO(gri) Consider removing the linebreaks flag from this signature.
+// Its likely rarely used in common cases.
+
+func Fprint(w io.Writer, x Node, linebreaks bool) (n int, err error) {
+	p := printer{
+		output:     w,
+		linebreaks: linebreaks,
+	}
+
+	defer func() {
+		n = p.written
+		if e := recover(); e != nil {
+			err = e.(localError).err // re-panics if it's not a localError
+		}
+	}()
+
+	p.print(x)
+	p.flush(_EOF)
+
+	return
+}
+
+func String(n Node) string {
+	var buf bytes.Buffer
+	_, err := Fprint(&buf, n, false)
+	if err != nil {
+		panic(err) // TODO(gri) print something sensible into buf instead
+	}
+	return buf.String()
+}
+
+type ctrlSymbol int
+
+const (
+	none ctrlSymbol = iota
+	semi
+	blank
+	newline
+	indent
+	outdent
+	// comment
+	// eolComment
+)
+
+type whitespace struct {
+	last token
+	kind ctrlSymbol
+	//text string // comment text (possibly ""); valid if kind == comment
+}
+
+type printer struct {
+	output     io.Writer
+	written    int  // number of bytes written
+	linebreaks bool // print linebreaks instead of semis
+
+	indent  int // current indentation level
+	nlcount int // number of consecutive newlines
+
+	pending []whitespace // pending whitespace
+	lastTok token        // last token (after any pending semi) processed by print
+}
+
+// write is a thin wrapper around p.output.Write
+// that takes care of accounting and error handling.
+func (p *printer) write(data []byte) {
+	n, err := p.output.Write(data)
+	p.written += n
+	if err != nil {
+		panic(localError{err})
+	}
+}
+
+var (
+	tabBytes    = []byte("\t\t\t\t\t\t\t\t")
+	newlineByte = []byte("\n")
+	blankByte   = []byte(" ")
+)
+
+func (p *printer) writeBytes(data []byte) {
+	if len(data) == 0 {
+		panic("expected non-empty []byte")
+	}
+	if p.nlcount > 0 && p.indent > 0 {
+		// write indentation
+		n := p.indent
+		for n > len(tabBytes) {
+			p.write(tabBytes)
+			n -= len(tabBytes)
+		}
+		p.write(tabBytes[:n])
+	}
+	p.write(data)
+	p.nlcount = 0
+}
+
+func (p *printer) writeString(s string) {
+	p.writeBytes([]byte(s))
+}
+
+// If impliesSemi returns true for a non-blank line's final token tok,
+// a semicolon is automatically inserted. Vice versa, a semicolon may
+// be omitted in those cases.
+func impliesSemi(tok token) bool {
+	switch tok {
+	case _Name,
+		_Break, _Continue, _Fallthrough, _Return,
+		/*_Inc, _Dec,*/ _Rparen, _Rbrack, _Rbrace: // TODO(gri) fix this
+		return true
+	}
+	return false
+}
+
+// TODO(gri) provide table of []byte values for all tokens to avoid repeated string conversion
+
+func lineComment(text string) bool {
+	return strings.HasPrefix(text, "//")
+}
+
+func (p *printer) addWhitespace(kind ctrlSymbol, text string) {
+	p.pending = append(p.pending, whitespace{p.lastTok, kind /*text*/})
+	switch kind {
+	case semi:
+		p.lastTok = _Semi
+	case newline:
+		p.lastTok = 0
+		// TODO(gri) do we need to handle /*-style comments containing newlines here?
+	}
+}
+
+func (p *printer) flush(next token) {
+	// eliminate semis and redundant whitespace
+	sawNewline := next == _EOF
+	sawParen := next == _Rparen || next == _Rbrace
+	for i := len(p.pending) - 1; i >= 0; i-- {
+		switch p.pending[i].kind {
+		case semi:
+			k := semi
+			if sawParen {
+				sawParen = false
+				k = none // eliminate semi
+			} else if sawNewline && impliesSemi(p.pending[i].last) {
+				sawNewline = false
+				k = none // eliminate semi
+			}
+			p.pending[i].kind = k
+		case newline:
+			sawNewline = true
+		case blank, indent, outdent:
+			// nothing to do
+		// case comment:
+		// 	// A multi-line comment acts like a newline; and a ""
+		// 	// comment implies by definition at least one newline.
+		// 	if text := p.pending[i].text; strings.HasPrefix(text, "/*") && strings.ContainsRune(text, '\n') {
+		// 		sawNewline = true
+		// 	}
+		// case eolComment:
+		// 	// TODO(gri) act depending on sawNewline
+		default:
+			panic("unreachable")
+		}
+	}
+
+	// print pending
+	prev := none
+	for i := range p.pending {
+		switch p.pending[i].kind {
+		case none:
+			// nothing to do
+		case semi:
+			p.writeString(";")
+			p.nlcount = 0
+			prev = semi
+		case blank:
+			if prev != blank {
+				// at most one blank
+				p.writeBytes(blankByte)
+				p.nlcount = 0
+				prev = blank
+			}
+		case newline:
+			const maxEmptyLines = 1
+			if p.nlcount <= maxEmptyLines {
+				p.write(newlineByte)
+				p.nlcount++
+				prev = newline
+			}
+		case indent:
+			p.indent++
+		case outdent:
+			p.indent--
+			if p.indent < 0 {
+				panic("negative indentation")
+			}
+		// case comment:
+		// 	if text := p.pending[i].text; text != "" {
+		// 		p.writeString(text)
+		// 		p.nlcount = 0
+		// 		prev = comment
+		// 	}
+		// 	// TODO(gri) should check that line comments are always followed by newline
+		default:
+			panic("unreachable")
+		}
+	}
+
+	p.pending = p.pending[:0] // re-use underlying array
+}
+
+func mayCombine(prev token, next byte) (b bool) {
+	return // for now
+	// switch prev {
+	// case lexical.Int:
+	// 	b = next == '.' // 1.
+	// case lexical.Add:
+	// 	b = next == '+' // ++
+	// case lexical.Sub:
+	// 	b = next == '-' // --
+	// case lexical.Quo:
+	// 	b = next == '*' // /*
+	// case lexical.Lss:
+	// 	b = next == '-' || next == '<' // <- or <<
+	// case lexical.And:
+	// 	b = next == '&' || next == '^' // && or &^
+	// }
+	// return
+}
+
+func (p *printer) print(args ...interface{}) {
+	for i := 0; i < len(args); i++ {
+		switch x := args[i].(type) {
+		case nil:
+			// we should not reach here but don't crash
+
+		case Node:
+			p.printNode(x)
+
+		case token:
+			// _Name implies an immediately following string
+			// argument which is the actual value to print.
+			var s string
+			if x == _Name {
+				i++
+				if i >= len(args) {
+					panic("missing string argument after _Name")
+				}
+				s = args[i].(string)
+			} else {
+				s = x.String()
+			}
+
+			// TODO(gri) This check seems at the wrong place since it doesn't
+			//           take into account pending white space.
+			if mayCombine(p.lastTok, s[0]) {
+				panic("adjacent tokens combine without whitespace")
+			}
+
+			if x == _Semi {
+				// delay printing of semi
+				p.addWhitespace(semi, "")
+			} else {
+				p.flush(x)
+				p.writeString(s)
+				p.nlcount = 0
+				p.lastTok = x
+			}
+
+		case Operator:
+			if x != 0 {
+				p.flush(_Operator)
+				p.writeString(x.String())
+			}
+
+		case ctrlSymbol:
+			switch x {
+			case none, semi /*, comment*/ :
+				panic("unreachable")
+			case newline:
+				// TODO(gri) need to handle mandatory newlines after a //-style comment
+				if !p.linebreaks {
+					x = blank
+				}
+			}
+			p.addWhitespace(x, "")
+
+		// case *Comment: // comments are not Nodes
+		// 	p.addWhitespace(comment, x.Text)
+
+		default:
+			panic(fmt.Sprintf("unexpected argument %v (%T)", x, x))
+		}
+	}
+}
+
+func (p *printer) printNode(n Node) {
+	// ncom := *n.Comments()
+	// if ncom != nil {
+	// 	// TODO(gri) in general we cannot make assumptions about whether
+	// 	// a comment is a /*- or a //-style comment since the syntax
+	// 	// tree may have been manipulated. Need to make sure the correct
+	// 	// whitespace is emitted.
+	// 	for _, c := range ncom.Alone {
+	// 		p.print(c, newline)
+	// 	}
+	// 	for _, c := range ncom.Before {
+	// 		if c.Text == "" || lineComment(c.Text) {
+	// 			panic("unexpected empty line or //-style 'before' comment")
+	// 		}
+	// 		p.print(c, blank)
+	// 	}
+	// }
+
+	p.printRawNode(n)
+
+	// if ncom != nil && len(ncom.After) > 0 {
+	// 	for i, c := range ncom.After {
+	// 		if i+1 < len(ncom.After) {
+	// 			if c.Text == "" || lineComment(c.Text) {
+	// 				panic("unexpected empty line or //-style non-final 'after' comment")
+	// 			}
+	// 		}
+	// 		p.print(blank, c)
+	// 	}
+	// 	//p.print(newline)
+	// }
+}
+
+func (p *printer) printRawNode(n Node) {
+	switch n := n.(type) {
+	// expressions and types
+	case *Name:
+		p.print(_Name, n.Value) // _Name requires actual value following immediately
+
+	case *BasicLit:
+		p.print(_Name, n.Value) // _Name requires actual value following immediately
+
+	case *FuncLit:
+		p.print(n.Type, blank)
+		p.printBody(n.Body)
+
+	case *CompositeLit:
+		if n.Type != nil {
+			p.print(n.Type)
+		}
+		p.print(_Lbrace)
+		if n.NKeys > 0 && n.NKeys == len(n.ElemList) {
+			p.printExprLines(n.ElemList)
+		} else {
+			p.printExprList(n.ElemList)
+		}
+		p.print(_Rbrace)
+
+	case *ParenExpr:
+		p.print(_Lparen, n.X, _Rparen)
+
+	case *SelectorExpr:
+		p.print(n.X, _Dot, n.Sel)
+
+	case *IndexExpr:
+		p.print(n.X, _Lbrack, n.Index, _Rbrack)
+
+	case *SliceExpr:
+		p.print(n.X, _Lbrack)
+		if i := n.Index[0]; i != nil {
+			p.printNode(i)
+		}
+		p.print(_Colon)
+		if j := n.Index[1]; j != nil {
+			p.printNode(j)
+		}
+		if k := n.Index[2]; k != nil {
+			p.print(_Colon, k)
+		}
+		p.print(_Rbrack)
+
+	case *AssertExpr:
+		p.print(n.X, _Dot, _Lparen)
+		if n.Type != nil {
+			p.printNode(n.Type)
+		} else {
+			p.print(_Type)
+		}
+		p.print(_Rparen)
+
+	case *CallExpr:
+		p.print(n.Fun, _Lparen)
+		p.printExprList(n.ArgList)
+		if n.HasDots {
+			p.print(_DotDotDot)
+		}
+		p.print(_Rparen)
+
+	case *Operation:
+		if n.Y == nil {
+			// unary expr
+			p.print(n.Op)
+			// if n.Op == lexical.Range {
+			// 	p.print(blank)
+			// }
+			p.print(n.X)
+		} else {
+			// binary expr
+			// TODO(gri) eventually take precedence into account
+			// to control possibly missing parentheses
+			p.print(n.X, blank, n.Op, blank, n.Y)
+		}
+
+	case *KeyValueExpr:
+		p.print(n.Key, _Colon, blank, n.Value)
+
+	case *ListExpr:
+		p.printExprList(n.ElemList)
+
+	case *ArrayType:
+		var len interface{} = _DotDotDot
+		if n.Len != nil {
+			len = n.Len
+		}
+		p.print(_Lbrack, len, _Rbrack, n.Elem)
+
+	case *SliceType:
+		p.print(_Lbrack, _Rbrack, n.Elem)
+
+	case *DotsType:
+		p.print(_DotDotDot, n.Elem)
+
+	case *StructType:
+		p.print(_Struct)
+		if len(n.FieldList) > 0 && p.linebreaks {
+			p.print(blank)
+		}
+		p.print(_Lbrace)
+		if len(n.FieldList) > 0 {
+			p.print(newline, indent)
+			p.printFieldList(n.FieldList, n.TagList)
+			p.print(outdent, newline)
+		}
+		p.print(_Rbrace)
+
+	case *FuncType:
+		p.print(_Func)
+		p.printSignature(n)
+
+	case *InterfaceType:
+		p.print(_Interface)
+		if len(n.MethodList) > 0 && p.linebreaks {
+			p.print(blank)
+		}
+		p.print(_Lbrace)
+		if len(n.MethodList) > 0 {
+			p.print(newline, indent)
+			p.printMethodList(n.MethodList)
+			p.print(outdent, newline)
+		}
+		p.print(_Rbrace)
+
+	case *MapType:
+		p.print(_Map, _Lbrack, n.Key, _Rbrack, n.Value)
+
+	case *ChanType:
+		if n.Dir == RecvOnly {
+			p.print(_Arrow)
+		}
+		p.print(_Chan)
+		if n.Dir == SendOnly {
+			p.print(_Arrow)
+		}
+		p.print(blank, n.Elem)
+
+	// statements
+	case *DeclStmt:
+		p.printDecl(n.DeclList)
+
+	case *EmptyStmt:
+		// nothing to print
+
+	case *LabeledStmt:
+		p.print(outdent, n.Label, _Colon, indent, newline, n.Stmt)
+
+	case *ExprStmt:
+		p.print(n.X)
+
+	case *SendStmt:
+		p.print(n.Chan, blank, _Arrow, blank, n.Value)
+
+	case *AssignStmt:
+		p.print(n.Lhs)
+		if n.Rhs == ImplicitOne {
+			// TODO(gri) This is going to break the mayCombine
+			//           check once we enable that again.
+			p.print(n.Op, n.Op) // ++ or --
+		} else {
+			p.print(blank, n.Op, _Assign, blank)
+			p.print(n.Rhs)
+		}
+
+	case *CallStmt:
+		p.print(n.Tok, blank, n.Call)
+
+	case *ReturnStmt:
+		p.print(_Return)
+		if n.Results != nil {
+			p.print(blank, n.Results)
+		}
+
+	case *BranchStmt:
+		p.print(n.Tok)
+		if n.Label != nil {
+			p.print(blank, n.Label)
+		}
+
+	case *BlockStmt:
+		p.printBody(n.Body)
+
+	case *IfStmt:
+		p.print(_If, blank)
+		if n.Init != nil {
+			p.print(n.Init, _Semi, blank)
+		}
+		p.print(n.Cond, blank)
+		p.printBody(n.Then)
+		if n.Else != nil {
+			p.print(blank, _Else, blank, n.Else)
+		}
+
+	case *SwitchStmt:
+		p.print(_Switch, blank)
+		if n.Init != nil {
+			p.print(n.Init, _Semi, blank)
+		}
+		if n.Tag != nil {
+			p.print(n.Tag, blank)
+		}
+		p.printSwitchBody(n.Body)
+
+	case *TypeSwitchGuard:
+		if n.Lhs != nil {
+			p.print(n.Lhs, blank, _Define, blank)
+		}
+		p.print(n.X, _Dot, _Lparen, _Type, _Rparen)
+
+	case *SelectStmt:
+		p.print(_Select, blank) // for now
+		p.printSelectBody(n.Body)
+
+	case *RangeClause:
+		if n.Lhs != nil {
+			tok := _Assign
+			if n.Def {
+				tok = _Define
+			}
+			p.print(n.Lhs, blank, tok, blank)
+		}
+		p.print(_Range, blank, n.X)
+
+	case *ForStmt:
+		p.print(_For, blank)
+		if n.Init == nil && n.Post == nil {
+			if n.Cond != nil {
+				p.print(n.Cond, blank)
+			}
+		} else {
+			if n.Init != nil {
+				p.print(n.Init)
+				// TODO(gri) clean this up
+				if _, ok := n.Init.(*RangeClause); ok {
+					p.print(blank)
+					p.printBody(n.Body)
+					break
+				}
+			}
+			p.print(_Semi, blank)
+			if n.Cond != nil {
+				p.print(n.Cond)
+			}
+			p.print(_Semi, blank)
+			if n.Post != nil {
+				p.print(n.Post, blank)
+			}
+		}
+		p.printBody(n.Body)
+
+	case *ImportDecl:
+		if n.Group == nil {
+			p.print(_Import, blank)
+		}
+		if n.LocalPkgName != nil {
+			p.print(n.LocalPkgName, blank)
+		}
+		p.print(n.Path)
+
+	case *ConstDecl:
+		if n.Group == nil {
+			p.print(_Const, blank)
+		}
+		p.printNameList(n.NameList)
+		if n.Type != nil {
+			p.print(blank, n.Type)
+		}
+		if n.Values != nil {
+			p.print(blank, _Assign, blank, n.Values)
+		}
+
+	case *TypeDecl:
+		if n.Group == nil {
+			p.print(_Type, blank)
+		}
+		p.print(n.Name, blank, n.Type)
+
+	case *VarDecl:
+		if n.Group == nil {
+			p.print(_Var, blank)
+		}
+		p.printNameList(n.NameList)
+		if n.Type != nil {
+			p.print(blank, n.Type)
+		}
+		if n.Values != nil {
+			p.print(blank, _Assign, blank, n.Values)
+		}
+
+	case *FuncDecl:
+		p.print(_Func, blank)
+		if r := n.Recv; r != nil {
+			p.print(_Lparen)
+			if r.Name != nil {
+				p.print(r.Name, blank)
+			}
+			p.printNode(r.Type)
+			p.print(_Rparen, blank)
+		}
+		p.print(n.Name)
+		p.printSignature(n.Type)
+		if n.Body != nil {
+			p.print(blank)
+			p.printBody(n.Body)
+		}
+
+	case *printGroup:
+		p.print(n.Tok, blank, _Lparen)
+		if len(n.Decls) > 0 {
+			p.print(newline, indent)
+			for _, d := range n.Decls {
+				p.printNode(d)
+				p.print(_Semi, newline)
+			}
+			p.print(outdent)
+		}
+		p.print(_Rparen)
+
+	// files
+	case *File:
+		p.print(_Package, blank, n.PkgName)
+		if len(n.DeclList) > 0 {
+			p.print(_Semi, newline, newline)
+			p.printDeclList(n.DeclList)
+		}
+
+	default:
+		panic(fmt.Sprintf("syntax.Iterate: unexpected node type %T", n))
+	}
+}
+
+func (p *printer) printFields(fields []*Field, tags []*BasicLit, i, j int) {
+	if i+1 == j && fields[i].Name == nil {
+		// anonymous field
+		p.printNode(fields[i].Type)
+	} else {
+		for k, f := range fields[i:j] {
+			if k > 0 {
+				p.print(_Comma, blank)
+			}
+			p.printNode(f.Name)
+		}
+		p.print(blank)
+		p.printNode(fields[i].Type)
+	}
+	if i < len(tags) && tags[i] != nil {
+		p.print(blank)
+		p.printNode(tags[i])
+	}
+}
+
+func (p *printer) printFieldList(fields []*Field, tags []*BasicLit) {
+	i0 := 0
+	var typ Expr
+	for i, f := range fields {
+		if f.Name == nil || f.Type != typ {
+			if i0 < i {
+				p.printFields(fields, tags, i0, i)
+				p.print(_Semi, newline)
+				i0 = i
+			}
+			typ = f.Type
+		}
+	}
+	p.printFields(fields, tags, i0, len(fields))
+}
+
+func (p *printer) printMethodList(methods []*Field) {
+	for i, m := range methods {
+		if i > 0 {
+			p.print(_Semi, newline)
+		}
+		if m.Name != nil {
+			p.printNode(m.Name)
+			p.printSignature(m.Type.(*FuncType))
+		} else {
+			p.printNode(m.Type)
+		}
+	}
+}
+
+func (p *printer) printNameList(list []*Name) {
+	for i, x := range list {
+		if i > 0 {
+			p.print(_Comma, blank)
+		}
+		p.printNode(x)
+	}
+}
+
+func (p *printer) printExprList(list []Expr) {
+	for i, x := range list {
+		if i > 0 {
+			p.print(_Comma, blank)
+		}
+		p.printNode(x)
+	}
+}
+
+func (p *printer) printExprLines(list []Expr) {
+	if len(list) > 0 {
+		p.print(newline, indent)
+		for _, x := range list {
+			p.print(x, _Comma, newline)
+		}
+		p.print(outdent)
+	}
+}
+
+func groupFor(d Decl) (token, *Group) {
+	switch d := d.(type) {
+	case *ImportDecl:
+		return _Import, d.Group
+	case *ConstDecl:
+		return _Const, d.Group
+	case *TypeDecl:
+		return _Type, d.Group
+	case *VarDecl:
+		return _Var, d.Group
+	case *FuncDecl:
+		return _Func, nil
+	default:
+		panic("unreachable")
+	}
+}
+
+type printGroup struct {
+	node
+	Tok   token
+	Decls []Decl
+}
+
+func (p *printer) printDecl(list []Decl) {
+	tok, group := groupFor(list[0])
+
+	if group == nil {
+		if len(list) != 1 {
+			panic("unreachable")
+		}
+		p.printNode(list[0])
+		return
+	}
+
+	// if _, ok := list[0].(*EmptyDecl); ok {
+	// 	if len(list) != 1 {
+	// 		panic("unreachable")
+	// 	}
+	// 	// TODO(gri) if there are comments inside the empty
+	// 	// group, we may need to keep the list non-nil
+	// 	list = nil
+	// }
+
+	// printGroup is here for consistent comment handling
+	// (this is not yet used)
+	var pg printGroup
+	// *pg.Comments() = *group.Comments()
+	pg.Tok = tok
+	pg.Decls = list
+	p.printNode(&pg)
+}
+
+func (p *printer) printDeclList(list []Decl) {
+	i0 := 0
+	var tok token
+	var group *Group
+	for i, x := range list {
+		if s, g := groupFor(x); g == nil || g != group {
+			if i0 < i {
+				p.printDecl(list[i0:i])
+				p.print(_Semi, newline)
+				// print empty line between different declaration groups,
+				// different kinds of declarations, or between functions
+				if g != group || s != tok || s == _Func {
+					p.print(newline)
+				}
+				i0 = i
+			}
+			tok, group = s, g
+		}
+	}
+	p.printDecl(list[i0:])
+}
+
+func (p *printer) printSignature(sig *FuncType) {
+	p.printParameterList(sig.ParamList)
+	if list := sig.ResultList; list != nil {
+		p.print(blank)
+		if len(list) == 1 && list[0].Name == nil {
+			p.printNode(list[0].Type)
+		} else {
+			p.printParameterList(list)
+		}
+	}
+}
+
+func (p *printer) printParameterList(list []*Field) {
+	p.print(_Lparen)
+	if len(list) > 0 {
+		for i, f := range list {
+			if i > 0 {
+				p.print(_Comma, blank)
+			}
+			if f.Name != nil {
+				p.printNode(f.Name)
+				if i+1 < len(list) {
+					f1 := list[i+1]
+					if f1.Name != nil && f1.Type == f.Type {
+						continue // no need to print type
+					}
+				}
+				p.print(blank)
+			}
+			p.printNode(f.Type)
+		}
+	}
+	p.print(_Rparen)
+}
+
+func (p *printer) printStmtList(list []Stmt, braces bool) {
+	for i, x := range list {
+		p.print(x, _Semi)
+		if i+1 < len(list) {
+			p.print(newline)
+		} else if braces {
+			// Print an extra semicolon if the last statement is
+			// an empty statement and we are in a braced block
+			// because one semicolon is automatically removed.
+			if _, ok := x.(*EmptyStmt); ok {
+				p.print(x, _Semi)
+			}
+		}
+	}
+}
+
+func (p *printer) printBody(list []Stmt) {
+	p.print(_Lbrace)
+	if len(list) > 0 {
+		p.print(newline, indent)
+		p.printStmtList(list, true)
+		p.print(outdent, newline)
+	}
+	p.print(_Rbrace)
+}
+
+func (p *printer) printSwitchBody(list []*CaseClause) {
+	p.print(_Lbrace)
+	if len(list) > 0 {
+		p.print(newline)
+		for i, c := range list {
+			p.printCaseClause(c, i+1 == len(list))
+			p.print(newline)
+		}
+	}
+	p.print(_Rbrace)
+}
+
+func (p *printer) printSelectBody(list []*CommClause) {
+	p.print(_Lbrace)
+	if len(list) > 0 {
+		p.print(newline)
+		for i, c := range list {
+			p.printCommClause(c, i+1 == len(list))
+			p.print(newline)
+		}
+	}
+	p.print(_Rbrace)
+}
+
+func (p *printer) printCaseClause(c *CaseClause, braces bool) {
+	if c.Cases != nil {
+		p.print(_Case, blank, c.Cases)
+	} else {
+		p.print(_Default)
+	}
+	p.print(_Colon)
+	if len(c.Body) > 0 {
+		p.print(newline, indent)
+		p.printStmtList(c.Body, braces)
+		p.print(outdent)
+	}
+}
+
+func (p *printer) printCommClause(c *CommClause, braces bool) {
+	if c.Comm != nil {
+		p.print(_Case, blank)
+		p.print(c.Comm)
+	} else {
+		p.print(_Default)
+	}
+	p.print(_Colon)
+	if len(c.Body) > 0 {
+		p.print(newline, indent)
+		p.printStmtList(c.Body, braces)
+		p.print(outdent)
+	}
+}
--- a/src/cmd/compile/internal/syntax/printer_test.go
+++ b/src/cmd/compile/internal/syntax/printer_test.go
@@ -0,0 +1,24 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+	"fmt"
+	"os"
+	"testing"
+)
+
+func TestPrint(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping test in short mode")
+	}
+
+	ast, err := ReadFile(*src, nil, 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+	Fprint(os.Stdout, ast, true)
+	fmt.Println()
+}
--- a/src/cmd/compile/internal/syntax/scanner.go
+++ b/src/cmd/compile/internal/syntax/scanner.go
@@ -0,0 +1,673 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+	"fmt"
+	"io"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+type scanner struct {
+	source
+	nlsemi bool // if set '\n' and EOF translate to ';'
+
+	// current token, valid after calling next()
+	pos, line int
+	tok       token
+	lit       string   // valid if tok is _Name or _Literal
+	kind      LitKind  // valid if tok is _Literal
+	op        Operator // valid if tok is _Operator, _AssignOp, or _IncOp
+	prec      int      // valid if tok is _Operator, _AssignOp, or _IncOp
+
+	pragmas []Pragma
+}
+
+func (s *scanner) init(src io.Reader, errh ErrorHandler) {
+	s.source.init(src, errh)
+	s.nlsemi = false
+}
+
+func (s *scanner) next() {
+	nlsemi := s.nlsemi
+	s.nlsemi = false
+
+redo:
+	// skip white space
+	c := s.getr()
+	for c == ' ' || c == '\t' || c == '\n' && !nlsemi || c == '\r' {
+		c = s.getr()
+	}
+
+	// token start
+	s.pos, s.line = s.source.pos0(), s.source.line0
+
+	if isLetter(c) || c >= utf8.RuneSelf && (unicode.IsLetter(c) || s.isCompatRune(c, true)) {
+		s.ident()
+		return
+	}
+
+	switch c {
+	case -1:
+		if nlsemi {
+			s.tok = _Semi
+			break
+		}
+		s.tok = _EOF
+
+	case '\n':
+		s.tok = _Semi
+
+	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+		s.number(c)
+
+	case '"':
+		s.stdString()
+
+	case '`':
+		s.rawString()
+
+	case '\'':
+		s.rune()
+
+	case '(':
+		s.tok = _Lparen
+
+	case '[':
+		s.tok = _Lbrack
+
+	case '{':
+		s.tok = _Lbrace
+
+	case ',':
+		s.tok = _Comma
+
+	case ';':
+		s.tok = _Semi
+
+	case ')':
+		s.nlsemi = true
+		s.tok = _Rparen
+
+	case ']':
+		s.nlsemi = true
+		s.tok = _Rbrack
+
+	case '}':
+		s.nlsemi = true
+		s.tok = _Rbrace
+
+	case ':':
+		if s.getr() == '=' {
+			s.tok = _Define
+			break
+		}
+		s.ungetr()
+		s.tok = _Colon
+
+	case '.':
+		c = s.getr()
+		if isDigit(c) {
+			s.ungetr()
+			s.source.r0-- // make sure '.' is part of literal (line cannot have changed)
+			s.number('.')
+			break
+		}
+		if c == '.' {
+			c = s.getr()
+			if c == '.' {
+				s.tok = _DotDotDot
+				break
+			}
+			s.ungetr()
+			s.source.r0-- // make next ungetr work (line cannot have changed)
+		}
+		s.ungetr()
+		s.tok = _Dot
+
+	case '+':
+		s.op, s.prec = Add, precAdd
+		c = s.getr()
+		if c != '+' {
+			goto assignop
+		}
+		s.nlsemi = true
+		s.tok = _IncOp
+
+	case '-':
+		s.op, s.prec = Sub, precAdd
+		c = s.getr()
+		if c != '-' {
+			goto assignop
+		}
+		s.nlsemi = true
+		s.tok = _IncOp
+
+	case '*':
+		s.op, s.prec = Mul, precMul
+		// don't goto assignop - want _Star token
+		if s.getr() == '=' {
+			s.tok = _AssignOp
+			break
+		}
+		s.ungetr()
+		s.tok = _Star
+
+	case '/':
+		c = s.getr()
+		if c == '/' {
+			s.lineComment()
+			goto redo
+		}
+		if c == '*' {
+			s.fullComment()
+			if s.source.line > s.line && nlsemi {
+				// A multi-line comment acts like a newline;
+				// it translates to a ';' if nlsemi is set.
+				s.tok = _Semi
+				break
+			}
+			goto redo
+		}
+		s.op, s.prec = Div, precMul
+		goto assignop
+
+	case '%':
+		s.op, s.prec = Rem, precMul
+		c = s.getr()
+		goto assignop
+
+	case '&':
+		c = s.getr()
+		if c == '&' {
+			s.op, s.prec = AndAnd, precAndAnd
+			s.tok = _Operator
+			break
+		}
+		s.op, s.prec = And, precMul
+		if c == '^' {
+			s.op = AndNot
+			c = s.getr()
+		}
+		goto assignop
+
+	case '|':
+		c = s.getr()
+		if c == '|' {
+			s.op, s.prec = OrOr, precOrOr
+			s.tok = _Operator
+			break
+		}
+		s.op, s.prec = Or, precAdd
+		goto assignop
+
+	case '~':
+		s.error("bitwise complement operator is ^")
+		fallthrough
+
+	case '^':
+		s.op, s.prec = Xor, precAdd
+		c = s.getr()
+		goto assignop
+
+	case '<':
+		c = s.getr()
+		if c == '=' {
+			s.op, s.prec = Leq, precCmp
+			s.tok = _Operator
+			break
+		}
+		if c == '<' {
+			s.op, s.prec = Shl, precMul
+			c = s.getr()
+			goto assignop
+		}
+		if c == '-' {
+			s.tok = _Arrow
+			break
+		}
+		s.ungetr()
+		s.op, s.prec = Lss, precCmp
+		s.tok = _Operator
+
+	case '>':
+		c = s.getr()
+		if c == '=' {
+			s.op, s.prec = Geq, precCmp
+			s.tok = _Operator
+			break
+		}
+		if c == '>' {
+			s.op, s.prec = Shr, precMul
+			c = s.getr()
+			goto assignop
+		}
+		s.ungetr()
+		s.op, s.prec = Gtr, precCmp
+		s.tok = _Operator
+
+	case '=':
+		if s.getr() == '=' {
+			s.op, s.prec = Eql, precCmp
+			s.tok = _Operator
+			break
+		}
+		s.ungetr()
+		s.tok = _Assign
+
+	case '!':
+		if s.getr() == '=' {
+			s.op, s.prec = Neq, precCmp
+			s.tok = _Operator
+			break
+		}
+		s.ungetr()
+		s.op, s.prec = Not, 0
+		s.tok = _Operator
+
+	default:
+		s.tok = 0
+		s.error(fmt.Sprintf("illegal character %#U", c))
+		goto redo
+	}
+
+	return
+
+assignop:
+	if c == '=' {
+		s.tok = _AssignOp
+		return
+	}
+	s.ungetr()
+	s.tok = _Operator
+}
+
+func isLetter(c rune) bool {
+	return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_'
+}
+
+func isDigit(c rune) bool {
+	return '0' <= c && c <= '9'
+}
+
+func (s *scanner) ident() {
+	s.startLit()
+
+	// accelerate common case (7bit ASCII)
+	c := s.getr()
+	for isLetter(c) || isDigit(c) {
+		c = s.getr()
+	}
+
+	// general case
+	if c >= utf8.RuneSelf {
+		for unicode.IsLetter(c) || c == '_' || unicode.IsDigit(c) || s.isCompatRune(c, false) {
+			c = s.getr()
+		}
+	}
+	s.ungetr()
+
+	lit := s.stopLit()
+
+	// possibly a keyword
+	if len(lit) >= 2 {
+		if tok := keywordMap[hash(lit)]; tok != 0 && strbyteseql(tokstrings[tok], lit) {
+			s.nlsemi = contains(1<<_Break|1<<_Continue|1<<_Fallthrough|1<<_Return, tok)
+			s.tok = tok
+			return
+		}
+	}
+
+	s.nlsemi = true
+	s.lit = string(lit)
+	s.tok = _Name
+}
+
+func (s *scanner) isCompatRune(c rune, start bool) bool {
+	if !gcCompat || c < utf8.RuneSelf {
+		return false
+	}
+	if start && unicode.IsNumber(c) {
+		s.error(fmt.Sprintf("identifier cannot begin with digit %#U", c))
+	} else {
+		s.error(fmt.Sprintf("invalid identifier character %#U", c))
+	}
+	return true
+}
+
+// hash is a perfect hash function for keywords.
+// It assumes that s has at least length 2.
+func hash(s []byte) uint {
+	return (uint(s[0])<<4 ^ uint(s[1]) + uint(len(s))) & uint(len(keywordMap)-1)
+}
+
+func strbyteseql(s string, b []byte) bool {
+	if len(s) == len(b) {
+		for i, b := range b {
+			if s[i] != b {
+				return false
+			}
+		}
+		return true
+	}
+	return false
+}
+
+var keywordMap [1 << 6]token // size must be power of two
+
+func init() {
+	// populate keywordMap
+	for tok := _Break; tok <= _Var; tok++ {
+		h := hash([]byte(tokstrings[tok]))
+		if keywordMap[h] != 0 {
+			panic("imperfect hash")
+		}
+		keywordMap[h] = tok
+	}
+}
+
+func (s *scanner) number(c rune) {
+	s.startLit()
+
+	if c != '.' {
+		s.kind = IntLit // until proven otherwise
+		if c == '0' {
+			c = s.getr()
+			if c == 'x' || c == 'X' {
+				// hex
+				c = s.getr()
+				hasDigit := false
+				for isDigit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
+					c = s.getr()
+					hasDigit = true
+				}
+				if !hasDigit {
+					s.error("malformed hex constant")
+				}
+				goto done
+			}
+
+			// decimal 0, octal, or float
+			has8or9 := false
+			for isDigit(c) {
+				if c > '7' {
+					has8or9 = true
+				}
+				c = s.getr()
+			}
+			if c != '.' && c != 'e' && c != 'E' && c != 'i' {
+				// octal
+				if has8or9 {
+					s.error("malformed octal constant")
+				}
+				goto done
+			}
+
+		} else {
+			// decimal or float
+			for isDigit(c) {
+				c = s.getr()
+			}
+		}
+	}
+
+	// float
+	if c == '.' {
+		s.kind = FloatLit
+		c = s.getr()
+		for isDigit(c) {
+			c = s.getr()
+		}
+	}
+
+	// exponent
+	if c == 'e' || c == 'E' {
+		s.kind = FloatLit
+		c = s.getr()
+		if c == '-' || c == '+' {
+			c = s.getr()
+		}
+		if !isDigit(c) {
+			s.error("malformed floating-point constant exponent")
+		}
+		for isDigit(c) {
+			c = s.getr()
+		}
+	}
+
+	// complex
+	if c == 'i' {
+		s.kind = ImagLit
+		s.getr()
+	}
+
+done:
+	s.ungetr()
+	s.nlsemi = true
+	s.lit = string(s.stopLit())
+	s.tok = _Literal
+}
+
+func (s *scanner) stdString() {
+	s.startLit()
+
+	for {
+		r := s.getr()
+		if r == '"' {
+			break
+		}
+		if r == '\\' {
+			s.escape('"')
+			continue
+		}
+		if r == '\n' {
+			s.ungetr() // assume newline is not part of literal
+			s.error("newline in string")
+			break
+		}
+		if r < 0 {
+			s.error_at(s.pos, s.line, "string not terminated")
+			break
+		}
+	}
+
+	s.nlsemi = true
+	s.lit = string(s.stopLit())
+	s.kind = StringLit
+	s.tok = _Literal
+}
+
+func (s *scanner) rawString() {
+	s.startLit()
+
+	for {
+		r := s.getr()
+		if r == '`' {
+			break
+		}
+		if r < 0 {
+			s.error_at(s.pos, s.line, "string not terminated")
+			break
+		}
+	}
+	// We leave CRs in the string since they are part of the
+	// literal (even though they are not part of the literal
+	// value).
+
+	s.nlsemi = true
+	s.lit = string(s.stopLit())
+	s.kind = StringLit
+	s.tok = _Literal
+}
+
+func (s *scanner) rune() {
+	s.startLit()
+
+	r := s.getr()
+	ok := false
+	if r == '\'' {
+		s.error("empty character literal or unescaped ' in character literal")
+	} else if r == '\n' {
+		s.ungetr() // assume newline is not part of literal
+		s.error("newline in character literal")
+	} else {
+		ok = true
+		if r == '\\' {
+			ok = s.escape('\'')
+		}
+	}
+
+	r = s.getr()
+	if r != '\'' {
+		// only report error if we're ok so far
+		if ok {
+			s.error("missing '")
+		}
+		s.ungetr()
+	}
+
+	s.nlsemi = true
+	s.lit = string(s.stopLit())
+	s.kind = RuneLit
+	s.tok = _Literal
+}
+
+func (s *scanner) lineComment() {
+	// recognize pragmas
+	var prefix string
+	r := s.getr()
+	switch r {
+	case 'g':
+		prefix = "go:"
+	case 'l':
+		prefix = "line "
+	default:
+		goto skip
+	}
+
+	s.startLit()
+	for _, m := range prefix {
+		if r != m {
+			s.stopLit()
+			goto skip
+		}
+		r = s.getr()
+	}
+
+	for r >= 0 {
+		if r == '\n' {
+			s.ungetr()
+			break
+		}
+		r = s.getr()
+	}
+	s.pragmas = append(s.pragmas, Pragma{
+		Line: s.line,
+		Text: strings.TrimSuffix(string(s.stopLit()), "\r"),
+	})
+	return
+
+skip:
+	// consume line
+	for r != '\n' && r >= 0 {
+		r = s.getr()
+	}
+	s.ungetr() // don't consume '\n' - needed for nlsemi logic
+}
+
+func (s *scanner) fullComment() {
+	for {
+		r := s.getr()
+		for r == '*' {
+			r = s.getr()
+			if r == '/' {
+				return
+			}
+		}
+		if r < 0 {
+			s.error_at(s.pos, s.line, "comment not terminated")
+			return
+		}
+	}
+}
+
+func (s *scanner) escape(quote rune) bool {
+	var n int
+	var base, max uint32
+
+	c := s.getr()
+	switch c {
+	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
+		return true
+	case '0', '1', '2', '3', '4', '5', '6', '7':
+		n, base, max = 3, 8, 255
+	case 'x':
+		c = s.getr()
+		n, base, max = 2, 16, 255
+	case 'u':
+		c = s.getr()
+		n, base, max = 4, 16, unicode.MaxRune
+	case 'U':
+		c = s.getr()
+		n, base, max = 8, 16, unicode.MaxRune
+	default:
+		if c < 0 {
+			return true // complain in caller about EOF
+		}
+		s.error("unknown escape sequence")
+		return false
+	}
+
+	var x uint32
+	for i := n; i > 0; i-- {
+		d := base
+		switch {
+		case isDigit(c):
+			d = uint32(c) - '0'
+		case 'a' <= c && c <= 'f':
+			d = uint32(c) - ('a' - 10)
+		case 'A' <= c && c <= 'F':
+			d = uint32(c) - ('A' - 10)
+		}
+		if d >= base {
+			if c < 0 {
+				return true // complain in caller about EOF
+			}
+			if gcCompat {
+				name := "hex"
+				if base == 8 {
+					name = "octal"
+				}
+				s.error(fmt.Sprintf("non-%s character in escape sequence: %c", name, c))
+			} else {
+				if c != quote {
+					s.error(fmt.Sprintf("illegal character %#U in escape sequence", c))
+				} else {
+					s.error("escape sequence incomplete")
+				}
+			}
+			s.ungetr()
+			return false
+		}
+		// d < base
+		x = x*base + d
+		c = s.getr()
+	}
+	s.ungetr()
+
+	if x > max && base == 8 {
+		s.error(fmt.Sprintf("octal escape value > 255: %d", x))
+		return false
+	}
+
+	if x > max || 0xD800 <= x && x < 0xE000 /* surrogate range */ {
+		s.error("escape sequence is invalid Unicode code point")
+		return false
+	}
+
+	return true
+}
--- a/src/cmd/compile/internal/syntax/scanner_test.go
+++ b/src/cmd/compile/internal/syntax/scanner_test.go
@@ -0,0 +1,354 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+	"fmt"
+	"os"
+	"testing"
+)
+
+func TestScanner(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping test in short mode")
+	}
+
+	src, err := os.Open("parser.go")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer src.Close()
+
+	var s scanner
+	s.init(src, nil)
+	for {
+		s.next()
+		if s.tok == _EOF {
+			break
+		}
+		switch s.tok {
+		case _Name:
+			fmt.Println(s.line, s.tok, "=>", s.lit)
+		case _Operator:
+			fmt.Println(s.line, s.tok, "=>", s.op, s.prec)
+		default:
+			fmt.Println(s.line, s.tok)
+		}
+	}
+}
+
+func TestTokens(t *testing.T) {
+	// make source
+	var buf []byte
+	for i, s := range sampleTokens {
+		buf = append(buf, "\t\t\t\t"[:i&3]...)     // leading indentation
+		buf = append(buf, s.src...)                // token
+		buf = append(buf, "        "[:i&7]...)     // trailing spaces
+		buf = append(buf, "/* foo */ // bar\n"...) // comments
+	}
+
+	// scan source
+	var got scanner
+	got.init(&bytesReader{buf}, nil)
+	got.next()
+	for i, want := range sampleTokens {
+		nlsemi := false
+
+		if got.line != i+1 {
+			t.Errorf("got line %d; want %d", got.line, i+1)
+		}
+
+		if got.tok != want.tok {
+			t.Errorf("got tok = %s; want %s", got.tok, want.tok)
+			continue
+		}
+
+		switch want.tok {
+		case _Name, _Literal:
+			if got.lit != want.src {
+				t.Errorf("got lit = %q; want %q", got.lit, want.src)
+				continue
+			}
+			nlsemi = true
+
+		case _Operator, _AssignOp, _IncOp:
+			if got.op != want.op {
+				t.Errorf("got op = %s; want %s", got.op, want.op)
+				continue
+			}
+			if got.prec != want.prec {
+				t.Errorf("got prec = %s; want %s", got.prec, want.prec)
+				continue
+			}
+			nlsemi = want.tok == _IncOp
+
+		case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return:
+			nlsemi = true
+		}
+
+		if nlsemi {
+			got.next()
+			if got.tok != _Semi {
+				t.Errorf("got tok = %s; want ;", got.tok)
+				continue
+			}
+		}
+
+		got.next()
+	}
+
+	if got.tok != _EOF {
+		t.Errorf("got %q; want _EOF", got.tok)
+	}
+}
+
+var sampleTokens = [...]struct {
+	tok  token
+	src  string
+	op   Operator
+	prec int
+}{
+	// name samples
+	{_Name, "x", 0, 0},
+	{_Name, "X123", 0, 0},
+	{_Name, "foo", 0, 0},
+	{_Name, "Foo123", 0, 0},
+	{_Name, "foo_bar", 0, 0},
+	{_Name, "_", 0, 0},
+	{_Name, "_foobar", 0, 0},
+	{_Name, "a۰۱۸", 0, 0},
+	{_Name, "foo६४", 0, 0},
+	{_Name, "bar９８７６", 0, 0},
+	{_Name, "ŝ", 0, 0},
+	{_Name, "ŝfoo", 0, 0},
+
+	// literal samples
+	{_Literal, "0", 0, 0},
+	{_Literal, "1", 0, 0},
+	{_Literal, "12345", 0, 0},
+	{_Literal, "123456789012345678890123456789012345678890", 0, 0},
+	{_Literal, "01234567", 0, 0},
+	{_Literal, "0x0", 0, 0},
+	{_Literal, "0xcafebabe", 0, 0},
+	{_Literal, "0.", 0, 0},
+	{_Literal, "0.e0", 0, 0},
+	{_Literal, "0.e-1", 0, 0},
+	{_Literal, "0.e+123", 0, 0},
+	{_Literal, ".0", 0, 0},
+	{_Literal, ".0E00", 0, 0},
+	{_Literal, ".0E-0123", 0, 0},
+	{_Literal, ".0E+12345678901234567890", 0, 0},
+	{_Literal, ".45e1", 0, 0},
+	{_Literal, "3.14159265", 0, 0},
+	{_Literal, "1e0", 0, 0},
+	{_Literal, "1e+100", 0, 0},
+	{_Literal, "1e-100", 0, 0},
+	{_Literal, "2.71828e-1000", 0, 0},
+	{_Literal, "0i", 0, 0},
+	{_Literal, "1i", 0, 0},
+	{_Literal, "012345678901234567889i", 0, 0},
+	{_Literal, "123456789012345678890i", 0, 0},
+	{_Literal, "0.i", 0, 0},
+	{_Literal, ".0i", 0, 0},
+	{_Literal, "3.14159265i", 0, 0},
+	{_Literal, "1e0i", 0, 0},
+	{_Literal, "1e+100i", 0, 0},
+	{_Literal, "1e-100i", 0, 0},
+	{_Literal, "2.71828e-1000i", 0, 0},
+	{_Literal, "'a'", 0, 0},
+	{_Literal, "'\\000'", 0, 0},
+	{_Literal, "'\\xFF'", 0, 0},
+	{_Literal, "'\\uff16'", 0, 0},
+	{_Literal, "'\\U0000ff16'", 0, 0},
+	{_Literal, "`foobar`", 0, 0},
+	{_Literal, "`foo\tbar`", 0, 0},
+	{_Literal, "`\r`", 0, 0},
+
+	// operators
+	{_Operator, "||", OrOr, precOrOr},
+
+	{_Operator, "&&", AndAnd, precAndAnd},
+
+	{_Operator, "==", Eql, precCmp},
+	{_Operator, "!=", Neq, precCmp},
+	{_Operator, "<", Lss, precCmp},
+	{_Operator, "<=", Leq, precCmp},
+	{_Operator, ">", Gtr, precCmp},
+	{_Operator, ">=", Geq, precCmp},
+
+	{_Operator, "+", Add, precAdd},
+	{_Operator, "-", Sub, precAdd},
+	{_Operator, "|", Or, precAdd},
+	{_Operator, "^", Xor, precAdd},
+
+	{_Star, "*", Mul, precMul},
+	{_Operator, "/", Div, precMul},
+	{_Operator, "%", Rem, precMul},
+	{_Operator, "&", And, precMul},
+	{_Operator, "&^", AndNot, precMul},
+	{_Operator, "<<", Shl, precMul},
+	{_Operator, ">>", Shr, precMul},
+
+	// assignment operations
+	{_AssignOp, "+=", Add, precAdd},
+	{_AssignOp, "-=", Sub, precAdd},
+	{_AssignOp, "|=", Or, precAdd},
+	{_AssignOp, "^=", Xor, precAdd},
+
+	{_AssignOp, "*=", Mul, precMul},
+	{_AssignOp, "/=", Div, precMul},
+	{_AssignOp, "%=", Rem, precMul},
+	{_AssignOp, "&=", And, precMul},
+	{_AssignOp, "&^=", AndNot, precMul},
+	{_AssignOp, "<<=", Shl, precMul},
+	{_AssignOp, ">>=", Shr, precMul},
+
+	// other operations
+	{_IncOp, "++", Add, precAdd},
+	{_IncOp, "--", Sub, precAdd},
+	{_Assign, "=", 0, 0},
+	{_Define, ":=", 0, 0},
+	{_Arrow, "<-", 0, 0},
+
+	// delimiters
+	{_Lparen, "(", 0, 0},
+	{_Lbrack, "[", 0, 0},
+	{_Lbrace, "{", 0, 0},
+	{_Rparen, ")", 0, 0},
+	{_Rbrack, "]", 0, 0},
+	{_Rbrace, "}", 0, 0},
+	{_Comma, ",", 0, 0},
+	{_Semi, ";", 0, 0},
+	{_Colon, ":", 0, 0},
+	{_Dot, ".", 0, 0},
+	{_DotDotDot, "...", 0, 0},
+
+	// keywords
+	{_Break, "break", 0, 0},
+	{_Case, "case", 0, 0},
+	{_Chan, "chan", 0, 0},
+	{_Const, "const", 0, 0},
+	{_Continue, "continue", 0, 0},
+	{_Default, "default", 0, 0},
+	{_Defer, "defer", 0, 0},
+	{_Else, "else", 0, 0},
+	{_Fallthrough, "fallthrough", 0, 0},
+	{_For, "for", 0, 0},
+	{_Func, "func", 0, 0},
+	{_Go, "go", 0, 0},
+	{_Goto, "goto", 0, 0},
+	{_If, "if", 0, 0},
+	{_Import, "import", 0, 0},
+	{_Interface, "interface", 0, 0},
+	{_Map, "map", 0, 0},
+	{_Package, "package", 0, 0},
+	{_Range, "range", 0, 0},
+	{_Return, "return", 0, 0},
+	{_Select, "select", 0, 0},
+	{_Struct, "struct", 0, 0},
+	{_Switch, "switch", 0, 0},
+	{_Type, "type", 0, 0},
+	{_Var, "var", 0, 0},
+}
+
+func TestScanErrors(t *testing.T) {
+	for _, test := range []struct {
+		src, msg  string
+		pos, line int
+	}{
+		// Note: Positions for lexical errors are the earliest position
+		// where the error is apparent, not the beginning of the respective
+		// token.
+
+		// rune-level errors
+		{"fo\x00o", "invalid NUL character", 2, 1},
+		{"foo\n\ufeff bar", "invalid BOM in the middle of the file", 4, 2},
+		{"foo\n\n\xff    ", "invalid UTF-8 encoding", 5, 3},
+
+		// token-level errors
+		{"x + ~y", "bitwise complement operator is ^", 4, 1},
+		{"foo$bar = 0", "illegal character U+0024 '$'", 3, 1},
+		{"const x = 0xyz", "malformed hex constant", 12, 1},
+		{"0123456789", "malformed octal constant", 10, 1},
+		{"0123456789. /* foobar", "comment not terminated", 12, 1},   // valid float constant
+		{"0123456789e0 /*\nfoobar", "comment not terminated", 13, 1}, // valid float constant
+		{"var a, b = 08, 07\n", "malformed octal constant", 13, 1},
+		{"(x + 1.0e+x)", "malformed floating-point constant exponent", 10, 1},
+
+		{`''`, "empty character literal or unescaped ' in character literal", 1, 1},
+		{"'\n", "newline in character literal", 1, 1},
+		{`'\`, "missing '", 2, 1},
+		{`'\'`, "missing '", 3, 1},
+		{`'\x`, "missing '", 3, 1},
+		{`'\x'`, "non-hex character in escape sequence: '", 3, 1},
+		{`'\y'`, "unknown escape sequence", 2, 1},
+		{`'\x0'`, "non-hex character in escape sequence: '", 4, 1},
+		{`'\00'`, "non-octal character in escape sequence: '", 4, 1},
+		{`'\377' /*`, "comment not terminated", 7, 1}, // valid octal escape
+		{`'\378`, "non-octal character in escape sequence: 8", 4, 1},
+		{`'\400'`, "octal escape value > 255: 256", 5, 1},
+		{`'xx`, "missing '", 2, 1},
+
+		{"\"\n", "newline in string", 1, 1},
+		{`"`, "string not terminated", 0, 1},
+		{`"foo`, "string not terminated", 0, 1},
+		{"`", "string not terminated", 0, 1},
+		{"`foo", "string not terminated", 0, 1},
+		{"/*/", "comment not terminated", 0, 1},
+		{"/*\n\nfoo", "comment not terminated", 0, 1},
+		{"/*\n\nfoo", "comment not terminated", 0, 1},
+		{`"\`, "string not terminated", 0, 1},
+		{`"\"`, "string not terminated", 0, 1},
+		{`"\x`, "string not terminated", 0, 1},
+		{`"\x"`, "non-hex character in escape sequence: \"", 3, 1},
+		{`"\y"`, "unknown escape sequence", 2, 1},
+		{`"\x0"`, "non-hex character in escape sequence: \"", 4, 1},
+		{`"\00"`, "non-octal character in escape sequence: \"", 4, 1},
+		{`"\377" /*`, "comment not terminated", 7, 1}, // valid octal escape
+		{`"\378"`, "non-octal character in escape sequence: 8", 4, 1},
+		{`"\400"`, "octal escape value > 255: 256", 5, 1},
+
+		{`s := "foo\z"`, "unknown escape sequence", 10, 1},
+		{`s := "foo\z00\nbar"`, "unknown escape sequence", 10, 1},
+		{`"\x`, "string not terminated", 0, 1},
+		{`"\x"`, "non-hex character in escape sequence: \"", 3, 1},
+		{`var s string = "\x"`, "non-hex character in escape sequence: \"", 18, 1},
+		{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 18, 1},
+
+		// former problem cases
+		{"package p\n\n\xef", "invalid UTF-8 encoding", 11, 3},
+	} {
+		var s scanner
+		nerrors := 0
+		s.init(&bytesReader{[]byte(test.src)}, func(pos, line int, msg string) {
+			nerrors++
+			// only check the first error
+			if nerrors == 1 {
+				if msg != test.msg {
+					t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
+				}
+				if pos != test.pos {
+					t.Errorf("%q: got pos = %d; want %d", test.src, pos, test.pos)
+				}
+				if line != test.line {
+					t.Errorf("%q: got line = %d; want %d", test.src, line, test.line)
+				}
+			} else if nerrors > 1 {
+				t.Errorf("%q: got unexpected %q at pos = %d, line = %d", test.src, msg, pos, line)
+			}
+		})
+
+		for {
+			s.next()
+			if s.tok == _EOF {
+				break
+			}
+		}
+
+		if nerrors == 0 {
+			t.Errorf("%q: got no error; want %q", test.src, test.msg)
+		}
+	}
+}
--- a/src/cmd/compile/internal/syntax/source.go
+++ b/src/cmd/compile/internal/syntax/source.go
@@ -0,0 +1,177 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+	"fmt"
+	"io"
+	"unicode/utf8"
+)
+
+// buf [...read...|...|...unread...|s|...free...]
+//         ^      ^   ^            ^
+//         |      |   |            |
+//        suf     r0  r            w
+
+type source struct {
+	src  io.Reader
+	errh ErrorHandler
+
+	// source buffer
+	buf         [4 << 10]byte
+	offs        int   // source offset of buf
+	r0, r, w    int   // previous/current read and write buf positions, excluding sentinel
+	line0, line int   // previous/current line
+	err         error // pending io error
+
+	// literal buffer
+	lit []byte // literal prefix
+	suf int    // literal suffix; suf >= 0 means we are scanning a literal
+}
+
+func (s *source) init(src io.Reader, errh ErrorHandler) {
+	s.src = src
+	s.errh = errh
+
+	s.buf[0] = utf8.RuneSelf // terminate with sentinel
+	s.offs = 0
+	s.r0, s.r, s.w = 0, 0, 0
+	s.line0, s.line = 1, 1
+	s.err = nil
+
+	s.lit = s.lit[:0]
+	s.suf = -1
+}
+
+func (s *source) error(msg string) {
+	s.error_at(s.pos0(), s.line0, msg)
+}
+
+func (s *source) error_at(pos, line int, msg string) {
+	if s.errh != nil {
+		s.errh(pos, line, msg)
+		return
+	}
+	panic(fmt.Sprintf("%d: %s", line, msg))
+}
+
+// pos0 returns the byte position of the last character read.
+func (s *source) pos0() int {
+	return s.offs + s.r0
+}
+
+func (s *source) ungetr() {
+	s.r, s.line = s.r0, s.line0
+}
+
+func (s *source) getr() rune {
+redo:
+	s.r0, s.line0 = s.r, s.line
+
+	// We could avoid at least one test that is always taken in the
+	// for loop below by duplicating the common case code (ASCII)
+	// here since we always have at least the sentinel (utf8.RuneSelf)
+	// in the buffer. Measure and optimize if necessary.
+
+	// make sure we have at least one rune in buffer, or we are at EOF
+	for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.err == nil && s.w-s.r < len(s.buf) {
+		s.fill() // s.w-s.r < len(s.buf) => buffer is not full
+	}
+
+	// common case: ASCII and enough bytes
+	// (invariant: s.buf[s.w] == utf8.RuneSelf)
+	if b := s.buf[s.r]; b < utf8.RuneSelf {
+		s.r++
+		if b == 0 {
+			s.error("invalid NUL character")
+			goto redo
+		}
+		if b == '\n' {
+			s.line++
+		}
+		return rune(b)
+	}
+
+	// EOF
+	if s.r == s.w {
+		if s.err != io.EOF {
+			s.error(s.err.Error())
+		}
+		return -1
+	}
+
+	// uncommon case: not ASCII
+	r, w := utf8.DecodeRune(s.buf[s.r:s.w])
+	s.r += w
+
+	if r == utf8.RuneError && w == 1 {
+		s.error("invalid UTF-8 encoding")
+		goto redo
+	}
+
+	// BOM's are only allowed as the first character in a file
+	const BOM = 0xfeff
+	if r == BOM {
+		if s.r0 > 0 { // s.r0 is always > 0 after 1st character (fill will set it to 1)
+			s.error("invalid BOM in the middle of the file")
+		}
+		goto redo
+	}
+
+	return r
+}
+
+func (s *source) fill() {
+	// Slide unread bytes to beginning but preserve last read char
+	// (for one ungetr call) plus one extra byte (for a 2nd ungetr
+	// call, only for ".." character sequence and float literals
+	// starting with ".").
+	if s.r0 > 1 {
+		// save literal prefix, if any
+		// (We see at most one ungetr call while reading
+		// a literal, so make sure s.r0 remains in buf.)
+		if s.suf >= 0 {
+			s.lit = append(s.lit, s.buf[s.suf:s.r0]...)
+			s.suf = 1 // == s.r0 after slide below
+		}
+		s.offs += s.r0 - 1
+		r := s.r - s.r0 + 1 // last read char plus one byte
+		s.w = r + copy(s.buf[r:], s.buf[s.r:s.w])
+		s.r = r
+		s.r0 = 1
+	}
+
+	// read more data: try a limited number of times
+	for i := 100; i > 0; i-- {
+		n, err := s.src.Read(s.buf[s.w : len(s.buf)-1]) // -1 to leave space for sentinel
+		if n < 0 {
+			panic("negative read") // incorrect underlying io.Reader implementation
+		}
+		s.w += n
+		if n > 0 || err != nil {
+			s.buf[s.w] = utf8.RuneSelf // sentinel
+			if err != nil {
+				s.err = err
+			}
+			return
+		}
+	}
+
+	s.err = io.ErrNoProgress
+}
+
+func (s *source) startLit() {
+	s.suf = s.r0
+	s.lit = s.lit[:0] // reuse lit
+}
+
+func (s *source) stopLit() []byte {
+	lit := s.buf[s.suf:s.r]
+	if len(s.lit) > 0 {
+		lit = append(s.lit, lit...)
+	}
+	s.suf = -1 // no pending literal
+	return lit
+}
--- a/src/cmd/compile/internal/syntax/syntax.go
+++ b/src/cmd/compile/internal/syntax/syntax.go
@@ -0,0 +1,61 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+	"fmt"
+	"io"
+	"os"
+)
+
+type Mode uint
+
+type ErrorHandler func(pos, line int, msg string)
+
+// TODO(gri) These need a lot more work.
+
+func ReadFile(filename string, errh ErrorHandler, mode Mode) (*File, error) {
+	src, err := os.Open(filename)
+	if err != nil {
+		return nil, err
+	}
+	defer src.Close()
+	return Read(src, errh, mode)
+}
+
+type bytesReader struct {
+	data []byte
+}
+
+func (r *bytesReader) Read(p []byte) (int, error) {
+	if len(r.data) > 0 {
+		n := copy(p, r.data)
+		r.data = r.data[n:]
+		return n, nil
+	}
+	return 0, io.EOF
+}
+
+func ReadBytes(src []byte, errh ErrorHandler, mode Mode) (*File, error) {
+	return Read(&bytesReader{src}, errh, mode)
+}
+
+func Read(src io.Reader, errh ErrorHandler, mode Mode) (*File, error) {
+	var p parser
+	p.init(src, errh)
+
+	p.next()
+	ast := p.file()
+
+	if errh == nil && p.nerrors > 0 {
+		return nil, fmt.Errorf("%d syntax errors", p.nerrors)
+	}
+
+	return ast, nil
+}
+
+func Write(w io.Writer, n *File) error {
+	panic("unimplemented")
+}
--- a/src/cmd/compile/internal/syntax/tokens.go
+++ b/src/cmd/compile/internal/syntax/tokens.go
@@ -0,0 +1,263 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import "fmt"
+
+type token uint
+
+const (
+	_ token = iota
+	_EOF
+
+	// names and literals
+	_Name
+	_Literal
+
+	// operators and operations
+	_Operator // excluding '*' (_Star)
+	_AssignOp
+	_IncOp
+	_Assign
+	_Define
+	_Arrow
+	_Star
+
+	// delimitors
+	_Lparen
+	_Lbrack
+	_Lbrace
+	_Rparen
+	_Rbrack
+	_Rbrace
+	_Comma
+	_Semi
+	_Colon
+	_Dot
+	_DotDotDot
+
+	// keywords
+	_Break
+	_Case
+	_Chan
+	_Const
+	_Continue
+	_Default
+	_Defer
+	_Else
+	_Fallthrough
+	_For
+	_Func
+	_Go
+	_Goto
+	_If
+	_Import
+	_Interface
+	_Map
+	_Package
+	_Range
+	_Return
+	_Select
+	_Struct
+	_Switch
+	_Type
+	_Var
+
+	tokenCount
+)
+
+const (
+	// for BranchStmt
+	Break       = _Break
+	Continue    = _Continue
+	Fallthrough = _Fallthrough
+	Goto        = _Goto
+
+	// for CallStmt
+	Go    = _Go
+	Defer = _Defer
+)
+
+var tokstrings = [...]string{
+	// source control
+	_EOF: "EOF",
+
+	// names and literals
+	_Name:    "name",
+	_Literal: "literal",
+
+	// operators and operations
+	_Operator: "op",
+	_AssignOp: "op=",
+	_IncOp:    "opop",
+	_Assign:   "=",
+	_Define:   ":=",
+	_Arrow:    "<-",
+	_Star:     "*",
+
+	// delimitors
+	_Lparen:    "(",
+	_Lbrack:    "[",
+	_Lbrace:    "{",
+	_Rparen:    ")",
+	_Rbrack:    "]",
+	_Rbrace:    "}",
+	_Comma:     ",",
+	_Semi:      ";",
+	_Colon:     ":",
+	_Dot:       ".",
+	_DotDotDot: "...",
+
+	// keywords
+	_Break:       "break",
+	_Case:        "case",
+	_Chan:        "chan",
+	_Const:       "const",
+	_Continue:    "continue",
+	_Default:     "default",
+	_Defer:       "defer",
+	_Else:        "else",
+	_Fallthrough: "fallthrough",
+	_For:         "for",
+	_Func:        "func",
+	_Go:          "go",
+	_Goto:        "goto",
+	_If:          "if",
+	_Import:      "import",
+	_Interface:   "interface",
+	_Map:         "map",
+	_Package:     "package",
+	_Range:       "range",
+	_Return:      "return",
+	_Select:      "select",
+	_Struct:      "struct",
+	_Switch:      "switch",
+	_Type:        "type",
+	_Var:         "var",
+}
+
+func (tok token) String() string {
+	var s string
+	if 0 <= tok && int(tok) < len(tokstrings) {
+		s = tokstrings[tok]
+	}
+	if s == "" {
+		s = fmt.Sprintf("<tok-%d>", tok)
+	}
+	return s
+}
+
+// Make sure we have at most 64 tokens so we can use them in a set.
+const _ uint64 = 1 << (tokenCount - 1)
+
+// contains reports whether tok is in tokset.
+func contains(tokset uint64, tok token) bool {
+	return tokset&(1<<tok) != 0
+}
+
+type LitKind uint
+
+const (
+	IntLit LitKind = iota
+	FloatLit
+	ImagLit
+	RuneLit
+	StringLit
+)
+
+type Operator uint
+
+const (
+	_    Operator = iota
+	Def           // :=
+	Not           // !
+	Recv          // <-
+
+	// precOrOr
+	OrOr // ||
+
+	// precAndAnd
+	AndAnd // &&
+
+	// precCmp
+	Eql // ==
+	Neq // !=
+	Lss // <
+	Leq // <=
+	Gtr // >
+	Geq // >=
+
+	// precAdd
+	Add // +
+	Sub // -
+	Or  // |
+	Xor // ^
+
+	// precMul
+	Mul    // *
+	Div    // /
+	Rem    // %
+	And    // &
+	AndNot // &^
+	Shl    // <<
+	Shr    // >>
+)
+
+var opstrings = [...]string{
+	// prec == 0
+	Def:  ":", // : in :=
+	Not:  "!",
+	Recv: "<-",
+
+	// precOrOr
+	OrOr: "||",
+
+	// precAndAnd
+	AndAnd: "&&",
+
+	// precCmp
+	Eql: "==",
+	Neq: "!=",
+	Lss: "<",
+	Leq: "<=",
+	Gtr: ">",
+	Geq: ">=",
+
+	// precAdd
+	Add: "+",
+	Sub: "-",
+	Or:  "|",
+	Xor: "^",
+
+	// precMul
+	Mul:    "*",
+	Div:    "/",
+	Rem:    "%",
+	And:    "&",
+	AndNot: "&^",
+	Shl:    "<<",
+	Shr:    ">>",
+}
+
+func (op Operator) String() string {
+	var s string
+	if 0 <= op && int(op) < len(opstrings) {
+		s = opstrings[op]
+	}
+	if s == "" {
+		s = fmt.Sprintf("<op-%d>", op)
+	}
+	return s
+}
+
+// Operator precedences
+const (
+	_ = iota
+	precOrOr
+	precAndAnd
+	precCmp
+	precAdd
+	precMul
+)
--- a/src/cmd/dist/buildtool.go
+++ b/src/cmd/dist/buildtool.go
@@ -36,9 +36,10 @@ var bootstrapDirs = []string{
 	"compile/internal/gc",
 	"compile/internal/mips64",
 	"compile/internal/ppc64",
-	"compile/internal/ssa",
-	"compile/internal/x86",
 	"compile/internal/s390x",
+	"compile/internal/ssa",
+	"compile/internal/syntax",
+	"compile/internal/x86",
 	"internal/bio",
 	"internal/gcprog",
 	"internal/obj",
--- a/test/fixedbugs/issue11610.go
+++ b/test/fixedbugs/issue11610.go
@@ -1,4 +1,4 @@
-// errorcheck
+// errorcheck -newparser=0

 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
@@ -7,6 +7,9 @@
 // Test an internal compiler error on ? symbol in declaration
 // following an empty import.

+// TODO(mdempsky): Update for new parser.  New parser recovers more
+// gracefully and doesn't trigger the "cannot declare name" error.
+
 package a
 import""  // ERROR "import path is empty"
 var?      // ERROR "illegal character U\+003F '\?'"
--- a/test/nul1.go
+++ b/test/nul1.go
@@ -1,4 +1,4 @@
-// errorcheckoutput
+// errorcheckoutput -newparser=0

 // Copyright 2009 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
@@ -6,6 +6,10 @@

 // Test source files and strings containing NUL and invalid UTF-8.

+// TODO(mdempsky): Update error expectations for -newparser=1. The new
+// lexer skips over NUL and invalid UTF-8 sequences, so they don't emit
+// "illegal character" or "invalid identifier character" errors.
+
 package main

 import (
@@ -53,4 +57,3 @@ var z` + "\xc1\x81" + ` int // ERROR "UTF-8" "invalid identifier character"

 `)
 }
-
--- a/test/switch2.go
+++ b/test/switch2.go
@@ -11,11 +11,11 @@ package main

 func f() {
 	switch {
-	case 0; // ERROR "expecting := or = or : or comma"
+	case 0; // ERROR "expecting := or = or : or comma|expecting :"
 	}

 	switch {
-	case 0; // ERROR "expecting := or = or : or comma"
+	case 0; // ERROR "expecting := or = or : or comma|expecting :"
 	default:
 	}

--- a/test/syntax/chan1.go
+++ b/test/syntax/chan1.go
@@ -1,9 +1,13 @@
-// errorcheck
+// errorcheck -newparser=0

 // Copyright 2010 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+// TODO(mdempsky): Update for new parser or delete.
+// Like go/parser, the new parser doesn't specially recognize
+// send statements misused in an expression context.
+
 package main

 var c chan int
--- a/test/syntax/semi4.go
+++ b/test/syntax/semi4.go
@@ -1,14 +1,17 @@
-// errorcheck
+// errorcheck -newparser=0

 // Copyright 2010 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+// TODO(mdempsky): Update error expectations for new parser.
+// The new parser emits an extra "missing { after for clause" error.
+// The old parser is supposed to emit this too, but it panics first
+// due to a nil pointer dereference.
+
 package main

 func main() {
 	for x		// GCCGO_ERROR "undefined"
 	{		// ERROR "missing .*{.* after for clause|missing operand"
 		z	// GCCGO_ERROR "undefined"
-
-
Author	SHA1	Message	Date
Matthew Dempsky	e809658d77	misc/nacl: include parser.go for cmd/compile/internal/syntax tests Fix suggested by Minux. Change-Id: Ia7aa8ccccc16535af4ec3ad23830ef0aa5d776ac Reviewed-on: https://go-review.googlesource.com/27193 Run-TryBot: Matthew Dempsky <mdempsky@google.com> Reviewed-by: Minux Ma <minux@golang.org>	2016-08-16 22:42:13 +00:00
Matthew Dempsky	504a104a2c	cmd/compile/internal/syntax: match old parser errors and line numbers This makes a bunch of changes to package syntax to tweak line numbers for AST nodes. For example, short variable declaration statements are now associated with the location of the ":=" token, and function calls are associated with the location of the final ")" token. These help satisfy many unit tests that assume the old parser's behavior. Because many of these changes are questionable, they're guarded behind a new "gcCompat" const to make them easy to identify and revisit in the future. A handful of remaining tests are too difficult to make behave identically. These have been updated to execute with -newparser=0 and comments explaining why they need to be fixed. all.bash now passes with both the old and new parsers. Change-Id: Iab834b71ca8698d39269f261eb5c92a0d55a3bf4	2016-08-16 14:32:09 -07:00
Matthew Dempsky	29cfbcfef4	cmd/compile/internal/gc: use new AST parser Introduce a new noder type to transform package syntax's AST into gc's Node tree. Hidden behind a new -newparser flag. Change-Id: Id0e862ef6196c41533876afc4ec289e21d422d18	2016-08-16 14:32:09 -07:00
Matthew Dempsky	55b9234023	cmd/dist: build cmd/compile/internal/syntax Change-Id: Ie6dd2318e031be445c0b1ae65d4c78723d5a1167	2016-08-16 14:32:09 -07:00
Matthew Dempsky	d6d493fc56	cmd/compile/internal/syntax: add a Line method to interface Node Change-Id: I64baf334a35c72336d26fa6755c67eb9d6f4e93c	2016-08-16 14:32:09 -07:00
Matthew Dempsky	4bc85b2eb8	cmd/compile/internal/syntax: refactor DeclStmt parsing No behavior change. Change-Id: I93c7cb7ab76b5afa74e6a1f092b7045fb96467b1	2016-08-16 14:32:09 -07:00
Matthew Dempsky	83153e7f37	cmd/compile/internal/syntax: emit errors for unexpected top-level tokens Fixes cmd/go's TestIssue7108. Change-Id: I8436b0e3e5a9b36649d46a9c2c741d820115a5d1	2016-08-16 14:32:09 -07:00
Matthew Dempsky	7a8201b924	cmd/compile/internal/syntax: change Read to still return the AST If we the caller provided an error handler, still return our best effort parsed AST tree.	2016-08-16 14:32:09 -07:00
Matthew Dempsky	38d52ef0c4	cmd/compile/internal/syntax: fix off-by-1 for implicit semicolons pos and line already refer to the position immediately before the returned newline character, so no need to decrement them further. Change-Id: I1d7a32cde55a5b8a1c6fd882ec8ba532869cc858	2016-08-16 14:32:08 -07:00
Robert Griesemer	f82bd3cc7d	cmd/compile/internal/syntax: better error positions for scanner errors For string/comment not terminated errors, report error at the start of the string/comment rather than the end of the file.	2016-08-16 14:32:08 -07:00
Robert Griesemer	3c72ed3bfa	cmd/compile/internal/syntax: provide LitKind to identify literals - various minor cleanups related to literal scanning - report newlines in character and regular string literals (plus tests)	2016-08-16 14:32:08 -07:00
Matthew Dempsky	a17ec6b30c	cmd/compile/internal/syntax: simplify BOM handling Change-Id: I5e2522d34720d7687e1e9ff0831936702976a1c7	2016-08-16 14:32:08 -07:00
Matthew Dempsky	0d8ac5ecf1	cmd/compile/internal/syntax: cleanup TypeSwitchGuard handling Fixes typeswitch3.go and at least recognizes the error in typesw.go (albeit wrong line number and non-pretty-printed syntax.Expr output). Change-Id: I38b8e923265b0e7b3c301aea2f4a901bbabc24b5	2016-08-16 14:32:08 -07:00
Robert Griesemer	573afbcdb6	cmd/compile/internal/syntax: export ImplicitOne and use to identify x++/x-- + minor documentation improvements	2016-08-16 14:32:08 -07:00
Robert Griesemer	a6a4701053	cmd/compile/internal/syntax: minor cleanups in source handling	2016-08-16 14:32:08 -07:00
Matthew Dempsky	81839a93a9	cmd/compile/internal/syntax: represent empty statements with EmptyStmt Fixes fixedbugs/bug031.go. Change-Id: Icc1846368802d71ade14e1a6b79cb6ee651b663e	2016-08-16 14:32:07 -07:00
Robert Griesemer	3a7da56582	cmd/compile/internal/syntax: fix many string/rune literal corner cases + many more test cases	2016-08-16 14:32:07 -07:00
Robert Griesemer	f363ad2a55	cmd/compile/internal/syntax: fix source.getr The source's rune reader logic was overly clever and tried to determine if an invalid rune was due to an actual source error or due to not enough bytes in the buffer. Replaced with slightly optimized version of bufio Reader's ReadRune method for now. Performance may be a tiny bit lower but we can always optimize later. Fixes endless loops when parsing certain source files. Added test case to ScanError test.	2016-08-16 14:32:07 -07:00
Robert Griesemer	fe52bcc8e6	cmd/compile/internal/syntax: rudimentary support for error reporting If an ErrorHandler is provided with a syntax.ReadXXX function, it is invoked for each error encountered. Will need to be refined, but should enable progress with all.bash. Also: - added tests for lexical errors - fixed endless loops when encountering non-terminated strings and comments	2016-08-16 14:32:07 -07:00
Robert Griesemer	f50329534a	cmd/compile/internal/syntax: skip verbose tests in -short mode	2016-08-16 14:32:07 -07:00
Matthew Dempsky	1d8cdaf6ce	cmd/compile/internal/syntax: insert implicit semicolons before EOF Fixes test/eof.go. Change-Id: Idaa2713bb1669ec165f3a2687fcfa2f8d8c70c74	2016-08-16 14:32:07 -07:00
Matthew Dempsky	49fb8d3cdb	cmd/compile/internal/syntax: fix general comment lexing Previously, we failed to recognize "**/" as the end of a general comment. Fixes ken/embed.go. Change-Id: Ibed746da105453420ec2d184e7be1a5de15927a6	2016-08-16 14:32:06 -07:00
Robert Griesemer	9be3d08e6a	cmd/compile/internal/syntax: fix lexing of ".45e1" and add token tests	2016-08-16 14:32:06 -07:00
Matthew Dempsky	b3efd7d6e8	cmd/compile/internal/syntax: fix dot-import parsing Change-Id: Iba8c9d9c01516706a2fc6ca5502401cfc063f210	2016-08-16 14:32:06 -07:00
Matthew Dempsky	1a5acb4b3a	cmd/compile/internal/syntax: pragma support for new parser Change-Id: Id4d9b40900b97708d2b4586f5a745dcb8b0eb8bd	2016-08-16 14:32:06 -07:00
Matthew Dempsky	4b1cc51518	cmd/compile/internal/syntax: simplify IfStmt Change IfStmt's Else field from []Stmt to Stmt like in go/ast. Change-Id: I835577beaf12b6e5895bc93041c13403143b4d2d	2016-08-16 14:32:06 -07:00
Matthew Dempsky	4f989a487d	cmd/compile/internal/syntax: various tweaks Export token constants needed by users (Break, Continue, Defer, Fallthrough, Go, and Goto). Fix parsing of Continue statements: previously they would be parsed as a BranchStmt with Break as the token. Change aNode, aDecl, aStmt, aExpr, etc.'s tag methods to take a pointer receiver to prevent accidents like trying to type assert from syntax.Expr to syntax.ListExpr instead of *syntax.ListExpr. Relocate pointer fields to the end of AST nodes so the GC can short-circuit scanning objects. Change-Id: Ib7505e75726816e260b9b29a2f726f76bf1a38b4	2016-08-16 14:32:02 -07:00
Robert Griesemer	d5bb1db3ec	cmd/compile/internal/syntax: don't allocate a string for each keyword $ go test -run StdLib -fast parsed 1074061 lines (2828 files) in 571.1019ms (1880681 lines/s) allocated 263.676Mb (461.696Mb/s) PASS	2016-08-16 10:48:20 -07:00
Robert Griesemer	c7cc983097	cmd/compile/internal/syntax: implement buffered reading from io.Reader No performance impact: $ go test -run StdLib -fast parsed 1073074 lines (2823 files) in 575.606804ms (1864248 lines/s) allocated 263.956Mb (458.570Mb/s) PASS	2016-08-16 10:48:01 -07:00
Robert Griesemer	a85b9c5467	cmd/compile/internal/syntax: fast Go syntax trees, initial commit. Syntax tree nodes, scanner, parser, basic printers. Builds syntax trees for entire Go std lib at a rate of ~1.8M lines/s in warmed up state (MacMini, 2.3 GHz Intel Core i7, 8GB RAM): $ go test -run StdLib -fast parsed 1074617 lines (2832 files) in 579.66364ms (1853863 lines/s) allocated 282.212Mb (486.854Mb/s) PASS	2016-08-16 10:45:05 -07:00