// Copyright 2014 The Go Authors.  All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// X86map constructs the x86 opcode map from the instruction set CSV file.
//
// Usage:
//
//	x86map [-fmt=format] x86.csv
//
// The known output formats are:
//
//	text (default) - print decoding tree in text form
//	decoder - print decoding tables for the x86asm package
//	scanner - print scanning tables for x86scan package
package main

import (
	"bufio"
	"bytes"
	"encoding/csv"
	"flag"
	"fmt"
	"io"
	"log"
	"os"
	"sort"
	"strconv"
	"strings"
)

var format = flag.String("fmt", "text", "output format: text, decoder")

var inputFile string

func usage() {
	fmt.Fprintf(os.Stderr, "usage: x86map [-fmt=format] x86.csv\n")
	os.Exit(2)
}

func main() {
	log.SetFlags(0)
	log.SetPrefix("x86map: ")

	flag.Usage = usage
	flag.Parse()
	if flag.NArg() != 1 {
		usage()
	}

	inputFile = flag.Arg(0)

	var print func(*Prog)
	switch *format {
	default:
		log.Fatalf("unknown output format %q", *format)
	case "text":
		print = printText
	case "decoder":
		print = printDecoder
	case "scanner":
		print = printScanner
	}

	p, err := readCSV(flag.Arg(0))
	if err != nil {
		log.Fatal(err)
	}

	//p = mergeTail(p)

	print(p)
}

// readCSV reads the CSV file and returns the corresponding Prog.
// It may print details about problems to standard error using the log package.
func readCSV(file string) (*Prog, error) {
	// Read input.
	// Skip leading blank and # comment lines.
	f, err := os.Open(file)
	if err != nil {
		return nil, err
	}
	b := bufio.NewReader(f)
	for {
		c, err := b.ReadByte()
		if err != nil {
			break
		}
		if c == '\n' {
			continue
		}
		if c == '#' {
			b.ReadBytes('\n')
			continue
		}
		b.UnreadByte()
		break
	}
	table, err := csv.NewReader(b).ReadAll()
	if err != nil {
		return nil, fmt.Errorf("parsing %s: %v", file, err)
	}
	if len(table) == 0 {
		return nil, fmt.Errorf("empty csv input")
	}
	if len(table[0]) < 6 {
		return nil, fmt.Errorf("csv too narrow: need at least six columns")
	}

	p := &Prog{}
	for _, row := range table {
		add(p, row[0], row[1], row[2], row[3], row[4], row[5])
	}

	check(p)

	return p, nil
}

// A Prog is a single node in the tree representing the instruction format.
// Collectively the tree of nodes form a kind of program for decoding.
// Each Prog has a single action, identifying the kind of node it is,
// and then children to be executed according to the action.
// For example, the Prog with Action="decode" has children named for each
// possible next byte in the input, and those children are the decoding
// tree to execute for the corresponding bytes.
type Prog struct {
	Path   string
	Action string
	Child  map[string]*Prog
	PC     int
	TailID int
}

// keys returns the child keys in sorted order.
func (p *Prog) keys() []string {
	var keys []string
	for key := range p.Child {
		keys = append(keys, key)
	}
	sort.Strings(keys)
	return keys
}

// findChildLeaf finds a leaf node in the subtree rooted at p
// and returns that node's full path. The path is useful in error
// messages as an example of where a particular subtree is headed.
func (p *Prog) findChildLeaf() string {
	for {
		if len(p.Child) == 0 {
			return p.Path
		}
		p = p.Child[p.keys()[0]]
	}
}

// walk advances from p to apply the given action and key.
// If p has no action yet, the action is recorded as p.Action.
// Otherwise the action must match p's action: every node in the
// tree can have at most one action, although possibly with many
// alternative keys.
// If p already has an alternative with the given key, walk returns
// that preexisting subtree. Otherwise walk allocates a new Prog
// representing that subtree and returns that node.
func (p *Prog) walk(action, key, text, opcode string) *Prog {
	if p.Action == "" {
		p.Action = action
	} else if p.Action != action {
		log.Printf("%s; %s: conflicting paths %s and %s|%s %s\n", text, opcode, p.findChildLeaf(), p.Path, action, key)
		return new(Prog)
	}
	q := p.Child[key]
	if q == nil {
		if p.Child == nil {
			p.Child = make(map[string]*Prog)
		}
		q = new(Prog)
		q.Path = fmt.Sprintf("%s|%s %s", p.Path, action, key)
		p.Child[key] = q
	}
	return q
}

// add adds a single instructions to the tree rooted at root.
// The string arguments match the CSV: instruction mnemonic,
// opcode encoding, validity in 32- and 64-bit modes, CPUID
// feature set (ignored), and additional tags.
//
// In effect, add adds a new path through the tree leading to
// the given instruction, but it reuses as much of the existing
// tree structure as possible. For example if there have already
// been instructions added starting with 0F and this instruction
// also starts with 0F, that 0F subtree node is reused instead of
// allocating a parallel one. To maximize the reuse, the check action
// sequence along the path being added is the same for every instruction:
// encoding pieces needed to make a decision, 64-bit mode check,
// rex check, prefix check, address size check, data size check,
// register vs memory argument check. Once all those checks have
// been applied, the assumption is that we have uniquely identified
// an instruction, and at that point it is okay to diverge from the
// uniform pattern to set the opcode and read the specific arguments
// corresponding to the instruction at hand.
//
// The maximimal reuse of the existing tree means that the tree
// resulting from all adds have been done amounts to a decision tree.
// There is one detail that makes it non-deterministic: some checks
// do not matter to some instructions and those are recorded as "any" keys.
// If you are decoding and there is a key for the specific thing you are
// seeing as well as the "any" key, both must be considered. To avoid
// adding complexity to the decoder execution, the 'check' function
// removes this case by merging "any" trees into specific keys when
// present.
func add(root *Prog, text, opcode, valid32, valid64, cpuid, tags string) {
	// These are not real instructions: they are either
	// prefixes for other instructions, composite instructions
	// built from multiple individual instructions, or alternate
	// mnemonics of other encodings.
	// Discard for disassembly, because we want a unique decoding.
	if strings.Contains(tags, "pseudo") {
		return
	}

	// Treat REX.W + opcode as being like having an "operand64" tag.
	// The REX.W flag sets the operand size to 64 bits; in this way it is
	// not much different than the 66 prefix that inverts 32 vs 16 bits.
	if strings.Contains(opcode, "REX.W") {
		if !strings.Contains(tags, "operand64") {
			if tags != "" {
				tags += ","
			}
			tags += "operand64"
		}
	}

	// If there is more than one operand size given, we need to do
	// a separate add for each size, because we need multiple
	// keys to be added in the operand size branch, and the code makes
	// a linear pass through the tree adding just one key to each node.
	// We would need to do the same for any other possible repeated tag
	// (for example, if an instruction could have multiple address sizes)
	// but so far operand size is the only tag we have needed to repeat.
	if strings.Count(tags, "operand") > 1 {
		f := strings.Split(tags, ",")
		var ops []string
		w := 0
		for _, tag := range f {
			if strings.HasPrefix(tag, "operand") {
				ops = append(ops, tag)
			} else {
				if strings.Contains(tag, "operand") {
					log.Fatalf("unknown tag %q", tag)
				}
				f[w] = tag
				w++
			}
		}
		f = f[:w]
		for _, op := range ops {
			add(root, text, opcode, valid32, valid64, cpuid, strings.Join(append(f, op), ","))
		}
		return
	}

	p := root
	walk := func(action, item string) {
		p = p.walk(action, item, text, opcode)
	}

	// Ignore VEX instructions for now.
	if strings.HasPrefix(opcode, "VEX") {
		if !strings.HasPrefix(text, "VMOVNTDQ") &&
			!strings.HasPrefix(text, "VMOVDQA") &&
			!strings.HasPrefix(text, "VMOVDQU") &&
			!strings.HasPrefix(text, "VZEROUPPER") {
			return
		}
		if !strings.HasPrefix(opcode, "VEX.256") && !strings.HasPrefix(text, "VZEROUPPER") {
			return
		}
		if !strings.Contains(tags, "VEXC4") {
			add(root, text, opcode, valid32, valid64, cpuid, tags+",VEXC4")
		}
		encoding := strings.Fields(opcode)
		walk("decode", encoding[1])
		walk("is64", "any")
		if strings.Contains(tags, "VEXC4") {
			walk("prefix", "C4")
		} else {
			walk("prefix", "C5")
		}
		for _, pref := range strings.Split(encoding[0], ".") {
			if isVexEncodablePrefix[pref] {
				walk("prefix", pref)
			}
		}
	}

	var rex, prefix string
	encoding := strings.Fields(opcode)
	if len(encoding) > 0 && strings.HasPrefix(encoding[0], "REX") {
		rex = encoding[0]
		encoding = encoding[1:]
		if len(encoding) > 0 && encoding[0] == "+" {
			encoding = encoding[1:]
		}
	}
	if len(encoding) > 0 && isPrefix[encoding[0]] {
		prefix = encoding[0]
		encoding = encoding[1:]
	}
	if rex == "" && len(encoding) > 0 && strings.HasPrefix(encoding[0], "REX") {
		rex = encoding[0]
		if rex == "REX" {
			log.Printf("REX without REX.W: %s %s", text, opcode)
		}
		encoding = encoding[1:]
		if len(encoding) > 0 && encoding[0] == "+" {
			encoding = encoding[1:]
		}
	}
	if len(encoding) > 0 && isPrefix[encoding[0]] {
		log.Printf("%s %s: too many prefixes", text, opcode)
		return
	}

	var haveModRM, havePlus bool
	var usedReg string
	for len(encoding) > 0 && (isHex(encoding[0]) || isSlashNum(encoding[0])) {
		key := encoding[0]
		if isSlashNum(key) {
			if usedReg != "" {
				log.Printf("%s %s: multiple modrm checks", text, opcode)
			}
			haveModRM = true
			usedReg = key
		}
		if i := strings.Index(key, "+"); i >= 0 {
			key = key[:i+1]
			havePlus = true
		}
		walk("decode", key)
		encoding = encoding[1:]
	}

	if valid32 != "V" {
		walk("is64", "1")
	} else if valid64 != "V" {
		walk("is64", "0")
	} else {
		walk("is64", "any")
	}

	if prefix == "" {
		prefix = "0"
	}
	walk("prefix", prefix)

	if strings.Contains(tags, "address16") {
		walk("addrsize", "16")
	} else if strings.Contains(tags, "address32") {
		walk("addrsize", "32")
	} else if strings.Contains(tags, "address64") {
		walk("addrsize", "64")
	} else {
		walk("addrsize", "any")
	}

	if strings.Contains(tags, "operand16") {
		walk("datasize", "16")
	} else if strings.Contains(tags, "operand32") {
		walk("datasize", "32")
	} else if strings.Contains(tags, "operand64") {
		walk("datasize", "64")
	} else {
		walk("datasize", "any")
	}

	if len(encoding) > 0 && encoding[0] == "/r" {
		haveModRM = true
	}
	if haveModRM {
		if strings.Contains(tags, "modrm_regonly") {
			walk("ismem", "0")
		} else if strings.Contains(tags, "modrm_memonly") {
			walk("ismem", "1")
		} else {
			walk("ismem", "any")
		}
	}

	walk("op", strings.Fields(text)[0])

	if len(encoding) > 0 && strings.HasPrefix(encoding[0], "VEX") {
		for _, field := range encoding[2:] {
			walk("read", field)
		}
	} else {
		for _, field := range encoding {
			walk("read", field)
		}
	}

	var usedRM string
	for _, arg := range strings.Fields(text)[1:] {
		arg = strings.TrimRight(arg, ",")
		if usesReg[arg] && !haveModRM && !havePlus {
			log.Printf("%s %s: no modrm field to use for %s", text, opcode, arg)
			continue
		}
		if usesRM[arg] && !haveModRM {
			log.Printf("%s %s: no modrm field to use for %s", text, opcode, arg)
			continue
		}
		if usesReg[arg] {
			if usedReg != "" {
				log.Printf("%s %s: modrm reg field used by both %s and %s", text, opcode, usedReg, arg)
				continue
			}
			usedReg = arg
		}
		if usesRM[arg] {
			if usedRM != "" {
				log.Printf("%s %s: modrm r/m field used by both %s and %s", text, opcode, usedRM, arg)
				continue
			}
			usedRM = arg
		}
		walk("arg", arg)
	}

	walk("match", "!")
}

// allKeys records the list of all possible child keys for actions that support "any".
var allKeys = map[string][]string{
	"is64":     {"0", "1"},
	"ismem":    {"0", "1"},
	"addrsize": {"16", "32", "64"},
	"datasize": {"16", "32", "64"},
}

// check checks that the program tree is well-formed.
// It also merges "any" keys into specific decoding keys in order to
// create an invariant that a particular check node either has a
// single "any" child - making it a no-op - or has no "any" children.
// See the discussion of "any" in the comment for add above.
func check(p *Prog) {
	if p.Child["any"] != nil && len(p.Child) > 1 {
		for _, key := range p.keys() {
			if key != "any" {
				mergeCopy(p.Child[key], p.Child["any"])
			}
		}
		if allKeys[p.Action] == nil {
			log.Printf("%s: unknown key space for %s=any", p.Path, p.Action)
		}
		for _, key := range allKeys[p.Action] {
			if p.Child[key] == nil {
				p.Child[key] = p.Child["any"]
			}
		}
		delete(p.Child, "any")
	}

	for _, q := range p.Child {
		check(q)
	}

	switch p.Action {
	case "op", "read", "arg":
		if len(p.Child) > 1 {
			log.Printf("%s: multiple children for action=%s: %v", p.Path, p.Action, p.keys())
		}
	}
}

// mergeCopy merges a copy of the tree rooted at src into dst.
// It is only used once no more paths will be added to the tree,
// so it is safe to introduce cross-links that make the program
// a dag rather than a tree.
func mergeCopy(dst, src *Prog) {
	//log.Printf("merge %s|%s and %s|%s\n", dst.Path, dst.Action, src.Path, src.Action)
	if dst.Action != src.Action {
		log.Printf("cannot merge %s|%s and %s|%s", dst.Path, dst.Action, src.Path, src.Action)
		return
	}

	for _, key := range src.keys() {
		if dst.Child[key] == nil {
			// Create new subtree by creating cross-link.
			dst.Child[key] = src.Child[key]
		} else {
			// Merge src subtree into existing dst subtree.
			mergeCopy(dst.Child[key], src.Child[key])
		}
	}
}

// set returns a map mapping each of the words in all to true.
func set(all string) map[string]bool {
	m := map[string]bool{}
	for _, f := range strings.Fields(all) {
		m[f] = true
	}
	return m
}

// isPrefix records the x86 opcode prefix bytes.
var isPrefix = set(`
	26
	2E
	36
	3E
	64
	65
	66
	67
	F0
	F2
	F3
`)

// usesReg records the argument codes that use the modrm reg field.
var usesReg = set(`
	r8
	r16
	r32
	r64
`)

// usesRM records the argument codes that use the modrm r/m field.
var usesRM = set(`
	r/m8
	r/m16
	r/m32
	r/m64
`)

var isVexEncodablePrefix = set(`
	0F
	0F38
	0F3A
	66
	F3
	F2
`)

// isHex reports whether the argument is a two digit hex number
// possibly followed by a +foo suffix.
func isHex(s string) bool {
	if i := strings.Index(s, "+"); i >= 0 {
		s = s[:i]
	}
	if len(s) != 2 {
		return false
	}
	for i := 0; i < len(s); i++ {
		c := s[i]
		if '0' <= c && c <= '9' || 'A' <= c && c <= 'F' {
			continue
		}
		return false
	}
	return true
}

// isSlashNum reports whether the argument is /n for some number n in [0,7].
func isSlashNum(s string) bool {
	return len(s) == 2 && s[0] == '/' && '0' <= s[1] && s[1] <= '7'
}

// mergeTail is supposed to merge common subtrees (program tails),
// reducing the size of the final program code.
// It identifies the subtrees using a bottom-up canonicalization.
//
// THIS CODE DOES NOT WORK. IT NEEDS TO BE DEBUGGED.
func mergeTail(p *Prog, emitted map[string]*Prog) *Prog {
	if emitted == nil {
		emitted = make(map[string]*Prog)
	}

	if p.Action == "match" {
		return p
	}

	for _, key := range p.keys() {
		p.Child[key] = mergeTail(p.Child[key], emitted)
	}

	op := ""
	for _, key := range p.keys() {
		q := p.Child[key]
		if q.Action != "op" || len(q.Child) > 1 {
			op = ""
			break
		}
		qop := q.keys()[0]
		if op == "" {
			op = qop
		} else if op != qop {
			op = ""
			break
		}
	}

	if op != "" {
		// Pull 'op x' up above the discriminator.
		p1 := new(Prog)
		*p1 = *p
		for _, key := range p.keys() {
			p1.Child[key] = p.Child[key].Child[op]
		}
		p.Action = "op"
		p.Child = map[string]*Prog{op: p1}
	}

	var buf bytes.Buffer
	fmt.Fprintf(&buf, "%s\n", p.Action)
	for _, key := range p.keys() {
		fmt.Fprintf(&buf, "%s %d\n", key, p.Child[key].TailID)
	}
	key := buf.String()

	if q := emitted[key]; q != nil {
		return q
	}
	emitted[key] = p
	p.TailID = len(emitted)
	return p
}

// printText prints the tree in textual form.
func printText(p *Prog) {
	printTree(os.Stdout, p, 0, false)
}

var tabs = strings.Repeat("    ", 100)

func printTree(w io.Writer, p *Prog, depth int, compact bool) {
	if compact && len(p.Child) == 1 {
		fmt.Fprintf(w, "%.*s%s", 4*depth, tabs, p.Action)
		for len(p.Child) == 1 {
			key := p.keys()[0]
			child := p.Child[key]
			fmt.Fprintf(w, " %s %s", key, child.Action)
			p = child
		}
		fmt.Fprintf(w, "\n")
	} else {
		fmt.Fprintf(w, "%.*s%s\n", 4*depth, tabs, p.Action)
	}
	for _, key := range p.keys() {
		fmt.Fprintf(w, "%.*s%s\n", 4*(depth+1), tabs, key)
		printTree(w, p.Child[key], depth+2, compact)
	}
}

// printDecoder prints a Go array containing the decoder program.
// It runs in two passes, both of which traverse and could generate
// the entire program. The first pass records the PC for each Prog node,
// and the second pass emits the actual program, using the PCs as jump
// targets in the places where the program is a dag rather than a tree.
func printDecoder(p *Prog) {
	opMap := map[string]bool{
		"PAUSE": true,
	}
	printDecoderPass(p, 1, false, opMap)
	fmt.Printf("// Code generated by x86map -fmt=decoder %s DO NOT EDIT.\n", inputFile)
	fmt.Printf("\n")
	fmt.Printf("package x86asm\n\n")
	fmt.Printf("var decoder = [...]uint16{\n\tuint16(xFail),\n")
	printDecoderPass(p, 1, true, opMap)
	fmt.Printf("}\n\n")

	var ops []string
	for op := range opMap {
		ops = append(ops, op)
	}
	sort.Strings(ops)

	fmt.Printf("const (\n")
	fmt.Printf("\t_ Op = iota\n\n")
	last := ""
	for _, op := range ops {
		fmt.Printf("\t%s\n", op)
		last = op
	}
	fmt.Printf(")\n\n")
	fmt.Printf("const maxOp = %s\n\n", last)

	fmt.Printf("var opNames = [...]string{\n")
	for _, op := range ops {
		fmt.Printf("\t%s: \"%s\",\n", op, op)
	}
	fmt.Printf("}\n")
}

// printScanner prints the decoding table for a scanner.
// The scanner can identify instruction boundaries but does not do
// full decoding. It is meant to be lighter weight than the x86asm
// decoder tables.
func printScanner(p *Prog) {
	walkScanTree(p, -1)
	var out []uint16
	out = append(out, 0)
	emitScanFunc(p, &out)
	fmt.Printf("var scanProg = []uint16{\n")
	fmt.Printf("\t/*0*/ 0, // dead\n")
	for i := 1; i < len(out); i++ {
		fmt.Printf("\t/*%d*/ ", i)
		switch out[i] {
		default:
			log.Fatalf("malformed program %#x", out[i])
		case scanMatch:
			fmt.Printf("scanMatch,\n")
			continue
		case scanJump:
			fmt.Printf("scanJump, %d,\n", out[i+1])
			i++
			continue
		case scanSwitchByte:
			fmt.Printf("scanSwitchByte,\n")
			for j := 0; j < 256/8; j++ {
				fmt.Printf("\t")
				fmt.Printf("/* %#02x-%#02x */", j*8, j*8+7)
				for k := 0; k < 8; k++ {
					fmt.Printf(" %d,", out[i+1+j*8+k])
				}
				fmt.Printf("\n")
			}
			i += 256
			continue
		case scanSwitchSlash:
			fmt.Printf("scanSwitchSlash, %d,\n", out[i+1])
			n := int(out[i+1])
			for j := 0; j < n; j++ {
				fmt.Printf("\t/* byte */ %#x, %d,\n", out[i+2+2*j], out[i+2+2*j+1])
			}
			for j := 0; j < 8; j++ {
				fmt.Printf("\t/* /%d */ %d,\n", j, out[i+2+2*n+j])
			}
			i += 1 + 2*n + 8
			continue
		case scanSwitchPrefix:
			fmt.Printf("scanSwitchPrefix, %d,\n", out[i+1])
			n := int(out[i+1])
			for j := 0; j < n; j++ {
				fmt.Printf("\t/* prefix */ %#x, %d,\n", out[i+2+2*j], out[i+2+2*j+1])
			}
			i += 1 + 2*n
			continue
		case scanSwitchIs64:
			fmt.Printf("scanSwitchIs64, %d, %d\n", out[i+1], out[i+2])
			i += 2
			continue
		case scanSwitchDatasize:
			fmt.Printf("scanSwitchDatasize, %d, %d, %d\n", out[i+1], out[i+2], out[i+3])
			i += 3
			continue
		case scanSwitchIsMem:
			fmt.Printf("scanSwitchIsMem, %d, %d\n", out[i+1], out[i+2])
			i += 2
			continue
		case scanReadModRM:
			fmt.Printf("scanReadModRM,\n")
			continue
		case scanReadIB:
			fmt.Printf("scanReadIB,\n")
			continue
		case scanReadIW:
			fmt.Printf("scanReadIW,\n")
			continue
		case scanReadIWD:
			fmt.Printf("scanReadIWD,\n")
			continue
		case scanReadIWDO:
			fmt.Printf("scanReadIWDO,\n")
			continue
		case scanReadCWD:
			fmt.Printf("scanReadCWD,\n")
			continue
		case scanReadCB:
			fmt.Printf("scanReadCB,\n")
			continue
		case scanReadCDP:
			fmt.Printf("scanReadCDP,\n")
			continue
		case scanReadCM:
			fmt.Printf("scanReadCM,\n")
			continue
		}
	}
	fmt.Printf("}\n")
}

func walkScanTree(p *Prog, is64 int) {
	keys := p.keys()
	for _, key := range keys {
		if p.Action == "is64" {
			switch key {
			case "0":
				is64 = 0
			case "1":
				is64 = 1
			}
		}
		walkScanTree(p.Child[key], is64)
	}

	switch p.Action {
	case "read", "match":
		// keep
		return
	case "decode":
		if len(keys) >= 8 && keys[0] == "/0" && keys[7] == "/7" && allSame(p, keys) {
			p.Action = "read"
			p.Child = map[string]*Prog{"/r": p.Child[keys[0]]}
			return
		}
	case "op", "arg":
		// drop
		*p = *p.Child[keys[0]]
		return
	case "prefix":
		if len(keys) >= 1 && keys[0] == "0" && allSame(p, keys) {
			*p = *p.Child[keys[0]]
			return
		}
	case "is64", "addrsize", "datasize", "ismem":
		if len(keys) == 1 && keys[0] == "any" {
			*p = *p.Child[keys[0]]
			return
		}
		nkey := len(allKeys[p.Action])
		if p.Action == "addrsize" {
			nkey = 2
		}
		if p.Action == "datasize" && is64 == 0 {
			nkey = 2
		}
		if len(keys) == nkey && allSame(p, keys) {
			*p = *p.Child[keys[0]]
			return
		}
	}

	switch p.Action {
	case "datasize":
		if len(keys) == 2 && is64 == 0 || len(keys) == 3 {
			if treeText(p.Child["16"]) == "read iw match ! \n" && treeText(p.Child["32"]) == "read id match ! \n" && (len(keys) == 2 || treeText(p.Child["64"]) == "read id match ! \n") {
				p.Action = "read"
				p.Child = map[string]*Prog{"iwd/d": p.Child["16"].Child["iw"]}
				return
			}
			if len(keys) == 3 && treeText(p.Child["16"]) == "read iw match ! \n" && treeText(p.Child["32"]) == "read id match ! \n" && treeText(p.Child["64"]) == "read io match ! \n" {
				p.Action = "read"
				p.Child = map[string]*Prog{"iwdo/d": p.Child["16"].Child["iw"]}
				return
			}
			if treeText(p.Child["16"]) == "read /r read iw match ! \n" && treeText(p.Child["32"]) == "read /r read id match ! \n" && (len(keys) == 2 || treeText(p.Child["64"]) == "read /r read id match ! \n") {
				p.Action = "read"
				p.Child = map[string]*Prog{"/r": {Action: "read", Child: map[string]*Prog{"iwd/d": p.Child["16"].Child["/r"].Child["iw"]}}}
				return
			}
			if treeText(p.Child["16"]) == "read cw match ! \n" && treeText(p.Child["32"]) == "read cd match ! \n" && (len(keys) == 2 || treeText(p.Child["64"]) == "read cd match ! \n") {
				p.Action = "read"
				p.Child = map[string]*Prog{"cwd/d": p.Child["16"].Child["cw"]}
				return
			}
			if treeText(p.Child["16"]) == "read cd match ! \n" && treeText(p.Child["32"]) == "read cp match ! \n" && (len(keys) == 2 || treeText(p.Child["64"]) == "read cp match ! \n") {
				p.Action = "read"
				p.Child = map[string]*Prog{"cdp/d": p.Child["16"].Child["cd"]}
				return
			}
			fmt.Printf("!! %q\n", treeText(p.Child["16"]))
		}

	case "is64":
		if len(keys) == 2 && treeText(p.Child["0"]) == "read cwd/d match ! \n" && treeText(p.Child["1"]) == "read cd match ! \n" {
			*p = *p.Child["0"]
			return
		}
		if len(keys) == 2 && treeText(p.Child["0"]) == "read iwd/d match ! \n" && treeText(p.Child["1"]) == "read iwdo/d match ! \n" {
			*p = *p.Child["1"]
			return
		}
	}

	/*
		match := make(map[string][]string)
		for _, key := range keys {
			text := treeText(p.Child[key])
			match[text] = append(match[text], key)
		}
		child := make(map[string]*Prog)
		for _, keys := range match {
			child[strings.Join(keys, ",")] = p.Child[keys[0]]
		}
		p.Child = child
	*/
}

func treeText(p *Prog) string {
	var buf bytes.Buffer
	printTree(&buf, p, 0, true)
	return buf.String()
}

func allSame(p *Prog, keys []string) bool {
	var tree string
	for i, key := range keys {
		if i == 0 {
			tree = treeText(p.Child[key])
			continue
		}
		if treeText(p.Child[key]) != tree {
			return false
		}
	}
	return true
}

var scanCache = map[string]uint16{}

const (
	_ uint16 = iota
	scanMatch
	scanJump
	scanSwitchByte
	scanSwitchSlash
	scanSwitchIs64
	scanSwitchDatasize
	scanSwitchIsMem
	scanSwitchPrefix
	scanReadModRM
	scanReadIB
	scanReadIW
	scanReadIWD
	scanReadIWDO
	scanReadCWD
	scanReadCB
	scanReadCDP
	scanReadCM
)

func decodeKeyPlus(key string) (val, n int) {
	n = 1
	if strings.HasSuffix(key, "+") {
		n = 8
		key = key[:len(key)-1]
	}
	v, err := strconv.ParseUint(key, 16, 8)
	if err != nil {
		log.Fatalf("unexpected decode key %q", key)
	}
	return int(v), n
}

func decodeKey(key string) int {
	val, n := decodeKeyPlus(key)
	if n != 1 {
		log.Panicf("unexpected decode key+ %q", key)
	}
	return val
}

func emitScanFunc(p *Prog, out *[]uint16) uint16 {
	keys := p.keys()
	text := treeText(p)
	if off, ok := scanCache[text]; ok {
		return off
	}
	start := uint16(len(*out))
	scanCache[text] = start
	switch p.Action {
	case "decode":
		if keys[0][0] != '/' {
			*out = append(*out, scanSwitchByte)
			off := len(*out)
			for i := 0; i < 256; i++ {
				*out = append(*out, 0)
			}
			for _, key := range keys {
				val, n := decodeKeyPlus(key)
				dst := emitScanFunc(p.Child[key], out)
				for j := 0; j < n; j++ {
					(*out)[off+val+j] = dst
				}
			}
			return start
		}

		n := len(keys)
		for n > 0 && keys[n-1][0] != '/' {
			n--
		}
		total := 0
		for i := n; i < len(keys); i++ {
			key := keys[i]
			_, n := decodeKeyPlus(key)
			total += n
		}
		*out = append(*out, scanSwitchSlash, uint16(total))
		off := len(*out)
		for i := 0; i < total; i++ {
			*out = append(*out, 0, 0)
		}
		for i := 0; i < 8; i++ {
			*out = append(*out, 0)
		}
		for i := n; i < len(keys); i++ {
			key := keys[i]
			val, valn := decodeKeyPlus(key)
			targ := emitScanFunc(p.Child[key], out)
			for j := 0; j < valn; j++ {
				(*out)[off] = uint16(val + j)
				off++
				(*out)[off] = targ
				off++
			}
		}
		for i := 0; i < n; i++ {
			key := keys[i]
			if len(key) != 2 || key[0] != '/' || key[1] < '0' || '8' <= key[1] {
				log.Fatalf("unexpected decode key %q", key)
			}
			(*out)[off+int(key[1]-'0')] = emitScanFunc(p.Child[key], out)
		}
		return start

	case "read":
		switch keys[0] {
		default:
			log.Fatalf("unexpected read %q", keys[0])
		case "/r":
			*out = append(*out, scanReadModRM)
		case "ib":
			*out = append(*out, scanReadIB)
		case "iw":
			*out = append(*out, scanReadIW)
		case "cb":
			*out = append(*out, scanReadCB)
		case "cm":
			*out = append(*out, scanReadCM)
		case "iwd/d":
			*out = append(*out, scanReadIWD)
		case "iwdo/d":
			*out = append(*out, scanReadIWDO)
		case "cwd/d":
			*out = append(*out, scanReadCWD)
		case "cdp/d":
			*out = append(*out, scanReadCDP)
		}
		next := p.Child[keys[0]]
		if next.Action == "match" {
			*out = append(*out, scanMatch)
		} else {
			*out = append(*out, scanJump, 0)
			off := len(*out)
			(*out)[off-1] = emitScanFunc(next, out)
		}
		return start

	case "match":
		*out = append(*out, scanMatch)
		return start

	case "is64":
		*out = append(*out, scanSwitchIs64, 0, 0)
		if next := p.Child["0"]; next != nil {
			(*out)[start+1] = emitScanFunc(next, out)
		}
		if next := p.Child["1"]; next != nil {
			(*out)[start+2] = emitScanFunc(next, out)
		}
		return start

	case "ismem":
		*out = append(*out, scanSwitchIsMem, 0, 0)
		if next := p.Child["0"]; next != nil {
			(*out)[start+1] = emitScanFunc(next, out)
		}
		if next := p.Child["1"]; next != nil {
			(*out)[start+2] = emitScanFunc(next, out)
		}
		return start

	case "datasize":
		*out = append(*out, scanSwitchDatasize, 0, 0, 0)
		if next := p.Child["16"]; next != nil {
			(*out)[start+1] = emitScanFunc(next, out)
		}
		if next := p.Child["32"]; next != nil {
			(*out)[start+2] = emitScanFunc(next, out)
		}
		if next := p.Child["64"]; next != nil {
			(*out)[start+3] = emitScanFunc(next, out)
		}
		return start
	case "prefix":
		*out = append(*out, scanSwitchPrefix, uint16(len(keys)))
		n := len(keys)
		for i := 0; i < n; i++ {
			*out = append(*out, uint16(decodeKey(keys[i])), 0)
		}
		for i := 0; i < n; i++ {
			(*out)[int(start)+2+2*i+1] = emitScanFunc(p.Child[keys[i]], out)
		}
		return start

	}

	log.Fatalf("unexpected action %q", p.Action)
	return start
}

// printDecoderPass prints the decoding table program for p,
// assuming that we are emitting code at the given program counter.
// It returns the new current program counter, that is, the program
// counter after the printed instructions.
// If printing==false, printDecoderPass does not print the actual
// code words but still does the PC computation.
func printDecoderPass(p *Prog, pc int, printing bool, ops map[string]bool) int {
	// Record PC on first pass.
	if p.PC == 0 {
		p.PC = pc
	}

	// If PC doesn't match, we've already printed this code
	// because it was reached some other way. Jump to that copy.
	if p.PC != pc {
		if printing {
			fmt.Printf("/*%d*/\tuint16(xJump), %d,\n", pc, p.PC)
		}
		return pc + 2
	}

	// Otherwise, emit the code for the given action.

	// At the bottom, if next is non-nil, emit code for next.
	// Then emit the code for the children named by the keys.
	keys := p.keys()
	var next *Prog

	switch p.Action {
	default:
		log.Printf("printDecoderPass: unknown action %q: %s", p.Action, p.Path)

	case "decode":
		// Decode hex bytes or /n modrm op checks.
		// Hex bytes take priority, so do them first.
		// Hex bytes of the form "40+" indicate an
		// 8 entry-wide swath of codes: 40, 41, ..., 47.
		hex := 0
		slash := 0
		for _, key := range keys {
			if isHex(key) {
				if strings.Contains(key, "+") {
					hex += 8
				} else {
					hex++
				}
			}
			if isSlashNum(key) {
				slash++
			}
		}
		if hex > 0 {
			// TODO(rsc): Introduce an xCondByte256 that has 256 child entries
			// and no explicit keys. That will cut the size in half for large
			// tables.
			if printing {
				fmt.Printf("/*%d*/\tuint16(xCondByte), %d,\n", pc, hex)
				for _, key := range keys {
					if !isHex(key) {
						continue
					}
					if i := strings.Index(key, "+"); i >= 0 {
						nextPC := p.Child[key].PC
						n, _ := strconv.ParseUint(key[:i], 16, 0)
						for j := 0; j < 8; j++ {
							fmt.Printf("\t%#02x, %d,\n", int(n)+j, nextPC)
						}
						continue
					}
					fmt.Printf("\t0x%s, %d,\n", key, p.Child[key].PC)
				}
			}
			pc += 2 + 2*hex

			// All other condition checks fail the decoding if nothing is found,
			// but this one falls through so that we can then do /n checks.
			// If there are no upcoming /n checks, insert an explicit failure.
			if slash == 0 {
				if printing {
					fmt.Printf("\tuint16(xFail),\n")
				}
				pc++
			}
		}
		if slash > 0 {
			if printing {
				fmt.Printf("/*%d*/\tuint16(xCondSlashR),\n", pc)
				for i := 0; i < 8; i++ {
					fmt.Printf("\t%d, // %d\n", p.childPC(fmt.Sprintf("/%d", i)), i)
				}
			}
			pc += 1 + 8
		}

	case "is64":
		// Decode based on processor mode: 64-bit or not.
		if len(keys) == 1 && keys[0] == "any" {
			next = p.Child["any"]
			break
		}
		if p.Child["any"] != nil {
			log.Printf("%s: mixed is64 keys: %v", p.Path, keys)
		}

		if printing {
			fmt.Printf("/*%d*/\tuint16(xCondIs64), %d, %d,\n", pc, p.childPC("0"), p.childPC("1"))
		}
		pc += 3

	case "prefix":
		// Decode based on presence of prefix.
		// The "0" prefix means "none of the above", so if there's
		// nothing else, it's the same as "any".
		if len(keys) == 1 && (keys[0] == "any" || keys[0] == "0") {
			next = p.Child["any"]
			break
		}
		if p.Child["any"] != nil {
			log.Printf("%s: mixed prefix keys: %v", p.Path, keys)
		}

		// Emit the prefixes in reverse sorted order, so that F3 and F2 are
		// considered before 66, and the fallback 0 is considered last.
		if printing {
			fmt.Printf("/*%d*/\tuint16(xCondPrefix), %d,\n", pc, len(keys))
			for i := len(keys) - 1; i >= 0; i-- {
				key := keys[i]
				nextPC := p.Child[key].PC
				fmt.Printf("\t0x%s, %d,\n", key, nextPC)
			}
		}
		pc += 2 + 2*len(keys)

	case "addrsize":
		// Decode based on address size attribute.
		if len(keys) == 1 && keys[0] == "any" {
			next = p.Child["any"]
			break
		}
		if p.Child["any"] != nil {
			log.Printf("%s: mixed addrsize keys: %v", p.Path, keys)
		}

		if printing {
			fmt.Printf("/*%d*/\tuint16(xCondAddrSize), %d, %d, %d,\n", pc, p.childPC("16"), p.childPC("32"), p.childPC("64"))
		}
		pc += 4

	case "datasize":
		// Decode based on operand size attribute.
		if len(keys) == 1 && keys[0] == "any" {
			next = p.Child["any"]
			break
		}
		if p.Child["any"] != nil {
			log.Printf("%s: mixed datasize keys: %v", p.Path, keys)
		}

		if printing {
			fmt.Printf("/*%d*/\tuint16(xCondDataSize), %d, %d, %d,\n", pc, p.childPC("16"), p.childPC("32"), p.childPC("64"))
		}
		pc += 4

	case "ismem":
		// Decode based on modrm form: memory or register reference.
		if len(keys) == 1 && keys[0] == "any" {
			next = p.Child["any"]
			break
		}
		if p.Child["any"] != nil {
			log.Printf("%s: mixed ismem keys: %v", p.Path, keys)
		}

		if printing {
			fmt.Printf("/*%d*/\tuint16(xCondIsMem), %d, %d,\n", pc, p.childPC("0"), p.childPC("1"))
		}
		pc += 3

	case "op":
		// Set opcode.
		ops[keys[0]] = true
		if printing {
			fmt.Printf("/*%d*/\tuint16(xSetOp), uint16(%s),\n", pc, keys[0])
		}
		next = p.Child[keys[0]]
		pc += 2

	case "read":
		// Read argument bytes.
		if printing {
			fmt.Printf("/*%d*/\tuint16(xRead%s),\n", pc, xOp(keys[0]))
		}
		next = p.Child[keys[0]]
		pc++

	case "arg":
		// Record instruction argument (interpret bytes loaded with read).
		if printing {
			fmt.Printf("/*%d*/\tuint16(xArg%s),\n", pc, xOp(keys[0]))
		}
		next = p.Child[keys[0]]
		pc++

	case "match":
		// Finish match.
		if printing {
			fmt.Printf("/*%d*/\tuint16(xMatch),\n", pc)
		}
		pc++
		return pc
	}

	if next != nil {
		pc = printDecoderPass(next, pc, printing, ops)
	}

	for _, key := range keys {
		q := p.Child[key]
		if q.PC == 0 || q.PC == pc {
			pc = printDecoderPass(q, pc, printing, ops)
		}
	}

	return pc
}

// childPC returns the PC for the given child key.
// If the key is not present, it returns PC 0,
// which is known to be an xFail instruction.
func (p *Prog) childPC(key string) int {
	q := p.Child[key]
	if q == nil {
		return 0
	}
	return q.PC
}

// isLower reports whether c is an ASCII lower case letter.
func isLower(c byte) bool {
	return 'a' <= c && c <= 'z'
}

// isLetterDigit reports whether c is an ASCII letter or digit.
func isLetterDigit(c byte) bool {
	return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9'
}

// xOp converts arg, an Intel manual shorthand, into a decoder opcode suffix.
// The standard form is LeadingUpperLetter with a few punctuation symbols
// turned into purely lower case words: M16and32, M16colon32, CR0dashCR7.
func xOp(arg string) string {
	var buf []byte
	for i := 0; i < len(arg); i++ {
		c := arg[i]
		if isLower(c) && (i == 0 || !isLetterDigit(arg[i-1])) {
			c -= 'a' - 'A'
		}
		buf = append(buf, c)
	}
	return argFix.Replace(string(buf))
}

var argFix = strings.NewReplacer(
	"/R", "SlashR",
	"/", "",
	"<", "",
	">", "",
	"+", "plus",
	"-", "dash",
	":", "colon",
	"&", "and",
	"ST(0)", "ST",
	"ST(I)", "STi",
	"ST(I)+Op", "STi",
)