// Copyright 2014 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Armmap constructs the ARM opcode map from the instruction set CSV file. // // Usage: // // armmap [-fmt=format] arm.csv // // The known output formats are: // // text (default) - print decoding tree in text form // decoder - print decoding tables for the armasm package package main import ( "bufio" "encoding/csv" "flag" "fmt" "log" "os" "sort" "strconv" "strings" ) var format = flag.String("fmt", "text", "output format: text, decoder") var inputFile string func usage() { fmt.Fprintf(os.Stderr, "usage: armmap [-fmt=format] x86.csv\n") os.Exit(2) } func main() { log.SetFlags(0) log.SetPrefix("armmap: ") flag.Usage = usage flag.Parse() if flag.NArg() != 1 { usage() } inputFile = flag.Arg(0) var print func(*Prog) switch *format { default: log.Fatalf("unknown output format %q", *format) case "text": print = printText case "decoder": print = printDecoder } p, err := readCSV(flag.Arg(0)) if err != nil { log.Fatal(err) } print(p) } // readCSV reads the CSV file and returns the corresponding Prog. // It may print details about problems to standard error using the log package. func readCSV(file string) (*Prog, error) { // Read input. // Skip leading blank and # comment lines. f, err := os.Open(file) if err != nil { return nil, err } b := bufio.NewReader(f) for { c, err := b.ReadByte() if err != nil { break } if c == '\n' { continue } if c == '#' { b.ReadBytes('\n') continue } b.UnreadByte() break } table, err := csv.NewReader(b).ReadAll() if err != nil { return nil, fmt.Errorf("parsing %s: %v", file, err) } if len(table) == 0 { return nil, fmt.Errorf("empty csv input") } if len(table[0]) < 5 { return nil, fmt.Errorf("csv too narrow: need at least five columns") } p := &Prog{} for _, row := range table { add(p, row[0], row[1], row[2], row[3], row[4]) } return p, nil } type Prog struct { Inst []Inst OpRanges map[string]string } type Inst struct { Text string Encoding string Mask uint32 Value uint32 Priority int OpBase string OpBits uint64 Args []string } type Arg struct { Name string Bits uint64 } // add adds the entry from the CSV described by maskstr, valuestr, text, encoding, tags // to the program p. func add(p *Prog, maskstr, valuestr, text, encoding, tags string) { if strings.Contains(tags, "pseudo") { return } // For now, ignore the VFP floating point instructions. if strings.HasPrefix(text, "V") && !strings.Contains(tags, "vfp") { // TODO return } mask, err := strconv.ParseUint(maskstr, 0, 32) if err != nil { log.Printf("invalid mask %q", maskstr) return } value, err := strconv.ParseUint(valuestr, 0, 32) if err != nil { log.Printf("invalid value %q", valuestr) return } // Parse encoding, building size and offset of each field. // The first field in the encoding is the largest offset. fuzzy := uint32(0) // mask of 'should be' bits fieldOffset := map[string]int{} fieldWidth := map[string]int{} off := 32 for _, f := range strings.Split(encoding, "|") { n := 1 if i := strings.Index(f, ":"); i >= 0 { n, _ = strconv.Atoi(f[i+1:]) } off -= n fieldOffset[f] = off fieldWidth[f] = n if f == "(0)" || f == "(1)" { fuzzy |= 1 << uint(off) } } if off != 0 { fmt.Fprintf(os.Stderr, "%s: counted %d bits in %s\n", text, 32-off, encoding) } // Track which encoding fields we found uses for. // If we do not find a use for a field, that's an error in the input tables. fieldUsed := map[string]bool{} // Split text into opcode and arguments. var op, argstr string if i := strings.Index(text, " "); i >= 0 { op = text[:i] argstr = text[i:] } else { op = text } op = strings.TrimSpace(op) argstr = strings.TrimSpace(argstr) // Parse opcode suffixes. i := strings.Index(op, "<") if i < 0 { i = len(op) } if j := strings.Index(op, "{"); j >= 0 && j < i { i = j } op, suffix := op[:i], op[i:] if suffix != "" && opSuffix[suffix] == "" { fmt.Fprintf(os.Stderr, "%s: invalid op suffix %q in %s\n", text, suffix, op+suffix) } // Make sure fields needed by opcode suffix are available. for _, f := range strings.Split(opSuffix[suffix], ",") { if f != "" && fieldWidth[f] == 0 { fmt.Fprintf(os.Stderr, "%s: opsuffix %s missing %s in encoding %s\n", text, suffix, f, encoding) } fieldUsed[f] = true } // Build list of opcodes that can be generated by this suffix. // For example, the opcodes generated by ADD are ADD.EQ, ADD.NE, etc. // To simplify the decoding of instruction opcodes, we arrange that this // sequence aligns with the encoding, so that decoding amounts to extracting // the right bits, concatenating them, and adding them to the first opcode in // the sequence. If the condition code is present, we always place it in the // low order bits, so that x&^15 == FOO_EQ tests whether x is any of the // conditional FOO instructions. ops := []string{op} opBits := uint64(0) // record of bits to extract and add to opcode base opFields := strings.Split(opSuffix[suffix], ",") // First the optional elements, like {S} meaning "" or ".S". for strings.HasPrefix(suffix, "{") { i := strings.Index(suffix, "}") var f, option string option, suffix = suffix[1:i], suffix[i+1:] f, opFields = opFields[0], opFields[1:] if option == "W" { // The {W} option on PLD{W} uses the R bit which is !W. ops = cross(ops, "."+option, "") } else { ops = cross(ops, "", "."+option) } if fieldWidth[f] != 1 { fmt.Fprintf(os.Stderr, "%s: have %d bits for {%s}\n", text, fieldWidth[f], option) } // opBits is a sequence of 16-bit chunks describing contiguous bit sections. // Each chunk is 8-bit offset followed by 8-bit size. opBits = opBits<<16 | uint64(fieldOffset[f])<<8 | 1 } // Then the true field substitutions. haveCond := false for strings.Contains(suffix, "<") { var f, literal, x string if len(opFields) == 0 { fmt.Fprintf(os.Stderr, "%s: ran out of suffix fields for <%s>\n", text, x) break } f, opFields = opFields[0], opFields[1:] i := strings.Index(suffix, "<") j := strings.Index(suffix, ">") literal, x, suffix = suffix[:i], suffix[i+1:j], suffix[j+1:] // Add leading literal text to all opcodes. ops = cross(ops, literal) // The condition can happen anywhere in the opcode text // but we want to generate the actual variation in the low bits // of the list index. Remember when and where we've seen and apply // it after the loop has finished. if x == "c" && f == "cond:4" { haveCond = true ops = cross(ops, "_COND_") continue } // Otherwise, choices[x] lists the possible expansions of . // If is of the form the choices are A, B, and C. expand := choices[x] if expand == nil && strings.Contains(x, ",") { expand = strings.Split(x, ",") } if expand == nil { fmt.Fprintf(os.Stderr, "%s: unknown choices for <%s>\n", text, x) expand = []string{x} } else if len(expand) != 1< but %d bits\n", text, len(expand), x, fieldWidth[f]) } opBits = opBits<<16 | uint64(fieldOffset[f])<<8 | uint64(fieldWidth[f]) ops = cross(ops, expand...) } if haveCond { // Apply condtional suffix last. opBits = opBits<<16 | 28<<8 | 4 ops = crossCond(ops) } ops = cross(ops, suffix) // Now ops is a list of opcodes generated by this opcode pattern. // We want to make sure that we can arrange for those opcodes to // happen consecutively in the final opcode numbering. // Record in p.OpRanges[op] the required consecutive sequence of // opcode that includes op. To make searches easier, we record // the sequence as a comma-separated list of strings with commas // on both ends: [A, B] encodes as ",A,B,". if p.OpRanges == nil { p.OpRanges = make(map[string]string) } opstr := "," + strings.Join(ops, ",") + "," for _, op := range ops { if old := p.OpRanges[op]; old != "" && old != opstr { if strings.Contains(old, opstr) { opstr = old } else if strings.Contains(opstr, old) { // great, do nothing } else { // It would also be okay if there is some subsequence s such that // old = x+s and opstr = s+y (or vice versa), in which case we should // record opstr = x+s+y. However, this has not come up in practice. // Failing that, we can't satisfy the sequencing requirements. fmt.Fprintf(os.Stderr, "%s: %s appears in both %s and %s\n", text, op, old, opstr) } } } for _, op := range strings.Split(opstr, ",") { if op != "" { p.OpRanges[op] = opstr } } // Process the arguments, building a list of argument descriptions. // Each argument description has the form |field@off|field@off... // where the |field@off suffixes give the name and location of the fields // needed by the argument. Each such string maps to a different decoding // type in the generated table, according to the argOps map. var args []string for argstr != "" { // Find longest match among argSuffixes pieces. best := 0 for a := range argSuffixes { if argstr == a || strings.HasPrefix(argstr, a+",") { if best < len(a) { best = len(a) } } } if best == 0 { fmt.Fprintf(os.Stderr, "%s: unknown arg %s\n", text, argstr) break } var arg, desc string arg, argstr = argstr[:best], strings.TrimSpace(strings.TrimLeft(argstr[best:], ",")) desc = arg for _, f := range strings.Split(argSuffixes[desc], ",") { if f == "" { continue } if fieldWidth[f] == 0 { fmt.Fprintf(os.Stderr, "%s: arg %s missing %s in encoding %s\n", text, arg, f, encoding) } fieldUsed[f] = true desc += fmt.Sprintf("|%s@%d", f, fieldOffset[f]) } args = append(args, desc) } // Check that all encoding fields were used by suffix or argument decoding. for f := range fieldWidth { switch f { case "0", "1", "(0)", "(1)": // ok default: if !fieldUsed[f] { fmt.Fprintf(os.Stderr, "%s: encoding field %s not used in %s\n", text, f, encoding) } } } // Determine decoding priority. Instructions that say 'SEE X' in the tag // are considered lower priority than ones that don't. In theory the // structure described by the SEE tags might be richer than that, but // in practice it only has those two levels. // We leave space for two more priorities according to whether the // fuzzy bits are set correctly. The full set of priorities then is: // // 4 - no SEE tag, fuzzy bits all match // 3 - no SEE tag, some fuzzy bits don't match // 2 - SEE tag, fuzzy bits all match // 1 - SEE tag, some fuzzy bits don't match // // You could argue for swapping the middle two levels but so far // it has not been an issue. pri := 4 if strings.Contains(tags, "SEE") { pri = 2 } inst := Inst{ Text: text, Encoding: encoding, Mask: uint32(mask), Value: uint32(value), Priority: pri, OpBase: ops[0], OpBits: opBits, Args: args, } p.Inst = append(p.Inst, inst) if fuzzy != 0 { inst.Mask &^= fuzzy inst.Priority-- p.Inst = append(p.Inst, inst) } } // opSuffix describes the encoding fields used to resolve a given opcode suffix. var opSuffix = map[string]string{ "": "op", "": "op:2", ".F<32,64>": "op,cond:4,sz", ".F<32,64>": "op,cond:4,sz", "": "tb,cond:4", ".8": "op", "": "cond:4", ".32": "cond:4", ".F<32,64>": "cond:4,sz", "": "N,M,cond:4", "": "M,cond:4", "{B}": "B,cond:4", "{E}.F<32,64>": "E,cond:4,sz", "{R}": "R,cond:4", ".F<32,64>.32": "cond:4,sz,op", ".32.F<32,64>": "op,cond:4,signed,sz", "{S}": "S,cond:4", "{W}": "R", "{X}": "M,cond:4", ".": "T,cond:4,op", ".": "cond:4,sz", ".FX<16,32>.F<32,64>": "cond:4,U,sx,sz", ".F<32,64>.FX<16,32>": "cond:4,sz,U,sx", } // choices[x] describes the choices for filling in "<"+x+">" in an opcode suffix. // Opcodes that end up containing ZZ take up a numeric sequence value but are // not exported in the package API. var choices = map[string][]string{ "c": {".EQ", ".NE", ".CS", ".CC", ".MI", ".PL", ".VS", ".VC", ".HI", ".LS", ".GE", ".LT", ".GT", ".LE", "", ".ZZ"}, "x": {"B", "T"}, "y": {"B", "T"}, } // argOps maps from argument descriptions to internal decoder name. var argOps = map[string]string{ // 4-bit register encodings "|Rm:4@0": "arg_R_0", "|Rn:4@0": "arg_R_0", "|Rt:4@0": "arg_R_0", "|Rm:4@8": "arg_R_8", "|Ra:4@12": "arg_R_12", "|Rd:4@12": "arg_R_12", "|RdLo:4@12": "arg_R_12", "|Rt:4@12": "arg_R_12", "|Rt:4@12": "arg_R_12_nzcv", "|Rd:4@16": "arg_R_16", "|RdHi:4@16": "arg_R_16", "|Rn:4@16": "arg_R_16", // first and second of consecutive register pair "|Rt:4@0": "arg_R1_0", "|Rt:4@12": "arg_R1_12", "|Rt:4@0": "arg_R2_0", "|Rt:4@12": "arg_R2_12", // register arithmetic ", |Rm:4@0|Rs:4@8|type:2@5": "arg_R_shift_R", "{,}|Rm:4@0|imm5:5@7|type:2@5": "arg_R_shift_imm", "{,}|Rn:4@0|imm5:5@7|sh@6": "arg_R_shift_imm", "{,LSL #}|Rm:4@0|imm5:5@7": "arg_R_shift_imm", "{,}|Rm:4@0|rotate:2@10": "arg_R_rotate", // memory references "{!}|Rn:4@16|W@21": "arg_R_16_WB", "[]|Rn:4@16": "arg_mem_R", "[,+/-{, }]{!}|Rn:4@16|U@23|Rm:4@0|type:2@5|imm5:5@7|P@24|W@21": "arg_mem_R_pm_R_shift_imm_W", "[{,#+/-}]{!}|Rn:4@16|P@24|U@23|W@21|imm4H:4@8|imm4L:4@0": "arg_mem_R_pm_imm8_W", "[] {,#+/-}|Rn:4@16|U@23|imm4H:4@8|imm4L:4@0": "arg_mem_R_pm_imm8_postindex", "[{,#+/-}]{!}|Rn:4@16|P@24|U@23|W@21|imm12:12@0": "arg_mem_R_pm_imm12_W", "[],#+/-|Rn:4@16|imm12:12@0|U@23": "arg_mem_R_pm_imm12_postindex", "[,#+/-]|Rn:4@16|U@23|imm12:12@0": "arg_mem_R_pm_imm12_offset", "[] {,#+/-}|Rn:4@16|U@23|imm12:12@0": "arg_mem_R_pm_imm12_postindex", "[], +/-|Rn:4@16|U@23|Rm:4@0": "arg_mem_R_pm_R_postindex", "[,+/-]{!}|Rn:4@16|U@23|Rm:4@0|P@24|W@21": "arg_mem_R_pm_R_W", "[],+/-{, }|Rn:4@16|Rm:4@0|imm5:5@7|type:2@5|U@23": "arg_mem_R_pm_R_shift_imm_postindex", "[,+/-{, }]|Rn:4@16|U@23|Rm:4@0|type:2@5|imm5:5@7": "arg_mem_R_pm_R_shift_imm_offset", "[{,#+/-}]|Rn:4@16|U@23|imm8:8@0": "arg_mem_R_pm_imm8at0_offset", // pc-relative constants "|imm12:12@0": "arg_label_p_12", "|imm12:12@0": "arg_label_m_12", "|imm12:12@0|U@23": "arg_label_pm_12", "|imm4H:4@8|imm4L:4@0|U@23": "arg_label_pm_4_4", // constants "#|imm12:12@0": "arg_const", "#|imm5:5@7": "arg_imm5", "#|imm5:5@7": "arg_imm5_nz", "#|imm5:5@7": "arg_imm5_32", "|imm24:24@0": "arg_label24", "#|lsb:5@7": "arg_imm5", "#|lsb:5@7|msb:5@16": "arg_lsb_width", "#|imm12:12@8|imm4:4@0": "arg_imm_12at8_4at0", "#|imm12:12@0|imm4:4@16": "arg_imm_4at16_12at0", "|imm24:24@0|H@24": "arg_label24H", "#