// Copyright 2017 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Support for testing against external disassembler program. // Copied and simplified from ../../arm/armasm/ext_test.go. package arm64asm import ( "bufio" "bytes" "encoding/hex" "encoding/json" "flag" "fmt" "io" "io/ioutil" "log" "math/rand" "os" "os/exec" "path/filepath" "regexp" "strconv" "strings" "testing" "time" ) var ( dumpTest = flag.Bool("dump", false, "dump all encodings") mismatch = flag.Bool("mismatch", false, "log allowed mismatches") longTest = flag.Bool("long", false, "long test") keep = flag.Bool("keep", false, "keep object files around") debug = false ) // An ExtInst represents a single decoded instruction parsed // from an external disassembler's output. type ExtInst struct { addr uint64 enc [4]byte nenc int text string } func (r ExtInst) String() string { return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text) } // An ExtDis is a connection between an external disassembler and a test. type ExtDis struct { Arch Mode Dec chan ExtInst File *os.File Size int KeepFile bool Cmd *exec.Cmd } // InstJson describes instruction fields value got from ARMv8-A Reference Manual type InstJson struct { Name string Bits string Arch string Syntax string Code string Alias string Enc uint32 } // A Mode is an instruction execution mode. type Mode int const ( _ Mode = iota ModeARM64 ) // Run runs the given command - the external disassembler - and returns // a buffered reader of its standard output. func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) { if *keep { log.Printf("%s\n", strings.Join(cmd, " ")) } ext.Cmd = exec.Command(cmd[0], cmd[1:]...) out, err := ext.Cmd.StdoutPipe() if err != nil { return nil, fmt.Errorf("stdoutpipe: %v", err) } if err := ext.Cmd.Start(); err != nil { return nil, fmt.Errorf("exec: %v", err) } b := bufio.NewReaderSize(out, 1<<20) return b, nil } // Wait waits for the command started with Run to exit. func (ext *ExtDis) Wait() error { return ext.Cmd.Wait() } // testExtDis tests a set of byte sequences against an external disassembler. // The disassembler is expected to produce the given syntax and run // in the given architecture mode (16, 32, or 64-bit). // The extdis function must start the external disassembler // and then parse its output, sending the parsed instructions on ext.Dec. // The generate function calls its argument f once for each byte sequence // to be tested. The generate function itself will be called twice, and it must // make the same sequence of calls to f each time. // When a disassembly does not match the internal decoding, // allowedMismatch determines whether this mismatch should be // allowed, or else considered an error. func testExtDis( t *testing.T, syntax string, arch Mode, extdis func(ext *ExtDis) error, generate func(f func([]byte)), allowedMismatch func(text string, inst *Inst, dec ExtInst) bool, ) { start := time.Now() ext := &ExtDis{ Dec: make(chan ExtInst), Arch: arch, } errc := make(chan error) // First pass: write instructions to input file for external disassembler. file, f, size, err := writeInst(generate) if err != nil { t.Fatal(err) } ext.Size = size ext.File = f defer func() { f.Close() if !*keep { os.Remove(file) } }() // Second pass: compare disassembly against our decodings. var ( totalTests = 0 totalSkips = 0 totalErrors = 0 errors = make([]string, 0, 100) // Sampled errors, at most cap ) go func() { errc <- extdis(ext) }() generate(func(enc []byte) { dec, ok := <-ext.Dec if !ok { t.Errorf("decoding stream ended early") return } inst, text := disasm(syntax, pad(enc)) totalTests++ if *dumpTest { fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc) } if text != dec.text && !strings.Contains(dec.text, "unknown") && syntax == "gnu" { suffix := "" if allowedMismatch(text, &inst, dec) { totalSkips++ if !*mismatch { return } suffix += " (allowed mismatch)" } totalErrors++ cmp := fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s\n", enc, text, len(enc), dec.text, dec.nenc, suffix) if len(errors) >= cap(errors) { j := rand.Intn(totalErrors) if j >= cap(errors) { return } errors = append(errors[:j], errors[j+1:]...) } errors = append(errors, cmp) } }) if *mismatch { totalErrors -= totalSkips } for _, b := range errors { t.Log(b) } if totalErrors > 0 { t.Fail() } t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds()) t.Logf("decoder coverage: %.1f%%;\n", decodeCoverage()) if err := <-errc; err != nil { t.Fatalf("external disassembler: %v", err) } } // Start address of text. const start = 0x8000 // writeInst writes the generated byte sequences to a new file // starting at offset start. That file is intended to be the input to // the external disassembler. func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) { f, err = ioutil.TempFile("", "arm64asm") if err != nil { return } file = f.Name() f.Seek(start, io.SeekStart) w := bufio.NewWriter(f) defer w.Flush() size = 0 generate(func(x []byte) { if debug { fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):]) } w.Write(x) w.Write(zeros[len(x):]) size += len(zeros) }) return file, f, size, nil } var zeros = []byte{0, 0, 0, 0} // pad pads the code sequence with pops. func pad(enc []byte) []byte { if len(enc) < 4 { enc = append(enc[:len(enc):len(enc)], zeros[:4-len(enc)]...) } return enc } // disasm returns the decoded instruction and text // for the given source bytes, using the given syntax and mode. func disasm(syntax string, src []byte) (inst Inst, text string) { var err error inst, err = Decode(src) if err != nil { text = "error: " + err.Error() return } text = inst.String() switch syntax { case "gnu": text = GNUSyntax(inst) case "plan9": // [sic] text = GoSyntax(inst, 0, nil, nil) default: text = "error: unknown syntax " + syntax } return } // decodecoverage returns a floating point number denoting the // decoder coverage. func decodeCoverage() float64 { n := 0 for _, t := range decoderCover { if t { n++ } } return 100 * float64(1+n) / float64(1+len(decoderCover)) } // Helpers for writing disassembler output parsers. // hasPrefix reports whether any of the space-separated words in the text s // begins with any of the given prefixes. func hasPrefix(s string, prefixes ...string) bool { for _, prefix := range prefixes { for cur_s := s; cur_s != ""; { if strings.HasPrefix(cur_s, prefix) { return true } i := strings.Index(cur_s, " ") if i < 0 { break } cur_s = cur_s[i+1:] } } return false } // isHex reports whether b is a hexadecimal character (0-9a-fA-F). func isHex(b byte) bool { return ('0' <= b && b <= '9') || ('a' <= b && b <= 'f') || ('A' <= b && b <= 'F') } // parseHex parses the hexadecimal byte dump in hex, // appending the parsed bytes to raw and returning the updated slice. // The returned bool reports whether any invalid hex was found. // Spaces and tabs between bytes are okay but any other non-hex is not. func parseHex(hex []byte, raw []byte) ([]byte, bool) { hex = bytes.TrimSpace(hex) for j := 0; j < len(hex); { for hex[j] == ' ' || hex[j] == '\t' { j++ } if j >= len(hex) { break } if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) { return nil, false } raw = append(raw, unhex(hex[j])<<4|unhex(hex[j+1])) j += 2 } return raw, true } func unhex(b byte) byte { if '0' <= b && b <= '9' { return b - '0' } else if 'A' <= b && b <= 'F' { return b - 'A' + 10 } else if 'a' <= b && b <= 'f' { return b - 'a' + 10 } return 0 } // index is like bytes.Index(s, []byte(t)) but avoids the allocation. func index(s []byte, t string) int { i := 0 for { j := bytes.IndexByte(s[i:], t[0]) if j < 0 { return -1 } i = i + j if i+len(t) > len(s) { return -1 } for k := 1; k < len(t); k++ { if s[i+k] != t[k] { goto nomatch } } return i nomatch: i++ } } // fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s. // If s must be rewritten, it is rewritten in place. func fixSpace(s []byte) []byte { s = bytes.TrimSpace(s) for i := 0; i < len(s); i++ { if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' { goto Fix } } return s Fix: b := s w := 0 for i := 0; i < len(s); i++ { c := s[i] if c == '\t' || c == '\n' { c = ' ' } if c == ' ' && w > 0 && b[w-1] == ' ' { continue } b[w] = c w++ } if w > 0 && b[w-1] == ' ' { w-- } return b[:w] } // Fllowing regular expressions matches instructions using relative addressing mode. // pcrel matches B instructions and BL instructions. // pcrelr matches instrucions which consisted of register arguments and label arguments. // pcrelim matches instructions which consisted of register arguments, immediate // arguments and lable arguments. // pcrelrzr and prcelimzr matches instructions when register arguments is zero register. // pcrelprfm matches PRFM instructions when arguments consisted of register and lable. // pcrelprfmim matches PRFM instructions when arguments consisted of immediate and lable. var ( pcrel = regexp.MustCompile(`^((?:.* )?(?:b|bl)x?(?:\.)?(?:eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le|al|nv)?) 0x([0-9a-f]+)$`) pcrelr = regexp.MustCompile(`^((?:.*)?(?:ldr|adrp|adr|cbnz|cbz|ldrsw) (?:x|w|s|d|q)(?:[0-9]+,)) 0x([0-9a-f]+)$`) pcrelrzr = regexp.MustCompile(`^((?:.*)?(?:ldr|adrp|adr|cbnz|cbz|ldrsw) (?:x|w)zr,) 0x([0-9a-f]+)$`) pcrelim = regexp.MustCompile(`^((?:.*)?(?:tbnz|tbz) (?:x|w)(?:[0-9]+,) (?:#[0-9a-f]+,)) 0x([0-9a-f]+)$`) pcrelimzr = regexp.MustCompile(`^((?:.*)?(?:tbnz|tbz) (?:x|w)zr, (?:#[0-9a-f]+,)) 0x([0-9a-f]+)$`) pcrelprfm = regexp.MustCompile(`^((?:.*)?(?:prfm) (?:[0-9a-z]+,)) 0x([0-9a-f]+)$`) pcrelprfmim = regexp.MustCompile(`^((?:.*)?(?:prfm) (?:#0x[0-9a-f]+,)) 0x([0-9a-f]+)$`) ) // Round is the multiple of the number of instructions that read from Json file. // Round used as seed value for pseudo-random number generator provides the same sequence // in the same round run for the external disassembler and decoder. var Round int // condmark is used to mark conditional instructions when need to generate and test // conditional instructions. var condmark bool = false // Generate instruction binary according to Json file // Encode variable field of instruction with random value func doFuzzy(inst *InstJson, Ninst int) { var testdata uint32 var NonDigRE = regexp.MustCompile(`[\D]`) rand.Seed(int64(Round + Ninst)) off := 0 DigBit := "" if condmark == true && !strings.Contains(inst.Bits, "cond") { inst.Enc = 0xffffffff } else { for _, f := range strings.Split(inst.Bits, "|") { if i := strings.Index(f, ":"); i >= 0 { // consider f contains "01:2" and "Rm:5" DigBit = f[:i] m := NonDigRE.FindStringSubmatch(DigBit) if m == nil { DigBit = strings.TrimSpace(DigBit) s := strings.Split(DigBit, "") for i := 0; i < len(s); i++ { switch s[i] { case "1", "(1)": testdata |= 1 << uint(31-off) } off++ } } else { // DigBit is "Rn" or "imm3" n, _ := strconv.Atoi(f[i+1:]) if DigBit == "cond" && condmark == true { r := uint8(Round) for i := n - 1; i >= 0; i-- { switch (r >> uint(i)) & 1 { case 1: testdata |= 1 << uint(31-off) } off++ } } else { for i := 0; i < n; i++ { r := rand.Intn(2) switch r { case 1: testdata |= 1 << uint(31-off) } off++ } } } continue } for _, bit := range strings.Fields(f) { switch bit { case "0", "(0)": off++ continue case "1", "(1)": testdata |= 1 << uint(31-off) default: r := rand.Intn(2) switch r { case 1: testdata |= 1 << uint(31-off) } } off++ } } if off != 32 { log.Printf("incorrect bit count for %s %s: have %d", inst.Name, inst.Bits, off) } inst.Enc = testdata } } // Generators. // // The test cases are described as functions that invoke a callback repeatedly, // with a new input sequence each time. These helpers make writing those // a little easier. // JSONCases generates ARM64 instructions according to inst.json. func JSONCases(t *testing.T) func(func([]byte)) { return func(try func([]byte)) { data, err := ioutil.ReadFile("inst.json") if err != nil { t.Fatal(err) } var insts []InstJson var instsN []InstJson // Change N value to get more cases only when condmark=false. N := 100 if condmark == true { N = 16 } if err := json.Unmarshal(data, &insts); err != nil { t.Fatal(err) } // Append instructions to get more test cases. for i := 0; i < N; { for _, inst := range insts { instsN = append(instsN, inst) } i++ } Round = 0 for i := range instsN { if i%len(insts) == 0 { Round++ } doFuzzy(&instsN[i], i) } for _, inst := range instsN { if condmark == true && inst.Enc == 0xffffffff { continue } enc := inst.Enc try([]byte{byte(enc), byte(enc >> 8), byte(enc >> 16), byte(enc >> 24)}) } } } // condCases generates conditional instructions. func condCases(t *testing.T) func(func([]byte)) { return func(try func([]byte)) { condmark = true JSONCases(t)(func(enc []byte) { try(enc) }) } } // hexCases generates the cases written in hexadecimal in the encoded string. // Spaces in 'encoded' separate entire test cases, not individual bytes. func hexCases(t *testing.T, encoded string) func(func([]byte)) { return func(try func([]byte)) { for _, x := range strings.Fields(encoded) { src, err := hex.DecodeString(x) if err != nil { t.Errorf("parsing %q: %v", x, err) } try(src) } } } // testdataCases generates the test cases recorded in testdata/cases.txt. // It only uses the inputs; it ignores the answers recorded in that file. func testdataCases(t *testing.T, syntax string) func(func([]byte)) { var codes [][]byte input := filepath.Join("testdata", syntax+"cases.txt") data, err := ioutil.ReadFile(input) if err != nil { t.Fatal(err) } for _, line := range strings.Split(string(data), "\n") { line = strings.TrimSpace(line) if line == "" || strings.HasPrefix(line, "#") { continue } f := strings.Fields(line)[0] i := strings.Index(f, "|") if i < 0 { t.Errorf("parsing %q: missing | separator", f) continue } if i%2 != 0 { t.Errorf("parsing %q: misaligned | separator", f) } code, err := hex.DecodeString(f[:i] + f[i+1:]) if err != nil { t.Errorf("parsing %q: %v", f, err) continue } codes = append(codes, code) } return func(try func([]byte)) { for _, code := range codes { try(code) } } }