...

Source file src/golang.org/x/arch/arm64/arm64asm/ext_test.go

Documentation: golang.org/x/arch/arm64/arm64asm

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Support for testing against external disassembler program.
     6  // Copied and simplified from ../../arm/armasm/ext_test.go.
     7  
     8  package arm64asm
     9  
    10  import (
    11  	"bufio"
    12  	"bytes"
    13  	"encoding/hex"
    14  	"encoding/json"
    15  	"flag"
    16  	"fmt"
    17  	"io"
    18  	"io/ioutil"
    19  	"log"
    20  	"math/rand"
    21  	"os"
    22  	"os/exec"
    23  	"path/filepath"
    24  	"regexp"
    25  	"strconv"
    26  	"strings"
    27  	"testing"
    28  	"time"
    29  )
    30  
    31  var (
    32  	dumpTest = flag.Bool("dump", false, "dump all encodings")
    33  	mismatch = flag.Bool("mismatch", false, "log allowed mismatches")
    34  	longTest = flag.Bool("long", false, "long test")
    35  	keep     = flag.Bool("keep", false, "keep object files around")
    36  	debug    = false
    37  )
    38  
    39  // An ExtInst represents a single decoded instruction parsed
    40  // from an external disassembler's output.
    41  type ExtInst struct {
    42  	addr uint64
    43  	enc  [4]byte
    44  	nenc int
    45  	text string
    46  }
    47  
    48  func (r ExtInst) String() string {
    49  	return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text)
    50  }
    51  
    52  // An ExtDis is a connection between an external disassembler and a test.
    53  type ExtDis struct {
    54  	Arch     Mode
    55  	Dec      chan ExtInst
    56  	File     *os.File
    57  	Size     int
    58  	KeepFile bool
    59  	Cmd      *exec.Cmd
    60  }
    61  
    62  // InstJson describes instruction fields value got from ARMv8-A Reference Manual
    63  type InstJson struct {
    64  	Name   string
    65  	Bits   string
    66  	Arch   string
    67  	Syntax string
    68  	Code   string
    69  	Alias  string
    70  	Enc    uint32
    71  }
    72  
    73  // A Mode is an instruction execution mode.
    74  type Mode int
    75  
    76  const (
    77  	_ Mode = iota
    78  	ModeARM64
    79  )
    80  
    81  // Run runs the given command - the external disassembler - and returns
    82  // a buffered reader of its standard output.
    83  func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) {
    84  	if *keep {
    85  		log.Printf("%s\n", strings.Join(cmd, " "))
    86  	}
    87  	ext.Cmd = exec.Command(cmd[0], cmd[1:]...)
    88  	out, err := ext.Cmd.StdoutPipe()
    89  	if err != nil {
    90  		return nil, fmt.Errorf("stdoutpipe: %v", err)
    91  	}
    92  	if err := ext.Cmd.Start(); err != nil {
    93  		return nil, fmt.Errorf("exec: %v", err)
    94  	}
    95  
    96  	b := bufio.NewReaderSize(out, 1<<20)
    97  	return b, nil
    98  }
    99  
   100  // Wait waits for the command started with Run to exit.
   101  func (ext *ExtDis) Wait() error {
   102  	return ext.Cmd.Wait()
   103  }
   104  
   105  // testExtDis tests a set of byte sequences against an external disassembler.
   106  // The disassembler is expected to produce the given syntax and run
   107  // in the given architecture mode (16, 32, or 64-bit).
   108  // The extdis function must start the external disassembler
   109  // and then parse its output, sending the parsed instructions on ext.Dec.
   110  // The generate function calls its argument f once for each byte sequence
   111  // to be tested. The generate function itself will be called twice, and it must
   112  // make the same sequence of calls to f each time.
   113  // When a disassembly does not match the internal decoding,
   114  // allowedMismatch determines whether this mismatch should be
   115  // allowed, or else considered an error.
   116  func testExtDis(
   117  	t *testing.T,
   118  	syntax string,
   119  	arch Mode,
   120  	extdis func(ext *ExtDis) error,
   121  	generate func(f func([]byte)),
   122  	allowedMismatch func(text string, inst *Inst, dec ExtInst) bool,
   123  ) {
   124  	start := time.Now()
   125  	ext := &ExtDis{
   126  		Dec:  make(chan ExtInst),
   127  		Arch: arch,
   128  	}
   129  	errc := make(chan error)
   130  
   131  	// First pass: write instructions to input file for external disassembler.
   132  	file, f, size, err := writeInst(generate)
   133  	if err != nil {
   134  		t.Fatal(err)
   135  	}
   136  	ext.Size = size
   137  	ext.File = f
   138  	defer func() {
   139  		f.Close()
   140  		if !*keep {
   141  			os.Remove(file)
   142  		}
   143  	}()
   144  
   145  	// Second pass: compare disassembly against our decodings.
   146  	var (
   147  		totalTests  = 0
   148  		totalSkips  = 0
   149  		totalErrors = 0
   150  
   151  		errors = make([]string, 0, 100) // Sampled errors, at most cap
   152  	)
   153  	go func() {
   154  		errc <- extdis(ext)
   155  	}()
   156  
   157  	generate(func(enc []byte) {
   158  		dec, ok := <-ext.Dec
   159  		if !ok {
   160  			t.Errorf("decoding stream ended early")
   161  			return
   162  		}
   163  		inst, text := disasm(syntax, pad(enc))
   164  
   165  		totalTests++
   166  		if *dumpTest {
   167  			fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc)
   168  		}
   169  		if text != dec.text && !strings.Contains(dec.text, "unknown") && syntax == "gnu" {
   170  			suffix := ""
   171  			if allowedMismatch(text, &inst, dec) {
   172  				totalSkips++
   173  				if !*mismatch {
   174  					return
   175  				}
   176  				suffix += " (allowed mismatch)"
   177  			}
   178  			totalErrors++
   179  			cmp := fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s\n", enc, text, len(enc), dec.text, dec.nenc, suffix)
   180  
   181  			if len(errors) >= cap(errors) {
   182  				j := rand.Intn(totalErrors)
   183  				if j >= cap(errors) {
   184  					return
   185  				}
   186  				errors = append(errors[:j], errors[j+1:]...)
   187  			}
   188  			errors = append(errors, cmp)
   189  		}
   190  	})
   191  
   192  	if *mismatch {
   193  		totalErrors -= totalSkips
   194  	}
   195  
   196  	for _, b := range errors {
   197  		t.Log(b)
   198  	}
   199  
   200  	if totalErrors > 0 {
   201  		t.Fail()
   202  	}
   203  	t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds())
   204  	t.Logf("decoder coverage: %.1f%%;\n", decodeCoverage())
   205  	if err := <-errc; err != nil {
   206  		t.Fatalf("external disassembler: %v", err)
   207  	}
   208  
   209  }
   210  
   211  // Start address of text.
   212  const start = 0x8000
   213  
   214  // writeInst writes the generated byte sequences to a new file
   215  // starting at offset start. That file is intended to be the input to
   216  // the external disassembler.
   217  func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) {
   218  	f, err = ioutil.TempFile("", "arm64asm")
   219  	if err != nil {
   220  		return
   221  	}
   222  
   223  	file = f.Name()
   224  
   225  	f.Seek(start, io.SeekStart)
   226  	w := bufio.NewWriter(f)
   227  	defer w.Flush()
   228  	size = 0
   229  	generate(func(x []byte) {
   230  		if debug {
   231  			fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):])
   232  		}
   233  		w.Write(x)
   234  		w.Write(zeros[len(x):])
   235  		size += len(zeros)
   236  	})
   237  	return file, f, size, nil
   238  }
   239  
   240  var zeros = []byte{0, 0, 0, 0}
   241  
   242  // pad pads the code sequence with pops.
   243  func pad(enc []byte) []byte {
   244  	if len(enc) < 4 {
   245  		enc = append(enc[:len(enc):len(enc)], zeros[:4-len(enc)]...)
   246  	}
   247  	return enc
   248  }
   249  
   250  // disasm returns the decoded instruction and text
   251  // for the given source bytes, using the given syntax and mode.
   252  func disasm(syntax string, src []byte) (inst Inst, text string) {
   253  	var err error
   254  	inst, err = Decode(src)
   255  	if err != nil {
   256  		text = "error: " + err.Error()
   257  		return
   258  	}
   259  	text = inst.String()
   260  	switch syntax {
   261  	case "gnu":
   262  		text = GNUSyntax(inst)
   263  	case "plan9": // [sic]
   264  		text = GoSyntax(inst, 0, nil, nil)
   265  	default:
   266  		text = "error: unknown syntax " + syntax
   267  	}
   268  	return
   269  }
   270  
   271  // decodecoverage returns a floating point number denoting the
   272  // decoder coverage.
   273  func decodeCoverage() float64 {
   274  	n := 0
   275  	for _, t := range decoderCover {
   276  		if t {
   277  			n++
   278  		}
   279  	}
   280  	return 100 * float64(1+n) / float64(1+len(decoderCover))
   281  }
   282  
   283  // Helpers for writing disassembler output parsers.
   284  
   285  // hasPrefix reports whether any of the space-separated words in the text s
   286  // begins with any of the given prefixes.
   287  func hasPrefix(s string, prefixes ...string) bool {
   288  	for _, prefix := range prefixes {
   289  		for cur_s := s; cur_s != ""; {
   290  			if strings.HasPrefix(cur_s, prefix) {
   291  				return true
   292  			}
   293  			i := strings.Index(cur_s, " ")
   294  			if i < 0 {
   295  				break
   296  			}
   297  			cur_s = cur_s[i+1:]
   298  		}
   299  	}
   300  	return false
   301  }
   302  
   303  // isHex reports whether b is a hexadecimal character (0-9a-fA-F).
   304  func isHex(b byte) bool {
   305  	return ('0' <= b && b <= '9') || ('a' <= b && b <= 'f') || ('A' <= b && b <= 'F')
   306  }
   307  
   308  // parseHex parses the hexadecimal byte dump in hex,
   309  // appending the parsed bytes to raw and returning the updated slice.
   310  // The returned bool reports whether any invalid hex was found.
   311  // Spaces and tabs between bytes are okay but any other non-hex is not.
   312  func parseHex(hex []byte, raw []byte) ([]byte, bool) {
   313  	hex = bytes.TrimSpace(hex)
   314  	for j := 0; j < len(hex); {
   315  		for hex[j] == ' ' || hex[j] == '\t' {
   316  			j++
   317  		}
   318  		if j >= len(hex) {
   319  			break
   320  		}
   321  		if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) {
   322  			return nil, false
   323  		}
   324  		raw = append(raw, unhex(hex[j])<<4|unhex(hex[j+1]))
   325  		j += 2
   326  	}
   327  	return raw, true
   328  }
   329  
   330  func unhex(b byte) byte {
   331  	if '0' <= b && b <= '9' {
   332  		return b - '0'
   333  	} else if 'A' <= b && b <= 'F' {
   334  		return b - 'A' + 10
   335  	} else if 'a' <= b && b <= 'f' {
   336  		return b - 'a' + 10
   337  	}
   338  	return 0
   339  }
   340  
   341  // index is like bytes.Index(s, []byte(t)) but avoids the allocation.
   342  func index(s []byte, t string) int {
   343  	i := 0
   344  	for {
   345  		j := bytes.IndexByte(s[i:], t[0])
   346  		if j < 0 {
   347  			return -1
   348  		}
   349  		i = i + j
   350  		if i+len(t) > len(s) {
   351  			return -1
   352  		}
   353  		for k := 1; k < len(t); k++ {
   354  			if s[i+k] != t[k] {
   355  				goto nomatch
   356  			}
   357  		}
   358  		return i
   359  	nomatch:
   360  		i++
   361  	}
   362  }
   363  
   364  // fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s.
   365  // If s must be rewritten, it is rewritten in place.
   366  func fixSpace(s []byte) []byte {
   367  	s = bytes.TrimSpace(s)
   368  	for i := 0; i < len(s); i++ {
   369  		if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' {
   370  			goto Fix
   371  		}
   372  	}
   373  	return s
   374  
   375  Fix:
   376  	b := s
   377  	w := 0
   378  	for i := 0; i < len(s); i++ {
   379  		c := s[i]
   380  		if c == '\t' || c == '\n' {
   381  			c = ' '
   382  		}
   383  		if c == ' ' && w > 0 && b[w-1] == ' ' {
   384  			continue
   385  		}
   386  		b[w] = c
   387  		w++
   388  	}
   389  	if w > 0 && b[w-1] == ' ' {
   390  		w--
   391  	}
   392  	return b[:w]
   393  }
   394  
   395  // Fllowing regular expressions matches instructions using relative addressing mode.
   396  // pcrel matches B instructions and BL instructions.
   397  // pcrelr matches instrucions which consisted of register arguments and label arguments.
   398  // pcrelim matches instructions which consisted of register arguments, immediate
   399  // arguments and lable arguments.
   400  // pcrelrzr and prcelimzr matches instructions when register arguments is zero register.
   401  // pcrelprfm matches PRFM instructions when arguments consisted of register and lable.
   402  // pcrelprfmim matches PRFM instructions when arguments consisted of immediate and lable.
   403  var (
   404  	pcrel       = regexp.MustCompile(`^((?:.* )?(?:b|bl)x?(?:\.)?(?:eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le|al|nv)?) 0x([0-9a-f]+)$`)
   405  	pcrelr      = regexp.MustCompile(`^((?:.*)?(?:ldr|adrp|adr|cbnz|cbz|ldrsw) (?:x|w|s|d|q)(?:[0-9]+,)) 0x([0-9a-f]+)$`)
   406  	pcrelrzr    = regexp.MustCompile(`^((?:.*)?(?:ldr|adrp|adr|cbnz|cbz|ldrsw) (?:x|w)zr,) 0x([0-9a-f]+)$`)
   407  	pcrelim     = regexp.MustCompile(`^((?:.*)?(?:tbnz|tbz) (?:x|w)(?:[0-9]+,) (?:#[0-9a-f]+,)) 0x([0-9a-f]+)$`)
   408  	pcrelimzr   = regexp.MustCompile(`^((?:.*)?(?:tbnz|tbz) (?:x|w)zr, (?:#[0-9a-f]+,)) 0x([0-9a-f]+)$`)
   409  	pcrelprfm   = regexp.MustCompile(`^((?:.*)?(?:prfm) (?:[0-9a-z]+,)) 0x([0-9a-f]+)$`)
   410  	pcrelprfmim = regexp.MustCompile(`^((?:.*)?(?:prfm) (?:#0x[0-9a-f]+,)) 0x([0-9a-f]+)$`)
   411  )
   412  
   413  // Round is the multiple of the number of instructions that read from Json file.
   414  // Round used as seed value for pseudo-random number generator provides the same sequence
   415  // in the same round run for the external disassembler and decoder.
   416  var Round int
   417  
   418  // condmark is used to mark conditional instructions when need to generate and test
   419  // conditional instructions.
   420  var condmark bool = false
   421  
   422  // Generate instruction binary according to Json file
   423  // Encode variable field of instruction with random value
   424  func doFuzzy(inst *InstJson, Ninst int) {
   425  	var testdata uint32
   426  	var NonDigRE = regexp.MustCompile(`[\D]`)
   427  	rand.Seed(int64(Round + Ninst))
   428  	off := 0
   429  	DigBit := ""
   430  	if condmark == true && !strings.Contains(inst.Bits, "cond") {
   431  		inst.Enc = 0xffffffff
   432  	} else {
   433  		for _, f := range strings.Split(inst.Bits, "|") {
   434  			if i := strings.Index(f, ":"); i >= 0 {
   435  				// consider f contains "01:2" and "Rm:5"
   436  				DigBit = f[:i]
   437  				m := NonDigRE.FindStringSubmatch(DigBit)
   438  				if m == nil {
   439  					DigBit = strings.TrimSpace(DigBit)
   440  					s := strings.Split(DigBit, "")
   441  					for i := 0; i < len(s); i++ {
   442  						switch s[i] {
   443  						case "1", "(1)":
   444  							testdata |= 1 << uint(31-off)
   445  						}
   446  						off++
   447  					}
   448  				} else {
   449  					// DigBit is "Rn" or "imm3"
   450  					n, _ := strconv.Atoi(f[i+1:])
   451  					if DigBit == "cond" && condmark == true {
   452  						r := uint8(Round)
   453  						for i := n - 1; i >= 0; i-- {
   454  							switch (r >> uint(i)) & 1 {
   455  							case 1:
   456  								testdata |= 1 << uint(31-off)
   457  							}
   458  							off++
   459  						}
   460  					} else {
   461  						for i := 0; i < n; i++ {
   462  							r := rand.Intn(2)
   463  							switch r {
   464  							case 1:
   465  								testdata |= 1 << uint(31-off)
   466  							}
   467  							off++
   468  						}
   469  					}
   470  				}
   471  				continue
   472  			}
   473  			for _, bit := range strings.Fields(f) {
   474  				switch bit {
   475  				case "0", "(0)":
   476  					off++
   477  					continue
   478  				case "1", "(1)":
   479  					testdata |= 1 << uint(31-off)
   480  				default:
   481  					r := rand.Intn(2)
   482  					switch r {
   483  					case 1:
   484  						testdata |= 1 << uint(31-off)
   485  					}
   486  				}
   487  				off++
   488  			}
   489  		}
   490  		if off != 32 {
   491  			log.Printf("incorrect bit count for %s %s: have %d", inst.Name, inst.Bits, off)
   492  		}
   493  		inst.Enc = testdata
   494  	}
   495  }
   496  
   497  // Generators.
   498  //
   499  // The test cases are described as functions that invoke a callback repeatedly,
   500  // with a new input sequence each time. These helpers make writing those
   501  // a little easier.
   502  
   503  // JSONCases generates ARM64 instructions according to inst.json.
   504  func JSONCases(t *testing.T) func(func([]byte)) {
   505  	return func(try func([]byte)) {
   506  		data, err := ioutil.ReadFile("inst.json")
   507  		if err != nil {
   508  			t.Fatal(err)
   509  		}
   510  		var insts []InstJson
   511  		var instsN []InstJson
   512  		// Change N value to get more cases only when condmark=false.
   513  		N := 100
   514  		if condmark == true {
   515  			N = 16
   516  		}
   517  		if err := json.Unmarshal(data, &insts); err != nil {
   518  			t.Fatal(err)
   519  		}
   520  		// Append instructions to get more test cases.
   521  		for i := 0; i < N; {
   522  			for _, inst := range insts {
   523  				instsN = append(instsN, inst)
   524  			}
   525  			i++
   526  		}
   527  		Round = 0
   528  		for i := range instsN {
   529  			if i%len(insts) == 0 {
   530  				Round++
   531  			}
   532  			doFuzzy(&instsN[i], i)
   533  		}
   534  		for _, inst := range instsN {
   535  			if condmark == true && inst.Enc == 0xffffffff {
   536  				continue
   537  			}
   538  			enc := inst.Enc
   539  			try([]byte{byte(enc), byte(enc >> 8), byte(enc >> 16), byte(enc >> 24)})
   540  		}
   541  	}
   542  }
   543  
   544  // condCases generates conditional instructions.
   545  func condCases(t *testing.T) func(func([]byte)) {
   546  	return func(try func([]byte)) {
   547  		condmark = true
   548  		JSONCases(t)(func(enc []byte) {
   549  			try(enc)
   550  		})
   551  	}
   552  }
   553  
   554  // hexCases generates the cases written in hexadecimal in the encoded string.
   555  // Spaces in 'encoded' separate entire test cases, not individual bytes.
   556  func hexCases(t *testing.T, encoded string) func(func([]byte)) {
   557  	return func(try func([]byte)) {
   558  		for _, x := range strings.Fields(encoded) {
   559  			src, err := hex.DecodeString(x)
   560  			if err != nil {
   561  				t.Errorf("parsing %q: %v", x, err)
   562  			}
   563  			try(src)
   564  		}
   565  	}
   566  }
   567  
   568  // testdataCases generates the test cases recorded in testdata/cases.txt.
   569  // It only uses the inputs; it ignores the answers recorded in that file.
   570  func testdataCases(t *testing.T, syntax string) func(func([]byte)) {
   571  	var codes [][]byte
   572  	input := filepath.Join("testdata", syntax+"cases.txt")
   573  	data, err := ioutil.ReadFile(input)
   574  	if err != nil {
   575  		t.Fatal(err)
   576  	}
   577  	for _, line := range strings.Split(string(data), "\n") {
   578  		line = strings.TrimSpace(line)
   579  		if line == "" || strings.HasPrefix(line, "#") {
   580  			continue
   581  		}
   582  		f := strings.Fields(line)[0]
   583  		i := strings.Index(f, "|")
   584  		if i < 0 {
   585  			t.Errorf("parsing %q: missing | separator", f)
   586  			continue
   587  		}
   588  		if i%2 != 0 {
   589  			t.Errorf("parsing %q: misaligned | separator", f)
   590  		}
   591  		code, err := hex.DecodeString(f[:i] + f[i+1:])
   592  		if err != nil {
   593  			t.Errorf("parsing %q: %v", f, err)
   594  			continue
   595  		}
   596  		codes = append(codes, code)
   597  	}
   598  
   599  	return func(try func([]byte)) {
   600  		for _, code := range codes {
   601  			try(code)
   602  		}
   603  	}
   604  }
   605  

View as plain text