...

Source file src/github.com/chenzhuoyu/iasm/x86_64/assembler.go

Documentation: github.com/chenzhuoyu/iasm/x86_64

     1  package x86_64
     2  
     3  import (
     4      `bytes`
     5      `errors`
     6      `fmt`
     7      `math`
     8      `strconv`
     9      `strings`
    10      `unicode`
    11  
    12      `github.com/chenzhuoyu/iasm/expr`
    13  )
    14  
    15  type (
    16      _TokenKind   int
    17      _Punctuation int
    18  )
    19  
    20  const (
    21      _T_end _TokenKind = iota + 1
    22      _T_int
    23      _T_name
    24      _T_punc
    25      _T_space
    26  )
    27  
    28  const (
    29      _P_plus _Punctuation = iota + 1
    30      _P_minus
    31      _P_star
    32      _P_slash
    33      _P_percent
    34      _P_amp
    35      _P_bar
    36      _P_caret
    37      _P_shl
    38      _P_shr
    39      _P_tilde
    40      _P_lbrk
    41      _P_rbrk
    42      _P_dot
    43      _P_comma
    44      _P_colon
    45      _P_dollar
    46      _P_hash
    47  )
    48  
    49  var _PUNC_NAME = map[_Punctuation]string {
    50      _P_plus    : "+",
    51      _P_minus   : "-",
    52      _P_star    : "*",
    53      _P_slash   : "/",
    54      _P_percent : "%",
    55      _P_amp     : "&",
    56      _P_bar     : "|",
    57      _P_caret   : "^",
    58      _P_shl     : "<<",
    59      _P_shr     : ">>",
    60      _P_tilde   : "~",
    61      _P_lbrk    : "(",
    62      _P_rbrk    : ")",
    63      _P_dot     : ".",
    64      _P_comma   : ",",
    65      _P_colon   : ":",
    66      _P_dollar  : "$",
    67      _P_hash    : "#",
    68  }
    69  
    70  func (self _Punctuation) String() string {
    71      if v, ok := _PUNC_NAME[self]; ok {
    72          return v
    73      } else {
    74          return fmt.Sprintf("_Punctuation(%d)", self)
    75      }
    76  }
    77  
    78  type _Token struct {
    79      pos int
    80      end int
    81      u64 uint64
    82      str string
    83      tag _TokenKind
    84  }
    85  
    86  func (self *_Token) punc() _Punctuation {
    87      return _Punctuation(self.u64)
    88  }
    89  
    90  func (self *_Token) String() string {
    91      switch self.tag {
    92          case _T_end   : return "<END>"
    93          case _T_int   : return fmt.Sprintf("<INT %d>", self.u64)
    94          case _T_punc  : return fmt.Sprintf("<PUNC %s>", _Punctuation(self.u64))
    95          case _T_name  : return fmt.Sprintf("<NAME %s>", strconv.QuoteToASCII(self.str))
    96          case _T_space : return "<SPACE>"
    97          default       : return fmt.Sprintf("<UNK:%d %d %s>", self.tag, self.u64, strconv.QuoteToASCII(self.str))
    98      }
    99  }
   100  
   101  func tokenEnd(p int, end int) _Token {
   102      return _Token {
   103          pos: p,
   104          end: end,
   105          tag: _T_end,
   106      }
   107  }
   108  
   109  func tokenInt(p int, val uint64) _Token {
   110      return _Token {
   111          pos: p,
   112          u64: val,
   113          tag: _T_int,
   114      }
   115  }
   116  
   117  func tokenName(p int, name string) _Token {
   118      return _Token {
   119          pos: p,
   120          str: name,
   121          tag: _T_name,
   122      }
   123  }
   124  
   125  func tokenPunc(p int, punc _Punctuation) _Token {
   126      return _Token {
   127          pos: p,
   128          tag: _T_punc,
   129          u64: uint64(punc),
   130      }
   131  }
   132  
   133  func tokenSpace(p int, end int) _Token {
   134      return _Token {
   135          pos: p,
   136          end: end,
   137          tag: _T_space,
   138      }
   139  }
   140  
   141  // SyntaxError represents an error in the assembly syntax.
   142  type SyntaxError struct {
   143      Pos    int
   144      Row    int
   145      Src    []rune
   146      Reason string
   147  }
   148  
   149  // Error implements the error interface.
   150  func (self *SyntaxError) Error() string {
   151      if self.Pos < 0 {
   152          return fmt.Sprintf("%s at line %d", self.Reason, self.Row)
   153      } else {
   154          return fmt.Sprintf("%s at %d:%d", self.Reason, self.Row, self.Pos + 1)
   155      }
   156  }
   157  
   158  type _Tokenizer struct {
   159      pos int
   160      row int
   161      src []rune
   162  }
   163  
   164  func (self *_Tokenizer) ch() rune {
   165      return self.src[self.pos]
   166  }
   167  
   168  func (self *_Tokenizer) eof() bool {
   169      return self.pos >= len(self.src)
   170  }
   171  
   172  func (self *_Tokenizer) rch() (ret rune) {
   173      ret, self.pos = self.src[self.pos], self.pos + 1
   174      return
   175  }
   176  
   177  func (self *_Tokenizer) err(pos int, msg string) *SyntaxError {
   178      return &SyntaxError {
   179          Pos    : pos,
   180          Row    : self.row,
   181          Src    : self.src,
   182          Reason : msg,
   183      }
   184  }
   185  
   186  type _TrimState int
   187  
   188  const (
   189      _TS_normal _TrimState = iota
   190      _TS_slcomm
   191      _TS_hscomm
   192      _TS_string
   193      _TS_escape
   194      _TS_accept
   195      _TS_nolast
   196  )
   197  
   198  func (self *_Tokenizer) init(src string) {
   199      var i int
   200      var ch rune
   201      var st _TrimState
   202  
   203      /* set the source */
   204      self.pos = 0
   205      self.src = []rune(src)
   206  
   207      /* remove commends, including "//" and "##" */
   208      loop: for i, ch = range self.src {
   209          switch {
   210              case st == _TS_normal && ch == '/'  : st = _TS_slcomm
   211              case st == _TS_normal && ch == '"'  : st = _TS_string
   212              case st == _TS_normal && ch == ';'  : st = _TS_accept; break loop
   213              case st == _TS_normal && ch == '#'  : st = _TS_hscomm
   214              case st == _TS_slcomm && ch == '/'  : st = _TS_nolast; break loop
   215              case st == _TS_slcomm               : st = _TS_normal
   216              case st == _TS_hscomm && ch == '#'  : st = _TS_nolast; break loop
   217              case st == _TS_hscomm               : st = _TS_normal
   218              case st == _TS_string && ch == '"'  : st = _TS_normal
   219              case st == _TS_string && ch == '\\' : st = _TS_escape
   220              case st == _TS_escape               : st = _TS_string
   221          }
   222      }
   223  
   224      /* check for errors */
   225      switch st {
   226          case _TS_accept: self.src = self.src[:i]
   227          case _TS_nolast: self.src = self.src[:i - 1]
   228          case _TS_string: panic(self.err(i, "string is not terminated"))
   229          case _TS_escape: panic(self.err(i, "escape sequence is not terminated"))
   230      }
   231  }
   232  
   233  func (self *_Tokenizer) skip(check func(v rune) bool) {
   234      for !self.eof() && check(self.ch()) {
   235          self.pos++
   236      }
   237  }
   238  
   239  func (self *_Tokenizer) find(pos int, check func(v rune) bool) string {
   240      self.skip(check)
   241      return string(self.src[pos:self.pos])
   242  }
   243  
   244  func (self *_Tokenizer) chrv(p int) _Token {
   245      var err error
   246      var val uint64
   247  
   248      /* starting and ending position */
   249      p0 := p + 1
   250      p1 := p0 + 1
   251  
   252      /* find the end of the literal */
   253      for p1 < len(self.src) && self.src[p1] != '\'' {
   254          if p1++; self.src[p1 - 1] == '\\' {
   255              p1++
   256          }
   257      }
   258  
   259      /* empty literal */
   260      if p1 == p0 {
   261          panic(self.err(p1, "empty character constant"))
   262      }
   263  
   264      /* check for EOF */
   265      if p1 == len(self.src) {
   266          panic(self.err(p1, "unexpected EOF when scanning literals"))
   267      }
   268  
   269      /* parse the literal */
   270      if val, err = literal64(string(self.src[p0:p1])); err != nil {
   271          panic(self.err(p0, "cannot parse literal: " + err.Error()))
   272      }
   273  
   274      /* skip the closing '\'' */
   275      self.pos = p1 + 1
   276      return tokenInt(p, val)
   277  }
   278  
   279  func (self *_Tokenizer) numv(p int) _Token {
   280      if val, err := strconv.ParseUint(self.find(p, isnumber), 0, 64); err != nil {
   281          panic(self.err(p, "invalid immediate value: " + err.Error()))
   282      } else {
   283          return tokenInt(p, val)
   284      }
   285  }
   286  
   287  func (self *_Tokenizer) defv(p int, cc rune) _Token {
   288      if isdigit(cc) {
   289          return self.numv(p)
   290      } else if isident0(cc) {
   291          return tokenName(p, self.find(p, isident))
   292      } else {
   293          panic(self.err(p, "invalid char: " + strconv.QuoteRune(cc)))
   294      }
   295  }
   296  
   297  func (self *_Tokenizer) rep2(p int, pp _Punctuation, cc rune) _Token {
   298      if self.eof() {
   299          panic(self.err(self.pos, "unexpected EOF when scanning operators"))
   300      } else if c := self.rch(); c != cc {
   301          panic(self.err(p + 1, strconv.QuoteRune(cc) + " expected, got " + strconv.QuoteRune(c)))
   302      } else {
   303          return tokenPunc(p, pp)
   304      }
   305  }
   306  
   307  func (self *_Tokenizer) read() _Token {
   308      var p int
   309      var c rune
   310      var t _Token
   311  
   312      /* check for EOF */
   313      if self.eof() {
   314          return tokenEnd(self.pos, self.pos)
   315      }
   316  
   317      /* skip spaces as needed */
   318      if p = self.pos; unicode.IsSpace(self.src[p]) {
   319          self.skip(unicode.IsSpace)
   320          return tokenSpace(p, self.pos)
   321      }
   322  
   323      /* check for line comments */
   324      if p = self.pos; p < len(self.src) - 1 && self.src[p] == '/' && self.src[p + 1] == '/' {
   325          self.pos = len(self.src)
   326          return tokenEnd(p, self.pos)
   327      }
   328  
   329      /* read the next character */
   330      p = self.pos
   331      c = self.rch()
   332  
   333      /* parse the next character */
   334      switch c {
   335          case '+'  : t = tokenPunc(p, _P_plus)
   336          case '-'  : t = tokenPunc(p, _P_minus)
   337          case '*'  : t = tokenPunc(p, _P_star)
   338          case '/'  : t = tokenPunc(p, _P_slash)
   339          case '%'  : t = tokenPunc(p, _P_percent)
   340          case '&'  : t = tokenPunc(p, _P_amp)
   341          case '|'  : t = tokenPunc(p, _P_bar)
   342          case '^'  : t = tokenPunc(p, _P_caret)
   343          case '<'  : t = self.rep2(p, _P_shl, '<')
   344          case '>'  : t = self.rep2(p, _P_shr, '>')
   345          case '~'  : t = tokenPunc(p, _P_tilde)
   346          case '('  : t = tokenPunc(p, _P_lbrk)
   347          case ')'  : t = tokenPunc(p, _P_rbrk)
   348          case '.'  : t = tokenPunc(p, _P_dot)
   349          case ','  : t = tokenPunc(p, _P_comma)
   350          case ':'  : t = tokenPunc(p, _P_colon)
   351          case '$'  : t = tokenPunc(p, _P_dollar)
   352          case '#'  : t = tokenPunc(p, _P_hash)
   353          case '\'' : t = self.chrv(p)
   354          default   : t = self.defv(p, c)
   355      }
   356  
   357      /* mark the end of token */
   358      t.end = self.pos
   359      return t
   360  }
   361  
   362  func (self *_Tokenizer) next() (tk _Token) {
   363      for {
   364          if tk = self.read(); tk.tag != _T_space {
   365              return
   366          }
   367      }
   368  }
   369  
   370  // LabelKind indicates the type of label reference.
   371  type LabelKind int
   372  
   373  // OperandKind indicates the type of the operand.
   374  type OperandKind int
   375  
   376  // InstructionPrefix indicates the prefix bytes prepended to the instruction.
   377  type InstructionPrefix byte
   378  
   379  const (
   380      // OpImm means the operand is an immediate value.
   381      OpImm OperandKind = 1 << iota
   382  
   383      // OpReg means the operand is a register.
   384      OpReg
   385  
   386      // OpMem means the operand is a memory address.
   387      OpMem
   388  
   389      // OpLabel means the operand is a label, specifically for
   390      // branch instructions.
   391      OpLabel
   392  )
   393  
   394  const (
   395      // Declaration means the label is a declaration.
   396      Declaration LabelKind = iota + 1
   397  
   398      // BranchTarget means the label should be treated as a branch target.
   399      BranchTarget
   400  
   401      // RelativeAddress means the label should be treated as a reference to
   402      // the code section (e.g. RIP-relative addressing).
   403      RelativeAddress
   404  )
   405  
   406  const (
   407      // PrefixLock causes the processor's LOCK# signal to be asserted during execution of
   408      // the accompanying instruction (turns the instruction into an atomic instruction).
   409      // In a multiprocessor environment, the LOCK# signal insures that the processor
   410      // has exclusive use of any shared memory while the signal is asserted.
   411      PrefixLock InstructionPrefix = iota
   412  
   413      // PrefixSegmentCS overrides the memory operation of this instruction to CS (Code Segment).
   414      PrefixSegmentCS
   415  
   416      // PrefixSegmentDS overrides the memory operation of this instruction to DS (Data Segment),
   417      // this is the default section for most instructions if not specified.
   418      PrefixSegmentDS
   419  
   420      // PrefixSegmentES overrides the memory operation of this instruction to ES (Extra Segment).
   421      PrefixSegmentES
   422  
   423      // PrefixSegmentFS overrides the memory operation of this instruction to FS.
   424      PrefixSegmentFS
   425  
   426      // PrefixSegmentGS overrides the memory operation of this instruction to GS.
   427      PrefixSegmentGS
   428  
   429      // PrefixSegmentSS overrides the memory operation of this instruction to SS (Stack Segment).
   430      PrefixSegmentSS
   431  )
   432  
   433  // ParsedLabel represents a label in the source, either a jump target or
   434  // an RIP-relative addressing.
   435  type ParsedLabel struct {
   436      Name string
   437      Kind LabelKind
   438  }
   439  
   440  // ParsedOperand represents an operand of an instruction in the source.
   441  type ParsedOperand struct {
   442      Op     OperandKind
   443      Imm    int64
   444      Reg    Register
   445      Label  ParsedLabel
   446      Memory MemoryAddress
   447  }
   448  
   449  // ParsedInstruction represents an instruction in the source.
   450  type ParsedInstruction struct {
   451      Mnemonic string
   452      Operands []ParsedOperand
   453      Prefixes []InstructionPrefix
   454  }
   455  
   456  func (self *ParsedInstruction) imm(v int64) {
   457      self.Operands = append(self.Operands, ParsedOperand {
   458          Op  : OpImm,
   459          Imm : v,
   460      })
   461  }
   462  
   463  func (self *ParsedInstruction) reg(v Register) {
   464      self.Operands = append(self.Operands, ParsedOperand {
   465          Op  : OpReg,
   466          Reg : v,
   467      })
   468  }
   469  
   470  func (self *ParsedInstruction) mem(v MemoryAddress) {
   471      self.Operands = append(self.Operands, ParsedOperand {
   472          Op     : OpMem,
   473          Memory : v,
   474      })
   475  }
   476  
   477  func (self *ParsedInstruction) target(v string) {
   478      self.Operands = append(self.Operands, ParsedOperand {
   479          Op    : OpLabel,
   480          Label : ParsedLabel {
   481              Name: v,
   482              Kind: BranchTarget,
   483          },
   484      })
   485  }
   486  
   487  func (self *ParsedInstruction) reference(v string) {
   488      self.Operands = append(self.Operands, ParsedOperand {
   489          Op    : OpLabel,
   490          Label : ParsedLabel {
   491              Name: v,
   492              Kind: RelativeAddress,
   493          },
   494      })
   495  }
   496  
   497  // LineKind indicates the type of ParsedLine.
   498  type LineKind int
   499  
   500  const (
   501      // LineLabel means the ParsedLine is a label.
   502      LineLabel LineKind = iota + 1
   503  
   504      // LineInstr means the ParsedLine is an instruction.
   505      LineInstr
   506  
   507      // LineCommand means the ParsedLine is a ParsedCommand.
   508      LineCommand
   509  )
   510  
   511  // ParsedLine represents a parsed source line.
   512  type ParsedLine struct {
   513      Row         int
   514      Src         []rune
   515      Kind        LineKind
   516      Label       ParsedLabel
   517      Command     ParsedCommand
   518      Instruction ParsedInstruction
   519  }
   520  
   521  // ParsedCommand represents a parsed assembly directive command.
   522  type ParsedCommand struct {
   523      Cmd  string
   524      Args []ParsedCommandArg
   525  }
   526  
   527  // ParsedCommandArg represents an argument of a ParsedCommand.
   528  type ParsedCommandArg struct {
   529      Value    string
   530      IsString bool
   531  }
   532  
   533  // Parser parses the source, and generates a sequence of ParsedInstruction's.
   534  type Parser struct {
   535      lex _Tokenizer
   536      exp expr.Parser
   537  }
   538  
   539  const (
   540      rip Register64 = 0xff
   541  )
   542  
   543  var _RegBranch = map[string]bool {
   544      "jmp"   : true,
   545      "jmpq"  : true,
   546      "call"  : true,
   547      "callq" : true,
   548  }
   549  
   550  var _SegPrefix = map[string]InstructionPrefix {
   551      "cs": PrefixSegmentCS,
   552      "ds": PrefixSegmentDS,
   553      "es": PrefixSegmentES,
   554      "fs": PrefixSegmentFS,
   555      "gs": PrefixSegmentGS,
   556      "ss": PrefixSegmentSS,
   557  }
   558  
   559  func (self *Parser) i32(tk _Token, v int64) int32 {
   560      if v >= math.MinInt32 && v <= math.MaxUint32 {
   561          return int32(v)
   562      } else {
   563          panic(self.err(tk.pos, fmt.Sprintf("32-bit integer out ouf range: %d", v)))
   564      }
   565  }
   566  
   567  func (self *Parser) err(pos int, msg string) *SyntaxError {
   568      return &SyntaxError {
   569          Pos    : pos,
   570          Row    : self.lex.row,
   571          Src    : self.lex.src,
   572          Reason : msg,
   573      }
   574  }
   575  
   576  func (self *Parser) negv() int64 {
   577      tk := self.lex.read()
   578      tt := tk.tag
   579  
   580      /* must be an integer */
   581      if tt != _T_int {
   582          panic(self.err(tk.pos, "integer expected after '-'"))
   583      } else {
   584          return -int64(tk.u64)
   585      }
   586  }
   587  
   588  func (self *Parser) eval(p int) (r int64) {
   589      var e error
   590      var v *expr.Expr
   591  
   592      /* searching start */
   593      n := 1
   594      q := p + 1
   595  
   596      /* find the end of expression */
   597      for n > 0 && q < len(self.lex.src) {
   598          switch self.lex.src[q] {
   599              case '(' : q++; n++
   600              case ')' : q++; n--
   601              default  : q++
   602          }
   603      }
   604  
   605      /* check for EOF */
   606      if n != 0 {
   607          panic(self.err(q, "unexpected EOF when parsing expressions"))
   608      }
   609  
   610      /* evaluate the expression */
   611      if v, e = self.exp.SetSource(string(self.lex.src[p:q - 1])).Parse(nil); e != nil {
   612          panic(self.err(p, "cannot evaluate expression: " + e.Error()))
   613      }
   614  
   615      /* evaluate the expression */
   616      if r, e = v.Evaluate(); e != nil {
   617          panic(self.err(p, "cannot evaluate expression: " + e.Error()))
   618      }
   619  
   620      /* skip the last ')' */
   621      v.Free()
   622      self.lex.pos = q
   623      return
   624  }
   625  
   626  func (self *Parser) relx(tk _Token) {
   627      if tk.tag != _T_punc || tk.punc() != _P_lbrk {
   628          panic(self.err(tk.pos, "'(' expected for RIP-relative addressing"))
   629      } else if tk = self.lex.next(); self.regx(tk) != rip {
   630          panic(self.err(tk.pos, "RIP-relative addressing expects %rip as the base register"))
   631      } else if tk = self.lex.next(); tk.tag != _T_punc || tk.punc() != _P_rbrk {
   632          panic(self.err(tk.pos, "RIP-relative addressing does not support indexing or scaling"))
   633      }
   634  }
   635  
   636  func (self *Parser) immx(tk _Token) int64 {
   637      if tk.tag != _T_punc || tk.punc() != _P_dollar {
   638          panic(self.err(tk.pos, "'$' expected for registers"))
   639      } else if tk = self.lex.read(); tk.tag == _T_int {
   640          return int64(tk.u64)
   641      } else if tk.tag == _T_punc && tk.punc() == _P_lbrk {
   642          return self.eval(self.lex.pos)
   643      } else if tk.tag == _T_punc && tk.punc() == _P_minus {
   644          return self.negv()
   645      } else {
   646          panic(self.err(tk.pos, "immediate value expected"))
   647      }
   648  }
   649  
   650  func (self *Parser) regx(tk _Token) Register {
   651      if tk.tag != _T_punc || tk.punc() != _P_percent {
   652          panic(self.err(tk.pos, "'%' expected for registers"))
   653      } else if tk = self.lex.read(); tk.tag != _T_name {
   654          panic(self.err(tk.pos, "register name expected"))
   655      } else if tk.str == "rip" {
   656          return rip
   657      } else if reg, ok := Registers[tk.str]; ok {
   658          return reg
   659      } else {
   660          panic(self.err(tk.pos, "invalid register name: " + strconv.Quote(tk.str)))
   661      }
   662  }
   663  
   664  func (self *Parser) regv(tk _Token) Register {
   665      if reg := self.regx(tk); reg == rip {
   666          panic(self.err(tk.pos, "%rip is not accessable as a dedicated register"))
   667      } else {
   668          return reg
   669      }
   670  }
   671  
   672  func (self *Parser) disp(vv int32) MemoryAddress {
   673      switch tk := self.lex.next(); tk.tag {
   674          case _T_end  : return MemoryAddress { Displacement: vv }
   675          case _T_punc : return self.relm(tk, vv)
   676          default      : panic(self.err(tk.pos, "',' or '(' expected"))
   677      }
   678  }
   679  
   680  func (self *Parser) relm(tv _Token, disp int32) MemoryAddress {
   681      var tk _Token
   682      var tt _TokenKind
   683  
   684      /* check for absolute addressing */
   685      if tv.punc() == _P_comma {
   686          self.lex.pos--
   687          return MemoryAddress { Displacement: disp }
   688      }
   689  
   690      /* must be '(' now */
   691      if tv.punc() != _P_lbrk {
   692          panic(self.err(tv.pos, "',' or '(' expected"))
   693      }
   694  
   695      /* read the next token */
   696      tk = self.lex.next()
   697      tt = tk.tag
   698  
   699      /* must be a punctuation */
   700      if tt != _T_punc {
   701          panic(self.err(tk.pos, "'%' or ',' expected"))
   702      }
   703  
   704      /* check for base */
   705      switch tk.punc() {
   706          case _P_percent : return self.base(tk, disp)
   707          case _P_comma   : return self.index(nil, disp)
   708          default         : panic(self.err(tk.pos, "'%' or ',' expected"))
   709      }
   710  }
   711  
   712  func (self *Parser) base(tk _Token, disp int32) MemoryAddress {
   713      rr := self.regx(tk)
   714      nk := self.lex.next()
   715  
   716      /* check for register indirection or base-index addressing */
   717      if !isReg64(rr) {
   718          panic(self.err(tk.pos, "not a valid base register"))
   719      } else if nk.tag != _T_punc {
   720          panic(self.err(nk.pos, "',' or ')' expected"))
   721      } else if nk.punc() == _P_comma {
   722          return self.index(rr, disp)
   723      } else if nk.punc() == _P_rbrk {
   724          return MemoryAddress { Base: rr, Displacement: disp }
   725      } else {
   726          panic(self.err(nk.pos, "',' or ')' expected"))
   727      }
   728  }
   729  
   730  func (self *Parser) index(base Register, disp int32) MemoryAddress {
   731      tk := self.lex.next()
   732      rr := self.regx(tk)
   733      nk := self.lex.next()
   734  
   735      /* check for scaled indexing */
   736      if base == rip {
   737          panic(self.err(tk.pos, "RIP-relative addressing does not support indexing or scaling"))
   738      } else if !isIndexable(rr) {
   739          panic(self.err(tk.pos, "not a valid index register"))
   740      } else if nk.tag != _T_punc {
   741          panic(self.err(nk.pos, "',' or ')' expected"))
   742      } else if nk.punc() == _P_comma {
   743          return self.scale(base, rr, disp)
   744      } else if nk.punc() == _P_rbrk {
   745          return MemoryAddress { Base: base, Index: rr, Scale: 1, Displacement: disp }
   746      } else {
   747          panic(self.err(nk.pos, "',' or ')' expected"))
   748      }
   749  }
   750  
   751  func (self *Parser) scale(base Register, index Register, disp int32) MemoryAddress {
   752      tk := self.lex.next()
   753      tt := tk.tag
   754      tv := tk.u64
   755  
   756      /* must be an integer */
   757      if tt != _T_int {
   758          panic(self.err(tk.pos, "integer expected"))
   759      }
   760  
   761      /* scale can only be 1, 2, 4 or 8 */
   762      if tv == 0 || (_Scales & (1 << tv)) == 0 {
   763          panic(self.err(tk.pos, "scale can only be 1, 2, 4 or 8"))
   764      }
   765  
   766      /* read next token */
   767      tk = self.lex.next()
   768      tt = tk.tag
   769  
   770      /* check for the closing ')' */
   771      if tt != _T_punc || tk.punc() != _P_rbrk {
   772          panic(self.err(tk.pos, "')' expected"))
   773      }
   774  
   775      /* construct the memory address */
   776      return MemoryAddress {
   777          Base         : base,
   778          Index        : index,
   779          Scale        : uint8(tv),
   780          Displacement : disp,
   781      }
   782  }
   783  
   784  func (self *Parser) cmds() *ParsedLine {
   785      cmd := ""
   786      pos := self.lex.pos
   787      buf := []ParsedCommandArg(nil)
   788  
   789      /* find the end of command */
   790      for p := pos; pos < len(self.lex.src); pos++ {
   791          if unicode.IsSpace(self.lex.src[pos]) {
   792              cmd = string(self.lex.src[p:pos])
   793              break
   794          }
   795      }
   796  
   797      /* parse the arguments */
   798      loop: for {
   799          switch self.next(&pos) {
   800              case 0   : break loop
   801              case '#' : break loop
   802              case '"' : pos = self.strings(&buf, pos)
   803              default  : pos = self.expressions(&buf, pos)
   804          }
   805      }
   806  
   807      /* construct the line */
   808      return &ParsedLine {
   809          Row     : self.lex.row,
   810          Src     : self.lex.src,
   811          Kind    : LineCommand,
   812          Command : ParsedCommand {
   813              Cmd  : cmd,
   814              Args : buf,
   815          },
   816      }
   817  }
   818  
   819  func (self *Parser) feed(line string) *ParsedLine {
   820      ff := true
   821      rr := false
   822      lk := false
   823  
   824      /* reset the lexer */
   825      self.lex.row++
   826      self.lex.init(line)
   827  
   828      /* parse the first token */
   829      tk := self.lex.next()
   830      tt := tk.tag
   831  
   832      /* it is a directive if it starts with a dot */
   833      if tk.tag == _T_punc && tk.punc() == _P_dot {
   834          return self.cmds()
   835      }
   836  
   837      /* otherwise it could be labels or instructions */
   838      if tt != _T_name {
   839          panic(self.err(tk.pos, "identifier expected"))
   840      }
   841  
   842      /* peek the next token */
   843      lex := self.lex
   844      tkx := lex.next()
   845  
   846      /* check for labels */
   847      if tkx.tag == _T_punc && tkx.punc() == _P_colon {
   848          tkx = lex.next()
   849          ttx := tkx.tag
   850  
   851          /* the line must end here */
   852          if ttx != _T_end {
   853              panic(self.err(tkx.pos, "garbage after label definition"))
   854          }
   855  
   856          /* construct the label */
   857          return &ParsedLine {
   858              Row   : self.lex.row,
   859              Src   : self.lex.src,
   860              Kind  : LineLabel,
   861              Label : ParsedLabel {
   862                  Kind: Declaration,
   863                  Name: tk.str,
   864              },
   865          }
   866      }
   867  
   868      /* special case for the "lock" prefix */
   869      if tk.tag == _T_name && strings.ToLower(tk.str) == "lock" {
   870          lk = true
   871          tk = self.lex.next()
   872  
   873          /* must be an instruction */
   874          if tk.tag != _T_name {
   875              panic(self.err(tk.pos, "identifier expected"))
   876          }
   877      }
   878  
   879      /* set the line kind and mnemonic */
   880      ret := &ParsedLine {
   881          Row         : self.lex.row,
   882          Src         : self.lex.src,
   883          Kind        : LineInstr,
   884          Instruction : ParsedInstruction { Mnemonic: strings.ToLower(tk.str) },
   885      }
   886  
   887      /* check for LOCK prefix */
   888      if lk {
   889          ret.Instruction.Prefixes = append(ret.Instruction.Prefixes, PrefixLock)
   890      }
   891  
   892      /* parse all the operands */
   893      for {
   894          tk = self.lex.next()
   895          tt = tk.tag
   896  
   897          /* check for end of line */
   898          if tt == _T_end {
   899              break
   900          }
   901  
   902          /* expect a comma if not the first operand */
   903          if !ff {
   904              if tt == _T_punc && tk.punc() == _P_comma {
   905                  tk = self.lex.next()
   906              } else {
   907                  panic(self.err(tk.pos, "',' expected"))
   908              }
   909          }
   910  
   911          /* not the first operand anymore */
   912          ff = false
   913          tt = tk.tag
   914  
   915          /* encountered an integer, must be a SIB memory address */
   916          if tt == _T_int {
   917              ret.Instruction.mem(self.disp(self.i32(tk, int64(tk.u64))))
   918              continue
   919          }
   920  
   921          /* encountered an identifier, maybe an expression or a jump target, or a segment override prefix */
   922          if tt == _T_name {
   923              ts := tk.str
   924              tp := self.lex.pos
   925  
   926              /* if the next token is EOF or a comma, it's a jumpt target */
   927              if tk = self.lex.next(); tk.tag == _T_end || (tk.tag == _T_punc && tk.punc() == _P_comma) {
   928                  self.lex.pos = tp
   929                  ret.Instruction.target(ts)
   930                  continue
   931              }
   932  
   933              /* if it is a colon, it's a segment override prefix, otherwise it must be an RIP-relative addressing operand */
   934              if tk.tag != _T_punc || tk.punc() != _P_colon {
   935                  self.relx(tk)
   936                  ret.Instruction.reference(ts)
   937                  continue
   938              }
   939  
   940              /* lookup segment prefixes */
   941              if p, ok := _SegPrefix[strings.ToLower(ts)]; !ok {
   942                  panic(self.err(tk.pos, "invalid segment name"))
   943              } else {
   944                  ret.Instruction.Prefixes = append(ret.Instruction.Prefixes, p)
   945              }
   946  
   947              /* read the next token */
   948              tk = self.lex.next()
   949              tt = tk.tag
   950  
   951              /* encountered an integer, must be a SIB memory address */
   952              if tt == _T_int {
   953                  ret.Instruction.mem(self.disp(self.i32(tk, int64(tk.u64))))
   954                  continue
   955              }
   956          }
   957  
   958          /* certain instructions may have a "*" before operands */
   959          if tt == _T_punc && tk.punc() == _P_star {
   960              tk = self.lex.next()
   961              tt = tk.tag
   962              rr = true
   963          }
   964  
   965          /* ... otherwise it must be a punctuation */
   966          if tt != _T_punc {
   967              panic(self.err(tk.pos, "'$', '%', '-' or '(' expected"))
   968          }
   969  
   970          /* check the operator */
   971          switch tk.punc() {
   972              case _P_lbrk    : break
   973              case _P_minus   : ret.Instruction.mem(self.disp(self.i32(tk, self.negv()))) ; continue
   974              case _P_dollar  : ret.Instruction.imm(self.immx(tk))                        ; continue
   975              case _P_percent : ret.Instruction.reg(self.regv(tk))                        ; continue
   976              default         : panic(self.err(tk.pos, "'$', '%', '-' or '(' expected"))
   977          }
   978  
   979          /* special case of '(', might be either `(expr)(SIB)` or just `(SIB)`
   980           * read one more token to confirm */
   981          tk = self.lex.next()
   982          tt = tk.tag
   983  
   984          /* the next token is '%', it's a memory address,
   985           * or ',' if it's a memory address without base,
   986           * otherwise it must be in `(expr)(SIB)` form */
   987          if tk.tag == _T_punc && tk.punc() == _P_percent {
   988              ret.Instruction.mem(self.base(tk, 0))
   989          } else if tk.tag == _T_punc && tk.punc() == _P_comma {
   990              ret.Instruction.mem(self.index(nil, 0))
   991          } else {
   992              ret.Instruction.mem(self.disp(self.i32(tk, self.eval(tk.pos))))
   993          }
   994      }
   995  
   996      /* check "jmp" and "call" instructions */
   997      if !_RegBranch[ret.Instruction.Mnemonic] {
   998          return ret
   999      } else if len(ret.Instruction.Operands) != 1 {
  1000          panic(self.err(tk.pos, fmt.Sprintf(`"%s" requires exact 1 argument`, ret.Instruction.Mnemonic)))
  1001      } else if !rr && ret.Instruction.Operands[0].Op != OpReg && ret.Instruction.Operands[0].Op != OpLabel {
  1002          panic(self.err(tk.pos, fmt.Sprintf(`invalid operand for "%s" instruction`, ret.Instruction.Mnemonic)))
  1003      } else {
  1004          return ret
  1005      }
  1006  }
  1007  
  1008  func (self *Parser) next(p *int) rune {
  1009      for {
  1010          if *p >= len(self.lex.src) {
  1011              return 0
  1012          } else if cc := self.lex.src[*p]; !unicode.IsSpace(cc) {
  1013              return cc
  1014          } else {
  1015              *p++
  1016          }
  1017      }
  1018  }
  1019  
  1020  func (self *Parser) delim(p int) int {
  1021      if cc := self.next(&p); cc == 0 {
  1022          return p
  1023      } else if cc == ',' {
  1024          return p + 1
  1025      } else {
  1026          panic(self.err(p, "',' expected"))
  1027      }
  1028  }
  1029  
  1030  func (self *Parser) strings(argv *[]ParsedCommandArg, p int) int {
  1031      var i int
  1032      var e error
  1033      var v string
  1034  
  1035      /* find the end of string */
  1036      for i = p + 1; i < len(self.lex.src) && self.lex.src[i] != '"'; i++ {
  1037          if self.lex.src[i] == '\\' {
  1038              i++
  1039          }
  1040      }
  1041  
  1042      /* check for EOF */
  1043      if i == len(self.lex.src) {
  1044          panic(self.err(i, "unexpected EOF when scanning strings"))
  1045      }
  1046  
  1047      /* unquote the string */
  1048      if v, e = strconv.Unquote(string(self.lex.src[p:i + 1])); e != nil {
  1049          panic(self.err(p, "invalid string: " + e.Error()))
  1050      }
  1051  
  1052      /* add the argument to buffer */
  1053      *argv = append(*argv, ParsedCommandArg { Value: v, IsString: true })
  1054      return self.delim(i + 1)
  1055  }
  1056  
  1057  func (self *Parser) directives(line string) {
  1058      self.lex.row++
  1059      self.lex.init(line)
  1060  
  1061      /* parse the first token */
  1062      tk := self.lex.next()
  1063      tt := tk.tag
  1064  
  1065      /* check for EOF */
  1066      if tt == _T_end {
  1067          return
  1068      }
  1069  
  1070      /* must be a directive */
  1071      if tt != _T_punc || tk.punc() != _P_hash {
  1072          panic(self.err(tk.pos, "'#' expected"))
  1073      }
  1074  
  1075      /* parse the line number */
  1076      tk = self.lex.next()
  1077      tt = tk.tag
  1078  
  1079      /* must be a line number, if it is, set the row number, and ignore the rest of the line */
  1080      if tt != _T_int {
  1081          panic(self.err(tk.pos, "line number expected"))
  1082      } else {
  1083          self.lex.row = int(tk.u64) - 1
  1084      }
  1085  }
  1086  
  1087  func (self *Parser) expressions(argv *[]ParsedCommandArg, p int) int {
  1088      var i int
  1089      var n int
  1090      var s int
  1091  
  1092      /* scan until the first standalone ',' or EOF */
  1093      loop: for i = p; i < len(self.lex.src); i++ {
  1094          switch self.lex.src[i] {
  1095              case ','           : if s == 0 { if n == 0 { break loop } }
  1096              case ']', '}', '>' : if s == 0 { if n == 0 { break loop } else { n-- } }
  1097              case '[', '{', '<' : if s == 0 { n++ }
  1098              case '\\'          : if s != 0 { i++ }
  1099              case '\''          : if s != 2 { s ^= 1 }
  1100              case '"'           : if s != 1 { s ^= 2 }
  1101          }
  1102      }
  1103  
  1104      /* check for EOF in strings */
  1105      if s != 0 {
  1106          panic(self.err(i, "unexpected EOF when scanning strings"))
  1107      }
  1108  
  1109      /* check for bracket matching */
  1110      if n != 0 {
  1111          panic(self.err(i, "unbalanced '{' or '[' or '<'"))
  1112      }
  1113  
  1114      /* add the argument to buffer */
  1115      *argv = append(*argv, ParsedCommandArg { Value: string(self.lex.src[p:i]) })
  1116      return self.delim(i)
  1117  }
  1118  
  1119  // Feed feeds the parser with one more line, and the parser
  1120  // parses it into a ParsedLine.
  1121  //
  1122  // NOTE: Feed does not handle empty lines or multiple lines,
  1123  //       it panics when this happens. Use Parse to parse multiple
  1124  //       lines of assembly source.
  1125  //
  1126  func (self *Parser) Feed(src string) (ret *ParsedLine, err error) {
  1127      var ok bool
  1128      var ss string
  1129      var vv interface{}
  1130  
  1131      /* check for multiple lines */
  1132      if strings.ContainsRune(src, '\n') {
  1133          return nil, errors.New("passing multiple lines to Feed()")
  1134      }
  1135  
  1136      /* check for blank lines */
  1137      if ss = strings.TrimSpace(src); ss == "" || ss[0] == '#' || strings.HasPrefix(ss, "//") {
  1138          return nil, errors.New("blank line or line with only comments or line-marks")
  1139      }
  1140  
  1141      /* setup error handler */
  1142      defer func() {
  1143          if vv = recover(); vv != nil {
  1144              if err, ok = vv.(*SyntaxError); !ok {
  1145                  panic(vv)
  1146              }
  1147          }
  1148      }()
  1149  
  1150      /* call the actual parser */
  1151      ret = self.feed(src)
  1152      return
  1153  }
  1154  
  1155  // Parse parses the entire assembly source (possibly multiple lines) into
  1156  // a sequence of *ParsedLine.
  1157  func (self *Parser) Parse(src string) (ret []*ParsedLine, err error) {
  1158      var ok bool
  1159      var ss string
  1160      var vv interface{}
  1161  
  1162      /* setup error handler */
  1163      defer func() {
  1164          if vv = recover(); vv != nil {
  1165              if err, ok = vv.(*SyntaxError); !ok {
  1166                  panic(vv)
  1167              }
  1168          }
  1169      }()
  1170  
  1171      /* feed every line */
  1172      for _, line := range strings.Split(src, "\n") {
  1173          if ss = strings.TrimSpace(line); ss == "" || strings.HasPrefix(ss, "//") {
  1174              self.lex.row++
  1175          } else if ss[0] == '#' {
  1176              self.directives(line)
  1177          } else {
  1178              ret = append(ret, self.feed(line))
  1179          }
  1180      }
  1181  
  1182      /* all done */
  1183      err = nil
  1184      return
  1185  }
  1186  
  1187  // Directive handles the directive.
  1188  func (self *Parser) Directive(line string) (err error) {
  1189      var ok bool
  1190      var ss string
  1191      var vv interface{}
  1192  
  1193      /* check for directives */
  1194      if ss = strings.TrimSpace(line); ss == "" || ss[0] != '#' {
  1195          return errors.New("not a directive")
  1196      }
  1197  
  1198      /* setup error handler */
  1199      defer func() {
  1200          if vv = recover(); vv != nil {
  1201              if err, ok = vv.(*SyntaxError); !ok {
  1202                  panic(vv)
  1203              }
  1204          }
  1205      }()
  1206  
  1207      /* call the directive parser */
  1208      self.directives(line)
  1209      return
  1210  }
  1211  
  1212  type _TermRepo struct {
  1213      terms map[string]expr.Term
  1214  }
  1215  
  1216  func (self *_TermRepo) Get(name string) (expr.Term, error) {
  1217      if ret, ok := self.terms[name]; ok {
  1218          return ret, nil
  1219      } else {
  1220          return nil, errors.New("undefined name: " + name)
  1221      }
  1222  }
  1223  
  1224  func (self *_TermRepo) label(name string) (*Label, error) {
  1225      var ok bool
  1226      var lb *Label
  1227      var tr expr.Term
  1228  
  1229      /* check for existing terms */
  1230      if tr, ok = self.terms[name]; ok {
  1231          if lb, ok = tr.(*Label); ok {
  1232              return lb, nil
  1233          } else {
  1234              return nil, errors.New("name is not a label: " + name)
  1235          }
  1236      }
  1237  
  1238      /* create a new one as needed */
  1239      lb = new(Label)
  1240      lb.Name = name
  1241  
  1242      /* create the map if needed */
  1243      if self.terms == nil {
  1244          self.terms = make(map[string]expr.Term, 1)
  1245      }
  1246  
  1247      /* register the label */
  1248      self.terms[name] = lb
  1249      return lb, nil
  1250  }
  1251  
  1252  func (self *_TermRepo) define(name string, term expr.Term) {
  1253      var ok bool
  1254      var tr expr.Term
  1255  
  1256      /* create the map if needed */
  1257      if self.terms == nil {
  1258          self.terms = make(map[string]expr.Term, 1)
  1259      }
  1260  
  1261      /* check for existing terms */
  1262      if tr, ok = self.terms[name]; !ok {
  1263          self.terms[name] = term
  1264      } else if _, ok = tr.(*Label); !ok {
  1265          self.terms[name] = term
  1266      } else {
  1267          panic("conflicting term types: " + name)
  1268      }
  1269  }
  1270  
  1271  // _Command describes an assembler command.
  1272  //
  1273  // The _Command.args describes both the arity and argument type with characters,
  1274  // the length is the number of arguments, the character itself represents the
  1275  // argument type.
  1276  //
  1277  // Possible values are:
  1278  //
  1279  //      s   This argument should be a string
  1280  //      e   This argument should be an expression
  1281  //      ?   The next argument is optional, and must be the last argument.
  1282  //
  1283  type _Command struct {
  1284      args    string
  1285      handler func(*Assembler, *Program, []ParsedCommandArg) error
  1286  }
  1287  
  1288  // Options controls the behavior of Assembler.
  1289  type Options struct {
  1290      // InstructionAliasing specifies whether to enable instruction aliasing.
  1291      // Set to true enables instruction aliasing, and the Assembler will try harder to find instructions.
  1292      InstructionAliasing bool
  1293  
  1294      // IgnoreUnknownDirectives specifies whether to report errors when encountered unknown directives.
  1295      // Set to true ignores all unknwon directives silently, useful for parsing generated assembly.
  1296      IgnoreUnknownDirectives bool
  1297  }
  1298  
  1299  // Assembler assembles the entire assembly program and generates the corresponding
  1300  // machine code representations.
  1301  type Assembler struct {
  1302      cc   int
  1303      ps   Parser
  1304      pc   uintptr
  1305      buf  []byte
  1306      main string
  1307      opts Options
  1308      repo _TermRepo
  1309      expr expr.Parser
  1310      line *ParsedLine
  1311  }
  1312  
  1313  var asmCommands = map[string]_Command {
  1314      "org"     : { "e"   , (*Assembler).assembleCommandOrg     },
  1315      "set"     : { "ee"  , (*Assembler).assembleCommandSet     },
  1316      "byte"    : { "e"   , (*Assembler).assembleCommandByte    },
  1317      "word"    : { "e"   , (*Assembler).assembleCommandWord    },
  1318      "long"    : { "e"   , (*Assembler).assembleCommandLong    },
  1319      "quad"    : { "e"   , (*Assembler).assembleCommandQuad    },
  1320      "fill"    : { "e?e" , (*Assembler).assembleCommandFill    },
  1321      "space"   : { "e?e" , (*Assembler).assembleCommandFill    },
  1322      "align"   : { "e?e" , (*Assembler).assembleCommandAlign   },
  1323      "entry"   : { "e"   , (*Assembler).assembleCommandEntry   },
  1324      "ascii"   : { "s"   , (*Assembler).assembleCommandAscii   },
  1325      "asciz"   : { "s"   , (*Assembler).assembleCommandAsciz   },
  1326      "p2align" : { "e?e" , (*Assembler).assembleCommandP2Align },
  1327  }
  1328  
  1329  func (self *Assembler) err(msg string) *SyntaxError {
  1330      return &SyntaxError {
  1331          Pos    : -1,
  1332          Row    : self.line.Row,
  1333          Src    : self.line.Src,
  1334          Reason : msg,
  1335      }
  1336  }
  1337  
  1338  func (self *Assembler) eval(expr string) (int64, error) {
  1339      if exp, err := self.expr.SetSource(expr).Parse(nil); err != nil {
  1340          return 0, err
  1341      } else {
  1342          return exp.Evaluate()
  1343      }
  1344  }
  1345  
  1346  func (self *Assembler) checkArgs(i int, n int, v *ParsedCommand, isString bool) error {
  1347      if i >= len(v.Args) {
  1348          return self.err(fmt.Sprintf("command %s takes exact %d arguments", strconv.Quote(v.Cmd), n))
  1349      } else if isString && !v.Args[i].IsString {
  1350          return self.err(fmt.Sprintf("argument %d of command %s must be a string", i + 1, strconv.Quote(v.Cmd)))
  1351      } else if !isString && v.Args[i].IsString {
  1352          return self.err(fmt.Sprintf("argument %d of command %s must be an expression", i + 1, strconv.Quote(v.Cmd)))
  1353      } else {
  1354          return nil
  1355      }
  1356  }
  1357  
  1358  func (self *Assembler) assembleLabel(p *Program, lb *ParsedLabel) error {
  1359      if v, err := self.repo.label(lb.Name); err != nil {
  1360          return err
  1361      } else {
  1362          p.Link(v)
  1363          return nil
  1364      }
  1365  }
  1366  
  1367  func (self *Assembler) assembleInstr(p *Program, line *ParsedInstruction) (err error) {
  1368      var ok  bool
  1369      var pfx []byte
  1370      var ops []interface{}
  1371      var enc _InstructionEncoder
  1372  
  1373      /* convert to lower-case */
  1374      opts := self.opts
  1375      name := strings.ToLower(line.Mnemonic)
  1376  
  1377      /* fix register-addressing branches if needed */
  1378      if opts.InstructionAliasing && len(line.Operands) == 1 {
  1379          switch {
  1380              case name == "retq"                                    : name = "ret"
  1381              case name == "movabsq"                                 : name = "movq"
  1382              case name == "jmp"   && line.Operands[0].Op != OpLabel : name = "jmpq"
  1383              case name == "jmpq"  && line.Operands[0].Op == OpLabel : name = "jmp"
  1384              case name == "call"  && line.Operands[0].Op != OpLabel : name = "callq"
  1385              case name == "callq" && line.Operands[0].Op == OpLabel : name = "call"
  1386          }
  1387      }
  1388  
  1389      /* lookup from the alias table if needed */
  1390      if opts.InstructionAliasing {
  1391          enc, ok = _InstructionAliases[name]
  1392      }
  1393  
  1394      /* lookup from the instruction table */
  1395      if !ok {
  1396          enc, ok = Instructions[name]
  1397      }
  1398  
  1399      /* remove size suffix if possible */
  1400      if !ok && opts.InstructionAliasing {
  1401          switch i := len(name) - 1; name[i] {
  1402              case 'b', 'w', 'l', 'q': {
  1403                  enc, ok = Instructions[name[:i]]
  1404              }
  1405          }
  1406      }
  1407  
  1408      /* check for instruction name */
  1409      if !ok {
  1410          return self.err("no such instruction: " + strconv.Quote(name))
  1411      }
  1412  
  1413      /* allocate memory for prefix if any */
  1414      if len(line.Prefixes) != 0 {
  1415          pfx = make([]byte, len(line.Prefixes))
  1416      }
  1417  
  1418      /* convert the prefixes */
  1419      for i, v := range line.Prefixes {
  1420          switch v {
  1421              case PrefixLock      : pfx[i] = _P_lock
  1422              case PrefixSegmentCS : pfx[i] = _P_cs
  1423              case PrefixSegmentDS : pfx[i] = _P_ds
  1424              case PrefixSegmentES : pfx[i] = _P_es
  1425              case PrefixSegmentFS : pfx[i] = _P_fs
  1426              case PrefixSegmentGS : pfx[i] = _P_gs
  1427              case PrefixSegmentSS : pfx[i] = _P_ss
  1428              default              : panic("unreachable: invalid segment prefix")
  1429          }
  1430      }
  1431  
  1432      /* convert the operands */
  1433      for _, op := range line.Operands {
  1434          switch op.Op {
  1435              case OpImm   : ops = append(ops, op.Imm)
  1436              case OpReg   : ops = append(ops, op.Reg)
  1437              case OpMem   : self.assembleInstrMem(&ops, op.Memory)  
  1438              case OpLabel : self.assembleInstrLabel(&ops, op.Label) 
  1439              default      : panic("parser yields an invalid operand kind")
  1440          }
  1441      }
  1442  
  1443      /* catch any exceptions in the encoder */
  1444      defer func() {
  1445          if v := recover(); v != nil {
  1446              err = self.err(fmt.Sprint(v))
  1447          }
  1448      }()
  1449  
  1450      /* encode the instruction */
  1451      enc(p, ops...).prefix = pfx
  1452      return nil
  1453  }
  1454  
  1455  func (self *Assembler) assembleInstrMem(ops *[]interface{}, addr MemoryAddress) {
  1456      mem := new(MemoryOperand)
  1457      *ops = append(*ops, mem)
  1458  
  1459      /* check for RIP-relative addressing */
  1460      if addr.Base != rip {
  1461          mem.Addr.Type = Memory
  1462          mem.Addr.Memory = addr
  1463      } else {
  1464          mem.Addr.Type = Offset
  1465          mem.Addr.Offset = RelativeOffset(addr.Displacement)
  1466      }
  1467  }
  1468  
  1469  func (self *Assembler) assembleInstrLabel(ops *[]interface{}, label ParsedLabel) {
  1470      vk := label.Kind
  1471      tr, err := self.repo.label(label.Name)
  1472  
  1473      /* check for errors */
  1474      if err != nil {
  1475          panic(err)
  1476      }
  1477  
  1478      /* check for branch target */
  1479      if vk == BranchTarget {
  1480          *ops = append(*ops, tr)
  1481          return
  1482      }
  1483  
  1484      /* add to ops */
  1485      *ops = append(*ops, &MemoryOperand {
  1486          Addr: Addressable {
  1487              Type      : Reference,
  1488              Reference : tr,
  1489          },
  1490      })
  1491  }
  1492  
  1493  func (self *Assembler) assembleCommand(p *Program, line *ParsedCommand) error {
  1494      var iv int
  1495      var cc rune
  1496      var ok bool
  1497      var va bool
  1498      var fn _Command
  1499  
  1500      /* find the command */
  1501      if fn, ok = asmCommands[line.Cmd]; !ok {
  1502          if self.opts.IgnoreUnknownDirectives {
  1503              return nil
  1504          } else {
  1505              return self.err("no such command: " + strconv.Quote(line.Cmd))
  1506          }
  1507      }
  1508  
  1509      /* expected & real argument count */
  1510      argx := len(fn.args)
  1511      argc := len(line.Args)
  1512  
  1513      /* check the arguments */
  1514      loop: for iv, cc = range fn.args {
  1515          switch cc {
  1516              case '?' : va = true; break loop
  1517              case 's' : if err := self.checkArgs(iv, argx, line, true)  ; err != nil { return err }
  1518              case 'e' : if err := self.checkArgs(iv, argx, line, false) ; err != nil { return err }
  1519              default  : panic("invalid argument descriptor: " + strconv.Quote(fn.args))
  1520          }
  1521      }
  1522  
  1523      /* simple case: non-variadic command */
  1524      if !va {
  1525          if argc == argx {
  1526              return fn.handler(self, p, line.Args)
  1527          } else {
  1528              return self.err(fmt.Sprintf("command %s takes exact %d arguments", strconv.Quote(line.Cmd), argx))
  1529          }
  1530      }
  1531  
  1532      /* check for the descriptor */
  1533      if iv != argx - 2 {
  1534          panic("invalid argument descriptor: " + strconv.Quote(fn.args))
  1535      }
  1536  
  1537      /* variadic command and the final optional argument is set */
  1538      if argc == argx - 1 {
  1539          switch fn.args[argx - 1] {
  1540              case 's' : if err := self.checkArgs(iv, -1, line, true)  ; err != nil { return err }
  1541              case 'e' : if err := self.checkArgs(iv, -1, line, false) ; err != nil { return err }
  1542              default  : panic("invalid argument descriptor: " + strconv.Quote(fn.args))
  1543          }
  1544      }
  1545  
  1546      /* check argument count */
  1547      if argc == argx - 1 || argc == argx - 2 {
  1548          return fn.handler(self, p, line.Args)
  1549      } else {
  1550          return self.err(fmt.Sprintf("command %s takes %d or %d arguments", strconv.Quote(line.Cmd), argx - 2, argx - 1))
  1551      }
  1552  }
  1553  
  1554  func (self *Assembler) assembleCommandInt(p *Program, argv []ParsedCommandArg, addfn func(*Program, *expr.Expr) *Instruction) error {
  1555      var err error
  1556      var val *expr.Expr
  1557  
  1558      /* parse the expression */
  1559      if val, err = self.expr.SetSource(argv[0].Value).Parse(&self.repo); err != nil {
  1560          return err
  1561      }
  1562  
  1563      /* add to the program */
  1564      addfn(p, val)
  1565      return nil
  1566  }
  1567  
  1568  func (self *Assembler) assembleCommandOrg(_ *Program, argv []ParsedCommandArg) error {
  1569      var err error
  1570      var val int64
  1571  
  1572      /* evaluate the expression */
  1573      if val, err = self.eval(argv[0].Value); err != nil {
  1574          return err
  1575      }
  1576  
  1577      /* check for origin */
  1578      if val < 0 {
  1579          return self.err(fmt.Sprintf("negative origin: %d", val))
  1580      }
  1581  
  1582      /* ".org" must be the first command if any */
  1583      if self.cc != 1 {
  1584          return self.err(".org must be the first command if present")
  1585      }
  1586  
  1587      /* set the initial program counter */
  1588      self.pc = uintptr(val)
  1589      return nil
  1590  }
  1591  
  1592  func (self *Assembler) assembleCommandSet(_ *Program, argv []ParsedCommandArg) error {
  1593      var err error
  1594      var val *expr.Expr
  1595  
  1596      /* parse the expression */
  1597      if val, err = self.expr.SetSource(argv[1].Value).Parse(&self.repo); err != nil {
  1598          return err
  1599      }
  1600  
  1601      /* define the new identifier */
  1602      self.repo.define(argv[0].Value, val)
  1603      return nil
  1604  }
  1605  
  1606  func (self *Assembler) assembleCommandByte(p *Program, argv []ParsedCommandArg) error {
  1607      return self.assembleCommandInt(p, argv, (*Program).Byte)
  1608  }
  1609  
  1610  func (self *Assembler) assembleCommandWord(p *Program, argv []ParsedCommandArg) error {
  1611      return self.assembleCommandInt(p, argv, (*Program).Word)
  1612  }
  1613  
  1614  func (self *Assembler) assembleCommandLong(p *Program, argv []ParsedCommandArg) error {
  1615      return self.assembleCommandInt(p, argv, (*Program).Long)
  1616  }
  1617  
  1618  func (self *Assembler) assembleCommandQuad(p *Program, argv []ParsedCommandArg) error {
  1619      return self.assembleCommandInt(p, argv, (*Program).Quad)
  1620  }
  1621  
  1622  func (self *Assembler) assembleCommandFill(p *Program, argv []ParsedCommandArg) error {
  1623      var fv byte
  1624      var nb int64
  1625      var ex error
  1626  
  1627      /* evaluate the size */
  1628      if nb, ex = self.eval(argv[0].Value); ex != nil {
  1629          return ex
  1630      }
  1631  
  1632      /* check for filling size */
  1633      if nb < 0 {
  1634          return self.err(fmt.Sprintf("negative filling size: %d", nb))
  1635      }
  1636  
  1637      /* check for optional filling value */
  1638      if len(argv) == 2 {
  1639          if val, err := self.eval(argv[1].Value); err != nil {
  1640              return err
  1641          } else if val < math.MinInt8 || val > math.MaxUint8 {
  1642              return self.err(fmt.Sprintf("value %d cannot be represented with a byte", val))
  1643          } else {
  1644              fv = byte(val)
  1645          }
  1646      }
  1647  
  1648      /* fill with specified byte */
  1649      p.Data(bytes.Repeat([]byte { fv }, int(nb)))
  1650      return nil
  1651  }
  1652  
  1653  func (self *Assembler) assembleCommandAlign(p *Program, argv []ParsedCommandArg) error {
  1654      var nb int64
  1655      var ex error
  1656      var fv *expr.Expr
  1657  
  1658      /* evaluate the size */
  1659      if nb, ex = self.eval(argv[0].Value); ex != nil {
  1660          return ex
  1661      }
  1662  
  1663      /* check for alignment value */
  1664      if nb <= 0 {
  1665          return self.err(fmt.Sprintf("zero or negative alignment: %d", nb))
  1666      }
  1667  
  1668      /* alignment must be a power of 2 */
  1669      if (nb & (nb - 1)) != 0 {
  1670          return self.err(fmt.Sprintf("alignment must be a power of 2: %d", nb))
  1671      }
  1672  
  1673      /* check for optional filling value */
  1674      if len(argv) == 2 {
  1675          if v, err := self.expr.SetSource(argv[1].Value).Parse(&self.repo); err == nil {
  1676              fv = v
  1677          } else {
  1678              return err
  1679          }
  1680      }
  1681  
  1682      /* fill with specified byte, default to 0 if not specified */
  1683      p.Align(uint64(nb), fv)
  1684      return nil
  1685  }
  1686  
  1687  func (self *Assembler) assembleCommandEntry(_ *Program, argv []ParsedCommandArg) error {
  1688      name := argv[0].Value
  1689      rbuf := []rune(name)
  1690  
  1691      /* check all the characters */
  1692      for i, cc := range rbuf {
  1693          if !isident0(cc) && (i == 0 || !isident(cc)) {
  1694              return self.err("entry point must be a label name")
  1695          }
  1696      }
  1697  
  1698      /* set the main entry point */
  1699      self.main = name
  1700      return nil
  1701  }
  1702  
  1703  func (self *Assembler) assembleCommandAscii(p *Program, argv []ParsedCommandArg) error {
  1704      p.Data([]byte(argv[0].Value))
  1705      return nil
  1706  }
  1707  
  1708  func (self *Assembler) assembleCommandAsciz(p *Program, argv []ParsedCommandArg) error {
  1709      p.Data(append([]byte(argv[0].Value), 0))
  1710      return nil
  1711  }
  1712  
  1713  func (self *Assembler) assembleCommandP2Align(p *Program, argv []ParsedCommandArg) error {
  1714      var nb int64
  1715      var ex error
  1716      var fv *expr.Expr
  1717  
  1718      /* evaluate the size */
  1719      if nb, ex = self.eval(argv[0].Value); ex != nil {
  1720          return ex
  1721      }
  1722  
  1723      /* check for alignment value */
  1724      if nb <= 0 {
  1725          return self.err(fmt.Sprintf("zero or negative alignment: %d", nb))
  1726      }
  1727  
  1728      /* check for optional filling value */
  1729      if len(argv) == 2 {
  1730          if v, err := self.expr.SetSource(argv[1].Value).Parse(&self.repo); err == nil {
  1731              fv = v
  1732          } else {
  1733              return err
  1734          }
  1735      }
  1736  
  1737      /* fill with specified byte, default to 0 if not specified */
  1738      p.Align(1 << nb, fv)
  1739      return nil
  1740  }
  1741  
  1742  // Base returns the origin.
  1743  func (self *Assembler) Base() uintptr {
  1744      return self.pc
  1745  }
  1746  
  1747  // Code returns the assembled machine code.
  1748  func (self *Assembler) Code() []byte {
  1749      return self.buf
  1750  }
  1751  
  1752  // Entry returns the address of the specified entry point, or the origin if not specified.
  1753  func (self *Assembler) Entry() uintptr {
  1754      if self.main == "" {
  1755          return self.pc
  1756      } else if tr, err := self.repo.Get(self.main); err != nil {
  1757          panic(err)
  1758      } else if val, err := tr.Evaluate(); err != nil {
  1759          panic(err)
  1760      } else {
  1761          return uintptr(val)
  1762      }
  1763  }
  1764  
  1765  // Options returns the internal options reference, changing it WILL affect this Assembler instance.
  1766  func (self *Assembler) Options() *Options {
  1767      return &self.opts
  1768  }
  1769  
  1770  // WithBase resets the origin to pc.
  1771  func (self *Assembler) WithBase(pc uintptr) *Assembler {
  1772      self.pc = pc
  1773      return self
  1774  }
  1775  
  1776  // Assemble assembles the assembly source and save the machine code to internal buffer.
  1777  func (self *Assembler) Assemble(src string) error {
  1778      var err error
  1779      var buf []*ParsedLine
  1780  
  1781      /* parse the source */
  1782      if buf, err = self.ps.Parse(src); err != nil {
  1783          return err
  1784      }
  1785  
  1786      /* create a new program */
  1787      p := DefaultArch.CreateProgram()
  1788      defer p.Free()
  1789  
  1790      /* process every line */
  1791      for _, self.line = range buf {
  1792          switch self.cc++; self.line.Kind {
  1793              case LineLabel   : if err = self.assembleLabel   (p, &self.line.Label)       ; err != nil { return err }
  1794              case LineInstr   : if err = self.assembleInstr   (p, &self.line.Instruction) ; err != nil { return err }
  1795              case LineCommand : if err = self.assembleCommand (p, &self.line.Command)     ; err != nil { return err }
  1796              default          : panic("parser yields an invalid line kind")
  1797          }
  1798      }
  1799  
  1800      /* assemble the program */
  1801      self.buf = p.Assemble(self.pc)
  1802      return nil
  1803  }
  1804  

View as plain text