...

Source file src/github.com/chenzhuoyu/iasm/x86_64/program.go

Documentation: github.com/chenzhuoyu/iasm/x86_64

     1  package x86_64
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"math/bits"
     7  
     8  	"github.com/chenzhuoyu/iasm/expr"
     9  )
    10  
    11  type (
    12  	_PseudoType         int
    13  	_InstructionEncoder func(*Program, ...interface{}) *Instruction
    14  )
    15  
    16  const (
    17  	_PseudoNop _PseudoType = iota + 1
    18  	_PseudoByte
    19  	_PseudoWord
    20  	_PseudoLong
    21  	_PseudoQuad
    22  	_PseudoData
    23  	_PseudoAlign
    24  )
    25  
    26  func (self _PseudoType) String() string {
    27  	switch self {
    28  	case _PseudoNop:
    29  		return ".nop"
    30  	case _PseudoByte:
    31  		return ".byte"
    32  	case _PseudoWord:
    33  		return ".word"
    34  	case _PseudoLong:
    35  		return ".long"
    36  	case _PseudoQuad:
    37  		return ".quad"
    38  	case _PseudoData:
    39  		return ".data"
    40  	case _PseudoAlign:
    41  		return ".align"
    42  	default:
    43  		panic("unreachable")
    44  	}
    45  }
    46  
    47  type _Pseudo struct {
    48  	kind _PseudoType
    49  	data []byte
    50  	uint uint64
    51  	expr *expr.Expr
    52  }
    53  
    54  func (self *_Pseudo) free() {
    55  	if self.expr != nil {
    56  		self.expr.Free()
    57  	}
    58  }
    59  
    60  func (self *_Pseudo) encode(m *[]byte, pc uintptr) int {
    61  	switch self.kind {
    62  	case _PseudoNop:
    63  		return 0
    64  	case _PseudoByte:
    65  		self.encodeByte(m)
    66  		return 1
    67  	case _PseudoWord:
    68  		self.encodeWord(m)
    69  		return 2
    70  	case _PseudoLong:
    71  		self.encodeLong(m)
    72  		return 4
    73  	case _PseudoQuad:
    74  		self.encodeQuad(m)
    75  		return 8
    76  	case _PseudoData:
    77  		self.encodeData(m)
    78  		return len(self.data)
    79  	case _PseudoAlign:
    80  		self.encodeAlign(m, pc)
    81  		return self.alignSize(pc)
    82  	default:
    83  		panic("invalid pseudo instruction")
    84  	}
    85  }
    86  
    87  func (self *_Pseudo) evalExpr(low int64, high int64) int64 {
    88  	if v, err := self.expr.Evaluate(); err != nil {
    89  		panic(err)
    90  	} else if v < low || v > high {
    91  		panic(fmt.Sprintf("expression out of range [%d, %d]: %d", low, high, v))
    92  	} else {
    93  		return v
    94  	}
    95  }
    96  
    97  func (self *_Pseudo) alignSize(pc uintptr) int {
    98  	if !ispow2(self.uint) {
    99  		panic(fmt.Sprintf("aligment should be a power of 2, not %d", self.uint))
   100  	} else {
   101  		return align(int(pc), bits.TrailingZeros64(self.uint)) - int(pc)
   102  	}
   103  }
   104  
   105  func (self *_Pseudo) encodeData(m *[]byte) {
   106  	if m != nil {
   107  		*m = append(*m, self.data...)
   108  	}
   109  }
   110  
   111  func (self *_Pseudo) encodeByte(m *[]byte) {
   112  	if m != nil {
   113  		append8(m, byte(self.evalExpr(math.MinInt8, math.MaxUint8)))
   114  	}
   115  }
   116  
   117  func (self *_Pseudo) encodeWord(m *[]byte) {
   118  	if m != nil {
   119  		append16(m, uint16(self.evalExpr(math.MinInt16, math.MaxUint16)))
   120  	}
   121  }
   122  
   123  func (self *_Pseudo) encodeLong(m *[]byte) {
   124  	if m != nil {
   125  		append32(m, uint32(self.evalExpr(math.MinInt32, math.MaxUint32)))
   126  	}
   127  }
   128  
   129  func (self *_Pseudo) encodeQuad(m *[]byte) {
   130  	if m != nil {
   131  		if v, err := self.expr.Evaluate(); err != nil {
   132  			panic(err)
   133  		} else {
   134  			append64(m, uint64(v))
   135  		}
   136  	}
   137  }
   138  
   139  func (self *_Pseudo) encodeAlign(m *[]byte, pc uintptr) {
   140  	if m != nil {
   141  		if self.expr == nil {
   142  			expandmm(m, self.alignSize(pc), 0)
   143  		} else {
   144  			expandmm(m, self.alignSize(pc), byte(self.evalExpr(math.MinInt8, math.MaxUint8)))
   145  		}
   146  	}
   147  }
   148  
   149  // Operands represents a sequence of operand required by an instruction.
   150  type Operands [_N_args]interface{}
   151  
   152  // InstructionDomain represents the domain of an instruction.
   153  type InstructionDomain uint8
   154  
   155  const (
   156  	DomainGeneric InstructionDomain = iota
   157  	DomainMMXSSE
   158  	DomainAVX
   159  	DomainFMA
   160  	DomainCrypto
   161  	DomainMask
   162  	DomainAMDSpecific
   163  	DomainMisc
   164  	DomainPseudo
   165  )
   166  
   167  type (
   168  	_BranchType uint8
   169  )
   170  
   171  const (
   172  	_B_none _BranchType = iota
   173  	_B_conditional
   174  	_B_unconditional
   175  )
   176  
   177  // Instruction represents an unencoded instruction.
   178  type Instruction struct {
   179  	next   *Instruction
   180  	pc     uintptr
   181  	nb     int
   182  	len    int
   183  	argc   int
   184  	name   string
   185  	argv   Operands
   186  	forms  [_N_forms]_Encoding
   187  	pseudo _Pseudo
   188  	branch _BranchType
   189  	domain InstructionDomain
   190  	prefix []byte
   191  }
   192  
   193  func (self *Instruction) add(flags int, encoder func(m *_Encoding, v []interface{})) {
   194  	self.forms[self.len].flags = flags
   195  	self.forms[self.len].encoder = encoder
   196  	self.len++
   197  }
   198  
   199  func (self *Instruction) free() {
   200  	self.clear()
   201  	self.pseudo.free()
   202  	//freeInstruction(self)
   203  }
   204  
   205  func (self *Instruction) clear() {
   206  	for i := 0; i < self.argc; i++ {
   207  		if v, ok := self.argv[i].(Disposable); ok {
   208  			v.Free()
   209  		}
   210  	}
   211  }
   212  
   213  func (self *Instruction) check(e *_Encoding) bool {
   214  	if (e.flags & _F_rel1) != 0 {
   215  		return isRel8(self.argv[0])
   216  	} else if (e.flags & _F_rel4) != 0 {
   217  		return isRel32(self.argv[0]) || isLabel(self.argv[0])
   218  	} else {
   219  		return true
   220  	}
   221  }
   222  
   223  func (self *Instruction) encode(m *[]byte) int {
   224  	n := math.MaxInt64
   225  	p := (*_Encoding)(nil)
   226  
   227  	/* encode prefixes if any */
   228  	if self.nb = len(self.prefix); m != nil {
   229  		*m = append(*m, self.prefix...)
   230  	}
   231  
   232  	/* check for pseudo-instructions */
   233  	if self.pseudo.kind != 0 {
   234  		self.nb += self.pseudo.encode(m, self.pc)
   235  		return self.nb
   236  	}
   237  
   238  	/* find the shortest encoding */
   239  	for i := 0; i < self.len; i++ {
   240  		if e := &self.forms[i]; self.check(e) {
   241  			if v := e.encode(self.argv[:self.argc]); v < n {
   242  				n = v
   243  				p = e
   244  			}
   245  		}
   246  	}
   247  
   248  	/* add to buffer if needed */
   249  	if m != nil {
   250  		*m = append(*m, p.bytes[:n]...)
   251  	}
   252  
   253  	/* update the instruction length */
   254  	self.nb += n
   255  	return self.nb
   256  }
   257  
   258  /** Instruction Prefixes **/
   259  
   260  const (
   261  	_P_cs   = 0x2e
   262  	_P_ds   = 0x3e
   263  	_P_es   = 0x26
   264  	_P_fs   = 0x64
   265  	_P_gs   = 0x65
   266  	_P_ss   = 0x36
   267  	_P_lock = 0xf0
   268  )
   269  
   270  // CS overrides the memory operation of this instruction to CS.
   271  func (self *Instruction) CS() *Instruction {
   272  	self.prefix = append(self.prefix, _P_cs)
   273  	return self
   274  }
   275  
   276  // DS overrides the memory operation of this instruction to DS,
   277  // this is the default section for most instructions if not specified.
   278  func (self *Instruction) DS() *Instruction {
   279  	self.prefix = append(self.prefix, _P_ds)
   280  	return self
   281  }
   282  
   283  // ES overrides the memory operation of this instruction to ES.
   284  func (self *Instruction) ES() *Instruction {
   285  	self.prefix = append(self.prefix, _P_es)
   286  	return self
   287  }
   288  
   289  // FS overrides the memory operation of this instruction to FS.
   290  func (self *Instruction) FS() *Instruction {
   291  	self.prefix = append(self.prefix, _P_fs)
   292  	return self
   293  }
   294  
   295  // GS overrides the memory operation of this instruction to GS.
   296  func (self *Instruction) GS() *Instruction {
   297  	self.prefix = append(self.prefix, _P_gs)
   298  	return self
   299  }
   300  
   301  // SS overrides the memory operation of this instruction to SS.
   302  func (self *Instruction) SS() *Instruction {
   303  	self.prefix = append(self.prefix, _P_ss)
   304  	return self
   305  }
   306  
   307  // LOCK causes the processor's LOCK# signal to be asserted during execution of
   308  // the accompanying instruction (turns the instruction into an atomic instruction).
   309  // In a multiprocessor environment, the LOCK# signal insures that the processor
   310  // has exclusive use of any shared memory while the signal is asserted.
   311  func (self *Instruction) LOCK() *Instruction {
   312  	self.prefix = append(self.prefix, _P_lock)
   313  	return self
   314  }
   315  
   316  /** Basic Instruction Properties **/
   317  
   318  // Name returns the instruction name.
   319  func (self *Instruction) Name() string {
   320  	return self.name
   321  }
   322  
   323  // Domain returns the domain of this instruction.
   324  func (self *Instruction) Domain() InstructionDomain {
   325  	return self.domain
   326  }
   327  
   328  // Operands returns the operands of this instruction.
   329  func (self *Instruction) Operands() []interface{} {
   330  	return self.argv[:self.argc]
   331  }
   332  
   333  // Program represents a sequence of instructions.
   334  type Program struct {
   335  	arch *Arch
   336  	head *Instruction
   337  	tail *Instruction
   338  }
   339  
   340  const (
   341  	_N_near       = 2 // near-branch (-128 ~ +127) takes 2 bytes to encode
   342  	_N_far_cond   = 6 // conditional far-branch takes 6 bytes to encode
   343  	_N_far_uncond = 5 // unconditional far-branch takes 5 bytes to encode
   344  )
   345  
   346  func (self *Program) clear() {
   347  	for p, q := self.head, self.head; p != nil; p = q {
   348  		q = p.next
   349  		p.free()
   350  	}
   351  }
   352  
   353  func (self *Program) alloc(name string, argc int, argv Operands) *Instruction {
   354  	p := self.tail
   355  	q := newInstruction(name, argc, argv)
   356  
   357  	/* attach to tail if any */
   358  	if p != nil {
   359  		p.next = q
   360  	} else {
   361  		self.head = q
   362  	}
   363  
   364  	/* set the new tail */
   365  	self.tail = q
   366  	return q
   367  }
   368  
   369  func (self *Program) pseudo(kind _PseudoType) (p *Instruction) {
   370  	p = self.alloc(kind.String(), 0, Operands{})
   371  	p.domain = DomainPseudo
   372  	p.pseudo.kind = kind
   373  	return
   374  }
   375  
   376  func (self *Program) require(isa ISA) {
   377  	if !self.arch.HasISA(isa) {
   378  		panic("ISA '" + isa.String() + "' was not enabled")
   379  	}
   380  }
   381  
   382  func (self *Program) branchSize(p *Instruction) int {
   383  	switch p.branch {
   384  	case _B_none:
   385  		panic("p is not a branch")
   386  	case _B_conditional:
   387  		return _N_far_cond
   388  	case _B_unconditional:
   389  		return _N_far_uncond
   390  	default:
   391  		panic("invalid instruction")
   392  	}
   393  }
   394  
   395  /** Pseudo-Instructions **/
   396  
   397  // Byte is a pseudo-instruction to add raw byte to the assembled code.
   398  func (self *Program) Byte(v *expr.Expr) (p *Instruction) {
   399  	p = self.pseudo(_PseudoByte)
   400  	p.pseudo.expr = v
   401  	return
   402  }
   403  
   404  // Word is a pseudo-instruction to add raw uint16 as little-endian to the assembled code.
   405  func (self *Program) Word(v *expr.Expr) (p *Instruction) {
   406  	p = self.pseudo(_PseudoWord)
   407  	p.pseudo.expr = v
   408  	return
   409  }
   410  
   411  // Long is a pseudo-instruction to add raw uint32 as little-endian to the assembled code.
   412  func (self *Program) Long(v *expr.Expr) (p *Instruction) {
   413  	p = self.pseudo(_PseudoLong)
   414  	p.pseudo.expr = v
   415  	return
   416  }
   417  
   418  // Quad is a pseudo-instruction to add raw uint64 as little-endian to the assembled code.
   419  func (self *Program) Quad(v *expr.Expr) (p *Instruction) {
   420  	p = self.pseudo(_PseudoQuad)
   421  	p.pseudo.expr = v
   422  	return
   423  }
   424  
   425  // Data is a pseudo-instruction to add raw bytes to the assembled code.
   426  func (self *Program) Data(v []byte) (p *Instruction) {
   427  	p = self.pseudo(_PseudoData)
   428  	p.pseudo.data = v
   429  	return
   430  }
   431  
   432  // Align is a pseudo-instruction to ensure the PC is aligned to a certain value.
   433  func (self *Program) Align(align uint64, padding *expr.Expr) (p *Instruction) {
   434  	p = self.pseudo(_PseudoAlign)
   435  	p.pseudo.uint = align
   436  	p.pseudo.expr = padding
   437  	return
   438  }
   439  
   440  /** Program Assembler **/
   441  
   442  // Free returns the Program object into pool.
   443  // Any operation performed after Free is undefined behavior.
   444  //
   445  // NOTE: This also frees all the instructions, labels, memory
   446  //
   447  //	operands and expressions associated with this program.
   448  func (self *Program) Free() {
   449  	self.clear()
   450  	//freeProgram(self)
   451  }
   452  
   453  // Link pins a label at the current position.
   454  func (self *Program) Link(p *Label) {
   455  	if p.Dest != nil {
   456  		panic("lable was alreay linked")
   457  	} else {
   458  		p.Dest = self.pseudo(_PseudoNop)
   459  	}
   460  }
   461  
   462  // Assemble assembles and links the entire program into machine code.
   463  func (self *Program) Assemble(pc uintptr) (ret []byte) {
   464  	orig := pc
   465  	next := true
   466  	offs := uintptr(0)
   467  
   468  	/* Pass 0: PC-precompute, assume all labeled branches are far-branches. */
   469  	for p := self.head; p != nil; p = p.next {
   470  		if p.pc = pc; !isLabel(p.argv[0]) || p.branch == _B_none {
   471  			pc += uintptr(p.encode(nil))
   472  		} else {
   473  			pc += uintptr(self.branchSize(p))
   474  		}
   475  	}
   476  
   477  	/* allocate space for the machine code */
   478  	nb := int(pc - orig)
   479  	ret = make([]byte, 0, nb)
   480  
   481  	/* Pass 1: adjust all the jumps */
   482  	for next {
   483  		next = false
   484  		offs = uintptr(0)
   485  
   486  		/* scan all the branches */
   487  		for p := self.head; p != nil; p = p.next {
   488  			var ok bool
   489  			var lb *Label
   490  
   491  			/* re-calculate the alignment here */
   492  			if nb = p.nb; p.pseudo.kind == _PseudoAlign {
   493  				p.pc -= offs
   494  				offs += uintptr(nb - p.encode(nil))
   495  				continue
   496  			}
   497  
   498  			/* adjust the program counter */
   499  			p.pc -= offs
   500  			lb, ok = p.argv[0].(*Label)
   501  
   502  			/* only care about labeled far-branches */
   503  			if !ok || p.nb == _N_near || p.branch == _B_none {
   504  				continue
   505  			}
   506  
   507  			/* calculate the jump offset */
   508  			size := self.branchSize(p)
   509  			diff := lb.offset(p.pc, size)
   510  
   511  			/* too far to be a near jump */
   512  			if diff > 127 || diff < -128 {
   513  				p.nb = size
   514  				continue
   515  			}
   516  
   517  			/* a far jump becomes a near jump, calculate
   518  			 * the PC adjustment value and assemble again */
   519  			next = true
   520  			p.nb = _N_near
   521  			offs += uintptr(size - _N_near)
   522  		}
   523  	}
   524  
   525  	/* Pass 3: link all the cross-references */
   526  	for p := self.head; p != nil; p = p.next {
   527  		for i := 0; i < p.argc; i++ {
   528  			var ok bool
   529  			var lb *Label
   530  			var op *MemoryOperand
   531  
   532  			/* resolve labels */
   533  			if lb, ok = p.argv[i].(*Label); ok {
   534  				p.argv[i] = lb.offset(p.pc, p.nb)
   535  				continue
   536  			}
   537  
   538  			/* check for memory operands */
   539  			if op, ok = p.argv[i].(*MemoryOperand); !ok {
   540  				continue
   541  			}
   542  
   543  			/* check for label references */
   544  			if op.Addr.Type != Reference {
   545  				continue
   546  			}
   547  
   548  			/* replace the label with the real offset */
   549  			op.Addr.Type = Offset
   550  			op.Addr.Offset = op.Addr.Reference.offset(p.pc, p.nb)
   551  		}
   552  	}
   553  
   554  	/* Pass 4: actually encode all the instructions */
   555  	for p := self.head; p != nil; p = p.next {
   556  		p.encode(&ret)
   557  	}
   558  
   559  	/* all done */
   560  	return ret
   561  }
   562  
   563  // AssembleAndFree is like Assemble, but it frees the Program after assembling.
   564  func (self *Program) AssembleAndFree(pc uintptr) (ret []byte) {
   565  	ret = self.Assemble(pc)
   566  	self.Free()
   567  	return
   568  }
   569  

View as plain text