1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package xeddata 6 7 import ( 8 "encoding/json" 9 "strings" 10 ) 11 12 // An Object is a single "dec/enc-instruction" XED object from datafiles. 13 // 14 // Field names and their comments are borrowed from Intel XED 15 // engineering notes (see "$XED/misc/engineering-notes.txt"). 16 // 17 // Field values are always trimmed (i.e. no leading/trailing whitespace). 18 // 19 // Missing optional members are expressed with an empty string. 20 // 21 // Object contains multiple Inst elements that represent concrete 22 // instruction with encoding pattern and operands description. 23 type Object struct { 24 // Iclass is instruction class name (opcode). 25 // Iclass alone is not enough to uniquely identify machine instructions. 26 // Example: "PSRLW". 27 Iclass string 28 29 // Disasm is substituted name when a simple conversion 30 // from iclass is inappropriate. 31 // Never combined with DisasmIntel or DisasmATTSV. 32 // Example: "syscall". 33 // 34 // Optional. 35 Disasm string 36 37 // DisasmIntel is like Disasm, but with Intel syntax. 38 // If present, usually comes with DisasmATTSV. 39 // Example: "jmp far". 40 // 41 // Optional. 42 DisasmIntel string 43 44 // DisasmATTSV is like Disasm, but with AT&T/SysV syntax. 45 // If present, usually comes with DisasmIntel. 46 // Example: "ljmp". 47 // 48 // Optional. 49 DisasmATTSV string 50 51 // Attributes describes name set for bits in the binary attributes field. 52 // Example: "NOP X87_CONTROL NOTSX". 53 // 54 // Optional. If not present, zero attribute set is implied. 55 Attributes string 56 57 // Uname is unique name used for deleting / replacing instructions. 58 // 59 // Optional. Provided for completeness, mostly useful for XED internal usage. 60 Uname string 61 62 // CPL is instruction current privilege level restriction. 63 // Can have value of "0" or "3". 64 CPL string 65 66 // Category is an ad-hoc categorization of instructions. 67 // Example: "SEMAPHORE". 68 Category string 69 70 // Extension is an ad-hoc grouping of instructions. 71 // If no ISASet is specified, this is used instead. 72 // Example: "3DNOW" 73 Extension string 74 75 // Exceptions is an exception set name. 76 // Example: "SSE_TYPE_7". 77 // 78 // Optional. Empty exception category generally means that 79 // instruction generates no exceptions. 80 Exceptions string 81 82 // ISASet is a name for the group of instructions that 83 // introduced this feature. 84 // Example: "I286PROTECTED". 85 // 86 // Older objects only defined Extension field. 87 // Newer objects may contain both Extension and ISASet fields. 88 // For some objects Extension==ISASet. 89 // Both fields are required to do precise CPUID-like decisions. 90 // 91 // Optional. 92 ISASet string 93 94 // Flags describes read/written flag bit values. 95 // Example: "MUST [ of-u sf-u af-u pf-u cf-mod ]". 96 // 97 // Optional. If not present, no flags are neither read nor written. 98 Flags string 99 100 // A hopefully useful comment. 101 // 102 // Optional. 103 Comment string 104 105 // The object revision. 106 // 107 // Optional. 108 Version string 109 110 // RealOpcode marks unstable (not in SDM yet) instructions with "N". 111 // Normally, always "Y" or not present at all. 112 // 113 // Optional. 114 RealOpcode string 115 116 // Insts are concrete instruction templates that are derived from containing Object. 117 // Inst contains fields PATTERN, OPERANDS, IFORM in enc/dec instruction. 118 Insts []*Inst 119 } 120 121 // Inst represents a single instruction template. 122 // 123 // Some templates contain expandable (macro) pattern and operands 124 // which tells that there are more than one real instructions 125 // that are expressed by the template. 126 type Inst struct { 127 // Object that contains properties that are shared with multiple 128 // Inst objects. 129 *Object 130 131 // Index is the position inside XED object. 132 // Object.Insts[Index] returns this inst. 133 Index int 134 135 // Pattern is the sequence of bits and nonterminals used to 136 // decode/encode an instruction. 137 // Example: "0x0F 0x28 no_refining_prefix MOD[0b11] MOD=3 REG[rrr] RM[nnn]". 138 Pattern string 139 140 // Operands are instruction arguments, typicall registers, 141 // memory operands and pseudo-resources. Separated by space. 142 // Example: "MEM0:rcw:b REG0=GPR8_R():r REG1=XED_REG_AL:rcw:SUPP". 143 Operands string 144 145 // Iform is a name for the pattern that starts with the 146 // iclass and bakes in the operands. If omitted, XED 147 // tries to generate one. We often add custom suffixes 148 // to these to disambiguate certain combinations. 149 // Example: "MOVAPS_XMMps_XMMps_0F28". 150 // 151 // Optional. 152 Iform string 153 } 154 155 // Opcode returns instruction name or empty string, 156 // if appropriate Object fields are not initialized. 157 func (o *Object) Opcode() string { 158 switch { 159 case o.Iclass != "": 160 return o.Iclass 161 case o.Disasm != "": 162 return o.Disasm 163 case o.DisasmIntel != "": 164 return o.DisasmIntel 165 case o.DisasmATTSV != "": 166 return o.DisasmATTSV 167 case o.Uname != "": 168 return o.Uname 169 } 170 return "" 171 } 172 173 // HasAttribute checks that o has attribute with specified name. 174 // Note that check is done at "word" level, substring names will not match. 175 func (o *Object) HasAttribute(name string) bool { 176 return containsWord(o.Attributes, name) 177 } 178 179 // String returns pretty-printed inst representation. 180 // 181 // Outputs valid JSON string. This property is 182 // not guaranteed to be preserved. 183 func (inst *Inst) String() string { 184 // Do not use direct inst marshalling to achieve 185 // flat object printed representation. 186 // Map is avoided to ensure consistent props order. 187 type flatObject struct { 188 Iclass string 189 Disasm string `json:",omitempty"` 190 DisasmIntel string `json:",omitempty"` 191 DisasmATTSV string `json:",omitempty"` 192 Attributes string `json:",omitempty"` 193 Uname string `json:",omitempty"` 194 CPL string 195 Category string 196 Extension string 197 Exceptions string `json:",omitempty"` 198 ISASet string `json:",omitempty"` 199 Flags string `json:",omitempty"` 200 Comment string `json:",omitempty"` 201 Version string `json:",omitempty"` 202 RealOpcode string `json:",omitempty"` 203 Pattern string 204 Operands string 205 Iform string `json:",omitempty"` 206 } 207 208 flat := flatObject{ 209 Iclass: inst.Iclass, 210 Disasm: inst.Disasm, 211 DisasmIntel: inst.DisasmIntel, 212 DisasmATTSV: inst.DisasmATTSV, 213 Attributes: inst.Attributes, 214 Uname: inst.Uname, 215 CPL: inst.CPL, 216 Category: inst.Category, 217 Extension: inst.Extension, 218 Exceptions: inst.Exceptions, 219 ISASet: inst.ISASet, 220 Flags: inst.Flags, 221 Comment: inst.Comment, 222 Version: inst.Version, 223 RealOpcode: inst.RealOpcode, 224 Pattern: inst.Pattern, 225 Operands: inst.Operands, 226 Iform: inst.Iform, 227 } 228 229 b, err := json.MarshalIndent(flat, "", " ") 230 if err != nil { 231 panic(err) 232 } 233 return string(b) 234 } 235 236 // ExpandStates returns a copy of s where all state macros 237 // are expanded. 238 // This requires db "states" to be loaded. 239 func ExpandStates(db *Database, s string) string { 240 substs := db.states 241 parts := strings.Fields(s) 242 for i := range parts { 243 if repl := substs[parts[i]]; repl != "" { 244 parts[i] = repl 245 } 246 } 247 return strings.Join(parts, " ") 248 } 249 250 // containsWord searches for whole word match in s. 251 func containsWord(s, word string) bool { 252 i := strings.Index(s, word) 253 if i == -1 { 254 return false 255 } 256 leftOK := i == 0 || 257 (s[i-1] == ' ') 258 rigthOK := i+len(word) == len(s) || 259 (s[i+len(word)] == ' ') 260 return leftOK && rigthOK 261 } 262