...

Source file src/golang.org/x/text/internal/triegen/data_test.go

Documentation: golang.org/x/text/internal/triegen

     1  // This file is generated with "go test -tags generate". DO NOT EDIT!
     2  //go:build !generate
     3  
     4  package triegen_test
     5  
     6  // lookup returns the trie value for the first UTF-8 encoding in s and
     7  // the width in bytes of this encoding. The size will be 0 if s does not
     8  // hold enough bytes to complete the encoding. len(s) must be greater than 0.
     9  func (t *randTrie) lookup(s []byte) (v uint8, sz int) {
    10  	c0 := s[0]
    11  	switch {
    12  	case c0 < 0x80: // is ASCII
    13  		return randValues[c0], 1
    14  	case c0 < 0xC2:
    15  		return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
    16  	case c0 < 0xE0: // 2-byte UTF-8
    17  		if len(s) < 2 {
    18  			return 0, 0
    19  		}
    20  		i := randIndex[c0]
    21  		c1 := s[1]
    22  		if c1 < 0x80 || 0xC0 <= c1 {
    23  			return 0, 1 // Illegal UTF-8: not a continuation byte.
    24  		}
    25  		return t.lookupValue(uint32(i), c1), 2
    26  	case c0 < 0xF0: // 3-byte UTF-8
    27  		if len(s) < 3 {
    28  			return 0, 0
    29  		}
    30  		i := randIndex[c0]
    31  		c1 := s[1]
    32  		if c1 < 0x80 || 0xC0 <= c1 {
    33  			return 0, 1 // Illegal UTF-8: not a continuation byte.
    34  		}
    35  		o := uint32(i)<<6 + uint32(c1)
    36  		i = randIndex[o]
    37  		c2 := s[2]
    38  		if c2 < 0x80 || 0xC0 <= c2 {
    39  			return 0, 2 // Illegal UTF-8: not a continuation byte.
    40  		}
    41  		return t.lookupValue(uint32(i), c2), 3
    42  	case c0 < 0xF8: // 4-byte UTF-8
    43  		if len(s) < 4 {
    44  			return 0, 0
    45  		}
    46  		i := randIndex[c0]
    47  		c1 := s[1]
    48  		if c1 < 0x80 || 0xC0 <= c1 {
    49  			return 0, 1 // Illegal UTF-8: not a continuation byte.
    50  		}
    51  		o := uint32(i)<<6 + uint32(c1)
    52  		i = randIndex[o]
    53  		c2 := s[2]
    54  		if c2 < 0x80 || 0xC0 <= c2 {
    55  			return 0, 2 // Illegal UTF-8: not a continuation byte.
    56  		}
    57  		o = uint32(i)<<6 + uint32(c2)
    58  		i = randIndex[o]
    59  		c3 := s[3]
    60  		if c3 < 0x80 || 0xC0 <= c3 {
    61  			return 0, 3 // Illegal UTF-8: not a continuation byte.
    62  		}
    63  		return t.lookupValue(uint32(i), c3), 4
    64  	}
    65  	// Illegal rune
    66  	return 0, 1
    67  }
    68  
    69  // lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
    70  // s must start with a full and valid UTF-8 encoded rune.
    71  func (t *randTrie) lookupUnsafe(s []byte) uint8 {
    72  	c0 := s[0]
    73  	if c0 < 0x80 { // is ASCII
    74  		return randValues[c0]
    75  	}
    76  	i := randIndex[c0]
    77  	if c0 < 0xE0 { // 2-byte UTF-8
    78  		return t.lookupValue(uint32(i), s[1])
    79  	}
    80  	i = randIndex[uint32(i)<<6+uint32(s[1])]
    81  	if c0 < 0xF0 { // 3-byte UTF-8
    82  		return t.lookupValue(uint32(i), s[2])
    83  	}
    84  	i = randIndex[uint32(i)<<6+uint32(s[2])]
    85  	if c0 < 0xF8 { // 4-byte UTF-8
    86  		return t.lookupValue(uint32(i), s[3])
    87  	}
    88  	return 0
    89  }
    90  
    91  // lookupString returns the trie value for the first UTF-8 encoding in s and
    92  // the width in bytes of this encoding. The size will be 0 if s does not
    93  // hold enough bytes to complete the encoding. len(s) must be greater than 0.
    94  func (t *randTrie) lookupString(s string) (v uint8, sz int) {
    95  	c0 := s[0]
    96  	switch {
    97  	case c0 < 0x80: // is ASCII
    98  		return randValues[c0], 1
    99  	case c0 < 0xC2:
   100  		return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
   101  	case c0 < 0xE0: // 2-byte UTF-8
   102  		if len(s) < 2 {
   103  			return 0, 0
   104  		}
   105  		i := randIndex[c0]
   106  		c1 := s[1]
   107  		if c1 < 0x80 || 0xC0 <= c1 {
   108  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   109  		}
   110  		return t.lookupValue(uint32(i), c1), 2
   111  	case c0 < 0xF0: // 3-byte UTF-8
   112  		if len(s) < 3 {
   113  			return 0, 0
   114  		}
   115  		i := randIndex[c0]
   116  		c1 := s[1]
   117  		if c1 < 0x80 || 0xC0 <= c1 {
   118  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   119  		}
   120  		o := uint32(i)<<6 + uint32(c1)
   121  		i = randIndex[o]
   122  		c2 := s[2]
   123  		if c2 < 0x80 || 0xC0 <= c2 {
   124  			return 0, 2 // Illegal UTF-8: not a continuation byte.
   125  		}
   126  		return t.lookupValue(uint32(i), c2), 3
   127  	case c0 < 0xF8: // 4-byte UTF-8
   128  		if len(s) < 4 {
   129  			return 0, 0
   130  		}
   131  		i := randIndex[c0]
   132  		c1 := s[1]
   133  		if c1 < 0x80 || 0xC0 <= c1 {
   134  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   135  		}
   136  		o := uint32(i)<<6 + uint32(c1)
   137  		i = randIndex[o]
   138  		c2 := s[2]
   139  		if c2 < 0x80 || 0xC0 <= c2 {
   140  			return 0, 2 // Illegal UTF-8: not a continuation byte.
   141  		}
   142  		o = uint32(i)<<6 + uint32(c2)
   143  		i = randIndex[o]
   144  		c3 := s[3]
   145  		if c3 < 0x80 || 0xC0 <= c3 {
   146  			return 0, 3 // Illegal UTF-8: not a continuation byte.
   147  		}
   148  		return t.lookupValue(uint32(i), c3), 4
   149  	}
   150  	// Illegal rune
   151  	return 0, 1
   152  }
   153  
   154  // lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
   155  // s must start with a full and valid UTF-8 encoded rune.
   156  func (t *randTrie) lookupStringUnsafe(s string) uint8 {
   157  	c0 := s[0]
   158  	if c0 < 0x80 { // is ASCII
   159  		return randValues[c0]
   160  	}
   161  	i := randIndex[c0]
   162  	if c0 < 0xE0 { // 2-byte UTF-8
   163  		return t.lookupValue(uint32(i), s[1])
   164  	}
   165  	i = randIndex[uint32(i)<<6+uint32(s[1])]
   166  	if c0 < 0xF0 { // 3-byte UTF-8
   167  		return t.lookupValue(uint32(i), s[2])
   168  	}
   169  	i = randIndex[uint32(i)<<6+uint32(s[2])]
   170  	if c0 < 0xF8 { // 4-byte UTF-8
   171  		return t.lookupValue(uint32(i), s[3])
   172  	}
   173  	return 0
   174  }
   175  
   176  // randTrie. Total size: 9280 bytes (9.06 KiB). Checksum: 6debd324a8debb8f.
   177  type randTrie struct{}
   178  
   179  func newRandTrie(i int) *randTrie {
   180  	return &randTrie{}
   181  }
   182  
   183  // lookupValue determines the type of block n and looks up the value for b.
   184  func (t *randTrie) lookupValue(n uint32, b byte) uint8 {
   185  	switch {
   186  	default:
   187  		return uint8(randValues[n<<6+uint32(b)])
   188  	}
   189  }
   190  
   191  // randValues: 56 blocks, 3584 entries, 3584 bytes
   192  // The third block is the zero block.
   193  var randValues = [3584]uint8{
   194  	// Block 0x0, offset 0x0
   195  	// Block 0x1, offset 0x40
   196  	// Block 0x2, offset 0x80
   197  	// Block 0x3, offset 0xc0
   198  	0xc9: 0x0001,
   199  	// Block 0x4, offset 0x100
   200  	0x100: 0x0001,
   201  	// Block 0x5, offset 0x140
   202  	0x155: 0x0001,
   203  	// Block 0x6, offset 0x180
   204  	0x196: 0x0001,
   205  	// Block 0x7, offset 0x1c0
   206  	0x1ef: 0x0001,
   207  	// Block 0x8, offset 0x200
   208  	0x206: 0x0001,
   209  	// Block 0x9, offset 0x240
   210  	0x258: 0x0001,
   211  	// Block 0xa, offset 0x280
   212  	0x288: 0x0001,
   213  	// Block 0xb, offset 0x2c0
   214  	0x2f2: 0x0001,
   215  	// Block 0xc, offset 0x300
   216  	0x304: 0x0001,
   217  	// Block 0xd, offset 0x340
   218  	0x34b: 0x0001,
   219  	// Block 0xe, offset 0x380
   220  	0x3ba: 0x0001,
   221  	// Block 0xf, offset 0x3c0
   222  	0x3f5: 0x0001,
   223  	// Block 0x10, offset 0x400
   224  	0x41d: 0x0001,
   225  	// Block 0x11, offset 0x440
   226  	0x442: 0x0001,
   227  	// Block 0x12, offset 0x480
   228  	0x4bb: 0x0001,
   229  	// Block 0x13, offset 0x4c0
   230  	0x4e9: 0x0001,
   231  	// Block 0x14, offset 0x500
   232  	0x53e: 0x0001,
   233  	// Block 0x15, offset 0x540
   234  	0x55f: 0x0001,
   235  	// Block 0x16, offset 0x580
   236  	0x5b7: 0x0001,
   237  	// Block 0x17, offset 0x5c0
   238  	0x5d9: 0x0001,
   239  	// Block 0x18, offset 0x600
   240  	0x60e: 0x0001,
   241  	// Block 0x19, offset 0x640
   242  	0x652: 0x0001,
   243  	// Block 0x1a, offset 0x680
   244  	0x68f: 0x0001,
   245  	// Block 0x1b, offset 0x6c0
   246  	0x6dc: 0x0001,
   247  	// Block 0x1c, offset 0x700
   248  	0x703: 0x0001,
   249  	// Block 0x1d, offset 0x740
   250  	0x741: 0x0001,
   251  	// Block 0x1e, offset 0x780
   252  	0x79b: 0x0001,
   253  	// Block 0x1f, offset 0x7c0
   254  	0x7f1: 0x0001,
   255  	// Block 0x20, offset 0x800
   256  	0x833: 0x0001,
   257  	// Block 0x21, offset 0x840
   258  	0x853: 0x0001,
   259  	// Block 0x22, offset 0x880
   260  	0x8a2: 0x0001,
   261  	// Block 0x23, offset 0x8c0
   262  	0x8f8: 0x0001,
   263  	// Block 0x24, offset 0x900
   264  	0x917: 0x0001,
   265  	// Block 0x25, offset 0x940
   266  	0x945: 0x0001,
   267  	// Block 0x26, offset 0x980
   268  	0x99e: 0x0001,
   269  	// Block 0x27, offset 0x9c0
   270  	0x9fd: 0x0001,
   271  	// Block 0x28, offset 0xa00
   272  	0xa0d: 0x0001,
   273  	// Block 0x29, offset 0xa40
   274  	0xa66: 0x0001,
   275  	// Block 0x2a, offset 0xa80
   276  	0xaab: 0x0001,
   277  	// Block 0x2b, offset 0xac0
   278  	0xaea: 0x0001,
   279  	// Block 0x2c, offset 0xb00
   280  	0xb2d: 0x0001,
   281  	// Block 0x2d, offset 0xb40
   282  	0xb54: 0x0001,
   283  	// Block 0x2e, offset 0xb80
   284  	0xb90: 0x0001,
   285  	// Block 0x2f, offset 0xbc0
   286  	0xbe5: 0x0001,
   287  	// Block 0x30, offset 0xc00
   288  	0xc28: 0x0001,
   289  	// Block 0x31, offset 0xc40
   290  	0xc7c: 0x0001,
   291  	// Block 0x32, offset 0xc80
   292  	0xcbf: 0x0001,
   293  	// Block 0x33, offset 0xcc0
   294  	0xcc7: 0x0001,
   295  	// Block 0x34, offset 0xd00
   296  	0xd34: 0x0001,
   297  	// Block 0x35, offset 0xd40
   298  	0xd61: 0x0001,
   299  	// Block 0x36, offset 0xd80
   300  	0xdb9: 0x0001,
   301  	// Block 0x37, offset 0xdc0
   302  	0xdda: 0x0001,
   303  }
   304  
   305  // randIndex: 89 blocks, 5696 entries, 5696 bytes
   306  // Block 0 is the zero block.
   307  var randIndex = [5696]uint8{
   308  	// Block 0x0, offset 0x0
   309  	// Block 0x1, offset 0x40
   310  	// Block 0x2, offset 0x80
   311  	// Block 0x3, offset 0xc0
   312  	0xe1: 0x02, 0xe3: 0x03, 0xe4: 0x04,
   313  	0xea: 0x05, 0xeb: 0x06, 0xec: 0x07,
   314  	0xf0: 0x10, 0xf1: 0x24, 0xf2: 0x3d, 0xf3: 0x4f, 0xf4: 0x56,
   315  	// Block 0x4, offset 0x100
   316  	0x107: 0x01,
   317  	// Block 0x5, offset 0x140
   318  	0x16c: 0x02,
   319  	// Block 0x6, offset 0x180
   320  	0x19c: 0x03,
   321  	0x1ae: 0x04,
   322  	// Block 0x7, offset 0x1c0
   323  	0x1d8: 0x05,
   324  	0x1f7: 0x06,
   325  	// Block 0x8, offset 0x200
   326  	0x20c: 0x07,
   327  	// Block 0x9, offset 0x240
   328  	0x24a: 0x08,
   329  	// Block 0xa, offset 0x280
   330  	0x2b6: 0x09,
   331  	// Block 0xb, offset 0x2c0
   332  	0x2d5: 0x0a,
   333  	// Block 0xc, offset 0x300
   334  	0x31a: 0x0b,
   335  	// Block 0xd, offset 0x340
   336  	0x373: 0x0c,
   337  	// Block 0xe, offset 0x380
   338  	0x38b: 0x0d,
   339  	// Block 0xf, offset 0x3c0
   340  	0x3f0: 0x0e,
   341  	// Block 0x10, offset 0x400
   342  	0x433: 0x0f,
   343  	// Block 0x11, offset 0x440
   344  	0x45d: 0x10,
   345  	// Block 0x12, offset 0x480
   346  	0x491: 0x08, 0x494: 0x09, 0x497: 0x0a,
   347  	0x49b: 0x0b, 0x49c: 0x0c,
   348  	0x4a1: 0x0d,
   349  	0x4ad: 0x0e,
   350  	0x4ba: 0x0f,
   351  	// Block 0x13, offset 0x4c0
   352  	0x4c1: 0x11,
   353  	// Block 0x14, offset 0x500
   354  	0x531: 0x12,
   355  	// Block 0x15, offset 0x540
   356  	0x546: 0x13,
   357  	// Block 0x16, offset 0x580
   358  	0x5ab: 0x14,
   359  	// Block 0x17, offset 0x5c0
   360  	0x5d4: 0x11,
   361  	0x5fe: 0x11,
   362  	// Block 0x18, offset 0x600
   363  	0x618: 0x0a,
   364  	// Block 0x19, offset 0x640
   365  	0x65b: 0x15,
   366  	// Block 0x1a, offset 0x680
   367  	0x6a0: 0x16,
   368  	// Block 0x1b, offset 0x6c0
   369  	0x6d2: 0x17,
   370  	0x6f6: 0x18,
   371  	// Block 0x1c, offset 0x700
   372  	0x711: 0x19,
   373  	// Block 0x1d, offset 0x740
   374  	0x768: 0x1a,
   375  	// Block 0x1e, offset 0x780
   376  	0x783: 0x1b,
   377  	// Block 0x1f, offset 0x7c0
   378  	0x7f9: 0x1c,
   379  	// Block 0x20, offset 0x800
   380  	0x831: 0x1d,
   381  	// Block 0x21, offset 0x840
   382  	0x85e: 0x1e,
   383  	// Block 0x22, offset 0x880
   384  	0x898: 0x1f,
   385  	// Block 0x23, offset 0x8c0
   386  	0x8c7: 0x18,
   387  	0x8d5: 0x14,
   388  	0x8f7: 0x20,
   389  	0x8fe: 0x1f,
   390  	// Block 0x24, offset 0x900
   391  	0x905: 0x21,
   392  	// Block 0x25, offset 0x940
   393  	0x966: 0x03,
   394  	// Block 0x26, offset 0x980
   395  	0x981: 0x07, 0x983: 0x11,
   396  	0x989: 0x12, 0x98a: 0x13, 0x98e: 0x14, 0x98f: 0x15,
   397  	0x992: 0x16, 0x995: 0x17, 0x996: 0x18,
   398  	0x998: 0x19, 0x999: 0x1a, 0x99b: 0x1b, 0x99f: 0x1c,
   399  	0x9a3: 0x1d,
   400  	0x9ad: 0x1e, 0x9af: 0x1f,
   401  	0x9b0: 0x20, 0x9b1: 0x21,
   402  	0x9b8: 0x22, 0x9bd: 0x23,
   403  	// Block 0x27, offset 0x9c0
   404  	0x9cd: 0x22,
   405  	// Block 0x28, offset 0xa00
   406  	0xa0c: 0x08,
   407  	// Block 0x29, offset 0xa40
   408  	0xa6f: 0x1c,
   409  	// Block 0x2a, offset 0xa80
   410  	0xa90: 0x1a,
   411  	0xaaf: 0x23,
   412  	// Block 0x2b, offset 0xac0
   413  	0xae3: 0x19,
   414  	0xae8: 0x24,
   415  	0xafc: 0x25,
   416  	// Block 0x2c, offset 0xb00
   417  	0xb13: 0x26,
   418  	// Block 0x2d, offset 0xb40
   419  	0xb67: 0x1c,
   420  	// Block 0x2e, offset 0xb80
   421  	0xb8f: 0x0b,
   422  	// Block 0x2f, offset 0xbc0
   423  	0xbcb: 0x27,
   424  	0xbe7: 0x26,
   425  	// Block 0x30, offset 0xc00
   426  	0xc34: 0x16,
   427  	// Block 0x31, offset 0xc40
   428  	0xc62: 0x03,
   429  	// Block 0x32, offset 0xc80
   430  	0xcbb: 0x12,
   431  	// Block 0x33, offset 0xcc0
   432  	0xcdf: 0x09,
   433  	// Block 0x34, offset 0xd00
   434  	0xd34: 0x0a,
   435  	// Block 0x35, offset 0xd40
   436  	0xd41: 0x1e,
   437  	// Block 0x36, offset 0xd80
   438  	0xd83: 0x28,
   439  	// Block 0x37, offset 0xdc0
   440  	0xdc0: 0x15,
   441  	// Block 0x38, offset 0xe00
   442  	0xe1a: 0x15,
   443  	// Block 0x39, offset 0xe40
   444  	0xe65: 0x29,
   445  	// Block 0x3a, offset 0xe80
   446  	0xe86: 0x1f,
   447  	// Block 0x3b, offset 0xec0
   448  	0xeec: 0x18,
   449  	// Block 0x3c, offset 0xf00
   450  	0xf28: 0x2a,
   451  	// Block 0x3d, offset 0xf40
   452  	0xf53: 0x08,
   453  	// Block 0x3e, offset 0xf80
   454  	0xfa2: 0x2b,
   455  	0xfaa: 0x17,
   456  	// Block 0x3f, offset 0xfc0
   457  	0xfc0: 0x25, 0xfc2: 0x26,
   458  	0xfc9: 0x27, 0xfcd: 0x28, 0xfce: 0x29,
   459  	0xfd5: 0x2a,
   460  	0xfd8: 0x2b, 0xfd9: 0x2c, 0xfdf: 0x2d,
   461  	0xfe1: 0x2e, 0xfe2: 0x2f, 0xfe3: 0x30, 0xfe6: 0x31,
   462  	0xfe9: 0x32, 0xfec: 0x33, 0xfed: 0x34, 0xfef: 0x35,
   463  	0xff1: 0x36, 0xff2: 0x37, 0xff3: 0x38, 0xff4: 0x39,
   464  	0xffa: 0x3a, 0xffc: 0x3b, 0xffe: 0x3c,
   465  	// Block 0x40, offset 0x1000
   466  	0x102c: 0x2c,
   467  	// Block 0x41, offset 0x1040
   468  	0x1074: 0x2c,
   469  	// Block 0x42, offset 0x1080
   470  	0x108c: 0x08,
   471  	0x10a0: 0x2d,
   472  	// Block 0x43, offset 0x10c0
   473  	0x10e8: 0x10,
   474  	// Block 0x44, offset 0x1100
   475  	0x110f: 0x13,
   476  	// Block 0x45, offset 0x1140
   477  	0x114b: 0x2e,
   478  	// Block 0x46, offset 0x1180
   479  	0x118b: 0x23,
   480  	0x119d: 0x0c,
   481  	// Block 0x47, offset 0x11c0
   482  	0x11c3: 0x12,
   483  	0x11f9: 0x0f,
   484  	// Block 0x48, offset 0x1200
   485  	0x121e: 0x1b,
   486  	// Block 0x49, offset 0x1240
   487  	0x1270: 0x2f,
   488  	// Block 0x4a, offset 0x1280
   489  	0x128a: 0x1b,
   490  	0x12a7: 0x02,
   491  	// Block 0x4b, offset 0x12c0
   492  	0x12fb: 0x14,
   493  	// Block 0x4c, offset 0x1300
   494  	0x1333: 0x30,
   495  	// Block 0x4d, offset 0x1340
   496  	0x134d: 0x31,
   497  	// Block 0x4e, offset 0x1380
   498  	0x138e: 0x15,
   499  	// Block 0x4f, offset 0x13c0
   500  	0x13f4: 0x32,
   501  	// Block 0x50, offset 0x1400
   502  	0x141b: 0x33,
   503  	// Block 0x51, offset 0x1440
   504  	0x1448: 0x3e, 0x1449: 0x3f, 0x144a: 0x40, 0x144f: 0x41,
   505  	0x1459: 0x42, 0x145c: 0x43, 0x145e: 0x44, 0x145f: 0x45,
   506  	0x1468: 0x46, 0x1469: 0x47, 0x146c: 0x48, 0x146d: 0x49, 0x146e: 0x4a,
   507  	0x1472: 0x4b, 0x1473: 0x4c,
   508  	0x1479: 0x4d, 0x147b: 0x4e,
   509  	// Block 0x52, offset 0x1480
   510  	0x1480: 0x34,
   511  	0x1499: 0x11,
   512  	0x14b6: 0x2c,
   513  	// Block 0x53, offset 0x14c0
   514  	0x14e4: 0x0d,
   515  	// Block 0x54, offset 0x1500
   516  	0x1527: 0x08,
   517  	// Block 0x55, offset 0x1540
   518  	0x1555: 0x2b,
   519  	// Block 0x56, offset 0x1580
   520  	0x15b2: 0x35,
   521  	// Block 0x57, offset 0x15c0
   522  	0x15f2: 0x1c, 0x15f4: 0x29,
   523  	// Block 0x58, offset 0x1600
   524  	0x1600: 0x50, 0x1603: 0x51,
   525  	0x1608: 0x52, 0x160a: 0x53, 0x160d: 0x54, 0x160e: 0x55,
   526  }
   527  
   528  // lookup returns the trie value for the first UTF-8 encoding in s and
   529  // the width in bytes of this encoding. The size will be 0 if s does not
   530  // hold enough bytes to complete the encoding. len(s) must be greater than 0.
   531  func (t *multiTrie) lookup(s []byte) (v uint64, sz int) {
   532  	c0 := s[0]
   533  	switch {
   534  	case c0 < 0x80: // is ASCII
   535  		return t.ascii[c0], 1
   536  	case c0 < 0xC2:
   537  		return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
   538  	case c0 < 0xE0: // 2-byte UTF-8
   539  		if len(s) < 2 {
   540  			return 0, 0
   541  		}
   542  		i := t.utf8Start[c0]
   543  		c1 := s[1]
   544  		if c1 < 0x80 || 0xC0 <= c1 {
   545  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   546  		}
   547  		return t.lookupValue(uint32(i), c1), 2
   548  	case c0 < 0xF0: // 3-byte UTF-8
   549  		if len(s) < 3 {
   550  			return 0, 0
   551  		}
   552  		i := t.utf8Start[c0]
   553  		c1 := s[1]
   554  		if c1 < 0x80 || 0xC0 <= c1 {
   555  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   556  		}
   557  		o := uint32(i)<<6 + uint32(c1)
   558  		i = multiIndex[o]
   559  		c2 := s[2]
   560  		if c2 < 0x80 || 0xC0 <= c2 {
   561  			return 0, 2 // Illegal UTF-8: not a continuation byte.
   562  		}
   563  		return t.lookupValue(uint32(i), c2), 3
   564  	case c0 < 0xF8: // 4-byte UTF-8
   565  		if len(s) < 4 {
   566  			return 0, 0
   567  		}
   568  		i := t.utf8Start[c0]
   569  		c1 := s[1]
   570  		if c1 < 0x80 || 0xC0 <= c1 {
   571  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   572  		}
   573  		o := uint32(i)<<6 + uint32(c1)
   574  		i = multiIndex[o]
   575  		c2 := s[2]
   576  		if c2 < 0x80 || 0xC0 <= c2 {
   577  			return 0, 2 // Illegal UTF-8: not a continuation byte.
   578  		}
   579  		o = uint32(i)<<6 + uint32(c2)
   580  		i = multiIndex[o]
   581  		c3 := s[3]
   582  		if c3 < 0x80 || 0xC0 <= c3 {
   583  			return 0, 3 // Illegal UTF-8: not a continuation byte.
   584  		}
   585  		return t.lookupValue(uint32(i), c3), 4
   586  	}
   587  	// Illegal rune
   588  	return 0, 1
   589  }
   590  
   591  // lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
   592  // s must start with a full and valid UTF-8 encoded rune.
   593  func (t *multiTrie) lookupUnsafe(s []byte) uint64 {
   594  	c0 := s[0]
   595  	if c0 < 0x80 { // is ASCII
   596  		return t.ascii[c0]
   597  	}
   598  	i := t.utf8Start[c0]
   599  	if c0 < 0xE0 { // 2-byte UTF-8
   600  		return t.lookupValue(uint32(i), s[1])
   601  	}
   602  	i = multiIndex[uint32(i)<<6+uint32(s[1])]
   603  	if c0 < 0xF0 { // 3-byte UTF-8
   604  		return t.lookupValue(uint32(i), s[2])
   605  	}
   606  	i = multiIndex[uint32(i)<<6+uint32(s[2])]
   607  	if c0 < 0xF8 { // 4-byte UTF-8
   608  		return t.lookupValue(uint32(i), s[3])
   609  	}
   610  	return 0
   611  }
   612  
   613  // lookupString returns the trie value for the first UTF-8 encoding in s and
   614  // the width in bytes of this encoding. The size will be 0 if s does not
   615  // hold enough bytes to complete the encoding. len(s) must be greater than 0.
   616  func (t *multiTrie) lookupString(s string) (v uint64, sz int) {
   617  	c0 := s[0]
   618  	switch {
   619  	case c0 < 0x80: // is ASCII
   620  		return t.ascii[c0], 1
   621  	case c0 < 0xC2:
   622  		return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
   623  	case c0 < 0xE0: // 2-byte UTF-8
   624  		if len(s) < 2 {
   625  			return 0, 0
   626  		}
   627  		i := t.utf8Start[c0]
   628  		c1 := s[1]
   629  		if c1 < 0x80 || 0xC0 <= c1 {
   630  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   631  		}
   632  		return t.lookupValue(uint32(i), c1), 2
   633  	case c0 < 0xF0: // 3-byte UTF-8
   634  		if len(s) < 3 {
   635  			return 0, 0
   636  		}
   637  		i := t.utf8Start[c0]
   638  		c1 := s[1]
   639  		if c1 < 0x80 || 0xC0 <= c1 {
   640  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   641  		}
   642  		o := uint32(i)<<6 + uint32(c1)
   643  		i = multiIndex[o]
   644  		c2 := s[2]
   645  		if c2 < 0x80 || 0xC0 <= c2 {
   646  			return 0, 2 // Illegal UTF-8: not a continuation byte.
   647  		}
   648  		return t.lookupValue(uint32(i), c2), 3
   649  	case c0 < 0xF8: // 4-byte UTF-8
   650  		if len(s) < 4 {
   651  			return 0, 0
   652  		}
   653  		i := t.utf8Start[c0]
   654  		c1 := s[1]
   655  		if c1 < 0x80 || 0xC0 <= c1 {
   656  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   657  		}
   658  		o := uint32(i)<<6 + uint32(c1)
   659  		i = multiIndex[o]
   660  		c2 := s[2]
   661  		if c2 < 0x80 || 0xC0 <= c2 {
   662  			return 0, 2 // Illegal UTF-8: not a continuation byte.
   663  		}
   664  		o = uint32(i)<<6 + uint32(c2)
   665  		i = multiIndex[o]
   666  		c3 := s[3]
   667  		if c3 < 0x80 || 0xC0 <= c3 {
   668  			return 0, 3 // Illegal UTF-8: not a continuation byte.
   669  		}
   670  		return t.lookupValue(uint32(i), c3), 4
   671  	}
   672  	// Illegal rune
   673  	return 0, 1
   674  }
   675  
   676  // lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
   677  // s must start with a full and valid UTF-8 encoded rune.
   678  func (t *multiTrie) lookupStringUnsafe(s string) uint64 {
   679  	c0 := s[0]
   680  	if c0 < 0x80 { // is ASCII
   681  		return t.ascii[c0]
   682  	}
   683  	i := t.utf8Start[c0]
   684  	if c0 < 0xE0 { // 2-byte UTF-8
   685  		return t.lookupValue(uint32(i), s[1])
   686  	}
   687  	i = multiIndex[uint32(i)<<6+uint32(s[1])]
   688  	if c0 < 0xF0 { // 3-byte UTF-8
   689  		return t.lookupValue(uint32(i), s[2])
   690  	}
   691  	i = multiIndex[uint32(i)<<6+uint32(s[2])]
   692  	if c0 < 0xF8 { // 4-byte UTF-8
   693  		return t.lookupValue(uint32(i), s[3])
   694  	}
   695  	return 0
   696  }
   697  
   698  // multiTrie. Total size: 18250 bytes (17.82 KiB). Checksum: a69a609d8696aa5e.
   699  type multiTrie struct {
   700  	ascii     []uint64 // index for ASCII bytes
   701  	utf8Start []uint8  // index for UTF-8 bytes >= 0xC0
   702  }
   703  
   704  func newMultiTrie(i int) *multiTrie {
   705  	h := multiTrieHandles[i]
   706  	return &multiTrie{multiValues[uint32(h.ascii)<<6:], multiIndex[uint32(h.multi)<<6:]}
   707  }
   708  
   709  type multiTrieHandle struct {
   710  	ascii, multi uint8
   711  }
   712  
   713  // multiTrieHandles: 5 handles, 10 bytes
   714  var multiTrieHandles = [5]multiTrieHandle{
   715  	{0, 0},   // 8c1e77823143d35c: all
   716  	{0, 23},  // 8fb58ff8243b45b0: ASCII only
   717  	{0, 23},  // 8fb58ff8243b45b0: ASCII only 2
   718  	{0, 24},  // 2ccc43994f11046f: BMP only
   719  	{30, 25}, // ce448591bdcb4733: No BMP
   720  }
   721  
   722  // lookupValue determines the type of block n and looks up the value for b.
   723  func (t *multiTrie) lookupValue(n uint32, b byte) uint64 {
   724  	switch {
   725  	default:
   726  		return uint64(multiValues[n<<6+uint32(b)])
   727  	}
   728  }
   729  
   730  // multiValues: 32 blocks, 2048 entries, 16384 bytes
   731  // The third block is the zero block.
   732  var multiValues = [2048]uint64{
   733  	// Block 0x0, offset 0x0
   734  	0x03: 0x6e361699800b9fb8, 0x04: 0x52d3935a34f6f0b, 0x05: 0x2948319393e7ef10,
   735  	0x07: 0x20f03b006704f663, 0x08: 0x6c15c0732bb2495f, 0x09: 0xe54e2c59d953551,
   736  	0x0f: 0x33d8a825807d8037, 0x10: 0x6ecd93cb12168b92, 0x11: 0x6a81c9c0ce86e884,
   737  	0x1f: 0xa03e77aac8be79b, 0x20: 0x28591d0e7e486efa, 0x21: 0x716fa3bc398dec8,
   738  	0x3f: 0x4fd3bcfa72bce8b0,
   739  	// Block 0x1, offset 0x40
   740  	0x40: 0x3cbaef3db8ba5f12, 0x41: 0x2d262347c1f56357,
   741  	0x7f: 0x782caa2d25a418a9,
   742  	// Block 0x2, offset 0x80
   743  	// Block 0x3, offset 0xc0
   744  	0xc0: 0x6bbd1f937b1ff5d2, 0xc1: 0x732e23088d2eb8a4,
   745  	// Block 0x4, offset 0x100
   746  	0x13f: 0x56f8c4c82f5962dc,
   747  	// Block 0x5, offset 0x140
   748  	0x140: 0x57dc4544729a5da2, 0x141: 0x2f62f9cd307ffa0d,
   749  	// Block 0x6, offset 0x180
   750  	0x1bf: 0x7bf4d0ebf302a088,
   751  	// Block 0x7, offset 0x1c0
   752  	0x1c0: 0x1f0d67f249e59931, 0x1c1: 0x3011def73aa550c7,
   753  	// Block 0x8, offset 0x200
   754  	0x23f: 0x5de81c1dff6bf29d,
   755  	// Block 0x9, offset 0x240
   756  	0x240: 0x752c035737b825e8, 0x241: 0x1e793399081e3bb3,
   757  	// Block 0xa, offset 0x280
   758  	0x2bf: 0x6a28f01979cbf059,
   759  	// Block 0xb, offset 0x2c0
   760  	0x2c0: 0x373a4b0f2cbd4c74, 0x2c1: 0x4fd2c288683b767c,
   761  	// Block 0xc, offset 0x300
   762  	0x33f: 0x5a10ffa9e29184fb,
   763  	// Block 0xd, offset 0x340
   764  	0x340: 0x700f9bdb53fff6a5, 0x341: 0xcde93df0427eb79,
   765  	// Block 0xe, offset 0x380
   766  	0x3bf: 0x74071288fff39c76,
   767  	// Block 0xf, offset 0x3c0
   768  	0x3c0: 0x481fc2f510e5268a, 0x3c1: 0x7565c28164204849,
   769  	// Block 0x10, offset 0x400
   770  	0x43f: 0x5676a62fd49c6bec,
   771  	// Block 0x11, offset 0x440
   772  	0x440: 0x2f2d15776cbafc6b, 0x441: 0x4c55e8dc0ff11a3f,
   773  	// Block 0x12, offset 0x480
   774  	0x4bf: 0x69d6f0fe711fafc9,
   775  	// Block 0x13, offset 0x4c0
   776  	0x4c0: 0x33181de28cfb062d, 0x4c1: 0x2ef3adc6bb2f2d02,
   777  	// Block 0x14, offset 0x500
   778  	0x53f: 0xe03b31814c95f8b,
   779  	// Block 0x15, offset 0x540
   780  	0x540: 0x3bf6dc9a1c115603, 0x541: 0x6984ec9b7f51f7fc,
   781  	// Block 0x16, offset 0x580
   782  	0x5bf: 0x3c02ea92fb168559,
   783  	// Block 0x17, offset 0x5c0
   784  	0x5c0: 0x1badfe42e7629494, 0x5c1: 0x6dc4a554005f7645,
   785  	// Block 0x18, offset 0x600
   786  	0x63f: 0x3bb2ed2a72748f4b,
   787  	// Block 0x19, offset 0x640
   788  	0x640: 0x291354cd6767ec10, 0x641: 0x2c3a4715e3c070d6,
   789  	// Block 0x1a, offset 0x680
   790  	0x6bf: 0x352711cfb7236418,
   791  	// Block 0x1b, offset 0x6c0
   792  	0x6c0: 0x3a59d34fb8bceda, 0x6c1: 0x5e90d8ebedd64fa1,
   793  	// Block 0x1c, offset 0x700
   794  	0x73f: 0x7191a77b28d23110,
   795  	// Block 0x1d, offset 0x740
   796  	0x740: 0x4ca7f0c1623423d8, 0x741: 0x4f7156d996e2d0de,
   797  	// Block 0x1e, offset 0x780
   798  	// Block 0x1f, offset 0x7c0
   799  }
   800  
   801  // multiIndex: 29 blocks, 1856 entries, 1856 bytes
   802  // Block 0 is the zero block.
   803  var multiIndex = [1856]uint8{
   804  	// Block 0x0, offset 0x0
   805  	// Block 0x1, offset 0x40
   806  	// Block 0x2, offset 0x80
   807  	// Block 0x3, offset 0xc0
   808  	0xc2: 0x01, 0xc3: 0x02, 0xc4: 0x03, 0xc7: 0x04,
   809  	0xc8: 0x05, 0xcf: 0x06,
   810  	0xd0: 0x07,
   811  	0xdf: 0x08,
   812  	0xe0: 0x02, 0xe1: 0x03, 0xe2: 0x04, 0xe3: 0x05, 0xe4: 0x06, 0xe7: 0x07,
   813  	0xe8: 0x08, 0xef: 0x09,
   814  	0xf0: 0x0e, 0xf1: 0x11, 0xf2: 0x13, 0xf3: 0x15, 0xf4: 0x17,
   815  	// Block 0x4, offset 0x100
   816  	0x120: 0x09,
   817  	0x13f: 0x0a,
   818  	// Block 0x5, offset 0x140
   819  	0x140: 0x0b,
   820  	0x17f: 0x0c,
   821  	// Block 0x6, offset 0x180
   822  	0x180: 0x0d,
   823  	// Block 0x7, offset 0x1c0
   824  	0x1ff: 0x0e,
   825  	// Block 0x8, offset 0x200
   826  	0x200: 0x0f,
   827  	// Block 0x9, offset 0x240
   828  	0x27f: 0x10,
   829  	// Block 0xa, offset 0x280
   830  	0x280: 0x11,
   831  	// Block 0xb, offset 0x2c0
   832  	0x2ff: 0x12,
   833  	// Block 0xc, offset 0x300
   834  	0x300: 0x13,
   835  	// Block 0xd, offset 0x340
   836  	0x37f: 0x14,
   837  	// Block 0xe, offset 0x380
   838  	0x380: 0x15,
   839  	// Block 0xf, offset 0x3c0
   840  	0x3ff: 0x16,
   841  	// Block 0x10, offset 0x400
   842  	0x410: 0x0a,
   843  	0x41f: 0x0b,
   844  	0x420: 0x0c,
   845  	0x43f: 0x0d,
   846  	// Block 0x11, offset 0x440
   847  	0x440: 0x17,
   848  	// Block 0x12, offset 0x480
   849  	0x4bf: 0x18,
   850  	// Block 0x13, offset 0x4c0
   851  	0x4c0: 0x0f,
   852  	0x4ff: 0x10,
   853  	// Block 0x14, offset 0x500
   854  	0x500: 0x19,
   855  	// Block 0x15, offset 0x540
   856  	0x540: 0x12,
   857  	// Block 0x16, offset 0x580
   858  	0x5bf: 0x1a,
   859  	// Block 0x17, offset 0x5c0
   860  	0x5ff: 0x14,
   861  	// Block 0x18, offset 0x600
   862  	0x600: 0x1b,
   863  	// Block 0x19, offset 0x640
   864  	0x640: 0x16,
   865  	// Block 0x1a, offset 0x680
   866  	// Block 0x1b, offset 0x6c0
   867  	0x6c2: 0x01, 0x6c3: 0x02, 0x6c4: 0x03, 0x6c7: 0x04,
   868  	0x6c8: 0x05, 0x6cf: 0x06,
   869  	0x6d0: 0x07,
   870  	0x6df: 0x08,
   871  	0x6e0: 0x02, 0x6e1: 0x03, 0x6e2: 0x04, 0x6e3: 0x05, 0x6e4: 0x06, 0x6e7: 0x07,
   872  	0x6e8: 0x08, 0x6ef: 0x09,
   873  	// Block 0x1c, offset 0x700
   874  	0x730: 0x0e, 0x731: 0x11, 0x732: 0x13, 0x733: 0x15, 0x734: 0x17,
   875  }
   876  

View as plain text