...

Source file src/golang.org/x/text/internal/colltab/trie.go

Documentation: golang.org/x/text/internal/colltab

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // The trie in this file is used to associate the first full character in an
     6  // UTF-8 string to a collation element. All but the last byte in a UTF-8 byte
     7  // sequence are used to lookup offsets in the index table to be used for the
     8  // next byte. The last byte is used to index into a table of collation elements.
     9  // For a full description, see go.text/collate/build/trie.go.
    10  
    11  package colltab
    12  
    13  const blockSize = 64
    14  
    15  type Trie struct {
    16  	Index0  []uint16 // index for first byte (0xC0-0xFF)
    17  	Values0 []uint32 // index for first byte (0x00-0x7F)
    18  	Index   []uint16
    19  	Values  []uint32
    20  }
    21  
    22  const (
    23  	t1 = 0x00 // 0000 0000
    24  	tx = 0x80 // 1000 0000
    25  	t2 = 0xC0 // 1100 0000
    26  	t3 = 0xE0 // 1110 0000
    27  	t4 = 0xF0 // 1111 0000
    28  	t5 = 0xF8 // 1111 1000
    29  	t6 = 0xFC // 1111 1100
    30  	te = 0xFE // 1111 1110
    31  )
    32  
    33  func (t *Trie) lookupValue(n uint16, b byte) Elem {
    34  	return Elem(t.Values[int(n)<<6+int(b)])
    35  }
    36  
    37  // lookup returns the trie value for the first UTF-8 encoding in s and
    38  // the width in bytes of this encoding. The size will be 0 if s does not
    39  // hold enough bytes to complete the encoding. len(s) must be greater than 0.
    40  func (t *Trie) lookup(s []byte) (v Elem, sz int) {
    41  	c0 := s[0]
    42  	switch {
    43  	case c0 < tx:
    44  		return Elem(t.Values0[c0]), 1
    45  	case c0 < t2:
    46  		return 0, 1
    47  	case c0 < t3:
    48  		if len(s) < 2 {
    49  			return 0, 0
    50  		}
    51  		i := t.Index0[c0]
    52  		c1 := s[1]
    53  		if c1 < tx || t2 <= c1 {
    54  			return 0, 1
    55  		}
    56  		return t.lookupValue(i, c1), 2
    57  	case c0 < t4:
    58  		if len(s) < 3 {
    59  			return 0, 0
    60  		}
    61  		i := t.Index0[c0]
    62  		c1 := s[1]
    63  		if c1 < tx || t2 <= c1 {
    64  			return 0, 1
    65  		}
    66  		o := int(i)<<6 + int(c1)
    67  		i = t.Index[o]
    68  		c2 := s[2]
    69  		if c2 < tx || t2 <= c2 {
    70  			return 0, 2
    71  		}
    72  		return t.lookupValue(i, c2), 3
    73  	case c0 < t5:
    74  		if len(s) < 4 {
    75  			return 0, 0
    76  		}
    77  		i := t.Index0[c0]
    78  		c1 := s[1]
    79  		if c1 < tx || t2 <= c1 {
    80  			return 0, 1
    81  		}
    82  		o := int(i)<<6 + int(c1)
    83  		i = t.Index[o]
    84  		c2 := s[2]
    85  		if c2 < tx || t2 <= c2 {
    86  			return 0, 2
    87  		}
    88  		o = int(i)<<6 + int(c2)
    89  		i = t.Index[o]
    90  		c3 := s[3]
    91  		if c3 < tx || t2 <= c3 {
    92  			return 0, 3
    93  		}
    94  		return t.lookupValue(i, c3), 4
    95  	}
    96  	// Illegal rune
    97  	return 0, 1
    98  }
    99  
   100  // The body of lookupString is a verbatim copy of that of lookup.
   101  func (t *Trie) lookupString(s string) (v Elem, sz int) {
   102  	c0 := s[0]
   103  	switch {
   104  	case c0 < tx:
   105  		return Elem(t.Values0[c0]), 1
   106  	case c0 < t2:
   107  		return 0, 1
   108  	case c0 < t3:
   109  		if len(s) < 2 {
   110  			return 0, 0
   111  		}
   112  		i := t.Index0[c0]
   113  		c1 := s[1]
   114  		if c1 < tx || t2 <= c1 {
   115  			return 0, 1
   116  		}
   117  		return t.lookupValue(i, c1), 2
   118  	case c0 < t4:
   119  		if len(s) < 3 {
   120  			return 0, 0
   121  		}
   122  		i := t.Index0[c0]
   123  		c1 := s[1]
   124  		if c1 < tx || t2 <= c1 {
   125  			return 0, 1
   126  		}
   127  		o := int(i)<<6 + int(c1)
   128  		i = t.Index[o]
   129  		c2 := s[2]
   130  		if c2 < tx || t2 <= c2 {
   131  			return 0, 2
   132  		}
   133  		return t.lookupValue(i, c2), 3
   134  	case c0 < t5:
   135  		if len(s) < 4 {
   136  			return 0, 0
   137  		}
   138  		i := t.Index0[c0]
   139  		c1 := s[1]
   140  		if c1 < tx || t2 <= c1 {
   141  			return 0, 1
   142  		}
   143  		o := int(i)<<6 + int(c1)
   144  		i = t.Index[o]
   145  		c2 := s[2]
   146  		if c2 < tx || t2 <= c2 {
   147  			return 0, 2
   148  		}
   149  		o = int(i)<<6 + int(c2)
   150  		i = t.Index[o]
   151  		c3 := s[3]
   152  		if c3 < tx || t2 <= c3 {
   153  			return 0, 3
   154  		}
   155  		return t.lookupValue(i, c3), 4
   156  	}
   157  	// Illegal rune
   158  	return 0, 1
   159  }
   160  

View as plain text