...

Source file src/golang.org/x/text/unicode/bidi/prop.go

Documentation: golang.org/x/text/unicode/bidi

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package bidi
     6  
     7  import "unicode/utf8"
     8  
     9  // Properties provides access to BiDi properties of runes.
    10  type Properties struct {
    11  	entry uint8
    12  	last  uint8
    13  }
    14  
    15  var trie = newBidiTrie(0)
    16  
    17  // TODO: using this for bidirule reduces the running time by about 5%. Consider
    18  // if this is worth exposing or if we can find a way to speed up the Class
    19  // method.
    20  //
    21  // // CompactClass is like Class, but maps all of the BiDi control classes
    22  // // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control.
    23  // func (p Properties) CompactClass() Class {
    24  // 	return Class(p.entry & 0x0F)
    25  // }
    26  
    27  // Class returns the Bidi class for p.
    28  func (p Properties) Class() Class {
    29  	c := Class(p.entry & 0x0F)
    30  	if c == Control {
    31  		c = controlByteToClass[p.last&0xF]
    32  	}
    33  	return c
    34  }
    35  
    36  // IsBracket reports whether the rune is a bracket.
    37  func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 }
    38  
    39  // IsOpeningBracket reports whether the rune is an opening bracket.
    40  // IsBracket must return true.
    41  func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 }
    42  
    43  // TODO: find a better API and expose.
    44  func (p Properties) reverseBracket(r rune) rune {
    45  	return xorMasks[p.entry>>xorMaskShift] ^ r
    46  }
    47  
    48  var controlByteToClass = [16]Class{
    49  	0xD: LRO, // U+202D LeftToRightOverride,
    50  	0xE: RLO, // U+202E RightToLeftOverride,
    51  	0xA: LRE, // U+202A LeftToRightEmbedding,
    52  	0xB: RLE, // U+202B RightToLeftEmbedding,
    53  	0xC: PDF, // U+202C PopDirectionalFormat,
    54  	0x6: LRI, // U+2066 LeftToRightIsolate,
    55  	0x7: RLI, // U+2067 RightToLeftIsolate,
    56  	0x8: FSI, // U+2068 FirstStrongIsolate,
    57  	0x9: PDI, // U+2069 PopDirectionalIsolate,
    58  }
    59  
    60  // LookupRune returns properties for r.
    61  func LookupRune(r rune) (p Properties, size int) {
    62  	var buf [4]byte
    63  	n := utf8.EncodeRune(buf[:], r)
    64  	return Lookup(buf[:n])
    65  }
    66  
    67  // TODO: these lookup methods are based on the generated trie code. The returned
    68  // sizes have slightly different semantics from the generated code, in that it
    69  // always returns size==1 for an illegal UTF-8 byte (instead of the length
    70  // of the maximum invalid subsequence). Most Transformers, like unicode/norm,
    71  // leave invalid UTF-8 untouched, in which case it has performance benefits to
    72  // do so (without changing the semantics). Bidi requires the semantics used here
    73  // for the bidirule implementation to be compatible with the Go semantics.
    74  //  They ultimately should perhaps be adopted by all trie implementations, for
    75  // convenience sake.
    76  // This unrolled code also boosts performance of the secure/bidirule package by
    77  // about 30%.
    78  // So, to remove this code:
    79  //   - add option to trie generator to define return type.
    80  //   - always return 1 byte size for ill-formed UTF-8 runes.
    81  
    82  // Lookup returns properties for the first rune in s and the width in bytes of
    83  // its encoding. The size will be 0 if s does not hold enough bytes to complete
    84  // the encoding.
    85  func Lookup(s []byte) (p Properties, sz int) {
    86  	c0 := s[0]
    87  	switch {
    88  	case c0 < 0x80: // is ASCII
    89  		return Properties{entry: bidiValues[c0]}, 1
    90  	case c0 < 0xC2:
    91  		return Properties{}, 1
    92  	case c0 < 0xE0: // 2-byte UTF-8
    93  		if len(s) < 2 {
    94  			return Properties{}, 0
    95  		}
    96  		i := bidiIndex[c0]
    97  		c1 := s[1]
    98  		if c1 < 0x80 || 0xC0 <= c1 {
    99  			return Properties{}, 1
   100  		}
   101  		return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
   102  	case c0 < 0xF0: // 3-byte UTF-8
   103  		if len(s) < 3 {
   104  			return Properties{}, 0
   105  		}
   106  		i := bidiIndex[c0]
   107  		c1 := s[1]
   108  		if c1 < 0x80 || 0xC0 <= c1 {
   109  			return Properties{}, 1
   110  		}
   111  		o := uint32(i)<<6 + uint32(c1)
   112  		i = bidiIndex[o]
   113  		c2 := s[2]
   114  		if c2 < 0x80 || 0xC0 <= c2 {
   115  			return Properties{}, 1
   116  		}
   117  		return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
   118  	case c0 < 0xF8: // 4-byte UTF-8
   119  		if len(s) < 4 {
   120  			return Properties{}, 0
   121  		}
   122  		i := bidiIndex[c0]
   123  		c1 := s[1]
   124  		if c1 < 0x80 || 0xC0 <= c1 {
   125  			return Properties{}, 1
   126  		}
   127  		o := uint32(i)<<6 + uint32(c1)
   128  		i = bidiIndex[o]
   129  		c2 := s[2]
   130  		if c2 < 0x80 || 0xC0 <= c2 {
   131  			return Properties{}, 1
   132  		}
   133  		o = uint32(i)<<6 + uint32(c2)
   134  		i = bidiIndex[o]
   135  		c3 := s[3]
   136  		if c3 < 0x80 || 0xC0 <= c3 {
   137  			return Properties{}, 1
   138  		}
   139  		return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
   140  	}
   141  	// Illegal rune
   142  	return Properties{}, 1
   143  }
   144  
   145  // LookupString returns properties for the first rune in s and the width in
   146  // bytes of its encoding. The size will be 0 if s does not hold enough bytes to
   147  // complete the encoding.
   148  func LookupString(s string) (p Properties, sz int) {
   149  	c0 := s[0]
   150  	switch {
   151  	case c0 < 0x80: // is ASCII
   152  		return Properties{entry: bidiValues[c0]}, 1
   153  	case c0 < 0xC2:
   154  		return Properties{}, 1
   155  	case c0 < 0xE0: // 2-byte UTF-8
   156  		if len(s) < 2 {
   157  			return Properties{}, 0
   158  		}
   159  		i := bidiIndex[c0]
   160  		c1 := s[1]
   161  		if c1 < 0x80 || 0xC0 <= c1 {
   162  			return Properties{}, 1
   163  		}
   164  		return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
   165  	case c0 < 0xF0: // 3-byte UTF-8
   166  		if len(s) < 3 {
   167  			return Properties{}, 0
   168  		}
   169  		i := bidiIndex[c0]
   170  		c1 := s[1]
   171  		if c1 < 0x80 || 0xC0 <= c1 {
   172  			return Properties{}, 1
   173  		}
   174  		o := uint32(i)<<6 + uint32(c1)
   175  		i = bidiIndex[o]
   176  		c2 := s[2]
   177  		if c2 < 0x80 || 0xC0 <= c2 {
   178  			return Properties{}, 1
   179  		}
   180  		return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
   181  	case c0 < 0xF8: // 4-byte UTF-8
   182  		if len(s) < 4 {
   183  			return Properties{}, 0
   184  		}
   185  		i := bidiIndex[c0]
   186  		c1 := s[1]
   187  		if c1 < 0x80 || 0xC0 <= c1 {
   188  			return Properties{}, 1
   189  		}
   190  		o := uint32(i)<<6 + uint32(c1)
   191  		i = bidiIndex[o]
   192  		c2 := s[2]
   193  		if c2 < 0x80 || 0xC0 <= c2 {
   194  			return Properties{}, 1
   195  		}
   196  		o = uint32(i)<<6 + uint32(c2)
   197  		i = bidiIndex[o]
   198  		c3 := s[3]
   199  		if c3 < 0x80 || 0xC0 <= c3 {
   200  			return Properties{}, 1
   201  		}
   202  		return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
   203  	}
   204  	// Illegal rune
   205  	return Properties{}, 1
   206  }
   207  

View as plain text