1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate stringer -type=Kind 6 //go:generate go run gen.go gen_common.go gen_trieval.go 7 8 // Package width provides functionality for handling different widths in text. 9 // 10 // Wide characters behave like ideographs; they tend to allow line breaks after 11 // each character and remain upright in vertical text layout. Narrow characters 12 // are kept together in words or runs that are rotated sideways in vertical text 13 // layout. 14 // 15 // For more information, see https://unicode.org/reports/tr11/. 16 package width // import "golang.org/x/text/width" 17 18 import ( 19 "unicode/utf8" 20 21 "golang.org/x/text/transform" 22 ) 23 24 // TODO 25 // 1) Reduce table size by compressing blocks. 26 // 2) API proposition for computing display length 27 // (approximation, fixed pitch only). 28 // 3) Implement display length. 29 30 // Kind indicates the type of width property as defined in https://unicode.org/reports/tr11/. 31 type Kind int 32 33 const ( 34 // Neutral characters do not occur in legacy East Asian character sets. 35 Neutral Kind = iota 36 37 // EastAsianAmbiguous characters that can be sometimes wide and sometimes 38 // narrow and require additional information not contained in the character 39 // code to further resolve their width. 40 EastAsianAmbiguous 41 42 // EastAsianWide characters are wide in its usual form. They occur only in 43 // the context of East Asian typography. These runes may have explicit 44 // halfwidth counterparts. 45 EastAsianWide 46 47 // EastAsianNarrow characters are narrow in its usual form. They often have 48 // fullwidth counterparts. 49 EastAsianNarrow 50 51 // Note: there exist Narrow runes that do not have fullwidth or wide 52 // counterparts, despite what the definition says (e.g. U+27E6). 53 54 // EastAsianFullwidth characters have a compatibility decompositions of type 55 // wide that map to a narrow counterpart. 56 EastAsianFullwidth 57 58 // EastAsianHalfwidth characters have a compatibility decomposition of type 59 // narrow that map to a wide or ambiguous counterpart, plus U+20A9 ₩ WON 60 // SIGN. 61 EastAsianHalfwidth 62 63 // Note: there exist runes that have a halfwidth counterparts but that are 64 // classified as Ambiguous, rather than wide (e.g. U+2190). 65 ) 66 67 // TODO: the generated tries need to return size 1 for invalid runes for the 68 // width to be computed correctly (each byte should render width 1) 69 70 var trie = newWidthTrie(0) 71 72 // Lookup reports the Properties of the first rune in b and the number of bytes 73 // of its UTF-8 encoding. 74 func Lookup(b []byte) (p Properties, size int) { 75 v, sz := trie.lookup(b) 76 return Properties{elem(v), b[sz-1]}, sz 77 } 78 79 // LookupString reports the Properties of the first rune in s and the number of 80 // bytes of its UTF-8 encoding. 81 func LookupString(s string) (p Properties, size int) { 82 v, sz := trie.lookupString(s) 83 return Properties{elem(v), s[sz-1]}, sz 84 } 85 86 // LookupRune reports the Properties of rune r. 87 func LookupRune(r rune) Properties { 88 var buf [4]byte 89 n := utf8.EncodeRune(buf[:], r) 90 v, _ := trie.lookup(buf[:n]) 91 last := byte(r) 92 if r >= utf8.RuneSelf { 93 last = 0x80 + byte(r&0x3f) 94 } 95 return Properties{elem(v), last} 96 } 97 98 // Properties provides access to width properties of a rune. 99 type Properties struct { 100 elem elem 101 last byte 102 } 103 104 func (e elem) kind() Kind { 105 return Kind(e >> typeShift) 106 } 107 108 // Kind returns the Kind of a rune as defined in Unicode TR #11. 109 // See https://unicode.org/reports/tr11/ for more details. 110 func (p Properties) Kind() Kind { 111 return p.elem.kind() 112 } 113 114 // Folded returns the folded variant of a rune or 0 if the rune is canonical. 115 func (p Properties) Folded() rune { 116 if p.elem&tagNeedsFold != 0 { 117 buf := inverseData[byte(p.elem)] 118 buf[buf[0]] ^= p.last 119 r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]]) 120 return r 121 } 122 return 0 123 } 124 125 // Narrow returns the narrow variant of a rune or 0 if the rune is already 126 // narrow or doesn't have a narrow variant. 127 func (p Properties) Narrow() rune { 128 if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous) { 129 buf := inverseData[byte(p.elem)] 130 buf[buf[0]] ^= p.last 131 r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]]) 132 return r 133 } 134 return 0 135 } 136 137 // Wide returns the wide variant of a rune or 0 if the rune is already 138 // wide or doesn't have a wide variant. 139 func (p Properties) Wide() rune { 140 if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianHalfwidth || k == EastAsianNarrow) { 141 buf := inverseData[byte(p.elem)] 142 buf[buf[0]] ^= p.last 143 r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]]) 144 return r 145 } 146 return 0 147 } 148 149 // TODO for Properties: 150 // - Add Fullwidth/Halfwidth or Inverted methods for computing variants 151 // mapping. 152 // - Add width information (including information on non-spacing runes). 153 154 // Transformer implements the transform.Transformer interface. 155 type Transformer struct { 156 t transform.SpanningTransformer 157 } 158 159 // Reset implements the transform.Transformer interface. 160 func (t Transformer) Reset() { t.t.Reset() } 161 162 // Transform implements the transform.Transformer interface. 163 func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 164 return t.t.Transform(dst, src, atEOF) 165 } 166 167 // Span implements the transform.SpanningTransformer interface. 168 func (t Transformer) Span(src []byte, atEOF bool) (n int, err error) { 169 return t.t.Span(src, atEOF) 170 } 171 172 // Bytes returns a new byte slice with the result of applying t to b. 173 func (t Transformer) Bytes(b []byte) []byte { 174 b, _, _ = transform.Bytes(t, b) 175 return b 176 } 177 178 // String returns a string with the result of applying t to s. 179 func (t Transformer) String(s string) string { 180 s, _, _ = transform.String(t, s) 181 return s 182 } 183 184 var ( 185 // Fold is a transform that maps all runes to their canonical width. 186 // 187 // Note that the NFKC and NFKD transforms in golang.org/x/text/unicode/norm 188 // provide a more generic folding mechanism. 189 Fold Transformer = Transformer{foldTransform{}} 190 191 // Widen is a transform that maps runes to their wide variant, if 192 // available. 193 Widen Transformer = Transformer{wideTransform{}} 194 195 // Narrow is a transform that maps runes to their narrow variant, if 196 // available. 197 Narrow Transformer = Transformer{narrowTransform{}} 198 ) 199 200 // TODO: Consider the following options: 201 // - Treat Ambiguous runes that have a halfwidth counterpart as wide, or some 202 // generalized variant of this. 203 // - Consider a wide Won character to be the default width (or some generalized 204 // variant of this). 205 // - Filter the set of characters that gets converted (the preferred approach is 206 // to allow applying filters to transforms). 207