1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package unicode 6 7 // Bit masks for each code point under U+0100, for fast lookup. 8 const ( 9 pC = 1 << iota // a control character. 10 pP // a punctuation character. 11 pN // a numeral. 12 pS // a symbolic character. 13 pZ // a spacing character. 14 pLu // an upper-case letter. 15 pLl // a lower-case letter. 16 pp // a printable character according to Go's definition. 17 pg = pp | pZ // a graphical character according to the Unicode definition. 18 pLo = pLl | pLu // a letter that is neither upper nor lower case. 19 pLmask = pLo 20 ) 21 22 // GraphicRanges defines the set of graphic characters according to Unicode. 23 var GraphicRanges = []*RangeTable{ 24 L, M, N, P, S, Zs, 25 } 26 27 // PrintRanges defines the set of printable characters according to Go. 28 // ASCII space, U+0020, is handled separately. 29 var PrintRanges = []*RangeTable{ 30 L, M, N, P, S, 31 } 32 33 // IsGraphic reports whether the rune is defined as a Graphic by Unicode. 34 // Such characters include letters, marks, numbers, punctuation, symbols, and 35 // spaces, from categories [L], [M], [N], [P], [S], [Zs]. 36 func IsGraphic(r rune) bool { 37 // We convert to uint32 to avoid the extra test for negative, 38 // and in the index we convert to uint8 to avoid the range check. 39 if uint32(r) <= MaxLatin1 { 40 return properties[uint8(r)]&pg != 0 41 } 42 return In(r, GraphicRanges...) 43 } 44 45 // IsPrint reports whether the rune is defined as printable by Go. Such 46 // characters include letters, marks, numbers, punctuation, symbols, and the 47 // ASCII space character, from categories [L], [M], [N], [P], [S] and the ASCII space 48 // character. This categorization is the same as [IsGraphic] except that the 49 // only spacing character is ASCII space, U+0020. 50 func IsPrint(r rune) bool { 51 if uint32(r) <= MaxLatin1 { 52 return properties[uint8(r)]&pp != 0 53 } 54 return In(r, PrintRanges...) 55 } 56 57 // IsOneOf reports whether the rune is a member of one of the ranges. 58 // The function "In" provides a nicer signature and should be used in preference to IsOneOf. 59 func IsOneOf(ranges []*RangeTable, r rune) bool { 60 for _, inside := range ranges { 61 if Is(inside, r) { 62 return true 63 } 64 } 65 return false 66 } 67 68 // In reports whether the rune is a member of one of the ranges. 69 func In(r rune, ranges ...*RangeTable) bool { 70 for _, inside := range ranges { 71 if Is(inside, r) { 72 return true 73 } 74 } 75 return false 76 } 77 78 // IsControl reports whether the rune is a control character. 79 // The [C] ([Other]) Unicode category includes more code points 80 // such as surrogates; use [Is](C, r) to test for them. 81 func IsControl(r rune) bool { 82 if uint32(r) <= MaxLatin1 { 83 return properties[uint8(r)]&pC != 0 84 } 85 // All control characters are < MaxLatin1. 86 return false 87 } 88 89 // IsLetter reports whether the rune is a letter (category [L]). 90 func IsLetter(r rune) bool { 91 if uint32(r) <= MaxLatin1 { 92 return properties[uint8(r)]&(pLmask) != 0 93 } 94 return isExcludingLatin(Letter, r) 95 } 96 97 // IsMark reports whether the rune is a mark character (category [M]). 98 func IsMark(r rune) bool { 99 // There are no mark characters in Latin-1. 100 return isExcludingLatin(Mark, r) 101 } 102 103 // IsNumber reports whether the rune is a number (category [N]). 104 func IsNumber(r rune) bool { 105 if uint32(r) <= MaxLatin1 { 106 return properties[uint8(r)]&pN != 0 107 } 108 return isExcludingLatin(Number, r) 109 } 110 111 // IsPunct reports whether the rune is a Unicode punctuation character 112 // (category [P]). 113 func IsPunct(r rune) bool { 114 if uint32(r) <= MaxLatin1 { 115 return properties[uint8(r)]&pP != 0 116 } 117 return Is(Punct, r) 118 } 119 120 // IsSpace reports whether the rune is a space character as defined 121 // by Unicode's White Space property; in the Latin-1 space 122 // this is 123 // 124 // '\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP). 125 // 126 // Other definitions of spacing characters are set by category 127 // Z and property [Pattern_White_Space]. 128 func IsSpace(r rune) bool { 129 // This property isn't the same as Z; special-case it. 130 if uint32(r) <= MaxLatin1 { 131 switch r { 132 case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0: 133 return true 134 } 135 return false 136 } 137 return isExcludingLatin(White_Space, r) 138 } 139 140 // IsSymbol reports whether the rune is a symbolic character. 141 func IsSymbol(r rune) bool { 142 if uint32(r) <= MaxLatin1 { 143 return properties[uint8(r)]&pS != 0 144 } 145 return isExcludingLatin(Symbol, r) 146 } 147