1 package characters
2
3 import (
4 "unicode/utf8"
5 )
6
7 type utf8Err struct {
8 Index int
9 Size int
10 }
11
12 func (u utf8Err) Zero() bool {
13 return u.Size == 0
14 }
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35 func Utf8TomlValidAlreadyEscaped(p []byte) (err utf8Err) {
36
37 offset := 0
38 for len(p) >= 8 {
39
40
41
42
43 first32 := uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
44 second32 := uint32(p[4]) | uint32(p[5])<<8 | uint32(p[6])<<16 | uint32(p[7])<<24
45 if (first32|second32)&0x80808080 != 0 {
46
47 break
48 }
49
50 for i, b := range p[:8] {
51 if InvalidAscii(b) {
52 err.Index = offset + i
53 err.Size = 1
54 return
55 }
56 }
57
58 p = p[8:]
59 offset += 8
60 }
61 n := len(p)
62 for i := 0; i < n; {
63 pi := p[i]
64 if pi < utf8.RuneSelf {
65 if InvalidAscii(pi) {
66 err.Index = offset + i
67 err.Size = 1
68 return
69 }
70 i++
71 continue
72 }
73 x := first[pi]
74 if x == xx {
75
76 err.Index = offset + i
77 err.Size = 1
78 return
79 }
80 size := int(x & 7)
81 if i+size > n {
82
83 err.Index = offset + i
84 err.Size = n - i
85 return
86 }
87 accept := acceptRanges[x>>4]
88 if c := p[i+1]; c < accept.lo || accept.hi < c {
89 err.Index = offset + i
90 err.Size = 2
91 return
92 } else if size == 2 {
93 } else if c := p[i+2]; c < locb || hicb < c {
94 err.Index = offset + i
95 err.Size = 3
96 return
97 } else if size == 3 {
98 } else if c := p[i+3]; c < locb || hicb < c {
99 err.Index = offset + i
100 err.Size = 4
101 return
102 }
103 i += size
104 }
105 return
106 }
107
108
109 func Utf8ValidNext(p []byte) int {
110 c := p[0]
111
112 if c < utf8.RuneSelf {
113 if InvalidAscii(c) {
114 return 0
115 }
116 return 1
117 }
118
119 x := first[c]
120 if x == xx {
121
122 return 0
123 }
124 size := int(x & 7)
125 if size > len(p) {
126
127 return 0
128 }
129 accept := acceptRanges[x>>4]
130 if c := p[1]; c < accept.lo || accept.hi < c {
131 return 0
132 } else if size == 2 {
133 } else if c := p[2]; c < locb || hicb < c {
134 return 0
135 } else if size == 3 {
136 } else if c := p[3]; c < locb || hicb < c {
137 return 0
138 }
139
140 return size
141 }
142
143
144
145 type acceptRange struct {
146 lo uint8
147 hi uint8
148 }
149
150
151 var acceptRanges = [16]acceptRange{
152 0: {locb, hicb},
153 1: {0xA0, hicb},
154 2: {locb, 0x9F},
155 3: {0x90, hicb},
156 4: {locb, 0x8F},
157 }
158
159
160 var first = [256]uint8{
161
162 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
163 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
164 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
165 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
166 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
167 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
168 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
169 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
170
171 xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx,
172 xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx,
173 xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx,
174 xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx,
175 xx, xx, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1,
176 s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1,
177 s2, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s4, s3, s3,
178 s5, s6, s6, s6, s7, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx,
179 }
180
181 const (
182
183 locb = 0b10000000
184 hicb = 0b10111111
185
186
187
188
189
190 xx = 0xF1
191 as = 0xF0
192 s1 = 0x02
193 s2 = 0x13
194 s3 = 0x03
195 s4 = 0x23
196 s5 = 0x34
197 s6 = 0x04
198 s7 = 0x44
199 )
200
View as plain text