1
2
3
4
5 package html
6
7 import (
8 "bytes"
9 "strings"
10 "unicode/utf8"
11 )
12
13
14
15
16 var replacementTable = [...]rune{
17 '\u20AC',
18 '\u0081',
19 '\u201A',
20 '\u0192',
21 '\u201E',
22 '\u2026',
23 '\u2020',
24 '\u2021',
25 '\u02C6',
26 '\u2030',
27 '\u0160',
28 '\u2039',
29 '\u0152',
30 '\u008D',
31 '\u017D',
32 '\u008F',
33 '\u0090',
34 '\u2018',
35 '\u2019',
36 '\u201C',
37 '\u201D',
38 '\u2022',
39 '\u2013',
40 '\u2014',
41 '\u02DC',
42 '\u2122',
43 '\u0161',
44 '\u203A',
45 '\u0153',
46 '\u009D',
47 '\u017E',
48 '\u0178',
49
50
51 }
52
53
54
55
56
57 func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
58
59
60
61 i, s := 1, b[src:]
62
63 if len(s) <= 1 {
64 b[dst] = b[src]
65 return dst + 1, src + 1
66 }
67
68 if s[i] == '#' {
69 if len(s) <= 3 {
70 b[dst] = b[src]
71 return dst + 1, src + 1
72 }
73 i++
74 c := s[i]
75 hex := false
76 if c == 'x' || c == 'X' {
77 hex = true
78 i++
79 }
80
81 x := '\x00'
82 for i < len(s) {
83 c = s[i]
84 i++
85 if hex {
86 if '0' <= c && c <= '9' {
87 x = 16*x + rune(c) - '0'
88 continue
89 } else if 'a' <= c && c <= 'f' {
90 x = 16*x + rune(c) - 'a' + 10
91 continue
92 } else if 'A' <= c && c <= 'F' {
93 x = 16*x + rune(c) - 'A' + 10
94 continue
95 }
96 } else if '0' <= c && c <= '9' {
97 x = 10*x + rune(c) - '0'
98 continue
99 }
100 if c != ';' {
101 i--
102 }
103 break
104 }
105
106 if i <= 3 {
107 b[dst] = b[src]
108 return dst + 1, src + 1
109 }
110
111 if 0x80 <= x && x <= 0x9F {
112
113 x = replacementTable[x-0x80]
114 } else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF {
115
116 x = '\uFFFD'
117 }
118
119 return dst + utf8.EncodeRune(b[dst:], x), src + i
120 }
121
122
123
124
125 for i < len(s) {
126 c := s[i]
127 i++
128
129 if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
130 continue
131 }
132 if c != ';' {
133 i--
134 }
135 break
136 }
137
138 entityName := string(s[1:i])
139 if entityName == "" {
140
141 } else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {
142
143 } else if x := entity[entityName]; x != 0 {
144 return dst + utf8.EncodeRune(b[dst:], x), src + i
145 } else if x := entity2[entityName]; x[0] != 0 {
146 dst1 := dst + utf8.EncodeRune(b[dst:], x[0])
147 return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i
148 } else if !attribute {
149 maxLen := len(entityName) - 1
150 if maxLen > longestEntityWithoutSemicolon {
151 maxLen = longestEntityWithoutSemicolon
152 }
153 for j := maxLen; j > 1; j-- {
154 if x := entity[entityName[:j]]; x != 0 {
155 return dst + utf8.EncodeRune(b[dst:], x), src + j + 1
156 }
157 }
158 }
159
160 dst1, src1 = dst+i, src+i
161 copy(b[dst:dst1], b[src:src1])
162 return dst1, src1
163 }
164
165
166
167 func unescape(b []byte, attribute bool) []byte {
168 for i, c := range b {
169 if c == '&' {
170 dst, src := unescapeEntity(b, i, i, attribute)
171 for src < len(b) {
172 c := b[src]
173 if c == '&' {
174 dst, src = unescapeEntity(b, dst, src, attribute)
175 } else {
176 b[dst] = c
177 dst, src = dst+1, src+1
178 }
179 }
180 return b[0:dst]
181 }
182 }
183 return b
184 }
185
186
187 func lower(b []byte) []byte {
188 for i, c := range b {
189 if 'A' <= c && c <= 'Z' {
190 b[i] = c + 'a' - 'A'
191 }
192 }
193 return b
194 }
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215 func escapeComment(w writer, s string) error {
216
217
218
219
220
221 if len(s) == 0 {
222 return nil
223 }
224
225
226
227
228
229 i := 0
230 for j := 0; j < len(s); j++ {
231 escaped := ""
232 switch s[j] {
233 case '&':
234 escaped = "&"
235
236 case '>':
237 if j > 0 {
238 if prev := s[j-1]; (prev != '!') && (prev != '-') {
239 continue
240 }
241 }
242 escaped = ">"
243
244 default:
245 continue
246 }
247
248 if i < j {
249 if _, err := w.WriteString(s[i:j]); err != nil {
250 return err
251 }
252 }
253 if _, err := w.WriteString(escaped); err != nil {
254 return err
255 }
256 i = j + 1
257 }
258
259 if i < len(s) {
260 if _, err := w.WriteString(s[i:]); err != nil {
261 return err
262 }
263 }
264 return nil
265 }
266
267
268 func escapeCommentString(s string) string {
269 if strings.IndexAny(s, "&>") == -1 {
270 return s
271 }
272 var buf bytes.Buffer
273 escapeComment(&buf, s)
274 return buf.String()
275 }
276
277 const escapedChars = "&'<>\"\r"
278
279 func escape(w writer, s string) error {
280 i := strings.IndexAny(s, escapedChars)
281 for i != -1 {
282 if _, err := w.WriteString(s[:i]); err != nil {
283 return err
284 }
285 var esc string
286 switch s[i] {
287 case '&':
288 esc = "&"
289 case '\'':
290
291 esc = "'"
292 case '<':
293 esc = "<"
294 case '>':
295 esc = ">"
296 case '"':
297
298 esc = """
299 case '\r':
300 esc = " "
301 default:
302 panic("unrecognized escape character")
303 }
304 s = s[i+1:]
305 if _, err := w.WriteString(esc); err != nil {
306 return err
307 }
308 i = strings.IndexAny(s, escapedChars)
309 }
310 _, err := w.WriteString(s)
311 return err
312 }
313
314
315
316
317
318 func EscapeString(s string) string {
319 if strings.IndexAny(s, escapedChars) == -1 {
320 return s
321 }
322 var buf bytes.Buffer
323 escape(&buf, s)
324 return buf.String()
325 }
326
327
328
329
330
331
332 func UnescapeString(s string) string {
333 for _, c := range s {
334 if c == '&' {
335 return string(unescape([]byte(s), false))
336 }
337 }
338 return s
339 }
340
View as plain text