1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 package csv
53
54 import (
55 "bufio"
56 "bytes"
57 "errors"
58 "fmt"
59 "io"
60 "unicode"
61 "unicode/utf8"
62 )
63
64
65
66 type ParseError struct {
67 StartLine int
68 Line int
69 Column int
70 Err error
71 }
72
73 func (e *ParseError) Error() string {
74 if e.Err == ErrFieldCount {
75 return fmt.Sprintf("record on line %d: %v", e.Line, e.Err)
76 }
77 if e.StartLine != e.Line {
78 return fmt.Sprintf("record on line %d; parse error on line %d, column %d: %v", e.StartLine, e.Line, e.Column, e.Err)
79 }
80 return fmt.Sprintf("parse error on line %d, column %d: %v", e.Line, e.Column, e.Err)
81 }
82
83 func (e *ParseError) Unwrap() error { return e.Err }
84
85
86 var (
87 ErrBareQuote = errors.New("bare \" in non-quoted-field")
88 ErrQuote = errors.New("extraneous or missing \" in quoted-field")
89 ErrFieldCount = errors.New("wrong number of fields")
90
91
92 ErrTrailingComma = errors.New("extra delimiter at end of line")
93 )
94
95 var errInvalidDelim = errors.New("csv: invalid field or comment delimiter")
96
97 func validDelim(r rune) bool {
98 return r != 0 && r != '"' && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError
99 }
100
101
102
103
104
105
106
107
108
109
110 type Reader struct {
111
112
113
114
115 Comma rune
116
117
118
119
120
121
122
123
124 Comment rune
125
126
127
128
129
130
131
132 FieldsPerRecord int
133
134
135
136 LazyQuotes bool
137
138
139
140 TrimLeadingSpace bool
141
142
143
144
145 ReuseRecord bool
146
147
148 TrailingComma bool
149
150 r *bufio.Reader
151
152
153 numLine int
154
155
156 offset int64
157
158
159 rawBuffer []byte
160
161
162
163
164
165 recordBuffer []byte
166
167
168
169 fieldIndexes []int
170
171
172
173 fieldPositions []position
174
175
176 lastRecord []string
177 }
178
179
180 func NewReader(r io.Reader) *Reader {
181 return &Reader{
182 Comma: ',',
183 r: bufio.NewReader(r),
184 }
185 }
186
187
188
189
190
191
192
193
194
195
196 func (r *Reader) Read() (record []string, err error) {
197 if r.ReuseRecord {
198 record, err = r.readRecord(r.lastRecord)
199 r.lastRecord = record
200 } else {
201 record, err = r.readRecord(nil)
202 }
203 return record, err
204 }
205
206
207
208
209
210
211
212 func (r *Reader) FieldPos(field int) (line, column int) {
213 if field < 0 || field >= len(r.fieldPositions) {
214 panic("out of range index passed to FieldPos")
215 }
216 p := &r.fieldPositions[field]
217 return p.line, p.col
218 }
219
220
221
222
223 func (r *Reader) InputOffset() int64 {
224 return r.offset
225 }
226
227
228 type position struct {
229 line, col int
230 }
231
232
233
234
235
236
237 func (r *Reader) ReadAll() (records [][]string, err error) {
238 for {
239 record, err := r.readRecord(nil)
240 if err == io.EOF {
241 return records, nil
242 }
243 if err != nil {
244 return nil, err
245 }
246 records = append(records, record)
247 }
248 }
249
250
251
252
253
254 func (r *Reader) readLine() ([]byte, error) {
255 line, err := r.r.ReadSlice('\n')
256 if err == bufio.ErrBufferFull {
257 r.rawBuffer = append(r.rawBuffer[:0], line...)
258 for err == bufio.ErrBufferFull {
259 line, err = r.r.ReadSlice('\n')
260 r.rawBuffer = append(r.rawBuffer, line...)
261 }
262 line = r.rawBuffer
263 }
264 readSize := len(line)
265 if readSize > 0 && err == io.EOF {
266 err = nil
267
268 if line[readSize-1] == '\r' {
269 line = line[:readSize-1]
270 }
271 }
272 r.numLine++
273 r.offset += int64(readSize)
274
275 if n := len(line); n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' {
276 line[n-2] = '\n'
277 line = line[:n-1]
278 }
279 return line, err
280 }
281
282
283 func lengthNL(b []byte) int {
284 if len(b) > 0 && b[len(b)-1] == '\n' {
285 return 1
286 }
287 return 0
288 }
289
290
291 func nextRune(b []byte) rune {
292 r, _ := utf8.DecodeRune(b)
293 return r
294 }
295
296 func (r *Reader) readRecord(dst []string) ([]string, error) {
297 if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) {
298 return nil, errInvalidDelim
299 }
300
301
302 var line []byte
303 var errRead error
304 for errRead == nil {
305 line, errRead = r.readLine()
306 if r.Comment != 0 && nextRune(line) == r.Comment {
307 line = nil
308 continue
309 }
310 if errRead == nil && len(line) == lengthNL(line) {
311 line = nil
312 continue
313 }
314 break
315 }
316 if errRead == io.EOF {
317 return nil, errRead
318 }
319
320
321 var err error
322 const quoteLen = len(`"`)
323 commaLen := utf8.RuneLen(r.Comma)
324 recLine := r.numLine
325 r.recordBuffer = r.recordBuffer[:0]
326 r.fieldIndexes = r.fieldIndexes[:0]
327 r.fieldPositions = r.fieldPositions[:0]
328 pos := position{line: r.numLine, col: 1}
329 parseField:
330 for {
331 if r.TrimLeadingSpace {
332 i := bytes.IndexFunc(line, func(r rune) bool {
333 return !unicode.IsSpace(r)
334 })
335 if i < 0 {
336 i = len(line)
337 pos.col -= lengthNL(line)
338 }
339 line = line[i:]
340 pos.col += i
341 }
342 if len(line) == 0 || line[0] != '"' {
343
344 i := bytes.IndexRune(line, r.Comma)
345 field := line
346 if i >= 0 {
347 field = field[:i]
348 } else {
349 field = field[:len(field)-lengthNL(field)]
350 }
351
352 if !r.LazyQuotes {
353 if j := bytes.IndexByte(field, '"'); j >= 0 {
354 col := pos.col + j
355 err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
356 break parseField
357 }
358 }
359 r.recordBuffer = append(r.recordBuffer, field...)
360 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
361 r.fieldPositions = append(r.fieldPositions, pos)
362 if i >= 0 {
363 line = line[i+commaLen:]
364 pos.col += i + commaLen
365 continue parseField
366 }
367 break parseField
368 } else {
369
370 fieldPos := pos
371 line = line[quoteLen:]
372 pos.col += quoteLen
373 for {
374 i := bytes.IndexByte(line, '"')
375 if i >= 0 {
376
377 r.recordBuffer = append(r.recordBuffer, line[:i]...)
378 line = line[i+quoteLen:]
379 pos.col += i + quoteLen
380 switch rn := nextRune(line); {
381 case rn == '"':
382
383 r.recordBuffer = append(r.recordBuffer, '"')
384 line = line[quoteLen:]
385 pos.col += quoteLen
386 case rn == r.Comma:
387
388 line = line[commaLen:]
389 pos.col += commaLen
390 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
391 r.fieldPositions = append(r.fieldPositions, fieldPos)
392 continue parseField
393 case lengthNL(line) == len(line):
394
395 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
396 r.fieldPositions = append(r.fieldPositions, fieldPos)
397 break parseField
398 case r.LazyQuotes:
399
400 r.recordBuffer = append(r.recordBuffer, '"')
401 default:
402
403 err = &ParseError{StartLine: recLine, Line: r.numLine, Column: pos.col - quoteLen, Err: ErrQuote}
404 break parseField
405 }
406 } else if len(line) > 0 {
407
408 r.recordBuffer = append(r.recordBuffer, line...)
409 if errRead != nil {
410 break parseField
411 }
412 pos.col += len(line)
413 line, errRead = r.readLine()
414 if len(line) > 0 {
415 pos.line++
416 pos.col = 1
417 }
418 if errRead == io.EOF {
419 errRead = nil
420 }
421 } else {
422
423 if !r.LazyQuotes && errRead == nil {
424 err = &ParseError{StartLine: recLine, Line: pos.line, Column: pos.col, Err: ErrQuote}
425 break parseField
426 }
427 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
428 r.fieldPositions = append(r.fieldPositions, fieldPos)
429 break parseField
430 }
431 }
432 }
433 }
434 if err == nil {
435 err = errRead
436 }
437
438
439
440 str := string(r.recordBuffer)
441 dst = dst[:0]
442 if cap(dst) < len(r.fieldIndexes) {
443 dst = make([]string, len(r.fieldIndexes))
444 }
445 dst = dst[:len(r.fieldIndexes)]
446 var preIdx int
447 for i, idx := range r.fieldIndexes {
448 dst[i] = str[preIdx:idx]
449 preIdx = idx
450 }
451
452
453 if r.FieldsPerRecord > 0 {
454 if len(dst) != r.FieldsPerRecord && err == nil {
455 err = &ParseError{
456 StartLine: recLine,
457 Line: recLine,
458 Column: 1,
459 Err: ErrFieldCount,
460 }
461 }
462 } else if r.FieldsPerRecord == 0 {
463 r.FieldsPerRecord = len(dst)
464 }
465 return dst, err
466 }
467
View as plain text