...

Source file src/github.com/pelletier/go-toml/v2/unstable/parser.go

Documentation: github.com/pelletier/go-toml/v2/unstable

     1  package unstable
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"unicode"
     7  
     8  	"github.com/pelletier/go-toml/v2/internal/characters"
     9  	"github.com/pelletier/go-toml/v2/internal/danger"
    10  )
    11  
    12  // ParserError describes an error relative to the content of the document.
    13  //
    14  // It cannot outlive the instance of Parser it refers to, and may cause panics
    15  // if the parser is reset.
    16  type ParserError struct {
    17  	Highlight []byte
    18  	Message   string
    19  	Key       []string // optional
    20  }
    21  
    22  // Error is the implementation of the error interface.
    23  func (e *ParserError) Error() string {
    24  	return e.Message
    25  }
    26  
    27  // NewParserError is a convenience function to create a ParserError
    28  //
    29  // Warning: Highlight needs to be a subslice of Parser.data, so only slices
    30  // returned by Parser.Raw are valid candidates.
    31  func NewParserError(highlight []byte, format string, args ...interface{}) error {
    32  	return &ParserError{
    33  		Highlight: highlight,
    34  		Message:   fmt.Errorf(format, args...).Error(),
    35  	}
    36  }
    37  
    38  // Parser scans over a TOML-encoded document and generates an iterative AST.
    39  //
    40  // To prime the Parser, first reset it with the contents of a TOML document.
    41  // Then, process all top-level expressions sequentially. See Example.
    42  //
    43  // Don't forget to check Error() after you're done parsing.
    44  //
    45  // Each top-level expression needs to be fully processed before calling
    46  // NextExpression() again. Otherwise, calls to various Node methods may panic if
    47  // the parser has moved on the next expression.
    48  //
    49  // For performance reasons, go-toml doesn't make a copy of the input bytes to
    50  // the parser. Make sure to copy all the bytes you need to outlive the slice
    51  // given to the parser.
    52  type Parser struct {
    53  	data    []byte
    54  	builder builder
    55  	ref     reference
    56  	left    []byte
    57  	err     error
    58  	first   bool
    59  
    60  	KeepComments bool
    61  }
    62  
    63  // Data returns the slice provided to the last call to Reset.
    64  func (p *Parser) Data() []byte {
    65  	return p.data
    66  }
    67  
    68  // Range returns a range description that corresponds to a given slice of the
    69  // input. If the argument is not a subslice of the parser input, this function
    70  // panics.
    71  func (p *Parser) Range(b []byte) Range {
    72  	return Range{
    73  		Offset: uint32(danger.SubsliceOffset(p.data, b)),
    74  		Length: uint32(len(b)),
    75  	}
    76  }
    77  
    78  // Raw returns the slice corresponding to the bytes in the given range.
    79  func (p *Parser) Raw(raw Range) []byte {
    80  	return p.data[raw.Offset : raw.Offset+raw.Length]
    81  }
    82  
    83  // Reset brings the parser to its initial state for a given input. It wipes an
    84  // reuses internal storage to reduce allocation.
    85  func (p *Parser) Reset(b []byte) {
    86  	p.builder.Reset()
    87  	p.ref = invalidReference
    88  	p.data = b
    89  	p.left = b
    90  	p.err = nil
    91  	p.first = true
    92  }
    93  
    94  // NextExpression parses the next top-level expression. If an expression was
    95  // successfully parsed, it returns true. If the parser is at the end of the
    96  // document or an error occurred, it returns false.
    97  //
    98  // Retrieve the parsed expression with Expression().
    99  func (p *Parser) NextExpression() bool {
   100  	if len(p.left) == 0 || p.err != nil {
   101  		return false
   102  	}
   103  
   104  	p.builder.Reset()
   105  	p.ref = invalidReference
   106  
   107  	for {
   108  		if len(p.left) == 0 || p.err != nil {
   109  			return false
   110  		}
   111  
   112  		if !p.first {
   113  			p.left, p.err = p.parseNewline(p.left)
   114  		}
   115  
   116  		if len(p.left) == 0 || p.err != nil {
   117  			return false
   118  		}
   119  
   120  		p.ref, p.left, p.err = p.parseExpression(p.left)
   121  
   122  		if p.err != nil {
   123  			return false
   124  		}
   125  
   126  		p.first = false
   127  
   128  		if p.ref.Valid() {
   129  			return true
   130  		}
   131  	}
   132  }
   133  
   134  // Expression returns a pointer to the node representing the last successfully
   135  // parsed expression.
   136  func (p *Parser) Expression() *Node {
   137  	return p.builder.NodeAt(p.ref)
   138  }
   139  
   140  // Error returns any error that has occurred during parsing.
   141  func (p *Parser) Error() error {
   142  	return p.err
   143  }
   144  
   145  // Position describes a position in the input.
   146  type Position struct {
   147  	// Number of bytes from the beginning of the input.
   148  	Offset int
   149  	// Line number, starting at 1.
   150  	Line int
   151  	// Column number, starting at 1.
   152  	Column int
   153  }
   154  
   155  // Shape describes the position of a range in the input.
   156  type Shape struct {
   157  	Start Position
   158  	End   Position
   159  }
   160  
   161  func (p *Parser) position(b []byte) Position {
   162  	offset := danger.SubsliceOffset(p.data, b)
   163  
   164  	lead := p.data[:offset]
   165  
   166  	return Position{
   167  		Offset: offset,
   168  		Line:   bytes.Count(lead, []byte{'\n'}) + 1,
   169  		Column: len(lead) - bytes.LastIndex(lead, []byte{'\n'}),
   170  	}
   171  }
   172  
   173  // Shape returns the shape of the given range in the input.  Will
   174  // panic if the range is not a subslice of the input.
   175  func (p *Parser) Shape(r Range) Shape {
   176  	raw := p.Raw(r)
   177  	return Shape{
   178  		Start: p.position(raw),
   179  		End:   p.position(raw[r.Length:]),
   180  	}
   181  }
   182  
   183  func (p *Parser) parseNewline(b []byte) ([]byte, error) {
   184  	if b[0] == '\n' {
   185  		return b[1:], nil
   186  	}
   187  
   188  	if b[0] == '\r' {
   189  		_, rest, err := scanWindowsNewline(b)
   190  		return rest, err
   191  	}
   192  
   193  	return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0])
   194  }
   195  
   196  func (p *Parser) parseComment(b []byte) (reference, []byte, error) {
   197  	ref := invalidReference
   198  	data, rest, err := scanComment(b)
   199  	if p.KeepComments && err == nil {
   200  		ref = p.builder.Push(Node{
   201  			Kind: Comment,
   202  			Raw:  p.Range(data),
   203  			Data: data,
   204  		})
   205  	}
   206  	return ref, rest, err
   207  }
   208  
   209  func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
   210  	// expression =  ws [ comment ]
   211  	// expression =/ ws keyval ws [ comment ]
   212  	// expression =/ ws table ws [ comment ]
   213  	ref := invalidReference
   214  
   215  	b = p.parseWhitespace(b)
   216  
   217  	if len(b) == 0 {
   218  		return ref, b, nil
   219  	}
   220  
   221  	if b[0] == '#' {
   222  		ref, rest, err := p.parseComment(b)
   223  		return ref, rest, err
   224  	}
   225  
   226  	if b[0] == '\n' || b[0] == '\r' {
   227  		return ref, b, nil
   228  	}
   229  
   230  	var err error
   231  	if b[0] == '[' {
   232  		ref, b, err = p.parseTable(b)
   233  	} else {
   234  		ref, b, err = p.parseKeyval(b)
   235  	}
   236  
   237  	if err != nil {
   238  		return ref, nil, err
   239  	}
   240  
   241  	b = p.parseWhitespace(b)
   242  
   243  	if len(b) > 0 && b[0] == '#' {
   244  		cref, rest, err := p.parseComment(b)
   245  		if cref != invalidReference {
   246  			p.builder.Chain(ref, cref)
   247  		}
   248  		return ref, rest, err
   249  	}
   250  
   251  	return ref, b, nil
   252  }
   253  
   254  func (p *Parser) parseTable(b []byte) (reference, []byte, error) {
   255  	// table = std-table / array-table
   256  	if len(b) > 1 && b[1] == '[' {
   257  		return p.parseArrayTable(b)
   258  	}
   259  
   260  	return p.parseStdTable(b)
   261  }
   262  
   263  func (p *Parser) parseArrayTable(b []byte) (reference, []byte, error) {
   264  	// array-table = array-table-open key array-table-close
   265  	// array-table-open  = %x5B.5B ws  ; [[ Double left square bracket
   266  	// array-table-close = ws %x5D.5D  ; ]] Double right square bracket
   267  	ref := p.builder.Push(Node{
   268  		Kind: ArrayTable,
   269  	})
   270  
   271  	b = b[2:]
   272  	b = p.parseWhitespace(b)
   273  
   274  	k, b, err := p.parseKey(b)
   275  	if err != nil {
   276  		return ref, nil, err
   277  	}
   278  
   279  	p.builder.AttachChild(ref, k)
   280  	b = p.parseWhitespace(b)
   281  
   282  	b, err = expect(']', b)
   283  	if err != nil {
   284  		return ref, nil, err
   285  	}
   286  
   287  	b, err = expect(']', b)
   288  
   289  	return ref, b, err
   290  }
   291  
   292  func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) {
   293  	// std-table = std-table-open key std-table-close
   294  	// std-table-open  = %x5B ws     ; [ Left square bracket
   295  	// std-table-close = ws %x5D     ; ] Right square bracket
   296  	ref := p.builder.Push(Node{
   297  		Kind: Table,
   298  	})
   299  
   300  	b = b[1:]
   301  	b = p.parseWhitespace(b)
   302  
   303  	key, b, err := p.parseKey(b)
   304  	if err != nil {
   305  		return ref, nil, err
   306  	}
   307  
   308  	p.builder.AttachChild(ref, key)
   309  
   310  	b = p.parseWhitespace(b)
   311  
   312  	b, err = expect(']', b)
   313  
   314  	return ref, b, err
   315  }
   316  
   317  func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
   318  	// keyval = key keyval-sep val
   319  	ref := p.builder.Push(Node{
   320  		Kind: KeyValue,
   321  	})
   322  
   323  	key, b, err := p.parseKey(b)
   324  	if err != nil {
   325  		return invalidReference, nil, err
   326  	}
   327  
   328  	// keyval-sep = ws %x3D ws ; =
   329  
   330  	b = p.parseWhitespace(b)
   331  
   332  	if len(b) == 0 {
   333  		return invalidReference, nil, NewParserError(b, "expected = after a key, but the document ends there")
   334  	}
   335  
   336  	b, err = expect('=', b)
   337  	if err != nil {
   338  		return invalidReference, nil, err
   339  	}
   340  
   341  	b = p.parseWhitespace(b)
   342  
   343  	valRef, b, err := p.parseVal(b)
   344  	if err != nil {
   345  		return ref, b, err
   346  	}
   347  
   348  	p.builder.Chain(valRef, key)
   349  	p.builder.AttachChild(ref, valRef)
   350  
   351  	return ref, b, err
   352  }
   353  
   354  //nolint:cyclop,funlen
   355  func (p *Parser) parseVal(b []byte) (reference, []byte, error) {
   356  	// val = string / boolean / array / inline-table / date-time / float / integer
   357  	ref := invalidReference
   358  
   359  	if len(b) == 0 {
   360  		return ref, nil, NewParserError(b, "expected value, not eof")
   361  	}
   362  
   363  	var err error
   364  	c := b[0]
   365  
   366  	switch c {
   367  	case '"':
   368  		var raw []byte
   369  		var v []byte
   370  		if scanFollowsMultilineBasicStringDelimiter(b) {
   371  			raw, v, b, err = p.parseMultilineBasicString(b)
   372  		} else {
   373  			raw, v, b, err = p.parseBasicString(b)
   374  		}
   375  
   376  		if err == nil {
   377  			ref = p.builder.Push(Node{
   378  				Kind: String,
   379  				Raw:  p.Range(raw),
   380  				Data: v,
   381  			})
   382  		}
   383  
   384  		return ref, b, err
   385  	case '\'':
   386  		var raw []byte
   387  		var v []byte
   388  		if scanFollowsMultilineLiteralStringDelimiter(b) {
   389  			raw, v, b, err = p.parseMultilineLiteralString(b)
   390  		} else {
   391  			raw, v, b, err = p.parseLiteralString(b)
   392  		}
   393  
   394  		if err == nil {
   395  			ref = p.builder.Push(Node{
   396  				Kind: String,
   397  				Raw:  p.Range(raw),
   398  				Data: v,
   399  			})
   400  		}
   401  
   402  		return ref, b, err
   403  	case 't':
   404  		if !scanFollowsTrue(b) {
   405  			return ref, nil, NewParserError(atmost(b, 4), "expected 'true'")
   406  		}
   407  
   408  		ref = p.builder.Push(Node{
   409  			Kind: Bool,
   410  			Data: b[:4],
   411  		})
   412  
   413  		return ref, b[4:], nil
   414  	case 'f':
   415  		if !scanFollowsFalse(b) {
   416  			return ref, nil, NewParserError(atmost(b, 5), "expected 'false'")
   417  		}
   418  
   419  		ref = p.builder.Push(Node{
   420  			Kind: Bool,
   421  			Data: b[:5],
   422  		})
   423  
   424  		return ref, b[5:], nil
   425  	case '[':
   426  		return p.parseValArray(b)
   427  	case '{':
   428  		return p.parseInlineTable(b)
   429  	default:
   430  		return p.parseIntOrFloatOrDateTime(b)
   431  	}
   432  }
   433  
   434  func atmost(b []byte, n int) []byte {
   435  	if n >= len(b) {
   436  		return b
   437  	}
   438  
   439  	return b[:n]
   440  }
   441  
   442  func (p *Parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) {
   443  	v, rest, err := scanLiteralString(b)
   444  	if err != nil {
   445  		return nil, nil, nil, err
   446  	}
   447  
   448  	return v, v[1 : len(v)-1], rest, nil
   449  }
   450  
   451  func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) {
   452  	// inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close
   453  	// inline-table-open  = %x7B ws     ; {
   454  	// inline-table-close = ws %x7D     ; }
   455  	// inline-table-sep   = ws %x2C ws  ; , Comma
   456  	// inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ]
   457  	parent := p.builder.Push(Node{
   458  		Kind: InlineTable,
   459  		Raw:  p.Range(b[:1]),
   460  	})
   461  
   462  	first := true
   463  
   464  	var child reference
   465  
   466  	b = b[1:]
   467  
   468  	var err error
   469  
   470  	for len(b) > 0 {
   471  		previousB := b
   472  		b = p.parseWhitespace(b)
   473  
   474  		if len(b) == 0 {
   475  			return parent, nil, NewParserError(previousB[:1], "inline table is incomplete")
   476  		}
   477  
   478  		if b[0] == '}' {
   479  			break
   480  		}
   481  
   482  		if !first {
   483  			b, err = expect(',', b)
   484  			if err != nil {
   485  				return parent, nil, err
   486  			}
   487  			b = p.parseWhitespace(b)
   488  		}
   489  
   490  		var kv reference
   491  
   492  		kv, b, err = p.parseKeyval(b)
   493  		if err != nil {
   494  			return parent, nil, err
   495  		}
   496  
   497  		if first {
   498  			p.builder.AttachChild(parent, kv)
   499  		} else {
   500  			p.builder.Chain(child, kv)
   501  		}
   502  		child = kv
   503  
   504  		first = false
   505  	}
   506  
   507  	rest, err := expect('}', b)
   508  
   509  	return parent, rest, err
   510  }
   511  
   512  //nolint:funlen,cyclop
   513  func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
   514  	// array = array-open [ array-values ] ws-comment-newline array-close
   515  	// array-open =  %x5B ; [
   516  	// array-close = %x5D ; ]
   517  	// array-values =  ws-comment-newline val ws-comment-newline array-sep array-values
   518  	// array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ]
   519  	// array-sep = %x2C  ; , Comma
   520  	// ws-comment-newline = *( wschar / [ comment ] newline )
   521  	arrayStart := b
   522  	b = b[1:]
   523  
   524  	parent := p.builder.Push(Node{
   525  		Kind: Array,
   526  	})
   527  
   528  	// First indicates whether the parser is looking for the first element
   529  	// (non-comment) of the array.
   530  	first := true
   531  
   532  	lastChild := invalidReference
   533  
   534  	addChild := func(valueRef reference) {
   535  		if lastChild == invalidReference {
   536  			p.builder.AttachChild(parent, valueRef)
   537  		} else {
   538  			p.builder.Chain(lastChild, valueRef)
   539  		}
   540  		lastChild = valueRef
   541  	}
   542  
   543  	var err error
   544  	for len(b) > 0 {
   545  		cref := invalidReference
   546  		cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
   547  		if err != nil {
   548  			return parent, nil, err
   549  		}
   550  
   551  		if cref != invalidReference {
   552  			addChild(cref)
   553  		}
   554  
   555  		if len(b) == 0 {
   556  			return parent, nil, NewParserError(arrayStart[:1], "array is incomplete")
   557  		}
   558  
   559  		if b[0] == ']' {
   560  			break
   561  		}
   562  
   563  		if b[0] == ',' {
   564  			if first {
   565  				return parent, nil, NewParserError(b[0:1], "array cannot start with comma")
   566  			}
   567  			b = b[1:]
   568  
   569  			cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
   570  			if err != nil {
   571  				return parent, nil, err
   572  			}
   573  			if cref != invalidReference {
   574  				addChild(cref)
   575  			}
   576  		} else if !first {
   577  			return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas")
   578  		}
   579  
   580  		// TOML allows trailing commas in arrays.
   581  		if len(b) > 0 && b[0] == ']' {
   582  			break
   583  		}
   584  
   585  		var valueRef reference
   586  		valueRef, b, err = p.parseVal(b)
   587  		if err != nil {
   588  			return parent, nil, err
   589  		}
   590  
   591  		addChild(valueRef)
   592  
   593  		cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
   594  		if err != nil {
   595  			return parent, nil, err
   596  		}
   597  		if cref != invalidReference {
   598  			addChild(cref)
   599  		}
   600  
   601  		first = false
   602  	}
   603  
   604  	rest, err := expect(']', b)
   605  
   606  	return parent, rest, err
   607  }
   608  
   609  func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) (reference, []byte, error) {
   610  	rootCommentRef := invalidReference
   611  	latestCommentRef := invalidReference
   612  
   613  	addComment := func(ref reference) {
   614  		if rootCommentRef == invalidReference {
   615  			rootCommentRef = ref
   616  		} else if latestCommentRef == invalidReference {
   617  			p.builder.AttachChild(rootCommentRef, ref)
   618  			latestCommentRef = ref
   619  		} else {
   620  			p.builder.Chain(latestCommentRef, ref)
   621  			latestCommentRef = ref
   622  		}
   623  	}
   624  
   625  	for len(b) > 0 {
   626  		var err error
   627  		b = p.parseWhitespace(b)
   628  
   629  		if len(b) > 0 && b[0] == '#' {
   630  			var ref reference
   631  			ref, b, err = p.parseComment(b)
   632  			if err != nil {
   633  				return invalidReference, nil, err
   634  			}
   635  			if ref != invalidReference {
   636  				addComment(ref)
   637  			}
   638  		}
   639  
   640  		if len(b) == 0 {
   641  			break
   642  		}
   643  
   644  		if b[0] == '\n' || b[0] == '\r' {
   645  			b, err = p.parseNewline(b)
   646  			if err != nil {
   647  				return invalidReference, nil, err
   648  			}
   649  		} else {
   650  			break
   651  		}
   652  	}
   653  
   654  	return rootCommentRef, b, nil
   655  }
   656  
   657  func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {
   658  	token, rest, err := scanMultilineLiteralString(b)
   659  	if err != nil {
   660  		return nil, nil, nil, err
   661  	}
   662  
   663  	i := 3
   664  
   665  	// skip the immediate new line
   666  	if token[i] == '\n' {
   667  		i++
   668  	} else if token[i] == '\r' && token[i+1] == '\n' {
   669  		i += 2
   670  	}
   671  
   672  	return token, token[i : len(token)-3], rest, err
   673  }
   674  
   675  //nolint:funlen,gocognit,cyclop
   676  func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) {
   677  	// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
   678  	// ml-basic-string-delim
   679  	// ml-basic-string-delim = 3quotation-mark
   680  	// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
   681  	//
   682  	// mlb-content = mlb-char / newline / mlb-escaped-nl
   683  	// mlb-char = mlb-unescaped / escaped
   684  	// mlb-quotes = 1*2quotation-mark
   685  	// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
   686  	// mlb-escaped-nl = escape ws newline *( wschar / newline )
   687  	token, escaped, rest, err := scanMultilineBasicString(b)
   688  	if err != nil {
   689  		return nil, nil, nil, err
   690  	}
   691  
   692  	i := 3
   693  
   694  	// skip the immediate new line
   695  	if token[i] == '\n' {
   696  		i++
   697  	} else if token[i] == '\r' && token[i+1] == '\n' {
   698  		i += 2
   699  	}
   700  
   701  	// fast path
   702  	startIdx := i
   703  	endIdx := len(token) - len(`"""`)
   704  
   705  	if !escaped {
   706  		str := token[startIdx:endIdx]
   707  		verr := characters.Utf8TomlValidAlreadyEscaped(str)
   708  		if verr.Zero() {
   709  			return token, str, rest, nil
   710  		}
   711  		return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
   712  	}
   713  
   714  	var builder bytes.Buffer
   715  
   716  	// The scanner ensures that the token starts and ends with quotes and that
   717  	// escapes are balanced.
   718  	for i < len(token)-3 {
   719  		c := token[i]
   720  
   721  		//nolint:nestif
   722  		if c == '\\' {
   723  			// When the last non-whitespace character on a line is an unescaped \,
   724  			// it will be trimmed along with all whitespace (including newlines) up
   725  			// to the next non-whitespace character or closing delimiter.
   726  
   727  			isLastNonWhitespaceOnLine := false
   728  			j := 1
   729  		findEOLLoop:
   730  			for ; j < len(token)-3-i; j++ {
   731  				switch token[i+j] {
   732  				case ' ', '\t':
   733  					continue
   734  				case '\r':
   735  					if token[i+j+1] == '\n' {
   736  						continue
   737  					}
   738  				case '\n':
   739  					isLastNonWhitespaceOnLine = true
   740  				}
   741  				break findEOLLoop
   742  			}
   743  			if isLastNonWhitespaceOnLine {
   744  				i += j
   745  				for ; i < len(token)-3; i++ {
   746  					c := token[i]
   747  					if !(c == '\n' || c == '\r' || c == ' ' || c == '\t') {
   748  						i--
   749  						break
   750  					}
   751  				}
   752  				i++
   753  				continue
   754  			}
   755  
   756  			// handle escaping
   757  			i++
   758  			c = token[i]
   759  
   760  			switch c {
   761  			case '"', '\\':
   762  				builder.WriteByte(c)
   763  			case 'b':
   764  				builder.WriteByte('\b')
   765  			case 'f':
   766  				builder.WriteByte('\f')
   767  			case 'n':
   768  				builder.WriteByte('\n')
   769  			case 'r':
   770  				builder.WriteByte('\r')
   771  			case 't':
   772  				builder.WriteByte('\t')
   773  			case 'e':
   774  				builder.WriteByte(0x1B)
   775  			case 'u':
   776  				x, err := hexToRune(atmost(token[i+1:], 4), 4)
   777  				if err != nil {
   778  					return nil, nil, nil, err
   779  				}
   780  				builder.WriteRune(x)
   781  				i += 4
   782  			case 'U':
   783  				x, err := hexToRune(atmost(token[i+1:], 8), 8)
   784  				if err != nil {
   785  					return nil, nil, nil, err
   786  				}
   787  
   788  				builder.WriteRune(x)
   789  				i += 8
   790  			default:
   791  				return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
   792  			}
   793  			i++
   794  		} else {
   795  			size := characters.Utf8ValidNext(token[i:])
   796  			if size == 0 {
   797  				return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
   798  			}
   799  			builder.Write(token[i : i+size])
   800  			i += size
   801  		}
   802  	}
   803  
   804  	return token, builder.Bytes(), rest, nil
   805  }
   806  
   807  func (p *Parser) parseKey(b []byte) (reference, []byte, error) {
   808  	// key = simple-key / dotted-key
   809  	// simple-key = quoted-key / unquoted-key
   810  	//
   811  	// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
   812  	// quoted-key = basic-string / literal-string
   813  	// dotted-key = simple-key 1*( dot-sep simple-key )
   814  	//
   815  	// dot-sep   = ws %x2E ws  ; . Period
   816  	raw, key, b, err := p.parseSimpleKey(b)
   817  	if err != nil {
   818  		return invalidReference, nil, err
   819  	}
   820  
   821  	ref := p.builder.Push(Node{
   822  		Kind: Key,
   823  		Raw:  p.Range(raw),
   824  		Data: key,
   825  	})
   826  
   827  	for {
   828  		b = p.parseWhitespace(b)
   829  		if len(b) > 0 && b[0] == '.' {
   830  			b = p.parseWhitespace(b[1:])
   831  
   832  			raw, key, b, err = p.parseSimpleKey(b)
   833  			if err != nil {
   834  				return ref, nil, err
   835  			}
   836  
   837  			p.builder.PushAndChain(Node{
   838  				Kind: Key,
   839  				Raw:  p.Range(raw),
   840  				Data: key,
   841  			})
   842  		} else {
   843  			break
   844  		}
   845  	}
   846  
   847  	return ref, b, nil
   848  }
   849  
   850  func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) {
   851  	if len(b) == 0 {
   852  		return nil, nil, nil, NewParserError(b, "expected key but found none")
   853  	}
   854  
   855  	// simple-key = quoted-key / unquoted-key
   856  	// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
   857  	// quoted-key = basic-string / literal-string
   858  	switch {
   859  	case b[0] == '\'':
   860  		return p.parseLiteralString(b)
   861  	case b[0] == '"':
   862  		return p.parseBasicString(b)
   863  	case isUnquotedKeyChar(b[0]):
   864  		key, rest = scanUnquotedKey(b)
   865  		return key, key, rest, nil
   866  	default:
   867  		return nil, nil, nil, NewParserError(b[0:1], "invalid character at start of key: %c", b[0])
   868  	}
   869  }
   870  
   871  //nolint:funlen,cyclop
   872  func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
   873  	// basic-string = quotation-mark *basic-char quotation-mark
   874  	// quotation-mark = %x22            ; "
   875  	// basic-char = basic-unescaped / escaped
   876  	// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
   877  	// escaped = escape escape-seq-char
   878  	// escape-seq-char =  %x22         ; "    quotation mark  U+0022
   879  	// escape-seq-char =/ %x5C         ; \    reverse solidus U+005C
   880  	// escape-seq-char =/ %x62         ; b    backspace       U+0008
   881  	// escape-seq-char =/ %x66         ; f    form feed       U+000C
   882  	// escape-seq-char =/ %x6E         ; n    line feed       U+000A
   883  	// escape-seq-char =/ %x72         ; r    carriage return U+000D
   884  	// escape-seq-char =/ %x74         ; t    tab             U+0009
   885  	// escape-seq-char =/ %x75 4HEXDIG ; uXXXX                U+XXXX
   886  	// escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX            U+XXXXXXXX
   887  	token, escaped, rest, err := scanBasicString(b)
   888  	if err != nil {
   889  		return nil, nil, nil, err
   890  	}
   891  
   892  	startIdx := len(`"`)
   893  	endIdx := len(token) - len(`"`)
   894  
   895  	// Fast path. If there is no escape sequence, the string should just be
   896  	// an UTF-8 encoded string, which is the same as Go. In that case,
   897  	// validate the string and return a direct reference to the buffer.
   898  	if !escaped {
   899  		str := token[startIdx:endIdx]
   900  		verr := characters.Utf8TomlValidAlreadyEscaped(str)
   901  		if verr.Zero() {
   902  			return token, str, rest, nil
   903  		}
   904  		return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
   905  	}
   906  
   907  	i := startIdx
   908  
   909  	var builder bytes.Buffer
   910  
   911  	// The scanner ensures that the token starts and ends with quotes and that
   912  	// escapes are balanced.
   913  	for i < len(token)-1 {
   914  		c := token[i]
   915  		if c == '\\' {
   916  			i++
   917  			c = token[i]
   918  
   919  			switch c {
   920  			case '"', '\\':
   921  				builder.WriteByte(c)
   922  			case 'b':
   923  				builder.WriteByte('\b')
   924  			case 'f':
   925  				builder.WriteByte('\f')
   926  			case 'n':
   927  				builder.WriteByte('\n')
   928  			case 'r':
   929  				builder.WriteByte('\r')
   930  			case 't':
   931  				builder.WriteByte('\t')
   932  			case 'e':
   933  				builder.WriteByte(0x1B)
   934  			case 'u':
   935  				x, err := hexToRune(token[i+1:len(token)-1], 4)
   936  				if err != nil {
   937  					return nil, nil, nil, err
   938  				}
   939  
   940  				builder.WriteRune(x)
   941  				i += 4
   942  			case 'U':
   943  				x, err := hexToRune(token[i+1:len(token)-1], 8)
   944  				if err != nil {
   945  					return nil, nil, nil, err
   946  				}
   947  
   948  				builder.WriteRune(x)
   949  				i += 8
   950  			default:
   951  				return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
   952  			}
   953  			i++
   954  		} else {
   955  			size := characters.Utf8ValidNext(token[i:])
   956  			if size == 0 {
   957  				return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
   958  			}
   959  			builder.Write(token[i : i+size])
   960  			i += size
   961  		}
   962  	}
   963  
   964  	return token, builder.Bytes(), rest, nil
   965  }
   966  
   967  func hexToRune(b []byte, length int) (rune, error) {
   968  	if len(b) < length {
   969  		return -1, NewParserError(b, "unicode point needs %d character, not %d", length, len(b))
   970  	}
   971  	b = b[:length]
   972  
   973  	var r uint32
   974  	for i, c := range b {
   975  		d := uint32(0)
   976  		switch {
   977  		case '0' <= c && c <= '9':
   978  			d = uint32(c - '0')
   979  		case 'a' <= c && c <= 'f':
   980  			d = uint32(c - 'a' + 10)
   981  		case 'A' <= c && c <= 'F':
   982  			d = uint32(c - 'A' + 10)
   983  		default:
   984  			return -1, NewParserError(b[i:i+1], "non-hex character")
   985  		}
   986  		r = r*16 + d
   987  	}
   988  
   989  	if r > unicode.MaxRune || 0xD800 <= r && r < 0xE000 {
   990  		return -1, NewParserError(b, "escape sequence is invalid Unicode code point")
   991  	}
   992  
   993  	return rune(r), nil
   994  }
   995  
   996  func (p *Parser) parseWhitespace(b []byte) []byte {
   997  	// ws = *wschar
   998  	// wschar =  %x20  ; Space
   999  	// wschar =/ %x09  ; Horizontal tab
  1000  	_, rest := scanWhitespace(b)
  1001  
  1002  	return rest
  1003  }
  1004  
  1005  //nolint:cyclop
  1006  func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) {
  1007  	switch b[0] {
  1008  	case 'i':
  1009  		if !scanFollowsInf(b) {
  1010  			return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'inf'")
  1011  		}
  1012  
  1013  		return p.builder.Push(Node{
  1014  			Kind: Float,
  1015  			Data: b[:3],
  1016  			Raw:  p.Range(b[:3]),
  1017  		}), b[3:], nil
  1018  	case 'n':
  1019  		if !scanFollowsNan(b) {
  1020  			return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'nan'")
  1021  		}
  1022  
  1023  		return p.builder.Push(Node{
  1024  			Kind: Float,
  1025  			Data: b[:3],
  1026  			Raw:  p.Range(b[:3]),
  1027  		}), b[3:], nil
  1028  	case '+', '-':
  1029  		return p.scanIntOrFloat(b)
  1030  	}
  1031  
  1032  	if len(b) < 3 {
  1033  		return p.scanIntOrFloat(b)
  1034  	}
  1035  
  1036  	s := 5
  1037  	if len(b) < s {
  1038  		s = len(b)
  1039  	}
  1040  
  1041  	for idx, c := range b[:s] {
  1042  		if isDigit(c) {
  1043  			continue
  1044  		}
  1045  
  1046  		if idx == 2 && c == ':' || (idx == 4 && c == '-') {
  1047  			return p.scanDateTime(b)
  1048  		}
  1049  
  1050  		break
  1051  	}
  1052  
  1053  	return p.scanIntOrFloat(b)
  1054  }
  1055  
  1056  func (p *Parser) scanDateTime(b []byte) (reference, []byte, error) {
  1057  	// scans for contiguous characters in [0-9T:Z.+-], and up to one space if
  1058  	// followed by a digit.
  1059  	hasDate := false
  1060  	hasTime := false
  1061  	hasTz := false
  1062  	seenSpace := false
  1063  
  1064  	i := 0
  1065  byteLoop:
  1066  	for ; i < len(b); i++ {
  1067  		c := b[i]
  1068  
  1069  		switch {
  1070  		case isDigit(c):
  1071  		case c == '-':
  1072  			hasDate = true
  1073  			const minOffsetOfTz = 8
  1074  			if i >= minOffsetOfTz {
  1075  				hasTz = true
  1076  			}
  1077  		case c == 'T' || c == 't' || c == ':' || c == '.':
  1078  			hasTime = true
  1079  		case c == '+' || c == '-' || c == 'Z' || c == 'z':
  1080  			hasTz = true
  1081  		case c == ' ':
  1082  			if !seenSpace && i+1 < len(b) && isDigit(b[i+1]) {
  1083  				i += 2
  1084  				// Avoid reaching past the end of the document in case the time
  1085  				// is malformed. See TestIssue585.
  1086  				if i >= len(b) {
  1087  					i--
  1088  				}
  1089  				seenSpace = true
  1090  				hasTime = true
  1091  			} else {
  1092  				break byteLoop
  1093  			}
  1094  		default:
  1095  			break byteLoop
  1096  		}
  1097  	}
  1098  
  1099  	var kind Kind
  1100  
  1101  	if hasTime {
  1102  		if hasDate {
  1103  			if hasTz {
  1104  				kind = DateTime
  1105  			} else {
  1106  				kind = LocalDateTime
  1107  			}
  1108  		} else {
  1109  			kind = LocalTime
  1110  		}
  1111  	} else {
  1112  		kind = LocalDate
  1113  	}
  1114  
  1115  	return p.builder.Push(Node{
  1116  		Kind: kind,
  1117  		Data: b[:i],
  1118  	}), b[i:], nil
  1119  }
  1120  
  1121  //nolint:funlen,gocognit,cyclop
  1122  func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) {
  1123  	i := 0
  1124  
  1125  	if len(b) > 2 && b[0] == '0' && b[1] != '.' && b[1] != 'e' && b[1] != 'E' {
  1126  		var isValidRune validRuneFn
  1127  
  1128  		switch b[1] {
  1129  		case 'x':
  1130  			isValidRune = isValidHexRune
  1131  		case 'o':
  1132  			isValidRune = isValidOctalRune
  1133  		case 'b':
  1134  			isValidRune = isValidBinaryRune
  1135  		default:
  1136  			i++
  1137  		}
  1138  
  1139  		if isValidRune != nil {
  1140  			i += 2
  1141  			for ; i < len(b); i++ {
  1142  				if !isValidRune(b[i]) {
  1143  					break
  1144  				}
  1145  			}
  1146  		}
  1147  
  1148  		return p.builder.Push(Node{
  1149  			Kind: Integer,
  1150  			Data: b[:i],
  1151  			Raw:  p.Range(b[:i]),
  1152  		}), b[i:], nil
  1153  	}
  1154  
  1155  	isFloat := false
  1156  
  1157  	for ; i < len(b); i++ {
  1158  		c := b[i]
  1159  
  1160  		if c >= '0' && c <= '9' || c == '+' || c == '-' || c == '_' {
  1161  			continue
  1162  		}
  1163  
  1164  		if c == '.' || c == 'e' || c == 'E' {
  1165  			isFloat = true
  1166  
  1167  			continue
  1168  		}
  1169  
  1170  		if c == 'i' {
  1171  			if scanFollowsInf(b[i:]) {
  1172  				return p.builder.Push(Node{
  1173  					Kind: Float,
  1174  					Data: b[:i+3],
  1175  					Raw:  p.Range(b[:i+3]),
  1176  				}), b[i+3:], nil
  1177  			}
  1178  
  1179  			return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'i' while scanning for a number")
  1180  		}
  1181  
  1182  		if c == 'n' {
  1183  			if scanFollowsNan(b[i:]) {
  1184  				return p.builder.Push(Node{
  1185  					Kind: Float,
  1186  					Data: b[:i+3],
  1187  					Raw:  p.Range(b[:i+3]),
  1188  				}), b[i+3:], nil
  1189  			}
  1190  
  1191  			return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'n' while scanning for a number")
  1192  		}
  1193  
  1194  		break
  1195  	}
  1196  
  1197  	if i == 0 {
  1198  		return invalidReference, b, NewParserError(b, "incomplete number")
  1199  	}
  1200  
  1201  	kind := Integer
  1202  
  1203  	if isFloat {
  1204  		kind = Float
  1205  	}
  1206  
  1207  	return p.builder.Push(Node{
  1208  		Kind: kind,
  1209  		Data: b[:i],
  1210  		Raw:  p.Range(b[:i]),
  1211  	}), b[i:], nil
  1212  }
  1213  
  1214  func isDigit(r byte) bool {
  1215  	return r >= '0' && r <= '9'
  1216  }
  1217  
  1218  type validRuneFn func(r byte) bool
  1219  
  1220  func isValidHexRune(r byte) bool {
  1221  	return r >= 'a' && r <= 'f' ||
  1222  		r >= 'A' && r <= 'F' ||
  1223  		r >= '0' && r <= '9' ||
  1224  		r == '_'
  1225  }
  1226  
  1227  func isValidOctalRune(r byte) bool {
  1228  	return r >= '0' && r <= '7' || r == '_'
  1229  }
  1230  
  1231  func isValidBinaryRune(r byte) bool {
  1232  	return r == '0' || r == '1' || r == '_'
  1233  }
  1234  
  1235  func expect(x byte, b []byte) ([]byte, error) {
  1236  	if len(b) == 0 {
  1237  		return nil, NewParserError(b, "expected character %c but the document ended here", x)
  1238  	}
  1239  
  1240  	if b[0] != x {
  1241  		return nil, NewParserError(b[0:1], "expected character %c", x)
  1242  	}
  1243  
  1244  	return b[1:], nil
  1245  }
  1246  

View as plain text