scanner.go

Documentation: github.com/pelletier/go-toml/v2/unstable

     1  package unstable
     2  
     3  import "github.com/pelletier/go-toml/v2/internal/characters"
     4  
     5  func scanFollows(b []byte, pattern string) bool {
     6  	n := len(pattern)
     7  
     8  	return len(b) >= n && string(b[:n]) == pattern
     9  }
    10  
    11  func scanFollowsMultilineBasicStringDelimiter(b []byte) bool {
    12  	return scanFollows(b, `"""`)
    13  }
    14  
    15  func scanFollowsMultilineLiteralStringDelimiter(b []byte) bool {
    16  	return scanFollows(b, `'''`)
    17  }
    18  
    19  func scanFollowsTrue(b []byte) bool {
    20  	return scanFollows(b, `true`)
    21  }
    22  
    23  func scanFollowsFalse(b []byte) bool {
    24  	return scanFollows(b, `false`)
    25  }
    26  
    27  func scanFollowsInf(b []byte) bool {
    28  	return scanFollows(b, `inf`)
    29  }
    30  
    31  func scanFollowsNan(b []byte) bool {
    32  	return scanFollows(b, `nan`)
    33  }
    34  
    35  func scanUnquotedKey(b []byte) ([]byte, []byte) {
    36  	// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
    37  	for i := 0; i < len(b); i++ {
    38  		if !isUnquotedKeyChar(b[i]) {
    39  			return b[:i], b[i:]
    40  		}
    41  	}
    42  
    43  	return b, b[len(b):]
    44  }
    45  
    46  func isUnquotedKeyChar(r byte) bool {
    47  	return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_'
    48  }
    49  
    50  func scanLiteralString(b []byte) ([]byte, []byte, error) {
    51  	// literal-string = apostrophe *literal-char apostrophe
    52  	// apostrophe = %x27 ; ' apostrophe
    53  	// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
    54  	for i := 1; i < len(b); {
    55  		switch b[i] {
    56  		case '\'':
    57  			return b[:i+1], b[i+1:], nil
    58  		case '\n', '\r':
    59  			return nil, nil, NewParserError(b[i:i+1], "literal strings cannot have new lines")
    60  		}
    61  		size := characters.Utf8ValidNext(b[i:])
    62  		if size == 0 {
    63  			return nil, nil, NewParserError(b[i:i+1], "invalid character")
    64  		}
    65  		i += size
    66  	}
    67  
    68  	return nil, nil, NewParserError(b[len(b):], "unterminated literal string")
    69  }
    70  
    71  func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {
    72  	// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body
    73  	// ml-literal-string-delim
    74  	// ml-literal-string-delim = 3apostrophe
    75  	// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ]
    76  	//
    77  	// mll-content = mll-char / newline
    78  	// mll-char = %x09 / %x20-26 / %x28-7E / non-ascii
    79  	// mll-quotes = 1*2apostrophe
    80  	for i := 3; i < len(b); {
    81  		switch b[i] {
    82  		case '\'':
    83  			if scanFollowsMultilineLiteralStringDelimiter(b[i:]) {
    84  				i += 3
    85  
    86  				// At that point we found 3 apostrophe, and i is the
    87  				// index of the byte after the third one. The scanner
    88  				// needs to be eager, because there can be an extra 2
    89  				// apostrophe that can be accepted at the end of the
    90  				// string.
    91  
    92  				if i >= len(b) || b[i] != '\'' {
    93  					return b[:i], b[i:], nil
    94  				}
    95  				i++
    96  
    97  				if i >= len(b) || b[i] != '\'' {
    98  					return b[:i], b[i:], nil
    99  				}
   100  				i++
   101  
   102  				if i < len(b) && b[i] == '\'' {
   103  					return nil, nil, NewParserError(b[i-3:i+1], "''' not allowed in multiline literal string")
   104  				}
   105  
   106  				return b[:i], b[i:], nil
   107  			}
   108  		case '\r':
   109  			if len(b) < i+2 {
   110  				return nil, nil, NewParserError(b[len(b):], `need a \n after \r`)
   111  			}
   112  			if b[i+1] != '\n' {
   113  				return nil, nil, NewParserError(b[i:i+2], `need a \n after \r`)
   114  			}
   115  			i += 2 // skip the \n
   116  			continue
   117  		}
   118  		size := characters.Utf8ValidNext(b[i:])
   119  		if size == 0 {
   120  			return nil, nil, NewParserError(b[i:i+1], "invalid character")
   121  		}
   122  		i += size
   123  	}
   124  
   125  	return nil, nil, NewParserError(b[len(b):], `multiline literal string not terminated by '''`)
   126  }
   127  
   128  func scanWindowsNewline(b []byte) ([]byte, []byte, error) {
   129  	const lenCRLF = 2
   130  	if len(b) < lenCRLF {
   131  		return nil, nil, NewParserError(b, "windows new line expected")
   132  	}
   133  
   134  	if b[1] != '\n' {
   135  		return nil, nil, NewParserError(b, `windows new line should be \r\n`)
   136  	}
   137  
   138  	return b[:lenCRLF], b[lenCRLF:], nil
   139  }
   140  
   141  func scanWhitespace(b []byte) ([]byte, []byte) {
   142  	for i := 0; i < len(b); i++ {
   143  		switch b[i] {
   144  		case ' ', '\t':
   145  			continue
   146  		default:
   147  			return b[:i], b[i:]
   148  		}
   149  	}
   150  
   151  	return b, b[len(b):]
   152  }
   153  
   154  func scanComment(b []byte) ([]byte, []byte, error) {
   155  	// comment-start-symbol = %x23 ; #
   156  	// non-ascii = %x80-D7FF / %xE000-10FFFF
   157  	// non-eol = %x09 / %x20-7F / non-ascii
   158  	//
   159  	// comment = comment-start-symbol *non-eol
   160  
   161  	for i := 1; i < len(b); {
   162  		if b[i] == '\n' {
   163  			return b[:i], b[i:], nil
   164  		}
   165  		if b[i] == '\r' {
   166  			if i+1 < len(b) && b[i+1] == '\n' {
   167  				return b[:i+1], b[i+1:], nil
   168  			}
   169  			return nil, nil, NewParserError(b[i:i+1], "invalid character in comment")
   170  		}
   171  		size := characters.Utf8ValidNext(b[i:])
   172  		if size == 0 {
   173  			return nil, nil, NewParserError(b[i:i+1], "invalid character in comment")
   174  		}
   175  
   176  		i += size
   177  	}
   178  
   179  	return b, b[len(b):], nil
   180  }
   181  
   182  func scanBasicString(b []byte) ([]byte, bool, []byte, error) {
   183  	// basic-string = quotation-mark *basic-char quotation-mark
   184  	// quotation-mark = %x22            ; "
   185  	// basic-char = basic-unescaped / escaped
   186  	// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
   187  	// escaped = escape escape-seq-char
   188  	escaped := false
   189  	i := 1
   190  
   191  	for ; i < len(b); i++ {
   192  		switch b[i] {
   193  		case '"':
   194  			return b[:i+1], escaped, b[i+1:], nil
   195  		case '\n', '\r':
   196  			return nil, escaped, nil, NewParserError(b[i:i+1], "basic strings cannot have new lines")
   197  		case '\\':
   198  			if len(b) < i+2 {
   199  				return nil, escaped, nil, NewParserError(b[i:i+1], "need a character after \\")
   200  			}
   201  			escaped = true
   202  			i++ // skip the next character
   203  		}
   204  	}
   205  
   206  	return nil, escaped, nil, NewParserError(b[len(b):], `basic string not terminated by "`)
   207  }
   208  
   209  func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) {
   210  	// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
   211  	// ml-basic-string-delim
   212  	// ml-basic-string-delim = 3quotation-mark
   213  	// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
   214  	//
   215  	// mlb-content = mlb-char / newline / mlb-escaped-nl
   216  	// mlb-char = mlb-unescaped / escaped
   217  	// mlb-quotes = 1*2quotation-mark
   218  	// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
   219  	// mlb-escaped-nl = escape ws newline *( wschar / newline )
   220  
   221  	escaped := false
   222  	i := 3
   223  
   224  	for ; i < len(b); i++ {
   225  		switch b[i] {
   226  		case '"':
   227  			if scanFollowsMultilineBasicStringDelimiter(b[i:]) {
   228  				i += 3
   229  
   230  				// At that point we found 3 apostrophe, and i is the
   231  				// index of the byte after the third one. The scanner
   232  				// needs to be eager, because there can be an extra 2
   233  				// apostrophe that can be accepted at the end of the
   234  				// string.
   235  
   236  				if i >= len(b) || b[i] != '"' {
   237  					return b[:i], escaped, b[i:], nil
   238  				}
   239  				i++
   240  
   241  				if i >= len(b) || b[i] != '"' {
   242  					return b[:i], escaped, b[i:], nil
   243  				}
   244  				i++
   245  
   246  				if i < len(b) && b[i] == '"' {
   247  					return nil, escaped, nil, NewParserError(b[i-3:i+1], `""" not allowed in multiline basic string`)
   248  				}
   249  
   250  				return b[:i], escaped, b[i:], nil
   251  			}
   252  		case '\\':
   253  			if len(b) < i+2 {
   254  				return nil, escaped, nil, NewParserError(b[len(b):], "need a character after \\")
   255  			}
   256  			escaped = true
   257  			i++ // skip the next character
   258  		case '\r':
   259  			if len(b) < i+2 {
   260  				return nil, escaped, nil, NewParserError(b[len(b):], `need a \n after \r`)
   261  			}
   262  			if b[i+1] != '\n' {
   263  				return nil, escaped, nil, NewParserError(b[i:i+2], `need a \n after \r`)
   264  			}
   265  			i++ // skip the \n
   266  		}
   267  	}
   268  
   269  	return nil, escaped, nil, NewParserError(b[len(b):], `multiline basic string not terminated by """`)
   270  }
   271
View as plain text