decode.go

Documentation: google.golang.org/protobuf/internal/encoding/json

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package json
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"regexp"
    12  	"unicode/utf8"
    13  
    14  	"google.golang.org/protobuf/internal/errors"
    15  )
    16  
    17  // call specifies which Decoder method was invoked.
    18  type call uint8
    19  
    20  const (
    21  	readCall call = iota
    22  	peekCall
    23  )
    24  
    25  const unexpectedFmt = "unexpected token %s"
    26  
    27  // ErrUnexpectedEOF means that EOF was encountered in the middle of the input.
    28  var ErrUnexpectedEOF = errors.New("%v", io.ErrUnexpectedEOF)
    29  
    30  // Decoder is a token-based JSON decoder.
    31  type Decoder struct {
    32  	// lastCall is last method called, either readCall or peekCall.
    33  	// Initial value is readCall.
    34  	lastCall call
    35  
    36  	// lastToken contains the last read token.
    37  	lastToken Token
    38  
    39  	// lastErr contains the last read error.
    40  	lastErr error
    41  
    42  	// openStack is a stack containing ObjectOpen and ArrayOpen values. The
    43  	// top of stack represents the object or the array the current value is
    44  	// directly located in.
    45  	openStack []Kind
    46  
    47  	// orig is used in reporting line and column.
    48  	orig []byte
    49  	// in contains the unconsumed input.
    50  	in []byte
    51  }
    52  
    53  // NewDecoder returns a Decoder to read the given []byte.
    54  func NewDecoder(b []byte) *Decoder {
    55  	return &Decoder{orig: b, in: b}
    56  }
    57  
    58  // Peek looks ahead and returns the next token kind without advancing a read.
    59  func (d *Decoder) Peek() (Token, error) {
    60  	defer func() { d.lastCall = peekCall }()
    61  	if d.lastCall == readCall {
    62  		d.lastToken, d.lastErr = d.Read()
    63  	}
    64  	return d.lastToken, d.lastErr
    65  }
    66  
    67  // Read returns the next JSON token.
    68  // It will return an error if there is no valid token.
    69  func (d *Decoder) Read() (Token, error) {
    70  	const scalar = Null | Bool | Number | String
    71  
    72  	defer func() { d.lastCall = readCall }()
    73  	if d.lastCall == peekCall {
    74  		return d.lastToken, d.lastErr
    75  	}
    76  
    77  	tok, err := d.parseNext()
    78  	if err != nil {
    79  		return Token{}, err
    80  	}
    81  
    82  	switch tok.kind {
    83  	case EOF:
    84  		if len(d.openStack) != 0 ||
    85  			d.lastToken.kind&scalar|ObjectClose|ArrayClose == 0 {
    86  			return Token{}, ErrUnexpectedEOF
    87  		}
    88  
    89  	case Null:
    90  		if !d.isValueNext() {
    91  			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
    92  		}
    93  
    94  	case Bool, Number:
    95  		if !d.isValueNext() {
    96  			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
    97  		}
    98  
    99  	case String:
   100  		if d.isValueNext() {
   101  			break
   102  		}
   103  		// This string token should only be for a field name.
   104  		if d.lastToken.kind&(ObjectOpen|comma) == 0 {
   105  			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
   106  		}
   107  		if len(d.in) == 0 {
   108  			return Token{}, ErrUnexpectedEOF
   109  		}
   110  		if c := d.in[0]; c != ':' {
   111  			return Token{}, d.newSyntaxError(d.currPos(), `unexpected character %s, missing ":" after field name`, string(c))
   112  		}
   113  		tok.kind = Name
   114  		d.consume(1)
   115  
   116  	case ObjectOpen, ArrayOpen:
   117  		if !d.isValueNext() {
   118  			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
   119  		}
   120  		d.openStack = append(d.openStack, tok.kind)
   121  
   122  	case ObjectClose:
   123  		if len(d.openStack) == 0 ||
   124  			d.lastToken.kind == comma ||
   125  			d.openStack[len(d.openStack)-1] != ObjectOpen {
   126  			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
   127  		}
   128  		d.openStack = d.openStack[:len(d.openStack)-1]
   129  
   130  	case ArrayClose:
   131  		if len(d.openStack) == 0 ||
   132  			d.lastToken.kind == comma ||
   133  			d.openStack[len(d.openStack)-1] != ArrayOpen {
   134  			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
   135  		}
   136  		d.openStack = d.openStack[:len(d.openStack)-1]
   137  
   138  	case comma:
   139  		if len(d.openStack) == 0 ||
   140  			d.lastToken.kind&(scalar|ObjectClose|ArrayClose) == 0 {
   141  			return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
   142  		}
   143  	}
   144  
   145  	// Update d.lastToken only after validating token to be in the right sequence.
   146  	d.lastToken = tok
   147  
   148  	if d.lastToken.kind == comma {
   149  		return d.Read()
   150  	}
   151  	return tok, nil
   152  }
   153  
   154  // Any sequence that looks like a non-delimiter (for error reporting).
   155  var errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`)
   156  
   157  // parseNext parses for the next JSON token. It returns a Token object for
   158  // different types, except for Name. It does not handle whether the next token
   159  // is in a valid sequence or not.
   160  func (d *Decoder) parseNext() (Token, error) {
   161  	// Trim leading spaces.
   162  	d.consume(0)
   163  
   164  	in := d.in
   165  	if len(in) == 0 {
   166  		return d.consumeToken(EOF, 0), nil
   167  	}
   168  
   169  	switch in[0] {
   170  	case 'n':
   171  		if n := matchWithDelim("null", in); n != 0 {
   172  			return d.consumeToken(Null, n), nil
   173  		}
   174  
   175  	case 't':
   176  		if n := matchWithDelim("true", in); n != 0 {
   177  			return d.consumeBoolToken(true, n), nil
   178  		}
   179  
   180  	case 'f':
   181  		if n := matchWithDelim("false", in); n != 0 {
   182  			return d.consumeBoolToken(false, n), nil
   183  		}
   184  
   185  	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   186  		if n, ok := parseNumber(in); ok {
   187  			return d.consumeToken(Number, n), nil
   188  		}
   189  
   190  	case '"':
   191  		s, n, err := d.parseString(in)
   192  		if err != nil {
   193  			return Token{}, err
   194  		}
   195  		return d.consumeStringToken(s, n), nil
   196  
   197  	case '{':
   198  		return d.consumeToken(ObjectOpen, 1), nil
   199  
   200  	case '}':
   201  		return d.consumeToken(ObjectClose, 1), nil
   202  
   203  	case '[':
   204  		return d.consumeToken(ArrayOpen, 1), nil
   205  
   206  	case ']':
   207  		return d.consumeToken(ArrayClose, 1), nil
   208  
   209  	case ',':
   210  		return d.consumeToken(comma, 1), nil
   211  	}
   212  	return Token{}, d.newSyntaxError(d.currPos(), "invalid value %s", errRegexp.Find(in))
   213  }
   214  
   215  // newSyntaxError returns an error with line and column information useful for
   216  // syntax errors.
   217  func (d *Decoder) newSyntaxError(pos int, f string, x ...interface{}) error {
   218  	e := errors.New(f, x...)
   219  	line, column := d.Position(pos)
   220  	return errors.New("syntax error (line %d:%d): %v", line, column, e)
   221  }
   222  
   223  // Position returns line and column number of given index of the original input.
   224  // It will panic if index is out of range.
   225  func (d *Decoder) Position(idx int) (line int, column int) {
   226  	b := d.orig[:idx]
   227  	line = bytes.Count(b, []byte("\n")) + 1
   228  	if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
   229  		b = b[i+1:]
   230  	}
   231  	column = utf8.RuneCount(b) + 1 // ignore multi-rune characters
   232  	return line, column
   233  }
   234  
   235  // currPos returns the current index position of d.in from d.orig.
   236  func (d *Decoder) currPos() int {
   237  	return len(d.orig) - len(d.in)
   238  }
   239  
   240  // matchWithDelim matches s with the input b and verifies that the match
   241  // terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
   242  // As a special case, EOF is considered a delimiter. It returns the length of s
   243  // if there is a match, else 0.
   244  func matchWithDelim(s string, b []byte) int {
   245  	if !bytes.HasPrefix(b, []byte(s)) {
   246  		return 0
   247  	}
   248  
   249  	n := len(s)
   250  	if n < len(b) && isNotDelim(b[n]) {
   251  		return 0
   252  	}
   253  	return n
   254  }
   255  
   256  // isNotDelim returns true if given byte is a not delimiter character.
   257  func isNotDelim(c byte) bool {
   258  	return (c == '-' || c == '+' || c == '.' || c == '_' ||
   259  		('a' <= c && c <= 'z') ||
   260  		('A' <= c && c <= 'Z') ||
   261  		('0' <= c && c <= '9'))
   262  }
   263  
   264  // consume consumes n bytes of input and any subsequent whitespace.
   265  func (d *Decoder) consume(n int) {
   266  	d.in = d.in[n:]
   267  	for len(d.in) > 0 {
   268  		switch d.in[0] {
   269  		case ' ', '\n', '\r', '\t':
   270  			d.in = d.in[1:]
   271  		default:
   272  			return
   273  		}
   274  	}
   275  }
   276  
   277  // isValueNext returns true if next type should be a JSON value: Null,
   278  // Number, String or Bool.
   279  func (d *Decoder) isValueNext() bool {
   280  	if len(d.openStack) == 0 {
   281  		return d.lastToken.kind == 0
   282  	}
   283  
   284  	start := d.openStack[len(d.openStack)-1]
   285  	switch start {
   286  	case ObjectOpen:
   287  		return d.lastToken.kind&Name != 0
   288  	case ArrayOpen:
   289  		return d.lastToken.kind&(ArrayOpen|comma) != 0
   290  	}
   291  	panic(fmt.Sprintf(
   292  		"unreachable logic in Decoder.isValueNext, lastToken.kind: %v, openStack: %v",
   293  		d.lastToken.kind, start))
   294  }
   295  
   296  // consumeToken constructs a Token for given Kind with raw value derived from
   297  // current d.in and given size, and consumes the given size-length of it.
   298  func (d *Decoder) consumeToken(kind Kind, size int) Token {
   299  	tok := Token{
   300  		kind: kind,
   301  		raw:  d.in[:size],
   302  		pos:  len(d.orig) - len(d.in),
   303  	}
   304  	d.consume(size)
   305  	return tok
   306  }
   307  
   308  // consumeBoolToken constructs a Token for a Bool kind with raw value derived from
   309  // current d.in and given size.
   310  func (d *Decoder) consumeBoolToken(b bool, size int) Token {
   311  	tok := Token{
   312  		kind: Bool,
   313  		raw:  d.in[:size],
   314  		pos:  len(d.orig) - len(d.in),
   315  		boo:  b,
   316  	}
   317  	d.consume(size)
   318  	return tok
   319  }
   320  
   321  // consumeStringToken constructs a Token for a String kind with raw value derived
   322  // from current d.in and given size.
   323  func (d *Decoder) consumeStringToken(s string, size int) Token {
   324  	tok := Token{
   325  		kind: String,
   326  		raw:  d.in[:size],
   327  		pos:  len(d.orig) - len(d.in),
   328  		str:  s,
   329  	}
   330  	d.consume(size)
   331  	return tok
   332  }
   333  
   334  // Clone returns a copy of the Decoder for use in reading ahead the next JSON
   335  // object, array or other values without affecting current Decoder.
   336  func (d *Decoder) Clone() *Decoder {
   337  	ret := *d
   338  	ret.openStack = append([]Kind(nil), ret.openStack...)
   339  	return &ret
   340  }
   341
View as plain text