encode.go

Documentation: google.golang.org/protobuf/internal/encoding/text

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package text
     6  
     7  import (
     8  	"math"
     9  	"math/bits"
    10  	"strconv"
    11  	"strings"
    12  	"unicode/utf8"
    13  
    14  	"google.golang.org/protobuf/internal/detrand"
    15  	"google.golang.org/protobuf/internal/errors"
    16  )
    17  
    18  // encType represents an encoding type.
    19  type encType uint8
    20  
    21  const (
    22  	_ encType = (1 << iota) / 2
    23  	name
    24  	scalar
    25  	messageOpen
    26  	messageClose
    27  )
    28  
    29  // Encoder provides methods to write out textproto constructs and values. The user is
    30  // responsible for producing valid sequences of constructs and values.
    31  type Encoder struct {
    32  	encoderState
    33  
    34  	indent      string
    35  	delims      [2]byte
    36  	outputASCII bool
    37  }
    38  
    39  type encoderState struct {
    40  	lastType encType
    41  	indents  []byte
    42  	out      []byte
    43  }
    44  
    45  // NewEncoder returns an Encoder.
    46  //
    47  // If indent is a non-empty string, it causes every entry in a List or Message
    48  // to be preceded by the indent and trailed by a newline.
    49  //
    50  // If delims is not the zero value, it controls the delimiter characters used
    51  // for messages (e.g., "{}" vs "<>").
    52  //
    53  // If outputASCII is true, strings will be serialized in such a way that
    54  // multi-byte UTF-8 sequences are escaped. This property ensures that the
    55  // overall output is ASCII (as opposed to UTF-8).
    56  func NewEncoder(buf []byte, indent string, delims [2]byte, outputASCII bool) (*Encoder, error) {
    57  	e := &Encoder{
    58  		encoderState: encoderState{out: buf},
    59  	}
    60  	if len(indent) > 0 {
    61  		if strings.Trim(indent, " \t") != "" {
    62  			return nil, errors.New("indent may only be composed of space and tab characters")
    63  		}
    64  		e.indent = indent
    65  	}
    66  	switch delims {
    67  	case [2]byte{0, 0}:
    68  		e.delims = [2]byte{'{', '}'}
    69  	case [2]byte{'{', '}'}, [2]byte{'<', '>'}:
    70  		e.delims = delims
    71  	default:
    72  		return nil, errors.New("delimiters may only be \"{}\" or \"<>\"")
    73  	}
    74  	e.outputASCII = outputASCII
    75  
    76  	return e, nil
    77  }
    78  
    79  // Bytes returns the content of the written bytes.
    80  func (e *Encoder) Bytes() []byte {
    81  	return e.out
    82  }
    83  
    84  // StartMessage writes out the '{' or '<' symbol.
    85  func (e *Encoder) StartMessage() {
    86  	e.prepareNext(messageOpen)
    87  	e.out = append(e.out, e.delims[0])
    88  }
    89  
    90  // EndMessage writes out the '}' or '>' symbol.
    91  func (e *Encoder) EndMessage() {
    92  	e.prepareNext(messageClose)
    93  	e.out = append(e.out, e.delims[1])
    94  }
    95  
    96  // WriteName writes out the field name and the separator ':'.
    97  func (e *Encoder) WriteName(s string) {
    98  	e.prepareNext(name)
    99  	e.out = append(e.out, s...)
   100  	e.out = append(e.out, ':')
   101  }
   102  
   103  // WriteBool writes out the given boolean value.
   104  func (e *Encoder) WriteBool(b bool) {
   105  	if b {
   106  		e.WriteLiteral("true")
   107  	} else {
   108  		e.WriteLiteral("false")
   109  	}
   110  }
   111  
   112  // WriteString writes out the given string value.
   113  func (e *Encoder) WriteString(s string) {
   114  	e.prepareNext(scalar)
   115  	e.out = appendString(e.out, s, e.outputASCII)
   116  }
   117  
   118  func appendString(out []byte, in string, outputASCII bool) []byte {
   119  	out = append(out, '"')
   120  	i := indexNeedEscapeInString(in)
   121  	in, out = in[i:], append(out, in[:i]...)
   122  	for len(in) > 0 {
   123  		switch r, n := utf8.DecodeRuneInString(in); {
   124  		case r == utf8.RuneError && n == 1:
   125  			// We do not report invalid UTF-8 because strings in the text format
   126  			// are used to represent both the proto string and bytes type.
   127  			r = rune(in[0])
   128  			fallthrough
   129  		case r < ' ' || r == '"' || r == '\\' || r == 0x7f:
   130  			out = append(out, '\\')
   131  			switch r {
   132  			case '"', '\\':
   133  				out = append(out, byte(r))
   134  			case '\n':
   135  				out = append(out, 'n')
   136  			case '\r':
   137  				out = append(out, 'r')
   138  			case '\t':
   139  				out = append(out, 't')
   140  			default:
   141  				out = append(out, 'x')
   142  				out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...)
   143  				out = strconv.AppendUint(out, uint64(r), 16)
   144  			}
   145  			in = in[n:]
   146  		case r >= utf8.RuneSelf && (outputASCII || r <= 0x009f):
   147  			out = append(out, '\\')
   148  			if r <= math.MaxUint16 {
   149  				out = append(out, 'u')
   150  				out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
   151  				out = strconv.AppendUint(out, uint64(r), 16)
   152  			} else {
   153  				out = append(out, 'U')
   154  				out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...)
   155  				out = strconv.AppendUint(out, uint64(r), 16)
   156  			}
   157  			in = in[n:]
   158  		default:
   159  			i := indexNeedEscapeInString(in[n:])
   160  			in, out = in[n+i:], append(out, in[:n+i]...)
   161  		}
   162  	}
   163  	out = append(out, '"')
   164  	return out
   165  }
   166  
   167  // indexNeedEscapeInString returns the index of the character that needs
   168  // escaping. If no characters need escaping, this returns the input length.
   169  func indexNeedEscapeInString(s string) int {
   170  	for i := 0; i < len(s); i++ {
   171  		if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= 0x7f {
   172  			return i
   173  		}
   174  	}
   175  	return len(s)
   176  }
   177  
   178  // WriteFloat writes out the given float value for given bitSize.
   179  func (e *Encoder) WriteFloat(n float64, bitSize int) {
   180  	e.prepareNext(scalar)
   181  	e.out = appendFloat(e.out, n, bitSize)
   182  }
   183  
   184  func appendFloat(out []byte, n float64, bitSize int) []byte {
   185  	switch {
   186  	case math.IsNaN(n):
   187  		return append(out, "nan"...)
   188  	case math.IsInf(n, +1):
   189  		return append(out, "inf"...)
   190  	case math.IsInf(n, -1):
   191  		return append(out, "-inf"...)
   192  	default:
   193  		return strconv.AppendFloat(out, n, 'g', -1, bitSize)
   194  	}
   195  }
   196  
   197  // WriteInt writes out the given signed integer value.
   198  func (e *Encoder) WriteInt(n int64) {
   199  	e.prepareNext(scalar)
   200  	e.out = strconv.AppendInt(e.out, n, 10)
   201  }
   202  
   203  // WriteUint writes out the given unsigned integer value.
   204  func (e *Encoder) WriteUint(n uint64) {
   205  	e.prepareNext(scalar)
   206  	e.out = strconv.AppendUint(e.out, n, 10)
   207  }
   208  
   209  // WriteLiteral writes out the given string as a literal value without quotes.
   210  // This is used for writing enum literal strings.
   211  func (e *Encoder) WriteLiteral(s string) {
   212  	e.prepareNext(scalar)
   213  	e.out = append(e.out, s...)
   214  }
   215  
   216  // prepareNext adds possible space and indentation for the next value based
   217  // on last encType and indent option. It also updates e.lastType to next.
   218  func (e *Encoder) prepareNext(next encType) {
   219  	defer func() {
   220  		e.lastType = next
   221  	}()
   222  
   223  	// Single line.
   224  	if len(e.indent) == 0 {
   225  		// Add space after each field before the next one.
   226  		if e.lastType&(scalar|messageClose) != 0 && next == name {
   227  			e.out = append(e.out, ' ')
   228  			// Add a random extra space to make output unstable.
   229  			if detrand.Bool() {
   230  				e.out = append(e.out, ' ')
   231  			}
   232  		}
   233  		return
   234  	}
   235  
   236  	// Multi-line.
   237  	switch {
   238  	case e.lastType == name:
   239  		e.out = append(e.out, ' ')
   240  		// Add a random extra space after name: to make output unstable.
   241  		if detrand.Bool() {
   242  			e.out = append(e.out, ' ')
   243  		}
   244  
   245  	case e.lastType == messageOpen && next != messageClose:
   246  		e.indents = append(e.indents, e.indent...)
   247  		e.out = append(e.out, '\n')
   248  		e.out = append(e.out, e.indents...)
   249  
   250  	case e.lastType&(scalar|messageClose) != 0:
   251  		if next == messageClose {
   252  			e.indents = e.indents[:len(e.indents)-len(e.indent)]
   253  		}
   254  		e.out = append(e.out, '\n')
   255  		e.out = append(e.out, e.indents...)
   256  	}
   257  }
   258  
   259  // Snapshot returns the current snapshot for use in Reset.
   260  func (e *Encoder) Snapshot() encoderState {
   261  	return e.encoderState
   262  }
   263  
   264  // Reset resets the Encoder to the given encoderState from a Snapshot.
   265  func (e *Encoder) Reset(es encoderState) {
   266  	e.encoderState = es
   267  }
   268  
   269  // AppendString appends the escaped form of the input string to b.
   270  func AppendString(b []byte, s string) []byte {
   271  	return appendString(b, s, false)
   272  }
   273
View as plain text