...

Source file src/github.com/json-iterator/go/iter_str.go

Documentation: github.com/json-iterator/go

     1  package jsoniter
     2  
     3  import (
     4  	"fmt"
     5  	"unicode/utf16"
     6  )
     7  
     8  // ReadString read string from iterator
     9  func (iter *Iterator) ReadString() (ret string) {
    10  	c := iter.nextToken()
    11  	if c == '"' {
    12  		for i := iter.head; i < iter.tail; i++ {
    13  			c := iter.buf[i]
    14  			if c == '"' {
    15  				ret = string(iter.buf[iter.head:i])
    16  				iter.head = i + 1
    17  				return ret
    18  			} else if c == '\\' {
    19  				break
    20  			} else if c < ' ' {
    21  				iter.ReportError("ReadString",
    22  					fmt.Sprintf(`invalid control character found: %d`, c))
    23  				return
    24  			}
    25  		}
    26  		return iter.readStringSlowPath()
    27  	} else if c == 'n' {
    28  		iter.skipThreeBytes('u', 'l', 'l')
    29  		return ""
    30  	}
    31  	iter.ReportError("ReadString", `expects " or n, but found `+string([]byte{c}))
    32  	return
    33  }
    34  
    35  func (iter *Iterator) readStringSlowPath() (ret string) {
    36  	var str []byte
    37  	var c byte
    38  	for iter.Error == nil {
    39  		c = iter.readByte()
    40  		if c == '"' {
    41  			return string(str)
    42  		}
    43  		if c == '\\' {
    44  			c = iter.readByte()
    45  			str = iter.readEscapedChar(c, str)
    46  		} else {
    47  			str = append(str, c)
    48  		}
    49  	}
    50  	iter.ReportError("readStringSlowPath", "unexpected end of input")
    51  	return
    52  }
    53  
    54  func (iter *Iterator) readEscapedChar(c byte, str []byte) []byte {
    55  	switch c {
    56  	case 'u':
    57  		r := iter.readU4()
    58  		if utf16.IsSurrogate(r) {
    59  			c = iter.readByte()
    60  			if iter.Error != nil {
    61  				return nil
    62  			}
    63  			if c != '\\' {
    64  				iter.unreadByte()
    65  				str = appendRune(str, r)
    66  				return str
    67  			}
    68  			c = iter.readByte()
    69  			if iter.Error != nil {
    70  				return nil
    71  			}
    72  			if c != 'u' {
    73  				str = appendRune(str, r)
    74  				return iter.readEscapedChar(c, str)
    75  			}
    76  			r2 := iter.readU4()
    77  			if iter.Error != nil {
    78  				return nil
    79  			}
    80  			combined := utf16.DecodeRune(r, r2)
    81  			if combined == '\uFFFD' {
    82  				str = appendRune(str, r)
    83  				str = appendRune(str, r2)
    84  			} else {
    85  				str = appendRune(str, combined)
    86  			}
    87  		} else {
    88  			str = appendRune(str, r)
    89  		}
    90  	case '"':
    91  		str = append(str, '"')
    92  	case '\\':
    93  		str = append(str, '\\')
    94  	case '/':
    95  		str = append(str, '/')
    96  	case 'b':
    97  		str = append(str, '\b')
    98  	case 'f':
    99  		str = append(str, '\f')
   100  	case 'n':
   101  		str = append(str, '\n')
   102  	case 'r':
   103  		str = append(str, '\r')
   104  	case 't':
   105  		str = append(str, '\t')
   106  	default:
   107  		iter.ReportError("readEscapedChar",
   108  			`invalid escape char after \`)
   109  		return nil
   110  	}
   111  	return str
   112  }
   113  
   114  // ReadStringAsSlice read string from iterator without copying into string form.
   115  // The []byte can not be kept, as it will change after next iterator call.
   116  func (iter *Iterator) ReadStringAsSlice() (ret []byte) {
   117  	c := iter.nextToken()
   118  	if c == '"' {
   119  		for i := iter.head; i < iter.tail; i++ {
   120  			// require ascii string and no escape
   121  			// for: field name, base64, number
   122  			if iter.buf[i] == '"' {
   123  				// fast path: reuse the underlying buffer
   124  				ret = iter.buf[iter.head:i]
   125  				iter.head = i + 1
   126  				return ret
   127  			}
   128  		}
   129  		readLen := iter.tail - iter.head
   130  		copied := make([]byte, readLen, readLen*2)
   131  		copy(copied, iter.buf[iter.head:iter.tail])
   132  		iter.head = iter.tail
   133  		for iter.Error == nil {
   134  			c := iter.readByte()
   135  			if c == '"' {
   136  				return copied
   137  			}
   138  			copied = append(copied, c)
   139  		}
   140  		return copied
   141  	}
   142  	iter.ReportError("ReadStringAsSlice", `expects " or n, but found `+string([]byte{c}))
   143  	return
   144  }
   145  
   146  func (iter *Iterator) readU4() (ret rune) {
   147  	for i := 0; i < 4; i++ {
   148  		c := iter.readByte()
   149  		if iter.Error != nil {
   150  			return
   151  		}
   152  		if c >= '0' && c <= '9' {
   153  			ret = ret*16 + rune(c-'0')
   154  		} else if c >= 'a' && c <= 'f' {
   155  			ret = ret*16 + rune(c-'a'+10)
   156  		} else if c >= 'A' && c <= 'F' {
   157  			ret = ret*16 + rune(c-'A'+10)
   158  		} else {
   159  			iter.ReportError("readU4", "expects 0~9 or a~f, but found "+string([]byte{c}))
   160  			return
   161  		}
   162  	}
   163  	return ret
   164  }
   165  
   166  const (
   167  	t1 = 0x00 // 0000 0000
   168  	tx = 0x80 // 1000 0000
   169  	t2 = 0xC0 // 1100 0000
   170  	t3 = 0xE0 // 1110 0000
   171  	t4 = 0xF0 // 1111 0000
   172  	t5 = 0xF8 // 1111 1000
   173  
   174  	maskx = 0x3F // 0011 1111
   175  	mask2 = 0x1F // 0001 1111
   176  	mask3 = 0x0F // 0000 1111
   177  	mask4 = 0x07 // 0000 0111
   178  
   179  	rune1Max = 1<<7 - 1
   180  	rune2Max = 1<<11 - 1
   181  	rune3Max = 1<<16 - 1
   182  
   183  	surrogateMin = 0xD800
   184  	surrogateMax = 0xDFFF
   185  
   186  	maxRune   = '\U0010FFFF' // Maximum valid Unicode code point.
   187  	runeError = '\uFFFD'     // the "error" Rune or "Unicode replacement character"
   188  )
   189  
   190  func appendRune(p []byte, r rune) []byte {
   191  	// Negative values are erroneous. Making it unsigned addresses the problem.
   192  	switch i := uint32(r); {
   193  	case i <= rune1Max:
   194  		p = append(p, byte(r))
   195  		return p
   196  	case i <= rune2Max:
   197  		p = append(p, t2|byte(r>>6))
   198  		p = append(p, tx|byte(r)&maskx)
   199  		return p
   200  	case i > maxRune, surrogateMin <= i && i <= surrogateMax:
   201  		r = runeError
   202  		fallthrough
   203  	case i <= rune3Max:
   204  		p = append(p, t3|byte(r>>12))
   205  		p = append(p, tx|byte(r>>6)&maskx)
   206  		p = append(p, tx|byte(r)&maskx)
   207  		return p
   208  	default:
   209  		p = append(p, t4|byte(r>>18))
   210  		p = append(p, tx|byte(r>>12)&maskx)
   211  		p = append(p, tx|byte(r>>6)&maskx)
   212  		p = append(p, tx|byte(r)&maskx)
   213  		return p
   214  	}
   215  }
   216  

View as plain text