...

Source file src/github.com/ugorji/go/codec/decimal.go

Documentation: github.com/ugorji/go/codec

     1  // Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved.
     2  // Use of this source code is governed by a MIT license found in the LICENSE file.
     3  
     4  package codec
     5  
     6  import (
     7  	"math"
     8  	"strconv"
     9  )
    10  
    11  // Per go spec, floats are represented in memory as
    12  // IEEE single or double precision floating point values.
    13  //
    14  // We also looked at the source for stdlib math/modf.go,
    15  // reviewed https://github.com/chewxy/math32
    16  // and read wikipedia documents describing the formats.
    17  //
    18  // It became clear that we could easily look at the bits to determine
    19  // whether any fraction exists.
    20  
    21  func parseFloat32(b []byte) (f float32, err error) {
    22  	return parseFloat32_custom(b)
    23  }
    24  
    25  func parseFloat64(b []byte) (f float64, err error) {
    26  	return parseFloat64_custom(b)
    27  }
    28  
    29  func parseFloat32_strconv(b []byte) (f float32, err error) {
    30  	f64, err := strconv.ParseFloat(stringView(b), 32)
    31  	f = float32(f64)
    32  	return
    33  }
    34  
    35  func parseFloat64_strconv(b []byte) (f float64, err error) {
    36  	return strconv.ParseFloat(stringView(b), 64)
    37  }
    38  
    39  // ------ parseFloat custom below --------
    40  
    41  // JSON really supports decimal numbers in base 10 notation, with exponent support.
    42  //
    43  // We assume the following:
    44  //   - a lot of floating point numbers in json files will have defined precision
    45  //     (in terms of number of digits after decimal point), etc.
    46  //   - these (referenced above) can be written in exact format.
    47  //
    48  // strconv.ParseFloat has some unnecessary overhead which we can do without
    49  // for the common case:
    50  //
    51  //    - expensive char-by-char check to see if underscores are in right place
    52  //    - testing for and skipping underscores
    53  //    - check if the string matches ignorecase +/- inf, +/- infinity, nan
    54  //    - support for base 16 (0xFFFF...)
    55  //
    56  // The functions below will try a fast-path for floats which can be decoded
    57  // without any loss of precision, meaning they:
    58  //
    59  //    - fits within the significand bits of the 32-bits or 64-bits
    60  //    - exponent fits within the exponent value
    61  //    - there is no truncation (any extra numbers are all trailing zeros)
    62  //
    63  // To figure out what the values are for maxMantDigits, use this idea below:
    64  //
    65  // 2^23 =                 838 8608 (between 10^ 6 and 10^ 7) (significand bits of uint32)
    66  // 2^32 =             42 9496 7296 (between 10^ 9 and 10^10) (full uint32)
    67  // 2^52 =      4503 5996 2737 0496 (between 10^15 and 10^16) (significand bits of uint64)
    68  // 2^64 = 1844 6744 0737 0955 1616 (between 10^19 and 10^20) (full uint64)
    69  //
    70  // Note: we only allow for up to what can comfortably fit into the significand
    71  // ignoring the exponent, and we only try to parse iff significand fits.
    72  
    73  const (
    74  	fMaxMultiplierForExactPow10_64 = 1e15
    75  	fMaxMultiplierForExactPow10_32 = 1e7
    76  
    77  	fUint64Cutoff = (1<<64-1)/10 + 1
    78  	// fUint32Cutoff = (1<<32-1)/10 + 1
    79  
    80  	fBase = 10
    81  )
    82  
    83  const (
    84  	thousand    = 1000
    85  	million     = thousand * thousand
    86  	billion     = thousand * million
    87  	trillion    = thousand * billion
    88  	quadrillion = thousand * trillion
    89  	quintillion = thousand * quadrillion
    90  )
    91  
    92  // Exact powers of 10.
    93  var uint64pow10 = [...]uint64{
    94  	1, 10, 100,
    95  	1 * thousand, 10 * thousand, 100 * thousand,
    96  	1 * million, 10 * million, 100 * million,
    97  	1 * billion, 10 * billion, 100 * billion,
    98  	1 * trillion, 10 * trillion, 100 * trillion,
    99  	1 * quadrillion, 10 * quadrillion, 100 * quadrillion,
   100  	1 * quintillion, 10 * quintillion,
   101  }
   102  var float64pow10 = [...]float64{
   103  	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
   104  	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
   105  	1e20, 1e21, 1e22,
   106  }
   107  var float32pow10 = [...]float32{
   108  	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10,
   109  }
   110  
   111  type floatinfo struct {
   112  	mantbits uint8
   113  
   114  	// expbits uint8 // (unused)
   115  	// bias    int16 // (unused)
   116  	// is32bit bool // (unused)
   117  
   118  	exactPow10 int8 // Exact powers of ten are <= 10^N (32: 10, 64: 22)
   119  
   120  	exactInts int8 // Exact integers are <= 10^N (for non-float, set to 0)
   121  
   122  	// maxMantDigits int8 // 10^19 fits in uint64, while 10^9 fits in uint32
   123  
   124  	mantCutoffIsUint64Cutoff bool
   125  
   126  	mantCutoff uint64
   127  }
   128  
   129  var fi32 = floatinfo{23, 10, 7, false, 1<<23 - 1}
   130  var fi64 = floatinfo{52, 22, 15, false, 1<<52 - 1}
   131  
   132  var fi64u = floatinfo{0, 19, 0, true, fUint64Cutoff}
   133  
   134  func noFrac64(fbits uint64) bool {
   135  	if fbits == 0 {
   136  		return true
   137  	}
   138  
   139  	exp := uint64(fbits>>52)&0x7FF - 1023 // uint(x>>shift)&mask - bias
   140  	// clear top 12+e bits, the integer part; if the rest is 0, then no fraction.
   141  	return exp < 52 && fbits<<(12+exp) == 0 // means there's no fractional part
   142  }
   143  
   144  func noFrac32(fbits uint32) bool {
   145  	if fbits == 0 {
   146  		return true
   147  	}
   148  
   149  	exp := uint32(fbits>>23)&0xFF - 127 // uint(x>>shift)&mask - bias
   150  	// clear top 9+e bits, the integer part; if the rest is 0, then no fraction.
   151  	return exp < 23 && fbits<<(9+exp) == 0 // means there's no fractional part
   152  }
   153  
   154  func strconvParseErr(b []byte, fn string) error {
   155  	return &strconv.NumError{
   156  		Func: fn,
   157  		Err:  strconv.ErrSyntax,
   158  		Num:  string(b),
   159  	}
   160  }
   161  
   162  func parseFloat32_reader(r readFloatResult) (f float32, fail bool) {
   163  	f = float32(r.mantissa)
   164  	if r.exp == 0 {
   165  	} else if r.exp < 0 { // int / 10^k
   166  		f /= float32pow10[uint8(-r.exp)]
   167  	} else { // exp > 0
   168  		if r.exp > fi32.exactPow10 {
   169  			f *= float32pow10[r.exp-fi32.exactPow10]
   170  			if f > fMaxMultiplierForExactPow10_32 { // exponent too large - outside range
   171  				fail = true
   172  				return // ok = false
   173  			}
   174  			f *= float32pow10[fi32.exactPow10]
   175  		} else {
   176  			f *= float32pow10[uint8(r.exp)]
   177  		}
   178  	}
   179  	if r.neg {
   180  		f = -f
   181  	}
   182  	return
   183  }
   184  
   185  func parseFloat32_custom(b []byte) (f float32, err error) {
   186  	r := readFloat(b, fi32)
   187  	if r.bad {
   188  		return 0, strconvParseErr(b, "ParseFloat")
   189  	}
   190  	if r.ok {
   191  		f, r.bad = parseFloat32_reader(r)
   192  		if !r.bad {
   193  			return
   194  		}
   195  	}
   196  	return parseFloat32_strconv(b)
   197  }
   198  
   199  func parseFloat64_reader(r readFloatResult) (f float64, fail bool) {
   200  	f = float64(r.mantissa)
   201  	if r.exp == 0 {
   202  	} else if r.exp < 0 { // int / 10^k
   203  		f /= float64pow10[-uint8(r.exp)]
   204  	} else { // exp > 0
   205  		if r.exp > fi64.exactPow10 {
   206  			f *= float64pow10[r.exp-fi64.exactPow10]
   207  			if f > fMaxMultiplierForExactPow10_64 { // exponent too large - outside range
   208  				fail = true
   209  				return
   210  			}
   211  			f *= float64pow10[fi64.exactPow10]
   212  		} else {
   213  			f *= float64pow10[uint8(r.exp)]
   214  		}
   215  	}
   216  	if r.neg {
   217  		f = -f
   218  	}
   219  	return
   220  }
   221  
   222  func parseFloat64_custom(b []byte) (f float64, err error) {
   223  	r := readFloat(b, fi64)
   224  	if r.bad {
   225  		return 0, strconvParseErr(b, "ParseFloat")
   226  	}
   227  	if r.ok {
   228  		f, r.bad = parseFloat64_reader(r)
   229  		if !r.bad {
   230  			return
   231  		}
   232  	}
   233  	return parseFloat64_strconv(b)
   234  }
   235  
   236  func parseUint64_simple(b []byte) (n uint64, ok bool) {
   237  	var i int
   238  	var n1 uint64
   239  	var c uint8
   240  LOOP:
   241  	if i < len(b) {
   242  		c = b[i]
   243  		// unsigned integers don't overflow well on multiplication, so check cutoff here
   244  		// e.g. (maxUint64-5)*10 doesn't overflow well ...
   245  		// if n >= fUint64Cutoff || !isDigitChar(b[i]) { // if c < '0' || c > '9' {
   246  		if n >= fUint64Cutoff || c < '0' || c > '9' {
   247  			return
   248  		} else if c == '0' {
   249  			n *= fBase
   250  		} else {
   251  			n1 = n
   252  			n = n*fBase + uint64(c-'0')
   253  			if n < n1 {
   254  				return
   255  			}
   256  		}
   257  		i++
   258  		goto LOOP
   259  	}
   260  	ok = true
   261  	return
   262  }
   263  
   264  func parseUint64_reader(r readFloatResult) (f uint64, fail bool) {
   265  	f = r.mantissa
   266  	if r.exp == 0 {
   267  	} else if r.exp < 0 { // int / 10^k
   268  		if f%uint64pow10[uint8(-r.exp)] != 0 {
   269  			fail = true
   270  		} else {
   271  			f /= uint64pow10[uint8(-r.exp)]
   272  		}
   273  	} else { // exp > 0
   274  		f *= uint64pow10[uint8(r.exp)]
   275  	}
   276  	return
   277  }
   278  
   279  func parseInteger_bytes(b []byte) (u uint64, neg, ok bool) {
   280  	if len(b) == 0 {
   281  		ok = true
   282  		return
   283  	}
   284  	if b[0] == '-' {
   285  		if len(b) == 1 {
   286  			return
   287  		}
   288  		neg = true
   289  		b = b[1:]
   290  	}
   291  
   292  	u, ok = parseUint64_simple(b)
   293  	if ok {
   294  		return
   295  	}
   296  
   297  	r := readFloat(b, fi64u)
   298  	if r.ok {
   299  		var fail bool
   300  		u, fail = parseUint64_reader(r)
   301  		if fail {
   302  			f, err := parseFloat64(b)
   303  			if err != nil {
   304  				return
   305  			}
   306  			if !noFrac64(math.Float64bits(f)) {
   307  				return
   308  			}
   309  			u = uint64(f)
   310  		}
   311  		ok = true
   312  		return
   313  	}
   314  	return
   315  }
   316  
   317  // parseNumber will return an integer if only composed of [-]?[0-9]+
   318  // Else it will return a float.
   319  func parseNumber(b []byte, z *fauxUnion, preferSignedInt bool) (err error) {
   320  	var ok, neg bool
   321  	var f uint64
   322  
   323  	if len(b) == 0 {
   324  		return
   325  	}
   326  
   327  	if b[0] == '-' {
   328  		neg = true
   329  		f, ok = parseUint64_simple(b[1:])
   330  	} else {
   331  		f, ok = parseUint64_simple(b)
   332  	}
   333  
   334  	if ok {
   335  		if neg {
   336  			z.v = valueTypeInt
   337  			if chkOvf.Uint2Int(f, neg) {
   338  				return strconvParseErr(b, "ParseInt")
   339  			}
   340  			z.i = -int64(f)
   341  		} else if preferSignedInt {
   342  			z.v = valueTypeInt
   343  			if chkOvf.Uint2Int(f, neg) {
   344  				return strconvParseErr(b, "ParseInt")
   345  			}
   346  			z.i = int64(f)
   347  		} else {
   348  			z.v = valueTypeUint
   349  			z.u = f
   350  		}
   351  		return
   352  	}
   353  
   354  	z.v = valueTypeFloat
   355  	z.f, err = parseFloat64_custom(b)
   356  	return
   357  }
   358  
   359  type readFloatResult struct {
   360  	mantissa uint64
   361  	exp      int8
   362  	neg      bool
   363  	trunc    bool
   364  	bad      bool // bad decimal string
   365  	hardexp  bool // exponent is hard to handle (> 2 digits, etc)
   366  	ok       bool
   367  	// sawdot   bool
   368  	// sawexp   bool
   369  	//_ [2]bool // padding
   370  }
   371  
   372  func readFloat(s []byte, y floatinfo) (r readFloatResult) {
   373  	var i uint // uint, so that we eliminate bounds checking
   374  	var slen = uint(len(s))
   375  	if slen == 0 {
   376  		// read an empty string as the zero value
   377  		// r.bad = true
   378  		r.ok = true
   379  		return
   380  	}
   381  
   382  	if s[0] == '-' {
   383  		r.neg = true
   384  		i++
   385  	}
   386  
   387  	// we considered punting early if string has length > maxMantDigits, but this doesn't account
   388  	// for trailing 0's e.g. 700000000000000000000 can be encoded exactly as it is 7e20
   389  
   390  	var nd, ndMant, dp int8
   391  	var sawdot, sawexp bool
   392  	var xu uint64
   393  
   394  LOOP:
   395  	for ; i < slen; i++ {
   396  		switch s[i] {
   397  		case '.':
   398  			if sawdot {
   399  				r.bad = true
   400  				return
   401  			}
   402  			sawdot = true
   403  			dp = nd
   404  		case 'e', 'E':
   405  			sawexp = true
   406  			break LOOP
   407  		case '0':
   408  			if nd == 0 {
   409  				dp--
   410  				continue LOOP
   411  			}
   412  			nd++
   413  			if r.mantissa < y.mantCutoff {
   414  				r.mantissa *= fBase
   415  				ndMant++
   416  			}
   417  		case '1', '2', '3', '4', '5', '6', '7', '8', '9':
   418  			nd++
   419  			if y.mantCutoffIsUint64Cutoff && r.mantissa < fUint64Cutoff {
   420  				r.mantissa *= fBase
   421  				xu = r.mantissa + uint64(s[i]-'0')
   422  				if xu < r.mantissa {
   423  					r.trunc = true
   424  					return
   425  				}
   426  				r.mantissa = xu
   427  			} else if r.mantissa < y.mantCutoff {
   428  				// mantissa = (mantissa << 1) + (mantissa << 3) + uint64(c-'0')
   429  				r.mantissa = r.mantissa*fBase + uint64(s[i]-'0')
   430  			} else {
   431  				r.trunc = true
   432  				return
   433  			}
   434  			ndMant++
   435  		default:
   436  			r.bad = true
   437  			return
   438  		}
   439  	}
   440  
   441  	if !sawdot {
   442  		dp = nd
   443  	}
   444  
   445  	if sawexp {
   446  		i++
   447  		if i < slen {
   448  			var eneg bool
   449  			if s[i] == '+' {
   450  				i++
   451  			} else if s[i] == '-' {
   452  				i++
   453  				eneg = true
   454  			}
   455  			if i < slen {
   456  				// for exact match, exponent is 1 or 2 digits (float64: -22 to 37, float32: -1 to 17).
   457  				// exit quick if exponent is more than 2 digits.
   458  				if i+2 < slen {
   459  					r.hardexp = true
   460  					return
   461  				}
   462  				var e int8
   463  				if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { //
   464  					r.bad = true
   465  					return
   466  				}
   467  				e = int8(s[i] - '0')
   468  				i++
   469  				if i < slen {
   470  					if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { //
   471  						r.bad = true
   472  						return
   473  					}
   474  					e = e*fBase + int8(s[i]-'0') // (e << 1) + (e << 3) + int8(s[i]-'0')
   475  					i++
   476  				}
   477  				if eneg {
   478  					dp -= e
   479  				} else {
   480  					dp += e
   481  				}
   482  			}
   483  		}
   484  	}
   485  
   486  	if r.mantissa != 0 {
   487  		r.exp = dp - ndMant
   488  		// do not set ok=true for cases we cannot handle
   489  		if r.exp < -y.exactPow10 ||
   490  			r.exp > y.exactInts+y.exactPow10 ||
   491  			(y.mantbits != 0 && r.mantissa>>y.mantbits != 0) {
   492  			r.hardexp = true
   493  			return
   494  		}
   495  	}
   496  
   497  	r.ok = true
   498  	return
   499  }
   500  

View as plain text