...

Source file src/golang.org/x/text/internal/number/pattern.go

Documentation: golang.org/x/text/internal/number

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package number
     6  
     7  import (
     8  	"errors"
     9  	"unicode/utf8"
    10  )
    11  
    12  // This file contains a parser for the CLDR number patterns as described in
    13  // https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
    14  //
    15  // The following BNF is derived from this standard.
    16  //
    17  // pattern    := subpattern (';' subpattern)?
    18  // subpattern := affix? number exponent? affix?
    19  // number     := decimal | sigDigits
    20  // decimal    := '#'* '0'* ('.' fraction)? | '#' | '0'
    21  // fraction   := '0'* '#'*
    22  // sigDigits  := '#'* '@' '@'* '#'*
    23  // exponent   := 'E' '+'? '0'* '0'
    24  // padSpec    := '*' \L
    25  //
    26  // Notes:
    27  // - An affix pattern may contain any runes, but runes with special meaning
    28  //   should be escaped.
    29  // - Sequences of digits, '#', and '@' in decimal and sigDigits may have
    30  //   interstitial commas.
    31  
    32  // TODO: replace special characters in affixes (-, +, ¤) with control codes.
    33  
    34  // Pattern holds information for formatting numbers. It is designed to hold
    35  // information from CLDR number patterns.
    36  //
    37  // This pattern is precompiled  for all patterns for all languages. Even though
    38  // the number of patterns is not very large, we want to keep this small.
    39  //
    40  // This type is only intended for internal use.
    41  type Pattern struct {
    42  	RoundingContext
    43  
    44  	Affix       string // includes prefix and suffix. First byte is prefix length.
    45  	Offset      uint16 // Offset into Affix for prefix and suffix
    46  	NegOffset   uint16 // Offset into Affix for negative prefix and suffix or 0.
    47  	PadRune     rune
    48  	FormatWidth uint16
    49  
    50  	GroupingSize [2]uint8
    51  	Flags        PatternFlag
    52  }
    53  
    54  // A RoundingContext indicates how a number should be converted to digits.
    55  // It contains all information needed to determine the "visible digits" as
    56  // required by the pluralization rules.
    57  type RoundingContext struct {
    58  	// TODO: unify these two fields so that there is a more unambiguous meaning
    59  	// of how precision is handled.
    60  	MaxSignificantDigits int16 // -1 is unlimited
    61  	MaxFractionDigits    int16 // -1 is unlimited
    62  
    63  	Increment      uint32
    64  	IncrementScale uint8 // May differ from printed scale.
    65  
    66  	Mode RoundingMode
    67  
    68  	DigitShift uint8 // Number of decimals to shift. Used for % and ‰.
    69  
    70  	// Number of digits.
    71  	MinIntegerDigits uint8
    72  
    73  	MaxIntegerDigits     uint8
    74  	MinFractionDigits    uint8
    75  	MinSignificantDigits uint8
    76  
    77  	MinExponentDigits uint8
    78  }
    79  
    80  // RoundSignificantDigits returns the number of significant digits an
    81  // implementation of Convert may round to or n < 0 if there is no maximum or
    82  // a maximum is not recommended.
    83  func (r *RoundingContext) RoundSignificantDigits() (n int) {
    84  	if r.MaxFractionDigits == 0 && r.MaxSignificantDigits > 0 {
    85  		return int(r.MaxSignificantDigits)
    86  	} else if r.isScientific() && r.MaxIntegerDigits == 1 {
    87  		if r.MaxSignificantDigits == 0 ||
    88  			int(r.MaxFractionDigits+1) == int(r.MaxSignificantDigits) {
    89  			// Note: don't add DigitShift: it is only used for decimals.
    90  			return int(r.MaxFractionDigits) + 1
    91  		}
    92  	}
    93  	return -1
    94  }
    95  
    96  // RoundFractionDigits returns the number of fraction digits an implementation
    97  // of Convert may round to or n < 0 if there is no maximum or a maximum is not
    98  // recommended.
    99  func (r *RoundingContext) RoundFractionDigits() (n int) {
   100  	if r.MinExponentDigits == 0 &&
   101  		r.MaxSignificantDigits == 0 &&
   102  		r.MaxFractionDigits >= 0 {
   103  		return int(r.MaxFractionDigits) + int(r.DigitShift)
   104  	}
   105  	return -1
   106  }
   107  
   108  // SetScale fixes the RoundingContext to a fixed number of fraction digits.
   109  func (r *RoundingContext) SetScale(scale int) {
   110  	r.MinFractionDigits = uint8(scale)
   111  	r.MaxFractionDigits = int16(scale)
   112  }
   113  
   114  func (r *RoundingContext) SetPrecision(prec int) {
   115  	r.MaxSignificantDigits = int16(prec)
   116  }
   117  
   118  func (r *RoundingContext) isScientific() bool {
   119  	return r.MinExponentDigits > 0
   120  }
   121  
   122  func (f *Pattern) needsSep(pos int) bool {
   123  	p := pos - 1
   124  	size := int(f.GroupingSize[0])
   125  	if size == 0 || p == 0 {
   126  		return false
   127  	}
   128  	if p == size {
   129  		return true
   130  	}
   131  	if p -= size; p < 0 {
   132  		return false
   133  	}
   134  	// TODO: make second groupingsize the same as first if 0 so that we can
   135  	// avoid this check.
   136  	if x := int(f.GroupingSize[1]); x != 0 {
   137  		size = x
   138  	}
   139  	return p%size == 0
   140  }
   141  
   142  // A PatternFlag is a bit mask for the flag field of a Pattern.
   143  type PatternFlag uint8
   144  
   145  const (
   146  	AlwaysSign PatternFlag = 1 << iota
   147  	ElideSign              // Use space instead of plus sign. AlwaysSign must be true.
   148  	AlwaysExpSign
   149  	AlwaysDecimalSeparator
   150  	ParenthesisForNegative // Common pattern. Saves space.
   151  
   152  	PadAfterNumber
   153  	PadAfterAffix
   154  
   155  	PadBeforePrefix = 0 // Default
   156  	PadAfterPrefix  = PadAfterAffix
   157  	PadBeforeSuffix = PadAfterNumber
   158  	PadAfterSuffix  = PadAfterNumber | PadAfterAffix
   159  	PadMask         = PadAfterNumber | PadAfterAffix
   160  )
   161  
   162  type parser struct {
   163  	*Pattern
   164  
   165  	leadingSharps int
   166  
   167  	pos            int
   168  	err            error
   169  	doNotTerminate bool
   170  	groupingCount  uint
   171  	hasGroup       bool
   172  	buf            []byte
   173  }
   174  
   175  func (p *parser) setError(err error) {
   176  	if p.err == nil {
   177  		p.err = err
   178  	}
   179  }
   180  
   181  func (p *parser) updateGrouping() {
   182  	if p.hasGroup &&
   183  		0 < p.groupingCount && p.groupingCount < 255 {
   184  		p.GroupingSize[1] = p.GroupingSize[0]
   185  		p.GroupingSize[0] = uint8(p.groupingCount)
   186  	}
   187  	p.groupingCount = 0
   188  	p.hasGroup = true
   189  }
   190  
   191  var (
   192  	// TODO: more sensible and localizeable error messages.
   193  	errMultiplePadSpecifiers = errors.New("format: pattern has multiple pad specifiers")
   194  	errInvalidPadSpecifier   = errors.New("format: invalid pad specifier")
   195  	errInvalidQuote          = errors.New("format: invalid quote")
   196  	errAffixTooLarge         = errors.New("format: prefix or suffix exceeds maximum UTF-8 length of 256 bytes")
   197  	errDuplicatePercentSign  = errors.New("format: duplicate percent sign")
   198  	errDuplicatePermilleSign = errors.New("format: duplicate permille sign")
   199  	errUnexpectedEnd         = errors.New("format: unexpected end of pattern")
   200  )
   201  
   202  // ParsePattern extracts formatting information from a CLDR number pattern.
   203  //
   204  // See https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
   205  func ParsePattern(s string) (f *Pattern, err error) {
   206  	p := parser{Pattern: &Pattern{}}
   207  
   208  	s = p.parseSubPattern(s)
   209  
   210  	if s != "" {
   211  		// Parse negative sub pattern.
   212  		if s[0] != ';' {
   213  			p.setError(errors.New("format: error parsing first sub pattern"))
   214  			return nil, p.err
   215  		}
   216  		neg := parser{Pattern: &Pattern{}} // just for extracting the affixes.
   217  		s = neg.parseSubPattern(s[len(";"):])
   218  		p.NegOffset = uint16(len(p.buf))
   219  		p.buf = append(p.buf, neg.buf...)
   220  	}
   221  	if s != "" {
   222  		p.setError(errors.New("format: spurious characters at end of pattern"))
   223  	}
   224  	if p.err != nil {
   225  		return nil, p.err
   226  	}
   227  	if affix := string(p.buf); affix == "\x00\x00" || affix == "\x00\x00\x00\x00" {
   228  		// No prefix or suffixes.
   229  		p.NegOffset = 0
   230  	} else {
   231  		p.Affix = affix
   232  	}
   233  	if p.Increment == 0 {
   234  		p.IncrementScale = 0
   235  	}
   236  	return p.Pattern, nil
   237  }
   238  
   239  func (p *parser) parseSubPattern(s string) string {
   240  	s = p.parsePad(s, PadBeforePrefix)
   241  	s = p.parseAffix(s)
   242  	s = p.parsePad(s, PadAfterPrefix)
   243  
   244  	s = p.parse(p.number, s)
   245  	p.updateGrouping()
   246  
   247  	s = p.parsePad(s, PadBeforeSuffix)
   248  	s = p.parseAffix(s)
   249  	s = p.parsePad(s, PadAfterSuffix)
   250  	return s
   251  }
   252  
   253  func (p *parser) parsePad(s string, f PatternFlag) (tail string) {
   254  	if len(s) >= 2 && s[0] == '*' {
   255  		r, sz := utf8.DecodeRuneInString(s[1:])
   256  		if p.PadRune != 0 {
   257  			p.err = errMultiplePadSpecifiers
   258  		} else {
   259  			p.Flags |= f
   260  			p.PadRune = r
   261  		}
   262  		return s[1+sz:]
   263  	}
   264  	return s
   265  }
   266  
   267  func (p *parser) parseAffix(s string) string {
   268  	x := len(p.buf)
   269  	p.buf = append(p.buf, 0) // placeholder for affix length
   270  
   271  	s = p.parse(p.affix, s)
   272  
   273  	n := len(p.buf) - x - 1
   274  	if n > 0xFF {
   275  		p.setError(errAffixTooLarge)
   276  	}
   277  	p.buf[x] = uint8(n)
   278  	return s
   279  }
   280  
   281  // state implements a state transition. It returns the new state. A state
   282  // function may set an error on the parser or may simply return on an incorrect
   283  // token and let the next phase fail.
   284  type state func(r rune) state
   285  
   286  // parse repeatedly applies a state function on the given string until a
   287  // termination condition is reached.
   288  func (p *parser) parse(fn state, s string) (tail string) {
   289  	for i, r := range s {
   290  		p.doNotTerminate = false
   291  		if fn = fn(r); fn == nil || p.err != nil {
   292  			return s[i:]
   293  		}
   294  		p.FormatWidth++
   295  	}
   296  	if p.doNotTerminate {
   297  		p.setError(errUnexpectedEnd)
   298  	}
   299  	return ""
   300  }
   301  
   302  func (p *parser) affix(r rune) state {
   303  	switch r {
   304  	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
   305  		'#', '@', '.', '*', ',', ';':
   306  		return nil
   307  	case '\'':
   308  		p.FormatWidth--
   309  		return p.escapeFirst
   310  	case '%':
   311  		if p.DigitShift != 0 {
   312  			p.setError(errDuplicatePercentSign)
   313  		}
   314  		p.DigitShift = 2
   315  	case '\u2030': // ‰ Per mille
   316  		if p.DigitShift != 0 {
   317  			p.setError(errDuplicatePermilleSign)
   318  		}
   319  		p.DigitShift = 3
   320  		// TODO: handle currency somehow: ¤, ¤¤, ¤¤¤, ¤¤¤¤
   321  	}
   322  	p.buf = append(p.buf, string(r)...)
   323  	return p.affix
   324  }
   325  
   326  func (p *parser) escapeFirst(r rune) state {
   327  	switch r {
   328  	case '\'':
   329  		p.buf = append(p.buf, "\\'"...)
   330  		return p.affix
   331  	default:
   332  		p.buf = append(p.buf, '\'')
   333  		p.buf = append(p.buf, string(r)...)
   334  	}
   335  	return p.escape
   336  }
   337  
   338  func (p *parser) escape(r rune) state {
   339  	switch r {
   340  	case '\'':
   341  		p.FormatWidth--
   342  		p.buf = append(p.buf, '\'')
   343  		return p.affix
   344  	default:
   345  		p.buf = append(p.buf, string(r)...)
   346  	}
   347  	return p.escape
   348  }
   349  
   350  // number parses a number. The BNF says the integer part should always have
   351  // a '0', but that does not appear to be the case according to the rest of the
   352  // documentation. We will allow having only '#' numbers.
   353  func (p *parser) number(r rune) state {
   354  	switch r {
   355  	case '#':
   356  		p.groupingCount++
   357  		p.leadingSharps++
   358  	case '@':
   359  		p.groupingCount++
   360  		p.leadingSharps = 0
   361  		p.MaxFractionDigits = -1
   362  		return p.sigDigits(r)
   363  	case ',':
   364  		if p.leadingSharps == 0 { // no leading commas
   365  			return nil
   366  		}
   367  		p.updateGrouping()
   368  	case 'E':
   369  		p.MaxIntegerDigits = uint8(p.leadingSharps)
   370  		return p.exponent
   371  	case '.': // allow ".##" etc.
   372  		p.updateGrouping()
   373  		return p.fraction
   374  	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   375  		return p.integer(r)
   376  	default:
   377  		return nil
   378  	}
   379  	return p.number
   380  }
   381  
   382  func (p *parser) integer(r rune) state {
   383  	if !('0' <= r && r <= '9') {
   384  		var next state
   385  		switch r {
   386  		case 'E':
   387  			if p.leadingSharps > 0 {
   388  				p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
   389  			}
   390  			next = p.exponent
   391  		case '.':
   392  			next = p.fraction
   393  		case ',':
   394  			next = p.integer
   395  		}
   396  		p.updateGrouping()
   397  		return next
   398  	}
   399  	p.Increment = p.Increment*10 + uint32(r-'0')
   400  	p.groupingCount++
   401  	p.MinIntegerDigits++
   402  	return p.integer
   403  }
   404  
   405  func (p *parser) sigDigits(r rune) state {
   406  	switch r {
   407  	case '@':
   408  		p.groupingCount++
   409  		p.MaxSignificantDigits++
   410  		p.MinSignificantDigits++
   411  	case '#':
   412  		return p.sigDigitsFinal(r)
   413  	case 'E':
   414  		p.updateGrouping()
   415  		return p.normalizeSigDigitsWithExponent()
   416  	default:
   417  		p.updateGrouping()
   418  		return nil
   419  	}
   420  	return p.sigDigits
   421  }
   422  
   423  func (p *parser) sigDigitsFinal(r rune) state {
   424  	switch r {
   425  	case '#':
   426  		p.groupingCount++
   427  		p.MaxSignificantDigits++
   428  	case 'E':
   429  		p.updateGrouping()
   430  		return p.normalizeSigDigitsWithExponent()
   431  	default:
   432  		p.updateGrouping()
   433  		return nil
   434  	}
   435  	return p.sigDigitsFinal
   436  }
   437  
   438  func (p *parser) normalizeSigDigitsWithExponent() state {
   439  	p.MinIntegerDigits, p.MaxIntegerDigits = 1, 1
   440  	p.MinFractionDigits = p.MinSignificantDigits - 1
   441  	p.MaxFractionDigits = p.MaxSignificantDigits - 1
   442  	p.MinSignificantDigits, p.MaxSignificantDigits = 0, 0
   443  	return p.exponent
   444  }
   445  
   446  func (p *parser) fraction(r rune) state {
   447  	switch r {
   448  	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   449  		p.Increment = p.Increment*10 + uint32(r-'0')
   450  		p.IncrementScale++
   451  		p.MinFractionDigits++
   452  		p.MaxFractionDigits++
   453  	case '#':
   454  		p.MaxFractionDigits++
   455  	case 'E':
   456  		if p.leadingSharps > 0 {
   457  			p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
   458  		}
   459  		return p.exponent
   460  	default:
   461  		return nil
   462  	}
   463  	return p.fraction
   464  }
   465  
   466  func (p *parser) exponent(r rune) state {
   467  	switch r {
   468  	case '+':
   469  		// Set mode and check it wasn't already set.
   470  		if p.Flags&AlwaysExpSign != 0 || p.MinExponentDigits > 0 {
   471  			break
   472  		}
   473  		p.Flags |= AlwaysExpSign
   474  		p.doNotTerminate = true
   475  		return p.exponent
   476  	case '0':
   477  		p.MinExponentDigits++
   478  		return p.exponent
   479  	}
   480  	// termination condition
   481  	if p.MinExponentDigits == 0 {
   482  		p.setError(errors.New("format: need at least one digit"))
   483  	}
   484  	return nil
   485  }
   486  

View as plain text