...

Source file src/golang.org/x/text/internal/format/parser.go

Documentation: golang.org/x/text/internal/format

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package format
     6  
     7  import (
     8  	"reflect"
     9  	"unicode/utf8"
    10  )
    11  
    12  // A Parser parses a format string. The result from the parse are set in the
    13  // struct fields.
    14  type Parser struct {
    15  	Verb rune
    16  
    17  	WidthPresent bool
    18  	PrecPresent  bool
    19  	Minus        bool
    20  	Plus         bool
    21  	Sharp        bool
    22  	Space        bool
    23  	Zero         bool
    24  
    25  	// For the formats %+v %#v, we set the plusV/sharpV flags
    26  	// and clear the plus/sharp flags since %+v and %#v are in effect
    27  	// different, flagless formats set at the top level.
    28  	PlusV  bool
    29  	SharpV bool
    30  
    31  	HasIndex bool
    32  
    33  	Width int
    34  	Prec  int // precision
    35  
    36  	// retain arguments across calls.
    37  	Args []interface{}
    38  	// retain current argument number across calls
    39  	ArgNum int
    40  
    41  	// reordered records whether the format string used argument reordering.
    42  	Reordered bool
    43  	// goodArgNum records whether the most recent reordering directive was valid.
    44  	goodArgNum bool
    45  
    46  	// position info
    47  	format   string
    48  	startPos int
    49  	endPos   int
    50  	Status   Status
    51  }
    52  
    53  // Reset initializes a parser to scan format strings for the given args.
    54  func (p *Parser) Reset(args []interface{}) {
    55  	p.Args = args
    56  	p.ArgNum = 0
    57  	p.startPos = 0
    58  	p.Reordered = false
    59  }
    60  
    61  // Text returns the part of the format string that was parsed by the last call
    62  // to Scan. It returns the original substitution clause if the current scan
    63  // parsed a substitution.
    64  func (p *Parser) Text() string { return p.format[p.startPos:p.endPos] }
    65  
    66  // SetFormat sets a new format string to parse. It does not reset the argument
    67  // count.
    68  func (p *Parser) SetFormat(format string) {
    69  	p.format = format
    70  	p.startPos = 0
    71  	p.endPos = 0
    72  }
    73  
    74  // Status indicates the result type of a call to Scan.
    75  type Status int
    76  
    77  const (
    78  	StatusText Status = iota
    79  	StatusSubstitution
    80  	StatusBadWidthSubstitution
    81  	StatusBadPrecSubstitution
    82  	StatusNoVerb
    83  	StatusBadArgNum
    84  	StatusMissingArg
    85  )
    86  
    87  // ClearFlags reset the parser to default behavior.
    88  func (p *Parser) ClearFlags() {
    89  	p.WidthPresent = false
    90  	p.PrecPresent = false
    91  	p.Minus = false
    92  	p.Plus = false
    93  	p.Sharp = false
    94  	p.Space = false
    95  	p.Zero = false
    96  
    97  	p.PlusV = false
    98  	p.SharpV = false
    99  
   100  	p.HasIndex = false
   101  }
   102  
   103  // Scan scans the next part of the format string and sets the status to
   104  // indicate whether it scanned a string literal, substitution or error.
   105  func (p *Parser) Scan() bool {
   106  	p.Status = StatusText
   107  	format := p.format
   108  	end := len(format)
   109  	if p.endPos >= end {
   110  		return false
   111  	}
   112  	afterIndex := false // previous item in format was an index like [3].
   113  
   114  	p.startPos = p.endPos
   115  	p.goodArgNum = true
   116  	i := p.startPos
   117  	for i < end && format[i] != '%' {
   118  		i++
   119  	}
   120  	if i > p.startPos {
   121  		p.endPos = i
   122  		return true
   123  	}
   124  	// Process one verb
   125  	i++
   126  
   127  	p.Status = StatusSubstitution
   128  
   129  	// Do we have flags?
   130  	p.ClearFlags()
   131  
   132  simpleFormat:
   133  	for ; i < end; i++ {
   134  		c := p.format[i]
   135  		switch c {
   136  		case '#':
   137  			p.Sharp = true
   138  		case '0':
   139  			p.Zero = !p.Minus // Only allow zero padding to the left.
   140  		case '+':
   141  			p.Plus = true
   142  		case '-':
   143  			p.Minus = true
   144  			p.Zero = false // Do not pad with zeros to the right.
   145  		case ' ':
   146  			p.Space = true
   147  		default:
   148  			// Fast path for common case of ascii lower case simple verbs
   149  			// without precision or width or argument indices.
   150  			if 'a' <= c && c <= 'z' && p.ArgNum < len(p.Args) {
   151  				if c == 'v' {
   152  					// Go syntax
   153  					p.SharpV = p.Sharp
   154  					p.Sharp = false
   155  					// Struct-field syntax
   156  					p.PlusV = p.Plus
   157  					p.Plus = false
   158  				}
   159  				p.Verb = rune(c)
   160  				p.ArgNum++
   161  				p.endPos = i + 1
   162  				return true
   163  			}
   164  			// Format is more complex than simple flags and a verb or is malformed.
   165  			break simpleFormat
   166  		}
   167  	}
   168  
   169  	// Do we have an explicit argument index?
   170  	i, afterIndex = p.updateArgNumber(format, i)
   171  
   172  	// Do we have width?
   173  	if i < end && format[i] == '*' {
   174  		i++
   175  		p.Width, p.WidthPresent = p.intFromArg()
   176  
   177  		if !p.WidthPresent {
   178  			p.Status = StatusBadWidthSubstitution
   179  		}
   180  
   181  		// We have a negative width, so take its value and ensure
   182  		// that the minus flag is set
   183  		if p.Width < 0 {
   184  			p.Width = -p.Width
   185  			p.Minus = true
   186  			p.Zero = false // Do not pad with zeros to the right.
   187  		}
   188  		afterIndex = false
   189  	} else {
   190  		p.Width, p.WidthPresent, i = parsenum(format, i, end)
   191  		if afterIndex && p.WidthPresent { // "%[3]2d"
   192  			p.goodArgNum = false
   193  		}
   194  	}
   195  
   196  	// Do we have precision?
   197  	if i+1 < end && format[i] == '.' {
   198  		i++
   199  		if afterIndex { // "%[3].2d"
   200  			p.goodArgNum = false
   201  		}
   202  		i, afterIndex = p.updateArgNumber(format, i)
   203  		if i < end && format[i] == '*' {
   204  			i++
   205  			p.Prec, p.PrecPresent = p.intFromArg()
   206  			// Negative precision arguments don't make sense
   207  			if p.Prec < 0 {
   208  				p.Prec = 0
   209  				p.PrecPresent = false
   210  			}
   211  			if !p.PrecPresent {
   212  				p.Status = StatusBadPrecSubstitution
   213  			}
   214  			afterIndex = false
   215  		} else {
   216  			p.Prec, p.PrecPresent, i = parsenum(format, i, end)
   217  			if !p.PrecPresent {
   218  				p.Prec = 0
   219  				p.PrecPresent = true
   220  			}
   221  		}
   222  	}
   223  
   224  	if !afterIndex {
   225  		i, afterIndex = p.updateArgNumber(format, i)
   226  	}
   227  	p.HasIndex = afterIndex
   228  
   229  	if i >= end {
   230  		p.endPos = i
   231  		p.Status = StatusNoVerb
   232  		return true
   233  	}
   234  
   235  	verb, w := utf8.DecodeRuneInString(format[i:])
   236  	p.endPos = i + w
   237  	p.Verb = verb
   238  
   239  	switch {
   240  	case verb == '%': // Percent does not absorb operands and ignores f.wid and f.prec.
   241  		p.startPos = p.endPos - 1
   242  		p.Status = StatusText
   243  	case !p.goodArgNum:
   244  		p.Status = StatusBadArgNum
   245  	case p.ArgNum >= len(p.Args): // No argument left over to print for the current verb.
   246  		p.Status = StatusMissingArg
   247  		p.ArgNum++
   248  	case verb == 'v':
   249  		// Go syntax
   250  		p.SharpV = p.Sharp
   251  		p.Sharp = false
   252  		// Struct-field syntax
   253  		p.PlusV = p.Plus
   254  		p.Plus = false
   255  		fallthrough
   256  	default:
   257  		p.ArgNum++
   258  	}
   259  	return true
   260  }
   261  
   262  // intFromArg gets the ArgNumth element of Args. On return, isInt reports
   263  // whether the argument has integer type.
   264  func (p *Parser) intFromArg() (num int, isInt bool) {
   265  	if p.ArgNum < len(p.Args) {
   266  		arg := p.Args[p.ArgNum]
   267  		num, isInt = arg.(int) // Almost always OK.
   268  		if !isInt {
   269  			// Work harder.
   270  			switch v := reflect.ValueOf(arg); v.Kind() {
   271  			case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
   272  				n := v.Int()
   273  				if int64(int(n)) == n {
   274  					num = int(n)
   275  					isInt = true
   276  				}
   277  			case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
   278  				n := v.Uint()
   279  				if int64(n) >= 0 && uint64(int(n)) == n {
   280  					num = int(n)
   281  					isInt = true
   282  				}
   283  			default:
   284  				// Already 0, false.
   285  			}
   286  		}
   287  		p.ArgNum++
   288  		if tooLarge(num) {
   289  			num = 0
   290  			isInt = false
   291  		}
   292  	}
   293  	return
   294  }
   295  
   296  // parseArgNumber returns the value of the bracketed number, minus 1
   297  // (explicit argument numbers are one-indexed but we want zero-indexed).
   298  // The opening bracket is known to be present at format[0].
   299  // The returned values are the index, the number of bytes to consume
   300  // up to the closing paren, if present, and whether the number parsed
   301  // ok. The bytes to consume will be 1 if no closing paren is present.
   302  func parseArgNumber(format string) (index int, wid int, ok bool) {
   303  	// There must be at least 3 bytes: [n].
   304  	if len(format) < 3 {
   305  		return 0, 1, false
   306  	}
   307  
   308  	// Find closing bracket.
   309  	for i := 1; i < len(format); i++ {
   310  		if format[i] == ']' {
   311  			width, ok, newi := parsenum(format, 1, i)
   312  			if !ok || newi != i {
   313  				return 0, i + 1, false
   314  			}
   315  			return width - 1, i + 1, true // arg numbers are one-indexed and skip paren.
   316  		}
   317  	}
   318  	return 0, 1, false
   319  }
   320  
   321  // updateArgNumber returns the next argument to evaluate, which is either the value of the passed-in
   322  // argNum or the value of the bracketed integer that begins format[i:]. It also returns
   323  // the new value of i, that is, the index of the next byte of the format to process.
   324  func (p *Parser) updateArgNumber(format string, i int) (newi int, found bool) {
   325  	if len(format) <= i || format[i] != '[' {
   326  		return i, false
   327  	}
   328  	p.Reordered = true
   329  	index, wid, ok := parseArgNumber(format[i:])
   330  	if ok && 0 <= index && index < len(p.Args) {
   331  		p.ArgNum = index
   332  		return i + wid, true
   333  	}
   334  	p.goodArgNum = false
   335  	return i + wid, ok
   336  }
   337  
   338  // tooLarge reports whether the magnitude of the integer is
   339  // too large to be used as a formatting width or precision.
   340  func tooLarge(x int) bool {
   341  	const max int = 1e6
   342  	return x > max || x < -max
   343  }
   344  
   345  // parsenum converts ASCII to integer.  num is 0 (and isnum is false) if no number present.
   346  func parsenum(s string, start, end int) (num int, isnum bool, newi int) {
   347  	if start >= end {
   348  		return 0, false, end
   349  	}
   350  	for newi = start; newi < end && '0' <= s[newi] && s[newi] <= '9'; newi++ {
   351  		if tooLarge(num) {
   352  			return 0, false, end // Overflow; crazy long number most likely.
   353  		}
   354  		num = num*10 + int(s[newi]-'0')
   355  		isnum = true
   356  	}
   357  	return
   358  }
   359  

View as plain text