...

Source file src/golang.org/x/text/language/parse.go

Documentation: golang.org/x/text/language

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package language
     6  
     7  import (
     8  	"errors"
     9  	"sort"
    10  	"strconv"
    11  	"strings"
    12  
    13  	"golang.org/x/text/internal/language"
    14  )
    15  
    16  // ValueError is returned by any of the parsing functions when the
    17  // input is well-formed but the respective subtag is not recognized
    18  // as a valid value.
    19  type ValueError interface {
    20  	error
    21  
    22  	// Subtag returns the subtag for which the error occurred.
    23  	Subtag() string
    24  }
    25  
    26  // Parse parses the given BCP 47 string and returns a valid Tag. If parsing
    27  // failed it returns an error and any part of the tag that could be parsed.
    28  // If parsing succeeded but an unknown value was found, it returns
    29  // ValueError. The Tag returned in this case is just stripped of the unknown
    30  // value. All other values are preserved. It accepts tags in the BCP 47 format
    31  // and extensions to this standard defined in
    32  // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
    33  // The resulting tag is canonicalized using the default canonicalization type.
    34  func Parse(s string) (t Tag, err error) {
    35  	return Default.Parse(s)
    36  }
    37  
    38  // Parse parses the given BCP 47 string and returns a valid Tag. If parsing
    39  // failed it returns an error and any part of the tag that could be parsed.
    40  // If parsing succeeded but an unknown value was found, it returns
    41  // ValueError. The Tag returned in this case is just stripped of the unknown
    42  // value. All other values are preserved. It accepts tags in the BCP 47 format
    43  // and extensions to this standard defined in
    44  // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
    45  // The resulting tag is canonicalized using the canonicalization type c.
    46  func (c CanonType) Parse(s string) (t Tag, err error) {
    47  	defer func() {
    48  		if recover() != nil {
    49  			t = Tag{}
    50  			err = language.ErrSyntax
    51  		}
    52  	}()
    53  
    54  	tt, err := language.Parse(s)
    55  	if err != nil {
    56  		return makeTag(tt), err
    57  	}
    58  	tt, changed := canonicalize(c, tt)
    59  	if changed {
    60  		tt.RemakeString()
    61  	}
    62  	return makeTag(tt), err
    63  }
    64  
    65  // Compose creates a Tag from individual parts, which may be of type Tag, Base,
    66  // Script, Region, Variant, []Variant, Extension, []Extension or error. If a
    67  // Base, Script or Region or slice of type Variant or Extension is passed more
    68  // than once, the latter will overwrite the former. Variants and Extensions are
    69  // accumulated, but if two extensions of the same type are passed, the latter
    70  // will replace the former. For -u extensions, though, the key-type pairs are
    71  // added, where later values overwrite older ones. A Tag overwrites all former
    72  // values and typically only makes sense as the first argument. The resulting
    73  // tag is returned after canonicalizing using the Default CanonType. If one or
    74  // more errors are encountered, one of the errors is returned.
    75  func Compose(part ...interface{}) (t Tag, err error) {
    76  	return Default.Compose(part...)
    77  }
    78  
    79  // Compose creates a Tag from individual parts, which may be of type Tag, Base,
    80  // Script, Region, Variant, []Variant, Extension, []Extension or error. If a
    81  // Base, Script or Region or slice of type Variant or Extension is passed more
    82  // than once, the latter will overwrite the former. Variants and Extensions are
    83  // accumulated, but if two extensions of the same type are passed, the latter
    84  // will replace the former. For -u extensions, though, the key-type pairs are
    85  // added, where later values overwrite older ones. A Tag overwrites all former
    86  // values and typically only makes sense as the first argument. The resulting
    87  // tag is returned after canonicalizing using CanonType c. If one or more errors
    88  // are encountered, one of the errors is returned.
    89  func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
    90  	defer func() {
    91  		if recover() != nil {
    92  			t = Tag{}
    93  			err = language.ErrSyntax
    94  		}
    95  	}()
    96  
    97  	var b language.Builder
    98  	if err = update(&b, part...); err != nil {
    99  		return und, err
   100  	}
   101  	b.Tag, _ = canonicalize(c, b.Tag)
   102  	return makeTag(b.Make()), err
   103  }
   104  
   105  var errInvalidArgument = errors.New("invalid Extension or Variant")
   106  
   107  func update(b *language.Builder, part ...interface{}) (err error) {
   108  	for _, x := range part {
   109  		switch v := x.(type) {
   110  		case Tag:
   111  			b.SetTag(v.tag())
   112  		case Base:
   113  			b.Tag.LangID = v.langID
   114  		case Script:
   115  			b.Tag.ScriptID = v.scriptID
   116  		case Region:
   117  			b.Tag.RegionID = v.regionID
   118  		case Variant:
   119  			if v.variant == "" {
   120  				err = errInvalidArgument
   121  				break
   122  			}
   123  			b.AddVariant(v.variant)
   124  		case Extension:
   125  			if v.s == "" {
   126  				err = errInvalidArgument
   127  				break
   128  			}
   129  			b.SetExt(v.s)
   130  		case []Variant:
   131  			b.ClearVariants()
   132  			for _, v := range v {
   133  				b.AddVariant(v.variant)
   134  			}
   135  		case []Extension:
   136  			b.ClearExtensions()
   137  			for _, e := range v {
   138  				b.SetExt(e.s)
   139  			}
   140  		// TODO: support parsing of raw strings based on morphology or just extensions?
   141  		case error:
   142  			if v != nil {
   143  				err = v
   144  			}
   145  		}
   146  	}
   147  	return
   148  }
   149  
   150  var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
   151  var errTagListTooLarge = errors.New("tag list exceeds max length")
   152  
   153  // ParseAcceptLanguage parses the contents of an Accept-Language header as
   154  // defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
   155  // a list of corresponding quality weights. It is more permissive than RFC 2616
   156  // and may return non-nil slices even if the input is not valid.
   157  // The Tags will be sorted by highest weight first and then by first occurrence.
   158  // Tags with a weight of zero will be dropped. An error will be returned if the
   159  // input could not be parsed.
   160  func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
   161  	defer func() {
   162  		if recover() != nil {
   163  			tag = nil
   164  			q = nil
   165  			err = language.ErrSyntax
   166  		}
   167  	}()
   168  
   169  	if strings.Count(s, "-") > 1000 {
   170  		return nil, nil, errTagListTooLarge
   171  	}
   172  
   173  	var entry string
   174  	for s != "" {
   175  		if entry, s = split(s, ','); entry == "" {
   176  			continue
   177  		}
   178  
   179  		entry, weight := split(entry, ';')
   180  
   181  		// Scan the language.
   182  		t, err := Parse(entry)
   183  		if err != nil {
   184  			id, ok := acceptFallback[entry]
   185  			if !ok {
   186  				return nil, nil, err
   187  			}
   188  			t = makeTag(language.Tag{LangID: id})
   189  		}
   190  
   191  		// Scan the optional weight.
   192  		w := 1.0
   193  		if weight != "" {
   194  			weight = consume(weight, 'q')
   195  			weight = consume(weight, '=')
   196  			// consume returns the empty string when a token could not be
   197  			// consumed, resulting in an error for ParseFloat.
   198  			if w, err = strconv.ParseFloat(weight, 32); err != nil {
   199  				return nil, nil, errInvalidWeight
   200  			}
   201  			// Drop tags with a quality weight of 0.
   202  			if w <= 0 {
   203  				continue
   204  			}
   205  		}
   206  
   207  		tag = append(tag, t)
   208  		q = append(q, float32(w))
   209  	}
   210  	sort.Stable(&tagSort{tag, q})
   211  	return tag, q, nil
   212  }
   213  
   214  // consume removes a leading token c from s and returns the result or the empty
   215  // string if there is no such token.
   216  func consume(s string, c byte) string {
   217  	if s == "" || s[0] != c {
   218  		return ""
   219  	}
   220  	return strings.TrimSpace(s[1:])
   221  }
   222  
   223  func split(s string, c byte) (head, tail string) {
   224  	if i := strings.IndexByte(s, c); i >= 0 {
   225  		return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
   226  	}
   227  	return strings.TrimSpace(s), ""
   228  }
   229  
   230  // Add hack mapping to deal with a small number of cases that occur
   231  // in Accept-Language (with reasonable frequency).
   232  var acceptFallback = map[string]language.Language{
   233  	"english": _en,
   234  	"deutsch": _de,
   235  	"italian": _it,
   236  	"french":  _fr,
   237  	"*":       _mul, // defined in the spec to match all languages.
   238  }
   239  
   240  type tagSort struct {
   241  	tag []Tag
   242  	q   []float32
   243  }
   244  
   245  func (s *tagSort) Len() int {
   246  	return len(s.q)
   247  }
   248  
   249  func (s *tagSort) Less(i, j int) bool {
   250  	return s.q[i] > s.q[j]
   251  }
   252  
   253  func (s *tagSort) Swap(i, j int) {
   254  	s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
   255  	s.q[i], s.q[j] = s.q[j], s.q[i]
   256  }
   257  

View as plain text