...

Source file src/golang.org/x/text/collate/option.go

Documentation: golang.org/x/text/collate

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package collate
     6  
     7  import (
     8  	"sort"
     9  
    10  	"golang.org/x/text/internal/colltab"
    11  	"golang.org/x/text/language"
    12  	"golang.org/x/text/unicode/norm"
    13  )
    14  
    15  // newCollator creates a new collator with default options configured.
    16  func newCollator(t colltab.Weighter) *Collator {
    17  	// Initialize a collator with default options.
    18  	c := &Collator{
    19  		options: options{
    20  			ignore: [colltab.NumLevels]bool{
    21  				colltab.Quaternary: true,
    22  				colltab.Identity:   true,
    23  			},
    24  			f: norm.NFD,
    25  			t: t,
    26  		},
    27  	}
    28  
    29  	// TODO: store vt in tags or remove.
    30  	c.variableTop = t.Top()
    31  
    32  	return c
    33  }
    34  
    35  // An Option is used to change the behavior of a Collator. Options override the
    36  // settings passed through the locale identifier.
    37  type Option struct {
    38  	priority int
    39  	f        func(o *options)
    40  }
    41  
    42  type prioritizedOptions []Option
    43  
    44  func (p prioritizedOptions) Len() int {
    45  	return len(p)
    46  }
    47  
    48  func (p prioritizedOptions) Swap(i, j int) {
    49  	p[i], p[j] = p[j], p[i]
    50  }
    51  
    52  func (p prioritizedOptions) Less(i, j int) bool {
    53  	return p[i].priority < p[j].priority
    54  }
    55  
    56  type options struct {
    57  	// ignore specifies which levels to ignore.
    58  	ignore [colltab.NumLevels]bool
    59  
    60  	// caseLevel is true if there is an additional level of case matching
    61  	// between the secondary and tertiary levels.
    62  	caseLevel bool
    63  
    64  	// backwards specifies the order of sorting at the secondary level.
    65  	// This option exists predominantly to support reverse sorting of accents in French.
    66  	backwards bool
    67  
    68  	// numeric specifies whether any sequence of decimal digits (category is Nd)
    69  	// is sorted at a primary level with its numeric value.
    70  	// For example, "A-21" < "A-123".
    71  	// This option is set by wrapping the main Weighter with NewNumericWeighter.
    72  	numeric bool
    73  
    74  	// alternate specifies an alternative handling of variables.
    75  	alternate alternateHandling
    76  
    77  	// variableTop is the largest primary value that is considered to be
    78  	// variable.
    79  	variableTop uint32
    80  
    81  	t colltab.Weighter
    82  
    83  	f norm.Form
    84  }
    85  
    86  func (o *options) setOptions(opts []Option) {
    87  	sort.Sort(prioritizedOptions(opts))
    88  	for _, x := range opts {
    89  		x.f(o)
    90  	}
    91  }
    92  
    93  // OptionsFromTag extracts the BCP47 collation options from the tag and
    94  // configures a collator accordingly. These options are set before any other
    95  // option.
    96  func OptionsFromTag(t language.Tag) Option {
    97  	return Option{0, func(o *options) {
    98  		o.setFromTag(t)
    99  	}}
   100  }
   101  
   102  func (o *options) setFromTag(t language.Tag) {
   103  	o.caseLevel = ldmlBool(t, o.caseLevel, "kc")
   104  	o.backwards = ldmlBool(t, o.backwards, "kb")
   105  	o.numeric = ldmlBool(t, o.numeric, "kn")
   106  
   107  	// Extract settings from the BCP47 u extension.
   108  	switch t.TypeForKey("ks") { // strength
   109  	case "level1":
   110  		o.ignore[colltab.Secondary] = true
   111  		o.ignore[colltab.Tertiary] = true
   112  	case "level2":
   113  		o.ignore[colltab.Tertiary] = true
   114  	case "level3", "":
   115  		// The default.
   116  	case "level4":
   117  		o.ignore[colltab.Quaternary] = false
   118  	case "identic":
   119  		o.ignore[colltab.Quaternary] = false
   120  		o.ignore[colltab.Identity] = false
   121  	}
   122  
   123  	switch t.TypeForKey("ka") {
   124  	case "shifted":
   125  		o.alternate = altShifted
   126  	// The following two types are not official BCP47, but we support them to
   127  	// give access to this otherwise hidden functionality. The name blanked is
   128  	// derived from the LDML name blanked and posix reflects the main use of
   129  	// the shift-trimmed option.
   130  	case "blanked":
   131  		o.alternate = altBlanked
   132  	case "posix":
   133  		o.alternate = altShiftTrimmed
   134  	}
   135  
   136  	// TODO: caseFirst ("kf"), reorder ("kr"), and maybe variableTop ("vt").
   137  
   138  	// Not used:
   139  	// - normalization ("kk", not necessary for this implementation)
   140  	// - hiraganaQuatenary ("kh", obsolete)
   141  }
   142  
   143  func ldmlBool(t language.Tag, old bool, key string) bool {
   144  	switch t.TypeForKey(key) {
   145  	case "true":
   146  		return true
   147  	case "false":
   148  		return false
   149  	default:
   150  		return old
   151  	}
   152  }
   153  
   154  var (
   155  	// IgnoreCase sets case-insensitive comparison.
   156  	IgnoreCase Option = ignoreCase
   157  	ignoreCase        = Option{3, ignoreCaseF}
   158  
   159  	// IgnoreDiacritics causes diacritical marks to be ignored. ("o" == "รถ").
   160  	IgnoreDiacritics Option = ignoreDiacritics
   161  	ignoreDiacritics        = Option{3, ignoreDiacriticsF}
   162  
   163  	// IgnoreWidth causes full-width characters to match their half-width
   164  	// equivalents.
   165  	IgnoreWidth Option = ignoreWidth
   166  	ignoreWidth        = Option{2, ignoreWidthF}
   167  
   168  	// Loose sets the collator to ignore diacritics, case and width.
   169  	Loose Option = loose
   170  	loose        = Option{4, looseF}
   171  
   172  	// Force ordering if strings are equivalent but not equal.
   173  	Force Option = force
   174  	force        = Option{5, forceF}
   175  
   176  	// Numeric specifies that numbers should sort numerically ("2" < "12").
   177  	Numeric Option = numeric
   178  	numeric        = Option{5, numericF}
   179  )
   180  
   181  func ignoreWidthF(o *options) {
   182  	o.ignore[colltab.Tertiary] = true
   183  	o.caseLevel = true
   184  }
   185  
   186  func ignoreDiacriticsF(o *options) {
   187  	o.ignore[colltab.Secondary] = true
   188  }
   189  
   190  func ignoreCaseF(o *options) {
   191  	o.ignore[colltab.Tertiary] = true
   192  	o.caseLevel = false
   193  }
   194  
   195  func looseF(o *options) {
   196  	ignoreWidthF(o)
   197  	ignoreDiacriticsF(o)
   198  	ignoreCaseF(o)
   199  }
   200  
   201  func forceF(o *options) {
   202  	o.ignore[colltab.Identity] = false
   203  }
   204  
   205  func numericF(o *options) { o.numeric = true }
   206  
   207  // Reorder overrides the pre-defined ordering of scripts and character sets.
   208  func Reorder(s ...string) Option {
   209  	// TODO: need fractional weights to implement this.
   210  	panic("TODO: implement")
   211  }
   212  
   213  // TODO: consider making these public again. These options cannot be fully
   214  // specified in BCP47, so an API interface seems warranted. Still a higher-level
   215  // interface would be nice (e.g. a POSIX option for enabling altShiftTrimmed)
   216  
   217  // alternateHandling identifies the various ways in which variables are handled.
   218  // A rune with a primary weight lower than the variable top is considered a
   219  // variable.
   220  // See https://www.unicode.org/reports/tr10/#Variable_Weighting for details.
   221  type alternateHandling int
   222  
   223  const (
   224  	// altNonIgnorable turns off special handling of variables.
   225  	altNonIgnorable alternateHandling = iota
   226  
   227  	// altBlanked sets variables and all subsequent primary ignorables to be
   228  	// ignorable at all levels. This is identical to removing all variables
   229  	// and subsequent primary ignorables from the input.
   230  	altBlanked
   231  
   232  	// altShifted sets variables to be ignorable for levels one through three and
   233  	// adds a fourth level based on the values of the ignored levels.
   234  	altShifted
   235  
   236  	// altShiftTrimmed is a slight variant of altShifted that is used to
   237  	// emulate POSIX.
   238  	altShiftTrimmed
   239  )
   240  

View as plain text