...

Source file src/golang.org/x/text/collate/collate_test.go

Documentation: golang.org/x/text/collate

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package collate
     6  
     7  import (
     8  	"bytes"
     9  	"testing"
    10  
    11  	"golang.org/x/text/internal/colltab"
    12  	"golang.org/x/text/language"
    13  )
    14  
    15  type weightsTest struct {
    16  	opt     opts
    17  	in, out ColElems
    18  }
    19  
    20  type opts struct {
    21  	lev int
    22  	alt alternateHandling
    23  	top int
    24  
    25  	backwards bool
    26  	caseLevel bool
    27  }
    28  
    29  // ignore returns an initialized boolean array based on the given Level.
    30  // A negative value means using the default setting of quaternary.
    31  func ignore(level colltab.Level) (ignore [colltab.NumLevels]bool) {
    32  	if level < 0 {
    33  		level = colltab.Quaternary
    34  	}
    35  	for i := range ignore {
    36  		ignore[i] = level < colltab.Level(i)
    37  	}
    38  	return ignore
    39  }
    40  
    41  func makeCE(w []int) colltab.Elem {
    42  	ce, err := colltab.MakeElem(w[0], w[1], w[2], uint8(w[3]))
    43  	if err != nil {
    44  		panic(err)
    45  	}
    46  	return ce
    47  }
    48  
    49  func (o opts) collator() *Collator {
    50  	c := &Collator{
    51  		options: options{
    52  			ignore:      ignore(colltab.Level(o.lev - 1)),
    53  			alternate:   o.alt,
    54  			backwards:   o.backwards,
    55  			caseLevel:   o.caseLevel,
    56  			variableTop: uint32(o.top),
    57  		},
    58  	}
    59  	return c
    60  }
    61  
    62  const (
    63  	maxQ = 0x1FFFFF
    64  )
    65  
    66  func wpq(p, q int) Weights {
    67  	return W(p, defaults.Secondary, defaults.Tertiary, q)
    68  }
    69  
    70  func wsq(s, q int) Weights {
    71  	return W(0, s, defaults.Tertiary, q)
    72  }
    73  
    74  func wq(q int) Weights {
    75  	return W(0, 0, 0, q)
    76  }
    77  
    78  var zero = W(0, 0, 0, 0)
    79  
    80  var processTests = []weightsTest{
    81  	// Shifted
    82  	{ // simple sequence of non-variables
    83  		opt: opts{alt: altShifted, top: 100},
    84  		in:  ColElems{W(200), W(300), W(400)},
    85  		out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)},
    86  	},
    87  	{ // first is a variable
    88  		opt: opts{alt: altShifted, top: 250},
    89  		in:  ColElems{W(200), W(300), W(400)},
    90  		out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)},
    91  	},
    92  	{ // all but first are variable
    93  		opt: opts{alt: altShifted, top: 999},
    94  		in:  ColElems{W(1000), W(200), W(300), W(400)},
    95  		out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)},
    96  	},
    97  	{ // first is a modifier
    98  		opt: opts{alt: altShifted, top: 999},
    99  		in:  ColElems{W(0, 10), W(1000)},
   100  		out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)},
   101  	},
   102  	{ // primary ignorables
   103  		opt: opts{alt: altShifted, top: 250},
   104  		in:  ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
   105  		out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)},
   106  	},
   107  	{ // secondary ignorables
   108  		opt: opts{alt: altShifted, top: 250},
   109  		in:  ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
   110  		out: ColElems{wq(200), zero, wpq(300, maxQ), W(0, 0, 15, maxQ), wpq(400, maxQ)},
   111  	},
   112  	{ // tertiary ignorables, no change
   113  		opt: opts{alt: altShifted, top: 250},
   114  		in:  ColElems{W(200), zero, W(300), zero, W(400)},
   115  		out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)},
   116  	},
   117  
   118  	// ShiftTrimmed (same as Shifted)
   119  	{ // simple sequence of non-variables
   120  		opt: opts{alt: altShiftTrimmed, top: 100},
   121  		in:  ColElems{W(200), W(300), W(400)},
   122  		out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)},
   123  	},
   124  	{ // first is a variable
   125  		opt: opts{alt: altShiftTrimmed, top: 250},
   126  		in:  ColElems{W(200), W(300), W(400)},
   127  		out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)},
   128  	},
   129  	{ // all but first are variable
   130  		opt: opts{alt: altShiftTrimmed, top: 999},
   131  		in:  ColElems{W(1000), W(200), W(300), W(400)},
   132  		out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)},
   133  	},
   134  	{ // first is a modifier
   135  		opt: opts{alt: altShiftTrimmed, top: 999},
   136  		in:  ColElems{W(0, 10), W(1000)},
   137  		out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)},
   138  	},
   139  	{ // primary ignorables
   140  		opt: opts{alt: altShiftTrimmed, top: 250},
   141  		in:  ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
   142  		out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)},
   143  	},
   144  	{ // secondary ignorables
   145  		opt: opts{alt: altShiftTrimmed, top: 250},
   146  		in:  ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
   147  		out: ColElems{wq(200), zero, wpq(300, maxQ), W(0, 0, 15, maxQ), wpq(400, maxQ)},
   148  	},
   149  	{ // tertiary ignorables, no change
   150  		opt: opts{alt: altShiftTrimmed, top: 250},
   151  		in:  ColElems{W(200), zero, W(300), zero, W(400)},
   152  		out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)},
   153  	},
   154  
   155  	// Blanked
   156  	{ // simple sequence of non-variables
   157  		opt: opts{alt: altBlanked, top: 100},
   158  		in:  ColElems{W(200), W(300), W(400)},
   159  		out: ColElems{W(200), W(300), W(400)},
   160  	},
   161  	{ // first is a variable
   162  		opt: opts{alt: altBlanked, top: 250},
   163  		in:  ColElems{W(200), W(300), W(400)},
   164  		out: ColElems{zero, W(300), W(400)},
   165  	},
   166  	{ // all but first are variable
   167  		opt: opts{alt: altBlanked, top: 999},
   168  		in:  ColElems{W(1000), W(200), W(300), W(400)},
   169  		out: ColElems{W(1000), zero, zero, zero},
   170  	},
   171  	{ // first is a modifier
   172  		opt: opts{alt: altBlanked, top: 999},
   173  		in:  ColElems{W(0, 10), W(1000)},
   174  		out: ColElems{W(0, 10), W(1000)},
   175  	},
   176  	{ // primary ignorables
   177  		opt: opts{alt: altBlanked, top: 250},
   178  		in:  ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
   179  		out: ColElems{zero, zero, W(300), W(0, 15), W(400)},
   180  	},
   181  	{ // secondary ignorables
   182  		opt: opts{alt: altBlanked, top: 250},
   183  		in:  ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
   184  		out: ColElems{zero, zero, W(300), W(0, 0, 15), W(400)},
   185  	},
   186  	{ // tertiary ignorables, no change
   187  		opt: opts{alt: altBlanked, top: 250},
   188  		in:  ColElems{W(200), zero, W(300), zero, W(400)},
   189  		out: ColElems{zero, zero, W(300), zero, W(400)},
   190  	},
   191  
   192  	// Non-ignorable: input is always equal to output.
   193  	{ // all but first are variable
   194  		opt: opts{alt: altNonIgnorable, top: 999},
   195  		in:  ColElems{W(1000), W(200), W(300), W(400)},
   196  		out: ColElems{W(1000), W(200), W(300), W(400)},
   197  	},
   198  	{ // primary ignorables
   199  		opt: opts{alt: altNonIgnorable, top: 250},
   200  		in:  ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
   201  		out: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
   202  	},
   203  	{ // secondary ignorables
   204  		opt: opts{alt: altNonIgnorable, top: 250},
   205  		in:  ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
   206  		out: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
   207  	},
   208  	{ // tertiary ignorables, no change
   209  		opt: opts{alt: altNonIgnorable, top: 250},
   210  		in:  ColElems{W(200), zero, W(300), zero, W(400)},
   211  		out: ColElems{W(200), zero, W(300), zero, W(400)},
   212  	},
   213  }
   214  
   215  func TestProcessWeights(t *testing.T) {
   216  	for i, tt := range processTests {
   217  		in := convertFromWeights(tt.in)
   218  		out := convertFromWeights(tt.out)
   219  		processWeights(tt.opt.alt, uint32(tt.opt.top), in)
   220  		for j, w := range in {
   221  			if w != out[j] {
   222  				t.Errorf("%d: Weights %d was %v; want %v", i, j, w, out[j])
   223  			}
   224  		}
   225  	}
   226  }
   227  
   228  type keyFromElemTest struct {
   229  	opt opts
   230  	in  ColElems
   231  	out []byte
   232  }
   233  
   234  var defS = byte(defaults.Secondary)
   235  var defT = byte(defaults.Tertiary)
   236  
   237  const sep = 0 // separator byte
   238  
   239  var keyFromElemTests = []keyFromElemTest{
   240  	{ // simple primary and secondary weights.
   241  		opts{alt: altShifted},
   242  		ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
   243  		[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
   244  			sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
   245  			sep, sep, defT, defT, defT, defT, // tertiary
   246  			sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
   247  		},
   248  	},
   249  	{ // same as first, but with zero element that need to be removed
   250  		opts{alt: altShifted},
   251  		ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
   252  		[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
   253  			sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
   254  			sep, sep, defT, defT, defT, defT, // tertiary
   255  			sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
   256  		},
   257  	},
   258  	{ // same as first, with large primary values
   259  		opts{alt: altShifted},
   260  		ColElems{W(0x200), W(0x8000), W(0, 0x30), W(0x12345)},
   261  		[]byte{0x2, 0, 0x80, 0x80, 0x00, 0x81, 0x23, 0x45, // primary
   262  			sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
   263  			sep, sep, defT, defT, defT, defT, // tertiary
   264  			sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
   265  		},
   266  	},
   267  	{ // same as first, but with the secondary level backwards
   268  		opts{alt: altShifted, backwards: true},
   269  		ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
   270  		[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
   271  			sep, sep, 0, defS, 0, 0x30, 0, defS, 0, defS, // secondary
   272  			sep, sep, defT, defT, defT, defT, // tertiary
   273  			sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
   274  		},
   275  	},
   276  	{ // same as first, ignoring quaternary level
   277  		opts{alt: altShifted, lev: 3},
   278  		ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
   279  		[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
   280  			sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
   281  			sep, sep, defT, defT, defT, defT, // tertiary
   282  		},
   283  	},
   284  	{ // same as first, ignoring tertiary level
   285  		opts{alt: altShifted, lev: 2},
   286  		ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
   287  		[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
   288  			sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
   289  		},
   290  	},
   291  	{ // same as first, ignoring secondary level
   292  		opts{alt: altShifted, lev: 1},
   293  		ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
   294  		[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00},
   295  	},
   296  	{ // simple primary and secondary weights.
   297  		opts{alt: altShiftTrimmed, top: 0x250},
   298  		ColElems{W(0x300), W(0x200), W(0x7FFF), W(0, 0x30), W(0x800)},
   299  		[]byte{0x3, 0, 0x7F, 0xFF, 0x8, 0x00, // primary
   300  			sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
   301  			sep, sep, defT, defT, defT, defT, // tertiary
   302  			sep, 0xFF, 0x2, 0, // quaternary
   303  		},
   304  	},
   305  	{ // as first, primary with case level enabled
   306  		opts{alt: altShifted, lev: 1, caseLevel: true},
   307  		ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
   308  		[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
   309  			sep, sep, // secondary
   310  			sep, sep, defT, defT, defT, defT, // tertiary
   311  		},
   312  	},
   313  }
   314  
   315  func TestKeyFromElems(t *testing.T) {
   316  	buf := Buffer{}
   317  	for i, tt := range keyFromElemTests {
   318  		buf.Reset()
   319  		in := convertFromWeights(tt.in)
   320  		processWeights(tt.opt.alt, uint32(tt.opt.top), in)
   321  		tt.opt.collator().keyFromElems(&buf, in)
   322  		res := buf.key
   323  		if len(res) != len(tt.out) {
   324  			t.Errorf("%d: len(ws) was %d; want %d (%X should be %X)", i, len(res), len(tt.out), res, tt.out)
   325  		}
   326  		n := len(res)
   327  		if len(tt.out) < n {
   328  			n = len(tt.out)
   329  		}
   330  		for j, c := range res[:n] {
   331  			if c != tt.out[j] {
   332  				t.Errorf("%d: byte %d was %X; want %X", i, j, c, tt.out[j])
   333  			}
   334  		}
   335  	}
   336  }
   337  
   338  func TestGetColElems(t *testing.T) {
   339  	for i, tt := range appendNextTests {
   340  		c, err := makeTable(tt.in)
   341  		if err != nil {
   342  			// error is reported in TestAppendNext
   343  			continue
   344  		}
   345  		// Create one large test per table
   346  		str := make([]byte, 0, 4000)
   347  		out := ColElems{}
   348  		for len(str) < 3000 {
   349  			for _, chk := range tt.chk {
   350  				str = append(str, chk.in[:chk.n]...)
   351  				out = append(out, chk.out...)
   352  			}
   353  		}
   354  		for j, chk := range append(tt.chk, check{string(str), len(str), out}) {
   355  			out := convertFromWeights(chk.out)
   356  			ce := c.getColElems([]byte(chk.in)[:chk.n])
   357  			if len(ce) != len(out) {
   358  				t.Errorf("%d:%d: len(ws) was %d; want %d", i, j, len(ce), len(out))
   359  				continue
   360  			}
   361  			cnt := 0
   362  			for k, w := range ce {
   363  				w, _ = colltab.MakeElem(w.Primary(), w.Secondary(), int(w.Tertiary()), 0)
   364  				if w != out[k] {
   365  					t.Errorf("%d:%d: Weights %d was %X; want %X", i, j, k, w, out[k])
   366  					cnt++
   367  				}
   368  				if cnt > 10 {
   369  					break
   370  				}
   371  			}
   372  		}
   373  	}
   374  }
   375  
   376  type keyTest struct {
   377  	in  string
   378  	out []byte
   379  }
   380  
   381  var keyTests = []keyTest{
   382  	{"abc",
   383  		[]byte{0, 100, 0, 200, 1, 44, 0, 0, 0, 32, 0, 32, 0, 32, 0, 0, 2, 2, 2, 0, 255, 255, 255},
   384  	},
   385  	{"a\u0301",
   386  		[]byte{0, 102, 0, 0, 0, 32, 0, 0, 2, 0, 255},
   387  	},
   388  	{"aaaaa",
   389  		[]byte{0, 100, 0, 100, 0, 100, 0, 100, 0, 100, 0, 0,
   390  			0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 0,
   391  			2, 2, 2, 2, 2, 0,
   392  			255, 255, 255, 255, 255,
   393  		},
   394  	},
   395  	// Issue 16391: incomplete rune at end of UTF-8 sequence.
   396  	{"\xc2", []byte{133, 255, 253, 0, 0, 0, 32, 0, 0, 2, 0, 255}},
   397  	{"\xc2a", []byte{133, 255, 253, 0, 100, 0, 0, 0, 32, 0, 32, 0, 0, 2, 2, 0, 255, 255}},
   398  }
   399  
   400  func TestKey(t *testing.T) {
   401  	c, _ := makeTable(appendNextTests[4].in)
   402  	c.alternate = altShifted
   403  	c.ignore = ignore(colltab.Quaternary)
   404  	buf := Buffer{}
   405  	keys1 := [][]byte{}
   406  	keys2 := [][]byte{}
   407  	for _, tt := range keyTests {
   408  		keys1 = append(keys1, c.Key(&buf, []byte(tt.in)))
   409  		keys2 = append(keys2, c.KeyFromString(&buf, tt.in))
   410  	}
   411  	// Separate generation from testing to ensure buffers are not overwritten.
   412  	for i, tt := range keyTests {
   413  		if !bytes.Equal(keys1[i], tt.out) {
   414  			t.Errorf("%d: Key(%q) = %d; want %d", i, tt.in, keys1[i], tt.out)
   415  		}
   416  		if !bytes.Equal(keys2[i], tt.out) {
   417  			t.Errorf("%d: KeyFromString(%q) = %d; want %d", i, tt.in, keys2[i], tt.out)
   418  		}
   419  	}
   420  }
   421  
   422  type compareTest struct {
   423  	a, b string
   424  	res  int // comparison result
   425  }
   426  
   427  var compareTests = []compareTest{
   428  	{"a\u0301", "a", 1},
   429  	{"a\u0301b", "ab", 1},
   430  	{"a", "a\u0301", -1},
   431  	{"ab", "a\u0301b", -1},
   432  	{"bc", "a\u0301c", 1},
   433  	{"ab", "aB", -1},
   434  	{"a\u0301", "a\u0301", 0},
   435  	{"a", "a", 0},
   436  	// Only clip prefixes of whole runes.
   437  	{"\u302E", "\u302F", 1},
   438  	// Don't clip prefixes when last rune of prefix may be part of contraction.
   439  	{"a\u035E", "a\u0301\u035F", -1},
   440  	{"a\u0301\u035Fb", "a\u0301\u035F", -1},
   441  }
   442  
   443  func TestCompare(t *testing.T) {
   444  	c, _ := makeTable(appendNextTests[4].in)
   445  	for i, tt := range compareTests {
   446  		if res := c.Compare([]byte(tt.a), []byte(tt.b)); res != tt.res {
   447  			t.Errorf("%d: Compare(%q, %q) == %d; want %d", i, tt.a, tt.b, res, tt.res)
   448  		}
   449  		if res := c.CompareString(tt.a, tt.b); res != tt.res {
   450  			t.Errorf("%d: CompareString(%q, %q) == %d; want %d", i, tt.a, tt.b, res, tt.res)
   451  		}
   452  	}
   453  }
   454  
   455  func TestNumeric(t *testing.T) {
   456  	c := New(language.English, Loose, Numeric)
   457  
   458  	for i, tt := range []struct {
   459  		a, b string
   460  		want int
   461  	}{
   462  		{"1", "2", -1},
   463  		{"2", "12", -1},
   464  		{"2", "12", -1}, // Fullwidth is sorted as usual.
   465  		{"₂", "₁₂", 1},  // Subscript is not sorted as numbers.
   466  		{"②", "①②", 1},  // Circled is not sorted as numbers.
   467  		{ // Imperial Aramaic, is not sorted as number.
   468  			"\U00010859",
   469  			"\U00010858\U00010859",
   470  			1,
   471  		},
   472  		{"12", "2", 1},
   473  		{"A-1", "A-2", -1},
   474  		{"A-2", "A-12", -1},
   475  		{"A-12", "A-2", 1},
   476  		{"A-0001", "A-1", 0},
   477  	} {
   478  		if got := c.CompareString(tt.a, tt.b); got != tt.want {
   479  			t.Errorf("%d: CompareString(%s, %s) = %d; want %d", i, tt.a, tt.b, got, tt.want)
   480  		}
   481  	}
   482  }
   483  

View as plain text