...

Source file src/golang.org/x/text/collate/build/builder_test.go

Documentation: golang.org/x/text/collate/build

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package build
     6  
     7  import "testing"
     8  
     9  // cjk returns an implicit collation element for a CJK rune.
    10  func cjk(r rune) []rawCE {
    11  	// A CJK character C is represented in the DUCET as
    12  	//   [.AAAA.0020.0002.C][.BBBB.0000.0000.C]
    13  	// Where AAAA is the most significant 15 bits plus a base value.
    14  	// Any base value will work for the test, so we pick the common value of FB40.
    15  	const base = 0xFB40
    16  	return []rawCE{
    17  		{w: []int{base + int(r>>15), defaultSecondary, defaultTertiary, int(r)}},
    18  		{w: []int{int(r&0x7FFF) | 0x8000, 0, 0, int(r)}},
    19  	}
    20  }
    21  
    22  func pCE(p int) []rawCE {
    23  	return mkCE([]int{p, defaultSecondary, defaultTertiary, 0}, 0)
    24  }
    25  
    26  func pqCE(p, q int) []rawCE {
    27  	return mkCE([]int{p, defaultSecondary, defaultTertiary, q}, 0)
    28  }
    29  
    30  func ptCE(p, t int) []rawCE {
    31  	return mkCE([]int{p, defaultSecondary, t, 0}, 0)
    32  }
    33  
    34  func ptcCE(p, t int, ccc uint8) []rawCE {
    35  	return mkCE([]int{p, defaultSecondary, t, 0}, ccc)
    36  }
    37  
    38  func sCE(s int) []rawCE {
    39  	return mkCE([]int{0, s, defaultTertiary, 0}, 0)
    40  }
    41  
    42  func stCE(s, t int) []rawCE {
    43  	return mkCE([]int{0, s, t, 0}, 0)
    44  }
    45  
    46  func scCE(s int, ccc uint8) []rawCE {
    47  	return mkCE([]int{0, s, defaultTertiary, 0}, ccc)
    48  }
    49  
    50  func mkCE(w []int, ccc uint8) []rawCE {
    51  	return []rawCE{rawCE{w, ccc}}
    52  }
    53  
    54  // ducetElem is used to define test data that is used to generate a table.
    55  type ducetElem struct {
    56  	str string
    57  	ces []rawCE
    58  }
    59  
    60  func newBuilder(t *testing.T, ducet []ducetElem) *Builder {
    61  	b := NewBuilder()
    62  	for _, e := range ducet {
    63  		ces := [][]int{}
    64  		for _, ce := range e.ces {
    65  			ces = append(ces, ce.w)
    66  		}
    67  		if err := b.Add([]rune(e.str), ces, nil); err != nil {
    68  			t.Errorf(err.Error())
    69  		}
    70  	}
    71  	b.t = &table{}
    72  	b.root.sort()
    73  	return b
    74  }
    75  
    76  type convertTest struct {
    77  	in, out []rawCE
    78  	err     bool
    79  }
    80  
    81  var convLargeTests = []convertTest{
    82  	{pCE(0xFB39), pCE(0xFB39), false},
    83  	{cjk(0x2F9B2), pqCE(0x3F9B2, 0x2F9B2), false},
    84  	{pCE(0xFB40), pCE(0), true},
    85  	{append(pCE(0xFB40), pCE(0)[0]), pCE(0), true},
    86  	{pCE(0xFFFE), pCE(illegalOffset), false},
    87  	{pCE(0xFFFF), pCE(illegalOffset + 1), false},
    88  }
    89  
    90  func TestConvertLarge(t *testing.T) {
    91  	for i, tt := range convLargeTests {
    92  		e := new(entry)
    93  		for _, ce := range tt.in {
    94  			e.elems = append(e.elems, makeRawCE(ce.w, ce.ccc))
    95  		}
    96  		elems, err := convertLargeWeights(e.elems)
    97  		if tt.err {
    98  			if err == nil {
    99  				t.Errorf("%d: expected error; none found", i)
   100  			}
   101  			continue
   102  		} else if err != nil {
   103  			t.Errorf("%d: unexpected error: %v", i, err)
   104  		}
   105  		if !equalCEArrays(elems, tt.out) {
   106  			t.Errorf("%d: conversion was %x; want %x", i, elems, tt.out)
   107  		}
   108  	}
   109  }
   110  
   111  // Collation element table for simplify tests.
   112  var simplifyTest = []ducetElem{
   113  	{"\u0300", sCE(30)}, // grave
   114  	{"\u030C", sCE(40)}, // caron
   115  	{"A", ptCE(100, 8)},
   116  	{"D", ptCE(104, 8)},
   117  	{"E", ptCE(105, 8)},
   118  	{"I", ptCE(110, 8)},
   119  	{"z", ptCE(130, 8)},
   120  	{"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0])},
   121  	{"\u05B7", sCE(80)},
   122  	{"\u00C0", append(ptCE(100, 8), sCE(30)...)},                                // A with grave, can be removed
   123  	{"\u00C8", append(ptCE(105, 8), sCE(30)...)},                                // E with grave
   124  	{"\uFB1F", append(ptCE(200, 4), ptCE(200, 4)[0], sCE(80)[0])},               // eliminated by NFD
   125  	{"\u00C8\u0302", ptCE(106, 8)},                                              // block previous from simplifying
   126  	{"\u01C5", append(ptCE(104, 9), ptCE(130, 4)[0], stCE(40, maxTertiary)[0])}, // eliminated by NFKD
   127  	// no removal: tertiary value of third element is not maxTertiary
   128  	{"\u2162", append(ptCE(110, 9), ptCE(110, 4)[0], ptCE(110, 8)[0])},
   129  }
   130  
   131  var genColTests = []ducetElem{
   132  	{"\uFA70", pqCE(0x1FA70, 0xFA70)},
   133  	{"A\u0300", append(ptCE(100, 8), sCE(30)...)},
   134  	{"A\u0300\uFA70", append(ptCE(100, 8), sCE(30)[0], pqCE(0x1FA70, 0xFA70)[0])},
   135  	{"A\u0300A\u0300", append(ptCE(100, 8), sCE(30)[0], ptCE(100, 8)[0], sCE(30)[0])},
   136  }
   137  
   138  func TestGenColElems(t *testing.T) {
   139  	b := newBuilder(t, simplifyTest[:5])
   140  
   141  	for i, tt := range genColTests {
   142  		res := b.root.genColElems(tt.str)
   143  		if !equalCEArrays(tt.ces, res) {
   144  			t.Errorf("%d: result %X; want %X", i, res, tt.ces)
   145  		}
   146  	}
   147  }
   148  
   149  type strArray []string
   150  
   151  func (sa strArray) contains(s string) bool {
   152  	for _, e := range sa {
   153  		if e == s {
   154  			return true
   155  		}
   156  	}
   157  	return false
   158  }
   159  
   160  var simplifyRemoved = strArray{"\u00C0", "\uFB1F"}
   161  var simplifyMarked = strArray{"\u01C5"}
   162  
   163  func TestSimplify(t *testing.T) {
   164  	b := newBuilder(t, simplifyTest)
   165  	o := &b.root
   166  	simplify(o)
   167  
   168  	for i, tt := range simplifyTest {
   169  		if simplifyRemoved.contains(tt.str) {
   170  			continue
   171  		}
   172  		e := o.find(tt.str)
   173  		if e.str != tt.str || !equalCEArrays(e.elems, tt.ces) {
   174  			t.Errorf("%d: found element %s -> %X; want %s -> %X", i, e.str, e.elems, tt.str, tt.ces)
   175  			break
   176  		}
   177  	}
   178  	var i, k int
   179  	for e := o.front(); e != nil; e, _ = e.nextIndexed() {
   180  		gold := simplifyMarked.contains(e.str)
   181  		if gold {
   182  			k++
   183  		}
   184  		if gold != e.decompose {
   185  			t.Errorf("%d: %s has decompose %v; want %v", i, e.str, e.decompose, gold)
   186  		}
   187  		i++
   188  	}
   189  	if k != len(simplifyMarked) {
   190  		t.Errorf(" an entry that should be marked as decompose was deleted")
   191  	}
   192  }
   193  
   194  var expandTest = []ducetElem{
   195  	{"\u0300", append(scCE(29, 230), scCE(30, 230)...)},
   196  	{"\u00C0", append(ptCE(100, 8), scCE(30, 230)...)},
   197  	{"\u00C8", append(ptCE(105, 8), scCE(30, 230)...)},
   198  	{"\u00C9", append(ptCE(105, 8), scCE(30, 230)...)}, // identical expansion
   199  	{"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0], ptCE(200, 4)[0])},
   200  	{"\u01FF", append(ptCE(200, 4), ptcCE(201, 4, 0)[0], scCE(30, 230)[0])},
   201  }
   202  
   203  func TestExpand(t *testing.T) {
   204  	const (
   205  		totalExpansions = 5
   206  		totalElements   = 2 + 2 + 2 + 3 + 3 + totalExpansions
   207  	)
   208  	b := newBuilder(t, expandTest)
   209  	o := &b.root
   210  	b.processExpansions(o)
   211  
   212  	e := o.front()
   213  	for _, tt := range expandTest {
   214  		exp := b.t.ExpandElem[e.expansionIndex:]
   215  		if int(exp[0]) != len(tt.ces) {
   216  			t.Errorf("%U: len(expansion)==%d; want %d", []rune(tt.str)[0], exp[0], len(tt.ces))
   217  		}
   218  		exp = exp[1:]
   219  		for j, w := range tt.ces {
   220  			if ce, _ := makeCE(w); exp[j] != ce {
   221  				t.Errorf("%U: element %d is %X; want %X", []rune(tt.str)[0], j, exp[j], ce)
   222  			}
   223  		}
   224  		e, _ = e.nextIndexed()
   225  	}
   226  	// Verify uniquing.
   227  	if len(b.t.ExpandElem) != totalElements {
   228  		t.Errorf("len(expandElem)==%d; want %d", len(b.t.ExpandElem), totalElements)
   229  	}
   230  }
   231  
   232  var contractTest = []ducetElem{
   233  	{"abc", pCE(102)},
   234  	{"abd", pCE(103)},
   235  	{"a", pCE(100)},
   236  	{"ab", pCE(101)},
   237  	{"ac", pCE(104)},
   238  	{"bcd", pCE(202)},
   239  	{"b", pCE(200)},
   240  	{"bc", pCE(201)},
   241  	{"bd", pCE(203)},
   242  	// shares suffixes with a*
   243  	{"Ab", pCE(301)},
   244  	{"A", pCE(300)},
   245  	{"Ac", pCE(304)},
   246  	{"Abc", pCE(302)},
   247  	{"Abd", pCE(303)},
   248  	// starter to be ignored
   249  	{"z", pCE(1000)},
   250  }
   251  
   252  func TestContract(t *testing.T) {
   253  	const (
   254  		totalElements = 5 + 5 + 4
   255  	)
   256  	b := newBuilder(t, contractTest)
   257  	o := &b.root
   258  	b.processContractions(o)
   259  
   260  	indexMap := make(map[int]bool)
   261  	handleMap := make(map[rune]*entry)
   262  	for e := o.front(); e != nil; e, _ = e.nextIndexed() {
   263  		if e.contractionHandle.n > 0 {
   264  			handleMap[e.runes[0]] = e
   265  			indexMap[e.contractionHandle.index] = true
   266  		}
   267  	}
   268  	// Verify uniquing.
   269  	if len(indexMap) != 2 {
   270  		t.Errorf("number of tries is %d; want %d", len(indexMap), 2)
   271  	}
   272  	for _, tt := range contractTest {
   273  		e, ok := handleMap[[]rune(tt.str)[0]]
   274  		if !ok {
   275  			continue
   276  		}
   277  		str := tt.str[1:]
   278  		offset, n := lookup(&b.t.ContractTries, e.contractionHandle, []byte(str))
   279  		if len(str) != n {
   280  			t.Errorf("%s: bytes consumed==%d; want %d", tt.str, n, len(str))
   281  		}
   282  		ce := b.t.ContractElem[offset+e.contractionIndex]
   283  		if want, _ := makeCE(tt.ces[0]); want != ce {
   284  			t.Errorf("%s: element %X; want %X", tt.str, ce, want)
   285  		}
   286  	}
   287  	if len(b.t.ContractElem) != totalElements {
   288  		t.Errorf("len(expandElem)==%d; want %d", len(b.t.ContractElem), totalElements)
   289  	}
   290  }
   291  

View as plain text