...

Source file src/golang.org/x/text/collate/table_test.go

Documentation: golang.org/x/text/collate

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package collate
     6  
     7  import (
     8  	"testing"
     9  
    10  	"golang.org/x/text/collate/build"
    11  	"golang.org/x/text/internal/colltab"
    12  	"golang.org/x/text/unicode/norm"
    13  )
    14  
    15  type ColElems []Weights
    16  
    17  type input struct {
    18  	str string
    19  	ces [][]int
    20  }
    21  
    22  type check struct {
    23  	in  string
    24  	n   int
    25  	out ColElems
    26  }
    27  
    28  type tableTest struct {
    29  	in  []input
    30  	chk []check
    31  }
    32  
    33  func w(ce ...int) Weights {
    34  	return W(ce...)
    35  }
    36  
    37  var defaults = w(0)
    38  
    39  func pt(p, t int) []int {
    40  	return []int{p, defaults.Secondary, t}
    41  }
    42  
    43  func makeTable(in []input) (*Collator, error) {
    44  	b := build.NewBuilder()
    45  	for _, r := range in {
    46  		if e := b.Add([]rune(r.str), r.ces, nil); e != nil {
    47  			panic(e)
    48  		}
    49  	}
    50  	t, err := b.Build()
    51  	if err != nil {
    52  		return nil, err
    53  	}
    54  	return NewFromTable(t), nil
    55  }
    56  
    57  // modSeq holds a seqeunce of modifiers in increasing order of CCC long enough
    58  // to cause a segment overflow if not handled correctly. The last rune in this
    59  // list has a CCC of 214.
    60  var modSeq = []rune{
    61  	0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BB,
    62  	0x05BC, 0x05BD, 0x05BF, 0x05C1, 0x05C2, 0xFB1E, 0x064B, 0x064C, 0x064D, 0x064E,
    63  	0x064F, 0x0650, 0x0651, 0x0652, 0x0670, 0x0711, 0x0C55, 0x0C56, 0x0E38, 0x0E48,
    64  	0x0EB8, 0x0EC8, 0x0F71, 0x0F72, 0x0F74, 0x0321, 0x1DCE,
    65  }
    66  
    67  var mods []input
    68  var modW = func() ColElems {
    69  	ws := ColElems{}
    70  	for _, r := range modSeq {
    71  		rune := norm.NFC.PropertiesString(string(r))
    72  		ws = append(ws, w(0, int(rune.CCC())))
    73  		mods = append(mods, input{string(r), [][]int{{0, int(rune.CCC())}}})
    74  	}
    75  	return ws
    76  }()
    77  
    78  var appendNextTests = []tableTest{
    79  	{ // test getWeights
    80  		[]input{
    81  			{"a", [][]int{{100}}},
    82  			{"b", [][]int{{105}}},
    83  			{"c", [][]int{{110}}},
    84  			{"ß", [][]int{{120}}},
    85  		},
    86  		[]check{
    87  			{"a", 1, ColElems{w(100)}},
    88  			{"b", 1, ColElems{w(105)}},
    89  			{"c", 1, ColElems{w(110)}},
    90  			{"d", 1, ColElems{w(0x50064)}},
    91  			{"ab", 1, ColElems{w(100)}},
    92  			{"bc", 1, ColElems{w(105)}},
    93  			{"dd", 1, ColElems{w(0x50064)}},
    94  			{"ß", 2, ColElems{w(120)}},
    95  		},
    96  	},
    97  	{ // test expansion
    98  		[]input{
    99  			{"u", [][]int{{100}}},
   100  			{"U", [][]int{{100}, {0, 25}}},
   101  			{"w", [][]int{{100}, {100}}},
   102  			{"W", [][]int{{100}, {0, 25}, {100}, {0, 25}}},
   103  		},
   104  		[]check{
   105  			{"u", 1, ColElems{w(100)}},
   106  			{"U", 1, ColElems{w(100), w(0, 25)}},
   107  			{"w", 1, ColElems{w(100), w(100)}},
   108  			{"W", 1, ColElems{w(100), w(0, 25), w(100), w(0, 25)}},
   109  		},
   110  	},
   111  	{ // test decompose
   112  		[]input{
   113  			{"D", [][]int{pt(104, 8)}},
   114  			{"z", [][]int{pt(130, 8)}},
   115  			{"\u030C", [][]int{{0, 40}}},                               // Caron
   116  			{"\u01C5", [][]int{pt(104, 9), pt(130, 4), {0, 40, 0x1F}}}, // Dž = D+z+caron
   117  		},
   118  		[]check{
   119  			{"\u01C5", 2, ColElems{w(pt(104, 9)...), w(pt(130, 4)...), w(0, 40, 0x1F)}},
   120  		},
   121  	},
   122  	{ // test basic contraction
   123  		[]input{
   124  			{"a", [][]int{{100}}},
   125  			{"ab", [][]int{{101}}},
   126  			{"aab", [][]int{{101}, {101}}},
   127  			{"abc", [][]int{{102}}},
   128  			{"b", [][]int{{200}}},
   129  			{"c", [][]int{{300}}},
   130  			{"d", [][]int{{400}}},
   131  		},
   132  		[]check{
   133  			{"a", 1, ColElems{w(100)}},
   134  			{"aa", 1, ColElems{w(100)}},
   135  			{"aac", 1, ColElems{w(100)}},
   136  			{"d", 1, ColElems{w(400)}},
   137  			{"ab", 2, ColElems{w(101)}},
   138  			{"abb", 2, ColElems{w(101)}},
   139  			{"aab", 3, ColElems{w(101), w(101)}},
   140  			{"aaba", 3, ColElems{w(101), w(101)}},
   141  			{"abc", 3, ColElems{w(102)}},
   142  			{"abcd", 3, ColElems{w(102)}},
   143  		},
   144  	},
   145  	{ // test discontinuous contraction
   146  		append(mods, []input{
   147  			// modifiers; secondary weight equals ccc
   148  			{"\u0316", [][]int{{0, 220}}},
   149  			{"\u0317", [][]int{{0, 220}, {0, 220}}},
   150  			{"\u302D", [][]int{{0, 222}}},
   151  			{"\u302E", [][]int{{0, 225}}}, // used as starter
   152  			{"\u302F", [][]int{{0, 224}}}, // used as starter
   153  			{"\u18A9", [][]int{{0, 228}}},
   154  			{"\u0300", [][]int{{0, 230}}},
   155  			{"\u0301", [][]int{{0, 230}}},
   156  			{"\u0315", [][]int{{0, 232}}},
   157  			{"\u031A", [][]int{{0, 232}}},
   158  			{"\u035C", [][]int{{0, 233}}},
   159  			{"\u035F", [][]int{{0, 233}}},
   160  			{"\u035D", [][]int{{0, 234}}},
   161  			{"\u035E", [][]int{{0, 234}}},
   162  			{"\u0345", [][]int{{0, 240}}},
   163  
   164  			// starters
   165  			{"a", [][]int{{100}}},
   166  			{"b", [][]int{{200}}},
   167  			{"c", [][]int{{300}}},
   168  			{"\u03B1", [][]int{{900}}},
   169  			{"\x01", [][]int{{0, 0, 0, 0}}},
   170  
   171  			// contractions
   172  			{"a\u0300", [][]int{{101}}},
   173  			{"a\u0301", [][]int{{102}}},
   174  			{"a\u035E", [][]int{{110}}},
   175  			{"a\u035Eb\u035E", [][]int{{115}}},
   176  			{"ac\u035Eaca\u035E", [][]int{{116}}},
   177  			{"a\u035Db\u035D", [][]int{{117}}},
   178  			{"a\u0301\u035Db", [][]int{{120}}},
   179  			{"a\u0301\u035F", [][]int{{121}}},
   180  			{"a\u0301\u035Fb", [][]int{{119}}},
   181  			{"\u03B1\u0345", [][]int{{901}, {902}}},
   182  			{"\u302E\u302F", [][]int{{0, 131}, {0, 131}}},
   183  			{"\u302F\u18A9", [][]int{{0, 130}}},
   184  		}...),
   185  		[]check{
   186  			{"a\x01\u0300", 1, ColElems{w(100)}},
   187  			{"ab", 1, ColElems{w(100)}},                              // closing segment
   188  			{"a\u0316\u0300b", 5, ColElems{w(101), w(0, 220)}},       // closing segment
   189  			{"a\u0316\u0300", 5, ColElems{w(101), w(0, 220)}},        // no closing segment
   190  			{"a\u0316\u0300\u035Cb", 5, ColElems{w(101), w(0, 220)}}, // completes before segment end
   191  			{"a\u0316\u0300\u035C", 5, ColElems{w(101), w(0, 220)}},  // completes before segment end
   192  
   193  			{"a\u0316\u0301b", 5, ColElems{w(102), w(0, 220)}},       // closing segment
   194  			{"a\u0316\u0301", 5, ColElems{w(102), w(0, 220)}},        // no closing segment
   195  			{"a\u0316\u0301\u035Cb", 5, ColElems{w(102), w(0, 220)}}, // completes before segment end
   196  			{"a\u0316\u0301\u035C", 5, ColElems{w(102), w(0, 220)}},  // completes before segment end
   197  
   198  			// match blocked by modifier with same ccc
   199  			{"a\u0301\u0315\u031A\u035Fb", 3, ColElems{w(102)}},
   200  
   201  			// multiple gaps
   202  			{"a\u0301\u035Db", 6, ColElems{w(120)}},
   203  			{"a\u0301\u035F", 5, ColElems{w(121)}},
   204  			{"a\u0301\u035Fb", 6, ColElems{w(119)}},
   205  			{"a\u0316\u0301\u035F", 7, ColElems{w(121), w(0, 220)}},
   206  			{"a\u0301\u0315\u035Fb", 7, ColElems{w(121), w(0, 232)}},
   207  			{"a\u0316\u0301\u0315\u035Db", 5, ColElems{w(102), w(0, 220)}},
   208  			{"a\u0316\u0301\u0315\u035F", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
   209  			{"a\u0316\u0301\u0315\u035Fb", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
   210  			{"a\u0316\u0301\u0315\u035F\u035D", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
   211  			{"a\u0316\u0301\u0315\u035F\u035Db", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
   212  
   213  			// handling of segment overflow
   214  			{ // just fits within segment
   215  				"a" + string(modSeq[:30]) + "\u0301",
   216  				3 + len(string(modSeq[:30])),
   217  				append(ColElems{w(102)}, modW[:30]...),
   218  			},
   219  			{"a" + string(modSeq[:31]) + "\u0301", 1, ColElems{w(100)}}, // overflow
   220  			{"a" + string(modSeq) + "\u0301", 1, ColElems{w(100)}},
   221  			{ // just fits within segment with two interstitial runes
   222  				"a" + string(modSeq[:28]) + "\u0301\u0315\u035F",
   223  				7 + len(string(modSeq[:28])),
   224  				append(append(ColElems{w(121)}, modW[:28]...), w(0, 232)),
   225  			},
   226  			{ // second half does not fit within segment
   227  				"a" + string(modSeq[:29]) + "\u0301\u0315\u035F",
   228  				3 + len(string(modSeq[:29])),
   229  				append(ColElems{w(102)}, modW[:29]...),
   230  			},
   231  
   232  			// discontinuity can only occur in last normalization segment
   233  			{"a\u035Eb\u035E", 6, ColElems{w(115)}},
   234  			{"a\u0316\u035Eb\u035E", 5, ColElems{w(110), w(0, 220)}},
   235  			{"a\u035Db\u035D", 6, ColElems{w(117)}},
   236  			{"a\u0316\u035Db\u035D", 1, ColElems{w(100)}},
   237  			{"a\u035Eb\u0316\u035E", 8, ColElems{w(115), w(0, 220)}},
   238  			{"a\u035Db\u0316\u035D", 8, ColElems{w(117), w(0, 220)}},
   239  			{"ac\u035Eaca\u035E", 9, ColElems{w(116)}},
   240  			{"a\u0316c\u035Eaca\u035E", 1, ColElems{w(100)}},
   241  			{"ac\u035Eac\u0316a\u035E", 1, ColElems{w(100)}},
   242  
   243  			// expanding contraction
   244  			{"\u03B1\u0345", 4, ColElems{w(901), w(902)}},
   245  
   246  			// Theoretical possibilities
   247  			// contraction within a gap
   248  			{"a\u302F\u18A9\u0301", 9, ColElems{w(102), w(0, 130)}},
   249  			// expansion within a gap
   250  			{"a\u0317\u0301", 5, ColElems{w(102), w(0, 220), w(0, 220)}},
   251  			// repeating CCC blocks last modifier
   252  			{"a\u302E\u302F\u0301", 1, ColElems{w(100)}},
   253  			// The trailing combining characters (with lower CCC) should block the first one.
   254  			// TODO: make the following pass.
   255  			// {"a\u035E\u0316\u0316", 1, ColElems{w(100)}},
   256  			{"a\u035F\u035Eb", 5, ColElems{w(110), w(0, 233)}},
   257  			// Last combiner should match after normalization.
   258  			// TODO: make the following pass.
   259  			// {"a\u035D\u0301", 3, ColElems{w(102), w(0, 234)}},
   260  			// The first combiner is blocking the second one as they have the same CCC.
   261  			{"a\u035D\u035Eb", 1, ColElems{w(100)}},
   262  		},
   263  	},
   264  }
   265  
   266  func TestAppendNext(t *testing.T) {
   267  	for i, tt := range appendNextTests {
   268  		c, err := makeTable(tt.in)
   269  		if err != nil {
   270  			t.Errorf("%d: error creating table: %v", i, err)
   271  			continue
   272  		}
   273  		for j, chk := range tt.chk {
   274  			ws, n := c.t.AppendNext(nil, []byte(chk.in))
   275  			if n != chk.n {
   276  				t.Errorf("%d:%d: bytes consumed was %d; want %d", i, j, n, chk.n)
   277  			}
   278  			out := convertFromWeights(chk.out)
   279  			if len(ws) != len(out) {
   280  				t.Errorf("%d:%d: len(ws) was %d; want %d (%X vs %X)\n%X", i, j, len(ws), len(out), ws, out, chk.in)
   281  				continue
   282  			}
   283  			for k, w := range ws {
   284  				w, _ = colltab.MakeElem(w.Primary(), w.Secondary(), int(w.Tertiary()), 0)
   285  				if w != out[k] {
   286  					t.Errorf("%d:%d: Weights %d was %X; want %X", i, j, k, w, out[k])
   287  				}
   288  			}
   289  		}
   290  	}
   291  }
   292  

View as plain text