...

Source file src/golang.org/x/text/unicode/norm/iter_test.go

Documentation: golang.org/x/text/unicode/norm

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package norm
     6  
     7  import (
     8  	"strings"
     9  	"testing"
    10  )
    11  
    12  func doIterNormString(f Form, s string) []byte {
    13  	acc := []byte{}
    14  	i := Iter{}
    15  	i.InitString(f, s)
    16  	for !i.Done() {
    17  		acc = append(acc, i.Next()...)
    18  	}
    19  	return acc
    20  }
    21  
    22  func doIterNorm(f Form, s string) []byte {
    23  	acc := []byte{}
    24  	i := Iter{}
    25  	i.Init(f, []byte(s))
    26  	for !i.Done() {
    27  		acc = append(acc, i.Next()...)
    28  	}
    29  	return acc
    30  }
    31  
    32  func TestIterNext(t *testing.T) {
    33  	runNormTests(t, "IterNext", func(f Form, out []byte, s string) []byte {
    34  		return doIterNormString(f, string(append(out, s...)))
    35  	})
    36  	runNormTests(t, "IterNext", func(f Form, out []byte, s string) []byte {
    37  		return doIterNorm(f, string(append(out, s...)))
    38  	})
    39  }
    40  
    41  type SegmentTest struct {
    42  	in  string
    43  	out []string
    44  }
    45  
    46  var segmentTests = []SegmentTest{
    47  	{"\u1E0A\u0323a", []string{"\x44\u0323\u0307", "a", ""}},
    48  	{rep('a', segSize), append(strings.Split(rep('a', segSize), ""), "")},
    49  	{rep('a', segSize+2), append(strings.Split(rep('a', segSize+2), ""), "")},
    50  	{rep('a', segSize) + "\u0300aa",
    51  		append(strings.Split(rep('a', segSize-1), ""), "a\u0300", "a", "a", "")},
    52  
    53  	// U+0f73 is NOT treated as a starter as it is a modifier
    54  	{"a" + grave(29) + "\u0f73", []string{"a" + grave(29), cgj + "\u0f73"}},
    55  	{"a\u0f73", []string{"a\u0f73"}},
    56  
    57  	// U+ff9e is treated as a non-starter.
    58  	// TODO: should we? Note that this will only affect iteration, as whether
    59  	// or not we do so does not affect the normalization output and will either
    60  	// way result in consistent iteration output.
    61  	{"a" + grave(30) + "\uff9e", []string{"a" + grave(30), cgj + "\uff9e"}},
    62  	{"a\uff9e", []string{"a\uff9e"}},
    63  }
    64  
    65  var segmentTestsK = []SegmentTest{
    66  	{"\u3332", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u3099", ""}},
    67  	// last segment of multi-segment decomposition needs normalization
    68  	{"\u3332\u093C", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u093C\u3099", ""}},
    69  	{"\u320E", []string{"\x28", "\uAC00", "\x29"}},
    70  
    71  	// last segment should be copied to start of buffer.
    72  	{"\ufdfa", []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645", ""}},
    73  	{"\ufdfa" + grave(30), []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645" + grave(30), ""}},
    74  	{"\uFDFA" + grave(64), []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645" + grave(30), cgj + grave(30), cgj + grave(4), ""}},
    75  
    76  	// Hangul and Jamo are grouped together.
    77  	{"\uAC00", []string{"\u1100\u1161", ""}},
    78  	{"\uAC01", []string{"\u1100\u1161\u11A8", ""}},
    79  	{"\u1100\u1161", []string{"\u1100\u1161", ""}},
    80  }
    81  
    82  // Note that, by design, segmentation is equal for composing and decomposing forms.
    83  func TestIterSegmentation(t *testing.T) {
    84  	segmentTest(t, "SegmentTestD", NFD, segmentTests)
    85  	segmentTest(t, "SegmentTestC", NFC, segmentTests)
    86  	segmentTest(t, "SegmentTestKD", NFKD, segmentTestsK)
    87  	segmentTest(t, "SegmentTestKC", NFKC, segmentTestsK)
    88  }
    89  
    90  func segmentTest(t *testing.T, name string, f Form, tests []SegmentTest) {
    91  	iter := Iter{}
    92  	for i, tt := range tests {
    93  		iter.InitString(f, tt.in)
    94  		for j, seg := range tt.out {
    95  			if seg == "" {
    96  				if !iter.Done() {
    97  					res := string(iter.Next())
    98  					t.Errorf(`%s:%d:%d: expected Done()==true, found segment %+q`, name, i, j, res)
    99  				}
   100  				continue
   101  			}
   102  			if iter.Done() {
   103  				t.Errorf("%s:%d:%d: Done()==true, want false", name, i, j)
   104  			}
   105  			seg = f.String(seg)
   106  			if res := string(iter.Next()); res != seg {
   107  				t.Errorf(`%s:%d:%d" segment was %+q (%d); want %+q (%d)`, name, i, j, pc(res), len(res), pc(seg), len(seg))
   108  			}
   109  		}
   110  	}
   111  }
   112  

View as plain text