...

Source file src/golang.org/x/text/unicode/norm/ucd_test.go

Documentation: golang.org/x/text/unicode/norm

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package norm
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"fmt"
    11  	"regexp"
    12  	"runtime"
    13  	"strconv"
    14  	"strings"
    15  	"sync"
    16  	"testing"
    17  	"time"
    18  	"unicode/utf8"
    19  
    20  	"golang.org/x/text/internal/gen"
    21  	"golang.org/x/text/internal/testtext"
    22  )
    23  
    24  var once sync.Once
    25  
    26  func skipShort(t *testing.T) {
    27  	testtext.SkipIfNotLong(t)
    28  
    29  	once.Do(func() { loadTestData(t) })
    30  }
    31  
    32  // This regression test runs the test set in NormalizationTest.txt
    33  // (taken from https://www.unicode.org/Public/<unicode.Version>/ucd/).
    34  //
    35  // NormalizationTest.txt has form:
    36  // @Part0 # Specific cases
    37  // #
    38  // 1E0A;1E0A;0044 0307;1E0A;0044 0307; # (Ḋ; Ḋ; D◌̇; Ḋ; D◌̇; ) LATIN CAPITAL LETTER D WITH DOT ABOVE
    39  // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # (Ḍ; Ḍ; D◌̣; Ḍ; D◌̣; ) LATIN CAPITAL LETTER D WITH DOT BELOW
    40  //
    41  // Each test has 5 columns (c1, c2, c3, c4, c5), where
    42  // (c1, c2, c3, c4, c5) == (c1, NFC(c1), NFD(c1), NFKC(c1), NFKD(c1))
    43  //
    44  // CONFORMANCE:
    45  // 1. The following invariants must be true for all conformant implementations
    46  //
    47  //    NFC
    48  //      c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3)
    49  //      c4 ==  NFC(c4) ==  NFC(c5)
    50  //
    51  //    NFD
    52  //      c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3)
    53  //      c5 ==  NFD(c4) ==  NFD(c5)
    54  //
    55  //    NFKC
    56  //      c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
    57  //
    58  //    NFKD
    59  //      c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
    60  //
    61  // 2. For every code point X assigned in this version of Unicode that is not
    62  //    specifically listed in Part 1, the following invariants must be true
    63  //    for all conformant implementations:
    64  //
    65  //      X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
    66  //
    67  
    68  // Column types.
    69  const (
    70  	cRaw = iota
    71  	cNFC
    72  	cNFD
    73  	cNFKC
    74  	cNFKD
    75  	cMaxColumns
    76  )
    77  
    78  // Holds data from NormalizationTest.txt
    79  var part []Part
    80  
    81  type Part struct {
    82  	name   string
    83  	number int
    84  	tests  []Test
    85  }
    86  
    87  type Test struct {
    88  	name   string
    89  	partnr int
    90  	number int
    91  	r      rune                // used for character by character test
    92  	cols   [cMaxColumns]string // Each has 5 entries, see below.
    93  }
    94  
    95  func (t Test) Name() string {
    96  	if t.number < 0 {
    97  		return part[t.partnr].name
    98  	}
    99  	return fmt.Sprintf("%s:%d", part[t.partnr].name, t.number)
   100  }
   101  
   102  var partRe = regexp.MustCompile(`@Part(\d) # (.*)$`)
   103  var testRe = regexp.MustCompile(`^` + strings.Repeat(`([\dA-F ]+);`, 5) + ` # (.*)$`)
   104  
   105  var counter int
   106  
   107  // Load the data form NormalizationTest.txt
   108  func loadTestData(t *testing.T) {
   109  	f := gen.OpenUCDFile("NormalizationTest.txt")
   110  	defer f.Close()
   111  	scanner := bufio.NewScanner(f)
   112  	for scanner.Scan() {
   113  		line := scanner.Text()
   114  		if len(line) == 0 || line[0] == '#' {
   115  			continue
   116  		}
   117  		m := partRe.FindStringSubmatch(line)
   118  		if m != nil {
   119  			if len(m) < 3 {
   120  				t.Fatal("Failed to parse Part: ", line)
   121  			}
   122  			i, err := strconv.Atoi(m[1])
   123  			if err != nil {
   124  				t.Fatal(err)
   125  			}
   126  			name := m[2]
   127  			part = append(part, Part{name: name[:len(name)-1], number: i})
   128  			continue
   129  		}
   130  		m = testRe.FindStringSubmatch(line)
   131  		if m == nil || len(m) < 7 {
   132  			t.Fatalf(`Failed to parse: "%s" result: %#v`, line, m)
   133  		}
   134  		test := Test{name: m[6], partnr: len(part) - 1, number: counter}
   135  		counter++
   136  		for j := 1; j < len(m)-1; j++ {
   137  			for _, split := range strings.Split(m[j], " ") {
   138  				r, err := strconv.ParseUint(split, 16, 64)
   139  				if err != nil {
   140  					t.Fatal(err)
   141  				}
   142  				if test.r == 0 {
   143  					// save for CharacterByCharacterTests
   144  					test.r = rune(r)
   145  				}
   146  				var buf [utf8.UTFMax]byte
   147  				sz := utf8.EncodeRune(buf[:], rune(r))
   148  				test.cols[j-1] += string(buf[:sz])
   149  			}
   150  		}
   151  		part := &part[len(part)-1]
   152  		part.tests = append(part.tests, test)
   153  	}
   154  	if scanner.Err() != nil {
   155  		t.Fatal(scanner.Err())
   156  	}
   157  }
   158  
   159  func cmpResult(t *testing.T, tc *Test, name string, f Form, gold, test, result string) {
   160  	if gold != result {
   161  		t.Errorf("%s:%s: %s(%+q)=%+q; want %+q: %s",
   162  			tc.Name(), name, fstr[f], test, result, gold, tc.name)
   163  	}
   164  }
   165  
   166  func cmpIsNormal(t *testing.T, tc *Test, name string, f Form, test string, result, want bool) {
   167  	if result != want {
   168  		t.Errorf("%s:%s: %s(%+q)=%v; want %v", tc.Name(), name, fstr[f], test, result, want)
   169  	}
   170  }
   171  
   172  func doTest(t *testing.T, tc *Test, f Form, gold, test string) {
   173  	testb := []byte(test)
   174  	result := f.Bytes(testb)
   175  	cmpResult(t, tc, "Bytes", f, gold, test, string(result))
   176  
   177  	sresult := f.String(test)
   178  	cmpResult(t, tc, "String", f, gold, test, sresult)
   179  
   180  	acc := []byte{}
   181  	i := Iter{}
   182  	i.InitString(f, test)
   183  	for !i.Done() {
   184  		acc = append(acc, i.Next()...)
   185  	}
   186  	cmpResult(t, tc, "Iter.Next", f, gold, test, string(acc))
   187  
   188  	buf := make([]byte, 128)
   189  	acc = nil
   190  	for p := 0; p < len(testb); {
   191  		nDst, nSrc, _ := f.Transform(buf, testb[p:], true)
   192  		acc = append(acc, buf[:nDst]...)
   193  		p += nSrc
   194  	}
   195  	cmpResult(t, tc, "Transform", f, gold, test, string(acc))
   196  
   197  	for i := range test {
   198  		out := f.Append(f.Bytes([]byte(test[:i])), []byte(test[i:])...)
   199  		cmpResult(t, tc, fmt.Sprintf(":Append:%d", i), f, gold, test, string(out))
   200  	}
   201  	cmpIsNormal(t, tc, "IsNormal", f, test, f.IsNormal([]byte(test)), test == gold)
   202  	cmpIsNormal(t, tc, "IsNormalString", f, test, f.IsNormalString(test), test == gold)
   203  }
   204  
   205  func doConformanceTests(t *testing.T, tc *Test, partn int) {
   206  	for i := 0; i <= 2; i++ {
   207  		doTest(t, tc, NFC, tc.cols[1], tc.cols[i])
   208  		doTest(t, tc, NFD, tc.cols[2], tc.cols[i])
   209  		doTest(t, tc, NFKC, tc.cols[3], tc.cols[i])
   210  		doTest(t, tc, NFKD, tc.cols[4], tc.cols[i])
   211  	}
   212  	for i := 3; i <= 4; i++ {
   213  		doTest(t, tc, NFC, tc.cols[3], tc.cols[i])
   214  		doTest(t, tc, NFD, tc.cols[4], tc.cols[i])
   215  		doTest(t, tc, NFKC, tc.cols[3], tc.cols[i])
   216  		doTest(t, tc, NFKD, tc.cols[4], tc.cols[i])
   217  	}
   218  }
   219  
   220  func TestCharacterByCharacter(t *testing.T) {
   221  	skipShort(t)
   222  	tests := part[1].tests
   223  	var last rune = 0
   224  	for i := 0; i <= len(tests); i++ { // last one is special case
   225  		var r rune
   226  		if i == len(tests) {
   227  			r = 0x2FA1E // Don't have to go to 0x10FFFF
   228  		} else {
   229  			r = tests[i].r
   230  		}
   231  		for last++; last < r; last++ {
   232  			// Check all characters that were not explicitly listed in the test.
   233  			tc := &Test{partnr: 1, number: -1}
   234  			char := string(last)
   235  			doTest(t, tc, NFC, char, char)
   236  			doTest(t, tc, NFD, char, char)
   237  			doTest(t, tc, NFKC, char, char)
   238  			doTest(t, tc, NFKD, char, char)
   239  		}
   240  		if i < len(tests) {
   241  			doConformanceTests(t, &tests[i], 1)
   242  		}
   243  	}
   244  }
   245  
   246  func TestStandardTests(t *testing.T) {
   247  	skipShort(t)
   248  	for _, j := range []int{0, 2, 3} {
   249  		for _, test := range part[j].tests {
   250  			doConformanceTests(t, &test, j)
   251  		}
   252  	}
   253  }
   254  
   255  // TestPerformance verifies that normalization is O(n). If any of the
   256  // code does not properly check for maxCombiningChars, normalization
   257  // may exhibit O(n**2) behavior.
   258  func TestPerformance(t *testing.T) {
   259  	skipShort(t)
   260  	runtime.GOMAXPROCS(2)
   261  	success := make(chan bool, 1)
   262  	go func() {
   263  		buf := bytes.Repeat([]byte("\u035D"), 1024*1024)
   264  		buf = append(buf, "\u035B"...)
   265  		NFC.Append(nil, buf...)
   266  		success <- true
   267  	}()
   268  	timeout := time.After(1 * time.Second)
   269  	select {
   270  	case <-success:
   271  		// test completed before the timeout
   272  	case <-timeout:
   273  		t.Errorf(`unexpectedly long time to complete PerformanceTest`)
   274  	}
   275  }
   276  

View as plain text