...

Source file src/golang.org/x/text/unicode/rangetable/merge_test.go

Documentation: golang.org/x/text/unicode/rangetable

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package rangetable
     6  
     7  import (
     8  	"testing"
     9  	"unicode"
    10  )
    11  
    12  var (
    13  	maxRuneTable = &unicode.RangeTable{
    14  		R32: []unicode.Range32{
    15  			{unicode.MaxRune, unicode.MaxRune, 1},
    16  		},
    17  	}
    18  
    19  	overlap1 = &unicode.RangeTable{
    20  		R16: []unicode.Range16{
    21  			{0x100, 0xfffc, 4},
    22  		},
    23  		R32: []unicode.Range32{
    24  			{0x100000, 0x10fffc, 4},
    25  		},
    26  	}
    27  
    28  	overlap2 = &unicode.RangeTable{
    29  		R16: []unicode.Range16{
    30  			{0x101, 0xfffd, 4},
    31  		},
    32  		R32: []unicode.Range32{
    33  			{0x100001, 0x10fffd, 3},
    34  		},
    35  	}
    36  
    37  	// The following table should be compacted into two entries for R16 and R32.
    38  	optimize = &unicode.RangeTable{
    39  		R16: []unicode.Range16{
    40  			{0x1, 0x1, 1},
    41  			{0x2, 0x2, 1},
    42  			{0x3, 0x3, 1},
    43  			{0x5, 0x5, 1},
    44  			{0x7, 0x7, 1},
    45  			{0x9, 0x9, 1},
    46  			{0xb, 0xf, 2},
    47  		},
    48  		R32: []unicode.Range32{
    49  			{0x10001, 0x10001, 1},
    50  			{0x10002, 0x10002, 1},
    51  			{0x10003, 0x10003, 1},
    52  			{0x10005, 0x10005, 1},
    53  			{0x10007, 0x10007, 1},
    54  			{0x10009, 0x10009, 1},
    55  			{0x1000b, 0x1000f, 2},
    56  		},
    57  	}
    58  )
    59  
    60  func TestMerge(t *testing.T) {
    61  	for i, tt := range [][]*unicode.RangeTable{
    62  		{unicode.Cc, unicode.Cf},
    63  		{unicode.L, unicode.Ll},
    64  		{unicode.L, unicode.Ll, unicode.Lu},
    65  		{unicode.Ll, unicode.Lu},
    66  		{unicode.M},
    67  		unicode.GraphicRanges,
    68  		cased,
    69  
    70  		// Merge R16 only and R32 only and vice versa.
    71  		{unicode.Khmer, unicode.Khudawadi},
    72  		{unicode.Imperial_Aramaic, unicode.Radical},
    73  
    74  		// Merge with empty.
    75  		{&unicode.RangeTable{}},
    76  		{&unicode.RangeTable{}, &unicode.RangeTable{}},
    77  		{&unicode.RangeTable{}, &unicode.RangeTable{}, &unicode.RangeTable{}},
    78  		{&unicode.RangeTable{}, unicode.Hiragana},
    79  		{unicode.Inherited, &unicode.RangeTable{}},
    80  		{&unicode.RangeTable{}, unicode.Hanunoo, &unicode.RangeTable{}},
    81  
    82  		// Hypothetical tables.
    83  		{maxRuneTable},
    84  		{overlap1, overlap2},
    85  
    86  		// Optimization
    87  		{optimize},
    88  	} {
    89  		rt := Merge(tt...)
    90  		for r := rune(0); r <= unicode.MaxRune; r++ {
    91  			if got, want := unicode.Is(rt, r), unicode.In(r, tt...); got != want {
    92  				t.Fatalf("%d:%U: got %v; want %v", i, r, got, want)
    93  			}
    94  		}
    95  		// Test optimization and correctness for R16.
    96  		for k := 0; k < len(rt.R16)-1; k++ {
    97  			if lo, hi := rt.R16[k].Lo, rt.R16[k].Hi; lo > hi {
    98  				t.Errorf("%d: Lo (%x) > Hi (%x)", i, lo, hi)
    99  			}
   100  			if hi, lo := rt.R16[k].Hi, rt.R16[k+1].Lo; hi >= lo {
   101  				t.Errorf("%d: Hi (%x) >= next Lo (%x)", i, hi, lo)
   102  			}
   103  			if rt.R16[k].Hi+rt.R16[k].Stride == rt.R16[k+1].Lo {
   104  				t.Errorf("%d: missed optimization for R16 at %d between %X and %x",
   105  					i, k, rt.R16[k], rt.R16[k+1])
   106  			}
   107  		}
   108  		// Test optimization and correctness for R32.
   109  		for k := 0; k < len(rt.R32)-1; k++ {
   110  			if lo, hi := rt.R32[k].Lo, rt.R32[k].Hi; lo > hi {
   111  				t.Errorf("%d: Lo (%x) > Hi (%x)", i, lo, hi)
   112  			}
   113  			if hi, lo := rt.R32[k].Hi, rt.R32[k+1].Lo; hi >= lo {
   114  				t.Errorf("%d: Hi (%x) >= next Lo (%x)", i, hi, lo)
   115  			}
   116  			if rt.R32[k].Hi+rt.R32[k].Stride == rt.R32[k+1].Lo {
   117  				t.Errorf("%d: missed optimization for R32 at %d between %X and %X",
   118  					i, k, rt.R32[k], rt.R32[k+1])
   119  			}
   120  		}
   121  	}
   122  }
   123  
   124  const runes = "Hello World in 2015!,\U0010fffd"
   125  
   126  func BenchmarkNotMerged(t *testing.B) {
   127  	for i := 0; i < t.N; i++ {
   128  		for _, r := range runes {
   129  			unicode.In(r, unicode.GraphicRanges...)
   130  		}
   131  	}
   132  }
   133  
   134  func BenchmarkMerged(t *testing.B) {
   135  	rt := Merge(unicode.GraphicRanges...)
   136  
   137  	for i := 0; i < t.N; i++ {
   138  		for _, r := range runes {
   139  			unicode.Is(rt, r)
   140  		}
   141  	}
   142  }
   143  
   144  var cased = []*unicode.RangeTable{
   145  	unicode.Lower,
   146  	unicode.Upper,
   147  	unicode.Title,
   148  	unicode.Other_Lowercase,
   149  	unicode.Other_Uppercase,
   150  }
   151  
   152  func BenchmarkNotMergedCased(t *testing.B) {
   153  	for i := 0; i < t.N; i++ {
   154  		for _, r := range runes {
   155  			unicode.In(r, cased...)
   156  		}
   157  	}
   158  }
   159  
   160  func BenchmarkMergedCased(t *testing.B) {
   161  	// This reduces len(R16) from 243 to 82 and len(R32) from 65 to 35 for
   162  	// Unicode 7.0.0.
   163  	rt := Merge(cased...)
   164  
   165  	for i := 0; i < t.N; i++ {
   166  		for _, r := range runes {
   167  			unicode.Is(rt, r)
   168  		}
   169  	}
   170  }
   171  
   172  func BenchmarkInit(t *testing.B) {
   173  	for i := 0; i < t.N; i++ {
   174  		Merge(cased...)
   175  		Merge(unicode.GraphicRanges...)
   176  	}
   177  }
   178  
   179  func BenchmarkInit2(t *testing.B) {
   180  	// Hypothetical near-worst-case performance.
   181  	for i := 0; i < t.N; i++ {
   182  		Merge(overlap1, overlap2)
   183  	}
   184  }
   185  

View as plain text