...

Source file src/golang.org/x/text/language/match_test.go

Documentation: golang.org/x/text/language

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package language
     6  
     7  import (
     8  	"bytes"
     9  	"flag"
    10  	"fmt"
    11  	"os"
    12  	"path"
    13  	"path/filepath"
    14  	"strings"
    15  	"testing"
    16  	"unicode/utf8"
    17  
    18  	"golang.org/x/text/internal/testtext"
    19  	"golang.org/x/text/internal/ucd"
    20  )
    21  
    22  var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers")
    23  
    24  func TestCompliance(t *testing.T) {
    25  	filepath.Walk("testdata", func(file string, info os.FileInfo, err error) error {
    26  		if info.IsDir() {
    27  			return nil
    28  		}
    29  		r, err := os.Open(file)
    30  		if err != nil {
    31  			t.Fatal(err)
    32  		}
    33  		ucd.Parse(r, func(p *ucd.Parser) {
    34  			name := strings.ReplaceAll(path.Join(p.String(0), p.String(1)), " ", "")
    35  			if skip[name] {
    36  				return
    37  			}
    38  			t.Run(info.Name()+"/"+short(name), func(t *testing.T) {
    39  				supported := makeTagList(p.String(0))
    40  				desired := makeTagList(p.String(1))
    41  				gotCombined, index, conf := NewMatcher(supported).Match(desired...)
    42  
    43  				gotMatch := supported[index]
    44  				wantMatch := Raw.Make(p.String(2)) // wantMatch may be null
    45  				if gotMatch != wantMatch {
    46  					t.Fatalf("match: got %q; want %q (%v)", gotMatch, wantMatch, conf)
    47  				}
    48  				if tag := strings.TrimSpace(p.String(3)); tag != "" {
    49  					wantCombined := Raw.MustParse(tag)
    50  					if err == nil && gotCombined != wantCombined {
    51  						t.Errorf("combined: got %q; want %q (%v)", gotCombined, wantCombined, conf)
    52  					}
    53  				}
    54  			})
    55  		})
    56  		return nil
    57  	})
    58  }
    59  
    60  func short(s string) string {
    61  	if len(s) <= 50 {
    62  		return s
    63  	}
    64  	var i int
    65  	for i = 1; i < utf8.UTFMax && !utf8.RuneStart(s[50-i]); i++ {
    66  	}
    67  	return s[:50-i] + "…"
    68  }
    69  
    70  var skip = map[string]bool{
    71  	// TODO: bugs
    72  	// Honor the wildcard match. This may only be useful to select non-exact
    73  	// stuff.
    74  	"mul,af/nl": true, // match: got "af"; want "mul"
    75  
    76  	// TODO: include other extensions.
    77  	// combined: got "en-GB-u-ca-buddhist-nu-arab"; want "en-GB-fonipa-t-m0-iso-i0-pinyin-u-ca-buddhist-nu-arab"
    78  	"und,en-GB-u-sd-gbsct/en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin": true,
    79  
    80  	// Inconsistencies with Mark Davis' implementation where it is not clear
    81  	// which is better.
    82  
    83  	// Inconsistencies in combined. I think the Go approach is more appropriate.
    84  	// We could use -u-rg- as alternative.
    85  	"und,fr/fr-BE-fonipa":              true, // combined: got "fr"; want "fr-BE-fonipa"
    86  	"und,fr-CA/fr-BE-fonipa":           true, // combined: got "fr-CA"; want "fr-BE-fonipa"
    87  	"und,fr-fonupa/fr-BE-fonipa":       true, // combined: got "fr-fonupa"; want "fr-BE-fonipa"
    88  	"und,no/nn-BE-fonipa":              true, // combined: got "no"; want "no-BE-fonipa"
    89  	"50,und,fr-CA-fonupa/fr-BE-fonipa": true, // combined: got "fr-CA-fonupa"; want "fr-BE-fonipa"
    90  
    91  	// The initial number is a threshold. As we don't use scoring, we will not
    92  	// implement this.
    93  	"50,und,fr-Cyrl-CA-fonupa/fr-BE-fonipa": true,
    94  	// match: got "und"; want "fr-Cyrl-CA-fonupa"
    95  	// combined: got "und"; want "fr-Cyrl-BE-fonipa"
    96  
    97  	// Other interesting cases to test:
    98  	// - Should same language or same script have the preference if there is
    99  	//   usually no understanding of the other script?
   100  	// - More specific region in desired may replace enclosing supported.
   101  }
   102  
   103  func makeTagList(s string) (tags []Tag) {
   104  	for _, s := range strings.Split(s, ",") {
   105  		tags = append(tags, mk(strings.TrimSpace(s)))
   106  	}
   107  	return tags
   108  }
   109  
   110  func TestMatchStrings(t *testing.T) {
   111  	testCases := []struct {
   112  		supported string
   113  		desired   string // strings separated by |
   114  		tag       string
   115  		index     int
   116  	}{{
   117  		supported: "en",
   118  		desired:   "",
   119  		tag:       "en",
   120  		index:     0,
   121  	}, {
   122  		supported: "en",
   123  		desired:   "nl",
   124  		tag:       "en",
   125  		index:     0,
   126  	}, {
   127  		supported: "en,nl",
   128  		desired:   "nl",
   129  		tag:       "nl",
   130  		index:     1,
   131  	}, {
   132  		supported: "en,nl",
   133  		desired:   "nl|en",
   134  		tag:       "nl",
   135  		index:     1,
   136  	}, {
   137  		supported: "en-GB,nl",
   138  		desired:   "en ; q=0.1,nl",
   139  		tag:       "nl",
   140  		index:     1,
   141  	}, {
   142  		supported: "en-GB,nl",
   143  		desired:   "en;q=0.005 | dk; q=0.1,nl ",
   144  		tag:       "en-GB",
   145  		index:     0,
   146  	}, {
   147  		// do not match faulty tags with und
   148  		supported: "en,und",
   149  		desired:   "|en",
   150  		tag:       "en",
   151  		index:     0,
   152  	}}
   153  	for _, tc := range testCases {
   154  		t.Run(path.Join(tc.supported, tc.desired), func(t *testing.T) {
   155  			m := NewMatcher(makeTagList(tc.supported))
   156  			tag, index := MatchStrings(m, strings.Split(tc.desired, "|")...)
   157  			if tag.String() != tc.tag || index != tc.index {
   158  				t.Errorf("got %v, %d; want %v, %d", tag, index, tc.tag, tc.index)
   159  			}
   160  		})
   161  	}
   162  }
   163  
   164  func TestRegionGroups(t *testing.T) {
   165  	testCases := []struct {
   166  		a, b     string
   167  		distance uint8
   168  	}{
   169  		{"zh-TW", "zh-HK", 5},
   170  		{"zh-MO", "zh-HK", 4},
   171  		{"es-ES", "es-AR", 5},
   172  		{"es-ES", "es", 4},
   173  		{"es-419", "es-MX", 4},
   174  		{"es-AR", "es-MX", 4},
   175  		{"es-ES", "es-MX", 5},
   176  		{"es-PT", "es-MX", 5},
   177  	}
   178  	for _, tc := range testCases {
   179  		a := MustParse(tc.a)
   180  		aScript, _ := a.Script()
   181  		b := MustParse(tc.b)
   182  		bScript, _ := b.Script()
   183  
   184  		if aScript != bScript {
   185  			t.Errorf("scripts differ: %q vs %q", aScript, bScript)
   186  			continue
   187  		}
   188  		d, _ := regionGroupDist(a.region(), b.region(), aScript.scriptID, a.lang())
   189  		if d != tc.distance {
   190  			t.Errorf("got %q; want %q", d, tc.distance)
   191  		}
   192  	}
   193  }
   194  
   195  func TestIsParadigmLocale(t *testing.T) {
   196  	testCases := map[string]bool{
   197  		"en-US":  true,
   198  		"en-GB":  true,
   199  		"en-VI":  false,
   200  		"es-GB":  false,
   201  		"es-ES":  true,
   202  		"es-419": true,
   203  	}
   204  	for str, want := range testCases {
   205  		tt := Make(str)
   206  		tag := tt.tag()
   207  		got := isParadigmLocale(tag.LangID, tag.RegionID)
   208  		if got != want {
   209  			t.Errorf("isPL(%q) = %v; want %v", str, got, want)
   210  		}
   211  	}
   212  }
   213  
   214  // Implementation of String methods for various types for debugging purposes.
   215  
   216  func (m *matcher) String() string {
   217  	w := &bytes.Buffer{}
   218  	fmt.Fprintln(w, "Default:", m.default_)
   219  	for tag, h := range m.index {
   220  		fmt.Fprintf(w, "  %s: %v\n", tag, h)
   221  	}
   222  	return w.String()
   223  }
   224  
   225  func (h *matchHeader) String() string {
   226  	w := &bytes.Buffer{}
   227  	fmt.Fprint(w, "haveTag: ")
   228  	for _, h := range h.haveTags {
   229  		fmt.Fprintf(w, "%v, ", h)
   230  	}
   231  	return w.String()
   232  }
   233  
   234  func (t haveTag) String() string {
   235  	return fmt.Sprintf("%v:%d:%v:%v-%v|%v", t.tag, t.index, t.conf, t.maxRegion, t.maxScript, t.altScript)
   236  }
   237  
   238  func TestIssue43834(t *testing.T) {
   239  	matcher := NewMatcher([]Tag{English})
   240  
   241  	// ZZ is the largest region code and should not cause overflow.
   242  	desired, _, err := ParseAcceptLanguage("en-ZZ")
   243  	if err != nil {
   244  		t.Error(err)
   245  	}
   246  	_, i, _ := matcher.Match(desired...)
   247  	if i != 0 {
   248  		t.Errorf("got %v; want 0", i)
   249  	}
   250  }
   251  
   252  func TestBestMatchAlloc(t *testing.T) {
   253  	m := NewMatcher(makeTagList("en sr nl"))
   254  	// Go allocates when creating a list of tags from a single tag!
   255  	list := []Tag{English}
   256  	avg := testtext.AllocsPerRun(100, func() {
   257  		m.Match(list...)
   258  	})
   259  	if avg > 0 {
   260  		t.Errorf("got %f; want 0", avg)
   261  	}
   262  }
   263  
   264  var benchHave = []Tag{
   265  	mk("en"),
   266  	mk("en-GB"),
   267  	mk("za"),
   268  	mk("zh-Hant"),
   269  	mk("zh-Hans-CN"),
   270  	mk("zh"),
   271  	mk("zh-HK"),
   272  	mk("ar-MK"),
   273  	mk("en-CA"),
   274  	mk("fr-CA"),
   275  	mk("fr-US"),
   276  	mk("fr-CH"),
   277  	mk("fr"),
   278  	mk("lt"),
   279  	mk("lv"),
   280  	mk("iw"),
   281  	mk("iw-NL"),
   282  	mk("he"),
   283  	mk("he-IT"),
   284  	mk("tlh"),
   285  	mk("ja"),
   286  	mk("ja-Jpan"),
   287  	mk("ja-Jpan-JP"),
   288  	mk("de"),
   289  	mk("de-CH"),
   290  	mk("de-AT"),
   291  	mk("de-DE"),
   292  	mk("sr"),
   293  	mk("sr-Latn"),
   294  	mk("sr-Cyrl"),
   295  	mk("sr-ME"),
   296  }
   297  
   298  var benchWant = [][]Tag{
   299  	[]Tag{
   300  		mk("en"),
   301  	},
   302  	[]Tag{
   303  		mk("en-AU"),
   304  		mk("de-HK"),
   305  		mk("nl"),
   306  		mk("fy"),
   307  		mk("lv"),
   308  	},
   309  	[]Tag{
   310  		mk("en-AU"),
   311  		mk("de-HK"),
   312  		mk("nl"),
   313  		mk("fy"),
   314  	},
   315  	[]Tag{
   316  		mk("ja-Hant"),
   317  		mk("da-HK"),
   318  		mk("nl"),
   319  		mk("zh-TW"),
   320  	},
   321  	[]Tag{
   322  		mk("ja-Hant"),
   323  		mk("da-HK"),
   324  		mk("nl"),
   325  		mk("hr"),
   326  	},
   327  }
   328  
   329  func BenchmarkMatch(b *testing.B) {
   330  	m := newMatcher(benchHave, nil)
   331  	for i := 0; i < b.N; i++ {
   332  		for _, want := range benchWant {
   333  			m.getBest(want...)
   334  		}
   335  	}
   336  }
   337  
   338  func BenchmarkMatchExact(b *testing.B) {
   339  	want := mk("en")
   340  	m := newMatcher(benchHave, nil)
   341  	for i := 0; i < b.N; i++ {
   342  		m.getBest(want)
   343  	}
   344  }
   345  
   346  func BenchmarkMatchAltLanguagePresent(b *testing.B) {
   347  	want := mk("hr")
   348  	m := newMatcher(benchHave, nil)
   349  	for i := 0; i < b.N; i++ {
   350  		m.getBest(want)
   351  	}
   352  }
   353  
   354  func BenchmarkMatchAltLanguageNotPresent(b *testing.B) {
   355  	want := mk("nn")
   356  	m := newMatcher(benchHave, nil)
   357  	for i := 0; i < b.N; i++ {
   358  		m.getBest(want)
   359  	}
   360  }
   361  
   362  func BenchmarkMatchAltScriptPresent(b *testing.B) {
   363  	want := mk("zh-Hant-CN")
   364  	m := newMatcher(benchHave, nil)
   365  	for i := 0; i < b.N; i++ {
   366  		m.getBest(want)
   367  	}
   368  }
   369  
   370  func BenchmarkMatchAltScriptNotPresent(b *testing.B) {
   371  	want := mk("fr-Cyrl")
   372  	m := newMatcher(benchHave, nil)
   373  	for i := 0; i < b.N; i++ {
   374  		m.getBest(want)
   375  	}
   376  }
   377  
   378  func BenchmarkMatchLimitedExact(b *testing.B) {
   379  	want := []Tag{mk("he-NL"), mk("iw-NL")}
   380  	m := newMatcher(benchHave, nil)
   381  	for i := 0; i < b.N; i++ {
   382  		m.getBest(want...)
   383  	}
   384  }
   385  

View as plain text