...

Source file src/golang.org/x/net/publicsuffix/list_test.go

Documentation: golang.org/x/net/publicsuffix

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package publicsuffix
     6  
     7  import (
     8  	"sort"
     9  	"strings"
    10  	"testing"
    11  )
    12  
    13  func TestNodeLabel(t *testing.T) {
    14  	for i, want := range nodeLabels {
    15  		got := nodeLabel(uint32(i))
    16  		if got != want {
    17  			t.Errorf("%d: got %q, want %q", i, got, want)
    18  		}
    19  	}
    20  }
    21  
    22  func TestFind(t *testing.T) {
    23  	testCases := []string{
    24  		"",
    25  		"a",
    26  		"a0",
    27  		"aaaa",
    28  		"ao",
    29  		"ap",
    30  		"ar",
    31  		"aro",
    32  		"arp",
    33  		"arpa",
    34  		"arpaa",
    35  		"arpb",
    36  		"az",
    37  		"b",
    38  		"b0",
    39  		"ba",
    40  		"z",
    41  		"zu",
    42  		"zv",
    43  		"zw",
    44  		"zx",
    45  		"zy",
    46  		"zz",
    47  		"zzzz",
    48  	}
    49  	for _, tc := range testCases {
    50  		got := find(tc, 0, numTLD)
    51  		want := notFound
    52  		for i := uint32(0); i < numTLD; i++ {
    53  			if tc == nodeLabel(i) {
    54  				want = i
    55  				break
    56  			}
    57  		}
    58  		if got != want {
    59  			t.Errorf("%q: got %d, want %d", tc, got, want)
    60  		}
    61  	}
    62  }
    63  
    64  func TestICANN(t *testing.T) {
    65  	testCases := map[string]bool{
    66  		"foo.org":            true,
    67  		"foo.co.uk":          true,
    68  		"foo.dyndns.org":     false,
    69  		"foo.go.dyndns.org":  false,
    70  		"foo.blogspot.co.uk": false,
    71  		"foo.intranet":       false,
    72  	}
    73  	for domain, want := range testCases {
    74  		_, got := PublicSuffix(domain)
    75  		if got != want {
    76  			t.Errorf("%q: got %v, want %v", domain, got, want)
    77  		}
    78  	}
    79  }
    80  
    81  var publicSuffixTestCases = []struct {
    82  	domain    string
    83  	wantPS    string
    84  	wantICANN bool
    85  }{
    86  	// Empty string.
    87  	{"", "", false},
    88  
    89  	// The .ao rules are:
    90  	// ao
    91  	// ed.ao
    92  	// gv.ao
    93  	// og.ao
    94  	// co.ao
    95  	// pb.ao
    96  	// it.ao
    97  	{"ao", "ao", true},
    98  	{"www.ao", "ao", true},
    99  	{"pb.ao", "pb.ao", true},
   100  	{"www.pb.ao", "pb.ao", true},
   101  	{"www.xxx.yyy.zzz.pb.ao", "pb.ao", true},
   102  
   103  	// The .ar rules are:
   104  	// ar
   105  	// com.ar
   106  	// edu.ar
   107  	// gob.ar
   108  	// gov.ar
   109  	// int.ar
   110  	// mil.ar
   111  	// net.ar
   112  	// org.ar
   113  	// tur.ar
   114  	// blogspot.com.ar (in the PRIVATE DOMAIN section).
   115  	{"ar", "ar", true},
   116  	{"www.ar", "ar", true},
   117  	{"nic.ar", "ar", true},
   118  	{"www.nic.ar", "ar", true},
   119  	{"com.ar", "com.ar", true},
   120  	{"www.com.ar", "com.ar", true},
   121  	{"blogspot.com.ar", "blogspot.com.ar", false},                 // PRIVATE DOMAIN.
   122  	{"www.blogspot.com.ar", "blogspot.com.ar", false},             // PRIVATE DOMAIN.
   123  	{"www.xxx.yyy.zzz.blogspot.com.ar", "blogspot.com.ar", false}, // PRIVATE DOMAIN.
   124  	{"logspot.com.ar", "com.ar", true},
   125  	{"zlogspot.com.ar", "com.ar", true},
   126  	{"zblogspot.com.ar", "com.ar", true},
   127  
   128  	// The .arpa rules are:
   129  	// arpa
   130  	// e164.arpa
   131  	// in-addr.arpa
   132  	// ip6.arpa
   133  	// iris.arpa
   134  	// uri.arpa
   135  	// urn.arpa
   136  	{"arpa", "arpa", true},
   137  	{"www.arpa", "arpa", true},
   138  	{"urn.arpa", "urn.arpa", true},
   139  	{"www.urn.arpa", "urn.arpa", true},
   140  	{"www.xxx.yyy.zzz.urn.arpa", "urn.arpa", true},
   141  
   142  	// The relevant {kobe,kyoto}.jp rules are:
   143  	// jp
   144  	// *.kobe.jp
   145  	// !city.kobe.jp
   146  	// kyoto.jp
   147  	// ide.kyoto.jp
   148  	{"jp", "jp", true},
   149  	{"kobe.jp", "jp", true},
   150  	{"c.kobe.jp", "c.kobe.jp", true},
   151  	{"b.c.kobe.jp", "c.kobe.jp", true},
   152  	{"a.b.c.kobe.jp", "c.kobe.jp", true},
   153  	{"city.kobe.jp", "kobe.jp", true},
   154  	{"www.city.kobe.jp", "kobe.jp", true},
   155  	{"kyoto.jp", "kyoto.jp", true},
   156  	{"test.kyoto.jp", "kyoto.jp", true},
   157  	{"ide.kyoto.jp", "ide.kyoto.jp", true},
   158  	{"b.ide.kyoto.jp", "ide.kyoto.jp", true},
   159  	{"a.b.ide.kyoto.jp", "ide.kyoto.jp", true},
   160  
   161  	// The .tw rules are:
   162  	// tw
   163  	// edu.tw
   164  	// gov.tw
   165  	// mil.tw
   166  	// com.tw
   167  	// net.tw
   168  	// org.tw
   169  	// idv.tw
   170  	// game.tw
   171  	// ebiz.tw
   172  	// club.tw
   173  	// 網路.tw (xn--zf0ao64a.tw)
   174  	// 組織.tw (xn--uc0atv.tw)
   175  	// 商業.tw (xn--czrw28b.tw)
   176  	// blogspot.tw
   177  	{"tw", "tw", true},
   178  	{"aaa.tw", "tw", true},
   179  	{"www.aaa.tw", "tw", true},
   180  	{"xn--czrw28b.aaa.tw", "tw", true},
   181  	{"edu.tw", "edu.tw", true},
   182  	{"www.edu.tw", "edu.tw", true},
   183  	{"xn--czrw28b.edu.tw", "edu.tw", true},
   184  	{"xn--czrw28b.tw", "xn--czrw28b.tw", true},
   185  	{"www.xn--czrw28b.tw", "xn--czrw28b.tw", true},
   186  	{"xn--uc0atv.xn--czrw28b.tw", "xn--czrw28b.tw", true},
   187  	{"xn--kpry57d.tw", "tw", true},
   188  
   189  	// The .uk rules are:
   190  	// uk
   191  	// ac.uk
   192  	// co.uk
   193  	// gov.uk
   194  	// ltd.uk
   195  	// me.uk
   196  	// net.uk
   197  	// nhs.uk
   198  	// org.uk
   199  	// plc.uk
   200  	// police.uk
   201  	// *.sch.uk
   202  	// blogspot.co.uk (in the PRIVATE DOMAIN section).
   203  	{"uk", "uk", true},
   204  	{"aaa.uk", "uk", true},
   205  	{"www.aaa.uk", "uk", true},
   206  	{"mod.uk", "uk", true},
   207  	{"www.mod.uk", "uk", true},
   208  	{"sch.uk", "uk", true},
   209  	{"mod.sch.uk", "mod.sch.uk", true},
   210  	{"www.sch.uk", "www.sch.uk", true},
   211  	{"co.uk", "co.uk", true},
   212  	{"www.co.uk", "co.uk", true},
   213  	{"blogspot.co.uk", "blogspot.co.uk", false}, // PRIVATE DOMAIN.
   214  	{"blogspot.nic.uk", "uk", true},
   215  	{"blogspot.sch.uk", "blogspot.sch.uk", true},
   216  
   217  	// The .рф rules are
   218  	// рф (xn--p1ai)
   219  	{"xn--p1ai", "xn--p1ai", true},
   220  	{"aaa.xn--p1ai", "xn--p1ai", true},
   221  	{"www.xxx.yyy.xn--p1ai", "xn--p1ai", true},
   222  
   223  	// The .bd rules are:
   224  	// *.bd
   225  	{"bd", "bd", false}, // The catch-all "*" rule is not in the ICANN DOMAIN section. See footnote (†).
   226  	{"www.bd", "www.bd", true},
   227  	{"xxx.www.bd", "www.bd", true},
   228  	{"zzz.bd", "zzz.bd", true},
   229  	{"www.zzz.bd", "zzz.bd", true},
   230  	{"www.xxx.yyy.zzz.bd", "zzz.bd", true},
   231  
   232  	// The .ck rules are:
   233  	// *.ck
   234  	// !www.ck
   235  	{"ck", "ck", false}, // The catch-all "*" rule is not in the ICANN DOMAIN section. See footnote (†).
   236  	{"www.ck", "ck", true},
   237  	{"xxx.www.ck", "ck", true},
   238  	{"zzz.ck", "zzz.ck", true},
   239  	{"www.zzz.ck", "zzz.ck", true},
   240  	{"www.xxx.yyy.zzz.ck", "zzz.ck", true},
   241  
   242  	// The .myjino.ru rules (in the PRIVATE DOMAIN section) are:
   243  	// myjino.ru
   244  	// *.hosting.myjino.ru
   245  	// *.landing.myjino.ru
   246  	// *.spectrum.myjino.ru
   247  	// *.vps.myjino.ru
   248  	{"myjino.ru", "myjino.ru", false},
   249  	{"aaa.myjino.ru", "myjino.ru", false},
   250  	{"bbb.ccc.myjino.ru", "myjino.ru", false},
   251  	{"hosting.ddd.myjino.ru", "myjino.ru", false},
   252  	{"landing.myjino.ru", "myjino.ru", false},
   253  	{"www.landing.myjino.ru", "www.landing.myjino.ru", false},
   254  	{"spectrum.vps.myjino.ru", "spectrum.vps.myjino.ru", false},
   255  
   256  	// The .uberspace.de rules (in the PRIVATE DOMAIN section) are:
   257  	// *.uberspace.de
   258  	{"uberspace.de", "de", true}, // "de" is in the ICANN DOMAIN section. See footnote (†).
   259  	{"aaa.uberspace.de", "aaa.uberspace.de", false},
   260  	{"bbb.ccc.uberspace.de", "ccc.uberspace.de", false},
   261  
   262  	// There are no .nosuchtld rules.
   263  	{"nosuchtld", "nosuchtld", false},
   264  	{"foo.nosuchtld", "nosuchtld", false},
   265  	{"bar.foo.nosuchtld", "nosuchtld", false},
   266  
   267  	// (†) There is some disagreement on how wildcards behave: what should the
   268  	// public suffix of "platform.sh" be when both "*.platform.sh" and "sh" is
   269  	// in the PSL, but "platform.sh" is not? Two possible answers are
   270  	// "platform.sh" and "sh", there are valid arguments for either behavior,
   271  	// and different browsers have implemented different behaviors.
   272  	//
   273  	// This implementation, Go's golang.org/x/net/publicsuffix, returns "sh",
   274  	// the same as a literal interpretation of the "Formal Algorithm" section
   275  	// of https://publicsuffix.org/list/
   276  	//
   277  	// Together, the TestPublicSuffix and TestSlowPublicSuffix tests check that
   278  	// the Go implementation (func PublicSuffix in list.go) and the literal
   279  	// interpretation (func slowPublicSuffix in list_test.go) produce the same
   280  	// (golden) results on every test case in this publicSuffixTestCases slice,
   281  	// including some "platform.sh" style cases.
   282  	//
   283  	// More discussion of "the platform.sh problem" is at:
   284  	//  - https://github.com/publicsuffix/list/issues/694
   285  	//  - https://bugzilla.mozilla.org/show_bug.cgi?id=1124625#c6
   286  	//  - https://wiki.mozilla.org/Public_Suffix_List/platform.sh_Problem
   287  }
   288  
   289  func BenchmarkPublicSuffix(b *testing.B) {
   290  	for i := 0; i < b.N; i++ {
   291  		for _, tc := range publicSuffixTestCases {
   292  			List.PublicSuffix(tc.domain)
   293  		}
   294  	}
   295  }
   296  
   297  func TestPublicSuffix(t *testing.T) {
   298  	for _, tc := range publicSuffixTestCases {
   299  		gotPS, gotICANN := PublicSuffix(tc.domain)
   300  		if gotPS != tc.wantPS || gotICANN != tc.wantICANN {
   301  			t.Errorf("%q: got (%q, %t), want (%q, %t)", tc.domain, gotPS, gotICANN, tc.wantPS, tc.wantICANN)
   302  		}
   303  	}
   304  }
   305  
   306  func TestSlowPublicSuffix(t *testing.T) {
   307  	for _, tc := range publicSuffixTestCases {
   308  		gotPS, gotICANN := slowPublicSuffix(tc.domain)
   309  		if gotPS != tc.wantPS || gotICANN != tc.wantICANN {
   310  			t.Errorf("%q: got (%q, %t), want (%q, %t)", tc.domain, gotPS, gotICANN, tc.wantPS, tc.wantICANN)
   311  		}
   312  	}
   313  }
   314  
   315  func TestNumICANNRules(t *testing.T) {
   316  	if numICANNRules <= 0 {
   317  		t.Fatal("no ICANN rules")
   318  	}
   319  	if numICANNRules >= len(rules) {
   320  		t.Fatal("no Private rules")
   321  	}
   322  	// Check the last ICANN and first Private rules. If the underlying public
   323  	// suffix list changes, we may need to update these hard-coded checks.
   324  	if got, want := rules[numICANNRules-1], "zuerich"; got != want {
   325  		t.Errorf("last ICANN rule: got %q, wawnt %q", got, want)
   326  	}
   327  	if got, want := rules[numICANNRules], "cc.ua"; got != want {
   328  		t.Errorf("first Private rule: got %q, wawnt %q", got, want)
   329  	}
   330  }
   331  
   332  type slowPublicSuffixRule struct {
   333  	ruleParts []string
   334  	icann     bool
   335  }
   336  
   337  // slowPublicSuffix implements the canonical (but O(number of rules)) public
   338  // suffix algorithm described at http://publicsuffix.org/list/.
   339  //
   340  // 1. Match domain against all rules and take note of the matching ones.
   341  // 2. If no rules match, the prevailing rule is "*".
   342  // 3. If more than one rule matches, the prevailing rule is the one which is an exception rule.
   343  // 4. If there is no matching exception rule, the prevailing rule is the one with the most labels.
   344  // 5. If the prevailing rule is a exception rule, modify it by removing the leftmost label.
   345  // 6. The public suffix is the set of labels from the domain which directly match the labels of the prevailing rule (joined by dots).
   346  // 7. The registered or registrable domain is the public suffix plus one additional label.
   347  //
   348  // This function returns the public suffix, not the registrable domain, and so
   349  // it stops after step 6.
   350  func slowPublicSuffix(domain string) (string, bool) {
   351  	match := func(rulePart, domainPart string) bool {
   352  		switch rulePart[0] {
   353  		case '*':
   354  			return true
   355  		case '!':
   356  			return rulePart[1:] == domainPart
   357  		}
   358  		return rulePart == domainPart
   359  	}
   360  
   361  	domainParts := strings.Split(domain, ".")
   362  	var matchingRules []slowPublicSuffixRule
   363  
   364  loop:
   365  	for i, rule := range rules {
   366  		ruleParts := strings.Split(rule, ".")
   367  		if len(domainParts) < len(ruleParts) {
   368  			continue
   369  		}
   370  		for i := range ruleParts {
   371  			rulePart := ruleParts[len(ruleParts)-1-i]
   372  			domainPart := domainParts[len(domainParts)-1-i]
   373  			if !match(rulePart, domainPart) {
   374  				continue loop
   375  			}
   376  		}
   377  		matchingRules = append(matchingRules, slowPublicSuffixRule{
   378  			ruleParts: ruleParts,
   379  			icann:     i < numICANNRules,
   380  		})
   381  	}
   382  	if len(matchingRules) == 0 {
   383  		matchingRules = append(matchingRules, slowPublicSuffixRule{
   384  			ruleParts: []string{"*"},
   385  			icann:     false,
   386  		})
   387  	} else {
   388  		sort.Sort(byPriority(matchingRules))
   389  	}
   390  
   391  	prevailing := matchingRules[0]
   392  	if prevailing.ruleParts[0][0] == '!' {
   393  		prevailing.ruleParts = prevailing.ruleParts[1:]
   394  	}
   395  	if prevailing.ruleParts[0][0] == '*' {
   396  		replaced := domainParts[len(domainParts)-len(prevailing.ruleParts)]
   397  		prevailing.ruleParts = append([]string{replaced}, prevailing.ruleParts[1:]...)
   398  	}
   399  	return strings.Join(prevailing.ruleParts, "."), prevailing.icann
   400  }
   401  
   402  type byPriority []slowPublicSuffixRule
   403  
   404  func (b byPriority) Len() int      { return len(b) }
   405  func (b byPriority) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
   406  func (b byPriority) Less(i, j int) bool {
   407  	if b[i].ruleParts[0][0] == '!' {
   408  		return true
   409  	}
   410  	if b[j].ruleParts[0][0] == '!' {
   411  		return false
   412  	}
   413  	return len(b[i].ruleParts) > len(b[j].ruleParts)
   414  }
   415  
   416  // eTLDPlusOneTestCases come from
   417  // https://github.com/publicsuffix/list/blob/master/tests/test_psl.txt
   418  var eTLDPlusOneTestCases = []struct {
   419  	domain, want string
   420  }{
   421  	// Empty input.
   422  	{"", ""},
   423  	// Unlisted TLD.
   424  	{"example", ""},
   425  	{"example.example", "example.example"},
   426  	{"b.example.example", "example.example"},
   427  	{"a.b.example.example", "example.example"},
   428  	// TLD with only 1 rule.
   429  	{"biz", ""},
   430  	{"domain.biz", "domain.biz"},
   431  	{"b.domain.biz", "domain.biz"},
   432  	{"a.b.domain.biz", "domain.biz"},
   433  	// TLD with some 2-level rules.
   434  	{"com", ""},
   435  	{"example.com", "example.com"},
   436  	{"b.example.com", "example.com"},
   437  	{"a.b.example.com", "example.com"},
   438  	{"uk.com", ""},
   439  	{"example.uk.com", "example.uk.com"},
   440  	{"b.example.uk.com", "example.uk.com"},
   441  	{"a.b.example.uk.com", "example.uk.com"},
   442  	{"test.ac", "test.ac"},
   443  	// TLD with only 1 (wildcard) rule.
   444  	{"mm", ""},
   445  	{"c.mm", ""},
   446  	{"b.c.mm", "b.c.mm"},
   447  	{"a.b.c.mm", "b.c.mm"},
   448  	// More complex TLD.
   449  	{"jp", ""},
   450  	{"test.jp", "test.jp"},
   451  	{"www.test.jp", "test.jp"},
   452  	{"ac.jp", ""},
   453  	{"test.ac.jp", "test.ac.jp"},
   454  	{"www.test.ac.jp", "test.ac.jp"},
   455  	{"kyoto.jp", ""},
   456  	{"test.kyoto.jp", "test.kyoto.jp"},
   457  	{"ide.kyoto.jp", ""},
   458  	{"b.ide.kyoto.jp", "b.ide.kyoto.jp"},
   459  	{"a.b.ide.kyoto.jp", "b.ide.kyoto.jp"},
   460  	{"c.kobe.jp", ""},
   461  	{"b.c.kobe.jp", "b.c.kobe.jp"},
   462  	{"a.b.c.kobe.jp", "b.c.kobe.jp"},
   463  	{"city.kobe.jp", "city.kobe.jp"},
   464  	{"www.city.kobe.jp", "city.kobe.jp"},
   465  	// TLD with a wildcard rule and exceptions.
   466  	{"ck", ""},
   467  	{"test.ck", ""},
   468  	{"b.test.ck", "b.test.ck"},
   469  	{"a.b.test.ck", "b.test.ck"},
   470  	{"www.ck", "www.ck"},
   471  	{"www.www.ck", "www.ck"},
   472  	// US K12.
   473  	{"us", ""},
   474  	{"test.us", "test.us"},
   475  	{"www.test.us", "test.us"},
   476  	{"ak.us", ""},
   477  	{"test.ak.us", "test.ak.us"},
   478  	{"www.test.ak.us", "test.ak.us"},
   479  	{"k12.ak.us", ""},
   480  	{"test.k12.ak.us", "test.k12.ak.us"},
   481  	{"www.test.k12.ak.us", "test.k12.ak.us"},
   482  	// Punycoded IDN labels
   483  	{"xn--85x722f.com.cn", "xn--85x722f.com.cn"},
   484  	{"xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"},
   485  	{"www.xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"},
   486  	{"shishi.xn--55qx5d.cn", "shishi.xn--55qx5d.cn"},
   487  	{"xn--55qx5d.cn", ""},
   488  	{"xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"},
   489  	{"www.xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"},
   490  	{"shishi.xn--fiqs8s", "shishi.xn--fiqs8s"},
   491  	{"xn--fiqs8s", ""},
   492  
   493  	// Invalid input
   494  	{".", ""},
   495  	{"de.", ""},
   496  	{".de", ""},
   497  	{".com.au", ""},
   498  	{"com.au.", ""},
   499  	{"com..au", ""},
   500  }
   501  
   502  func TestEffectiveTLDPlusOne(t *testing.T) {
   503  	for _, tc := range eTLDPlusOneTestCases {
   504  		got, _ := EffectiveTLDPlusOne(tc.domain)
   505  		if got != tc.want {
   506  			t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
   507  		}
   508  	}
   509  }
   510  

View as plain text