...

Source file src/golang.org/x/text/internal/export/idna/idna10.0.0.go

Documentation: golang.org/x/text/internal/export/idna

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build go1.10
     6  
     7  //go:generate go run gen.go gen_trieval.go gen_common.go
     8  
     9  // Package idna implements IDNA2008 using the compatibility processing
    10  // defined by UTS (Unicode Technical Standard) #46, which defines a standard to
    11  // deal with the transition from IDNA2003.
    12  //
    13  // IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
    14  // 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
    15  // UTS #46 is defined in https://www.unicode.org/reports/tr46.
    16  // See https://unicode.org/cldr/utility/idna.jsp for a visualization of the
    17  // differences between these two standards.
    18  package idna // import "golang.org/x/text/internal/export/idna"
    19  
    20  import (
    21  	"fmt"
    22  	"strings"
    23  	"unicode/utf8"
    24  
    25  	"golang.org/x/text/secure/bidirule"
    26  	"golang.org/x/text/unicode/bidi"
    27  	"golang.org/x/text/unicode/norm"
    28  )
    29  
    30  // NOTE: Unlike common practice in Go APIs, the functions will return a
    31  // sanitized domain name in case of errors. Browsers sometimes use a partially
    32  // evaluated string as lookup.
    33  // TODO: the current error handling is, in my opinion, the least opinionated.
    34  // Other strategies are also viable, though:
    35  // Option 1) Return an empty string in case of error, but allow the user to
    36  //    specify explicitly which errors to ignore.
    37  // Option 2) Return the partially evaluated string if it is itself a valid
    38  //    string, otherwise return the empty string in case of error.
    39  // Option 3) Option 1 and 2.
    40  // Option 4) Always return an empty string for now and implement Option 1 as
    41  //    needed, and document that the return string may not be empty in case of
    42  //    error in the future.
    43  // I think Option 1 is best, but it is quite opinionated.
    44  
    45  // ToASCII is a wrapper for Punycode.ToASCII.
    46  func ToASCII(s string) (string, error) {
    47  	return Punycode.process(s, true)
    48  }
    49  
    50  // ToUnicode is a wrapper for Punycode.ToUnicode.
    51  func ToUnicode(s string) (string, error) {
    52  	return Punycode.process(s, false)
    53  }
    54  
    55  // An Option configures a Profile at creation time.
    56  type Option func(*options)
    57  
    58  // Transitional sets a Profile to use the Transitional mapping as defined in UTS
    59  // #46. This will cause, for example, "ß" to be mapped to "ss". Using the
    60  // transitional mapping provides a compromise between IDNA2003 and IDNA2008
    61  // compatibility. It is used by some browsers when resolving domain names. This
    62  // option is only meaningful if combined with MapForLookup.
    63  func Transitional(transitional bool) Option {
    64  	return func(o *options) { o.transitional = transitional }
    65  }
    66  
    67  // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
    68  // are longer than allowed by the RFC.
    69  //
    70  // This option corresponds to the VerifyDnsLength flag in UTS #46.
    71  func VerifyDNSLength(verify bool) Option {
    72  	return func(o *options) { o.verifyDNSLength = verify }
    73  }
    74  
    75  // RemoveLeadingDots removes leading label separators. Leading runes that map to
    76  // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
    77  func RemoveLeadingDots(remove bool) Option {
    78  	return func(o *options) { o.removeLeadingDots = remove }
    79  }
    80  
    81  // ValidateLabels sets whether to check the mandatory label validation criteria
    82  // as defined in Section 5.4 of RFC 5891. This includes testing for correct use
    83  // of hyphens ('-'), normalization, validity of runes, and the context rules.
    84  // In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags
    85  // in UTS #46.
    86  func ValidateLabels(enable bool) Option {
    87  	return func(o *options) {
    88  		// Don't override existing mappings, but set one that at least checks
    89  		// normalization if it is not set.
    90  		if o.mapping == nil && enable {
    91  			o.mapping = normalize
    92  		}
    93  		o.trie = trie
    94  		o.checkJoiners = enable
    95  		o.checkHyphens = enable
    96  		if enable {
    97  			o.fromPuny = validateFromPunycode
    98  		} else {
    99  			o.fromPuny = nil
   100  		}
   101  	}
   102  }
   103  
   104  // CheckHyphens sets whether to check for correct use of hyphens ('-') in
   105  // labels. Most web browsers do not have this option set, since labels such as
   106  // "r3---sn-apo3qvuoxuxbt-j5pe" are in common use.
   107  //
   108  // This option corresponds to the CheckHyphens flag in UTS #46.
   109  func CheckHyphens(enable bool) Option {
   110  	return func(o *options) { o.checkHyphens = enable }
   111  }
   112  
   113  // CheckJoiners sets whether to check the ContextJ rules as defined in Appendix
   114  // A of RFC 5892, concerning the use of joiner runes.
   115  //
   116  // This option corresponds to the CheckJoiners flag in UTS #46.
   117  func CheckJoiners(enable bool) Option {
   118  	return func(o *options) {
   119  		o.trie = trie
   120  		o.checkJoiners = enable
   121  	}
   122  }
   123  
   124  // StrictDomainName limits the set of permissible ASCII characters to those
   125  // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
   126  // hyphen). This is set by default for MapForLookup and ValidateForRegistration,
   127  // but is only useful if ValidateLabels is set.
   128  //
   129  // This option is useful, for instance, for browsers that allow characters
   130  // outside this range, for example a '_' (U+005F LOW LINE). See
   131  // http://www.rfc-editor.org/std/std3.txt for more details.
   132  //
   133  // This option corresponds to the UseSTD3ASCIIRules flag in UTS #46.
   134  func StrictDomainName(use bool) Option {
   135  	return func(o *options) { o.useSTD3Rules = use }
   136  }
   137  
   138  // NOTE: the following options pull in tables. The tables should not be linked
   139  // in as long as the options are not used.
   140  
   141  // BidiRule enables the Bidi rule as defined in RFC 5893. Any application
   142  // that relies on proper validation of labels should include this rule.
   143  //
   144  // This option corresponds to the CheckBidi flag in UTS #46.
   145  func BidiRule() Option {
   146  	return func(o *options) { o.bidirule = bidirule.ValidString }
   147  }
   148  
   149  // ValidateForRegistration sets validation options to verify that a given IDN is
   150  // properly formatted for registration as defined by Section 4 of RFC 5891.
   151  func ValidateForRegistration() Option {
   152  	return func(o *options) {
   153  		o.mapping = validateRegistration
   154  		StrictDomainName(true)(o)
   155  		ValidateLabels(true)(o)
   156  		VerifyDNSLength(true)(o)
   157  		BidiRule()(o)
   158  	}
   159  }
   160  
   161  // MapForLookup sets validation and mapping options such that a given IDN is
   162  // transformed for domain name lookup according to the requirements set out in
   163  // Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
   164  // RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
   165  // to add this check.
   166  //
   167  // The mappings include normalization and mapping case, width and other
   168  // compatibility mappings.
   169  func MapForLookup() Option {
   170  	return func(o *options) {
   171  		o.mapping = validateAndMap
   172  		StrictDomainName(true)(o)
   173  		ValidateLabels(true)(o)
   174  	}
   175  }
   176  
   177  type options struct {
   178  	transitional      bool
   179  	useSTD3Rules      bool
   180  	checkHyphens      bool
   181  	checkJoiners      bool
   182  	verifyDNSLength   bool
   183  	removeLeadingDots bool
   184  
   185  	trie *idnaTrie
   186  
   187  	// fromPuny calls validation rules when converting A-labels to U-labels.
   188  	fromPuny func(p *Profile, s string) error
   189  
   190  	// mapping implements a validation and mapping step as defined in RFC 5895
   191  	// or UTS 46, tailored to, for example, domain registration or lookup.
   192  	mapping func(p *Profile, s string) (mapped string, isBidi bool, err error)
   193  
   194  	// bidirule, if specified, checks whether s conforms to the Bidi Rule
   195  	// defined in RFC 5893.
   196  	bidirule func(s string) bool
   197  }
   198  
   199  // A Profile defines the configuration of an IDNA mapper.
   200  type Profile struct {
   201  	options
   202  }
   203  
   204  func apply(o *options, opts []Option) {
   205  	for _, f := range opts {
   206  		f(o)
   207  	}
   208  }
   209  
   210  // New creates a new Profile.
   211  //
   212  // With no options, the returned Profile is the most permissive and equals the
   213  // Punycode Profile. Options can be passed to further restrict the Profile. The
   214  // MapForLookup and ValidateForRegistration options set a collection of options,
   215  // for lookup and registration purposes respectively, which can be tailored by
   216  // adding more fine-grained options, where later options override earlier
   217  // options.
   218  func New(o ...Option) *Profile {
   219  	p := &Profile{}
   220  	apply(&p.options, o)
   221  	return p
   222  }
   223  
   224  // ToASCII converts a domain or domain label to its ASCII form. For example,
   225  // ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
   226  // ToASCII("golang") is "golang". If an error is encountered it will return
   227  // an error and a (partially) processed result.
   228  func (p *Profile) ToASCII(s string) (string, error) {
   229  	return p.process(s, true)
   230  }
   231  
   232  // ToUnicode converts a domain or domain label to its Unicode form. For example,
   233  // ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
   234  // ToUnicode("golang") is "golang". If an error is encountered it will return
   235  // an error and a (partially) processed result.
   236  func (p *Profile) ToUnicode(s string) (string, error) {
   237  	pp := *p
   238  	pp.transitional = false
   239  	return pp.process(s, false)
   240  }
   241  
   242  // String reports a string with a description of the profile for debugging
   243  // purposes. The string format may change with different versions.
   244  func (p *Profile) String() string {
   245  	s := ""
   246  	if p.transitional {
   247  		s = "Transitional"
   248  	} else {
   249  		s = "NonTransitional"
   250  	}
   251  	if p.useSTD3Rules {
   252  		s += ":UseSTD3Rules"
   253  	}
   254  	if p.checkHyphens {
   255  		s += ":CheckHyphens"
   256  	}
   257  	if p.checkJoiners {
   258  		s += ":CheckJoiners"
   259  	}
   260  	if p.verifyDNSLength {
   261  		s += ":VerifyDNSLength"
   262  	}
   263  	return s
   264  }
   265  
   266  var (
   267  	// Punycode is a Profile that does raw punycode processing with a minimum
   268  	// of validation.
   269  	Punycode *Profile = punycode
   270  
   271  	// Lookup is the recommended profile for looking up domain names, according
   272  	// to Section 5 of RFC 5891. The exact configuration of this profile may
   273  	// change over time.
   274  	Lookup *Profile = lookup
   275  
   276  	// Display is the recommended profile for displaying domain names.
   277  	// The configuration of this profile may change over time.
   278  	Display *Profile = display
   279  
   280  	// Registration is the recommended profile for checking whether a given
   281  	// IDN is valid for registration, according to Section 4 of RFC 5891.
   282  	Registration *Profile = registration
   283  
   284  	punycode = &Profile{}
   285  	lookup   = &Profile{options{
   286  		transitional: transitionalLookup,
   287  		useSTD3Rules: true,
   288  		checkHyphens: true,
   289  		checkJoiners: true,
   290  		trie:         trie,
   291  		fromPuny:     validateFromPunycode,
   292  		mapping:      validateAndMap,
   293  		bidirule:     bidirule.ValidString,
   294  	}}
   295  	display = &Profile{options{
   296  		useSTD3Rules: true,
   297  		checkHyphens: true,
   298  		checkJoiners: true,
   299  		trie:         trie,
   300  		fromPuny:     validateFromPunycode,
   301  		mapping:      validateAndMap,
   302  		bidirule:     bidirule.ValidString,
   303  	}}
   304  	registration = &Profile{options{
   305  		useSTD3Rules:    true,
   306  		verifyDNSLength: true,
   307  		checkHyphens:    true,
   308  		checkJoiners:    true,
   309  		trie:            trie,
   310  		fromPuny:        validateFromPunycode,
   311  		mapping:         validateRegistration,
   312  		bidirule:        bidirule.ValidString,
   313  	}}
   314  
   315  	// TODO: profiles
   316  	// Register: recommended for approving domain names: don't do any mappings
   317  	// but rather reject on invalid input. Bundle or block deviation characters.
   318  )
   319  
   320  type labelError struct{ label, code_ string }
   321  
   322  func (e labelError) code() string { return e.code_ }
   323  func (e labelError) Error() string {
   324  	return fmt.Sprintf("idna: invalid label %q", e.label)
   325  }
   326  
   327  type runeError rune
   328  
   329  func (e runeError) code() string { return "P1" }
   330  func (e runeError) Error() string {
   331  	return fmt.Sprintf("idna: disallowed rune %U", e)
   332  }
   333  
   334  // process implements the algorithm described in section 4 of UTS #46,
   335  // see https://www.unicode.org/reports/tr46.
   336  func (p *Profile) process(s string, toASCII bool) (string, error) {
   337  	var err error
   338  	var isBidi bool
   339  	if p.mapping != nil {
   340  		s, isBidi, err = p.mapping(p, s)
   341  	}
   342  	// Remove leading empty labels.
   343  	if p.removeLeadingDots {
   344  		for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
   345  		}
   346  	}
   347  	// TODO: allow for a quick check of the tables data.
   348  	// It seems like we should only create this error on ToASCII, but the
   349  	// UTS 46 conformance tests suggests we should always check this.
   350  	if err == nil && p.verifyDNSLength && s == "" {
   351  		err = &labelError{s, "A4"}
   352  	}
   353  	labels := labelIter{orig: s}
   354  	for ; !labels.done(); labels.next() {
   355  		label := labels.label()
   356  		if label == "" {
   357  			// Empty labels are not okay. The label iterator skips the last
   358  			// label if it is empty.
   359  			if err == nil && p.verifyDNSLength {
   360  				err = &labelError{s, "A4"}
   361  			}
   362  			continue
   363  		}
   364  		if strings.HasPrefix(label, acePrefix) {
   365  			u, err2 := decode(label[len(acePrefix):])
   366  			if err2 != nil {
   367  				if err == nil {
   368  					err = err2
   369  				}
   370  				// Spec says keep the old label.
   371  				continue
   372  			}
   373  			isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight
   374  			labels.set(u)
   375  			if err == nil && p.fromPuny != nil {
   376  				err = p.fromPuny(p, u)
   377  			}
   378  			if err == nil {
   379  				// This should be called on NonTransitional, according to the
   380  				// spec, but that currently does not have any effect. Use the
   381  				// original profile to preserve options.
   382  				err = p.validateLabel(u)
   383  			}
   384  		} else if err == nil {
   385  			err = p.validateLabel(label)
   386  		}
   387  	}
   388  	if isBidi && p.bidirule != nil && err == nil {
   389  		for labels.reset(); !labels.done(); labels.next() {
   390  			if !p.bidirule(labels.label()) {
   391  				err = &labelError{s, "B"}
   392  				break
   393  			}
   394  		}
   395  	}
   396  	if toASCII {
   397  		for labels.reset(); !labels.done(); labels.next() {
   398  			label := labels.label()
   399  			if !ascii(label) {
   400  				a, err2 := encode(acePrefix, label)
   401  				if err == nil {
   402  					err = err2
   403  				}
   404  				label = a
   405  				labels.set(a)
   406  			}
   407  			n := len(label)
   408  			if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
   409  				err = &labelError{label, "A4"}
   410  			}
   411  		}
   412  	}
   413  	s = labels.result()
   414  	if toASCII && p.verifyDNSLength && err == nil {
   415  		// Compute the length of the domain name minus the root label and its dot.
   416  		n := len(s)
   417  		if n > 0 && s[n-1] == '.' {
   418  			n--
   419  		}
   420  		if len(s) < 1 || n > 253 {
   421  			err = &labelError{s, "A4"}
   422  		}
   423  	}
   424  	return s, err
   425  }
   426  
   427  func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) {
   428  	// TODO: consider first doing a quick check to see if any of these checks
   429  	// need to be done. This will make it slower in the general case, but
   430  	// faster in the common case.
   431  	mapped = norm.NFC.String(s)
   432  	isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft
   433  	return mapped, isBidi, nil
   434  }
   435  
   436  func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) {
   437  	// TODO: filter need for normalization in loop below.
   438  	if !norm.NFC.IsNormalString(s) {
   439  		return s, false, &labelError{s, "V1"}
   440  	}
   441  	for i := 0; i < len(s); {
   442  		v, sz := trie.lookupString(s[i:])
   443  		if sz == 0 {
   444  			return s, bidi, runeError(utf8.RuneError)
   445  		}
   446  		bidi = bidi || info(v).isBidi(s[i:])
   447  		// Copy bytes not copied so far.
   448  		switch p.simplify(info(v).category()) {
   449  		// TODO: handle the NV8 defined in the Unicode idna data set to allow
   450  		// for strict conformance to IDNA2008.
   451  		case valid, deviation:
   452  		case disallowed, mapped, unknown, ignored:
   453  			r, _ := utf8.DecodeRuneInString(s[i:])
   454  			return s, bidi, runeError(r)
   455  		}
   456  		i += sz
   457  	}
   458  	return s, bidi, nil
   459  }
   460  
   461  func (c info) isBidi(s string) bool {
   462  	if !c.isMapped() {
   463  		return c&attributesMask == rtl
   464  	}
   465  	// TODO: also store bidi info for mapped data. This is possible, but a bit
   466  	// cumbersome and not for the common case.
   467  	p, _ := bidi.LookupString(s)
   468  	switch p.Class() {
   469  	case bidi.R, bidi.AL, bidi.AN:
   470  		return true
   471  	}
   472  	return false
   473  }
   474  
   475  func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) {
   476  	var (
   477  		b []byte
   478  		k int
   479  	)
   480  	// combinedInfoBits contains the or-ed bits of all runes. We use this
   481  	// to derive the mayNeedNorm bit later. This may trigger normalization
   482  	// overeagerly, but it will not do so in the common case. The end result
   483  	// is another 10% saving on BenchmarkProfile for the common case.
   484  	var combinedInfoBits info
   485  	for i := 0; i < len(s); {
   486  		v, sz := trie.lookupString(s[i:])
   487  		if sz == 0 {
   488  			b = append(b, s[k:i]...)
   489  			b = append(b, "\ufffd"...)
   490  			k = len(s)
   491  			if err == nil {
   492  				err = runeError(utf8.RuneError)
   493  			}
   494  			break
   495  		}
   496  		combinedInfoBits |= info(v)
   497  		bidi = bidi || info(v).isBidi(s[i:])
   498  		start := i
   499  		i += sz
   500  		// Copy bytes not copied so far.
   501  		switch p.simplify(info(v).category()) {
   502  		case valid:
   503  			continue
   504  		case disallowed:
   505  			if err == nil {
   506  				r, _ := utf8.DecodeRuneInString(s[start:])
   507  				err = runeError(r)
   508  			}
   509  			continue
   510  		case mapped, deviation:
   511  			b = append(b, s[k:start]...)
   512  			b = info(v).appendMapping(b, s[start:i])
   513  		case ignored:
   514  			b = append(b, s[k:start]...)
   515  			// drop the rune
   516  		case unknown:
   517  			b = append(b, s[k:start]...)
   518  			b = append(b, "\ufffd"...)
   519  		}
   520  		k = i
   521  	}
   522  	if k == 0 {
   523  		// No changes so far.
   524  		if combinedInfoBits&mayNeedNorm != 0 {
   525  			s = norm.NFC.String(s)
   526  		}
   527  	} else {
   528  		b = append(b, s[k:]...)
   529  		if norm.NFC.QuickSpan(b) != len(b) {
   530  			b = norm.NFC.Bytes(b)
   531  		}
   532  		// TODO: the punycode converters require strings as input.
   533  		s = string(b)
   534  	}
   535  	return s, bidi, err
   536  }
   537  
   538  // A labelIter allows iterating over domain name labels.
   539  type labelIter struct {
   540  	orig     string
   541  	slice    []string
   542  	curStart int
   543  	curEnd   int
   544  	i        int
   545  }
   546  
   547  func (l *labelIter) reset() {
   548  	l.curStart = 0
   549  	l.curEnd = 0
   550  	l.i = 0
   551  }
   552  
   553  func (l *labelIter) done() bool {
   554  	return l.curStart >= len(l.orig)
   555  }
   556  
   557  func (l *labelIter) result() string {
   558  	if l.slice != nil {
   559  		return strings.Join(l.slice, ".")
   560  	}
   561  	return l.orig
   562  }
   563  
   564  func (l *labelIter) label() string {
   565  	if l.slice != nil {
   566  		return l.slice[l.i]
   567  	}
   568  	p := strings.IndexByte(l.orig[l.curStart:], '.')
   569  	l.curEnd = l.curStart + p
   570  	if p == -1 {
   571  		l.curEnd = len(l.orig)
   572  	}
   573  	return l.orig[l.curStart:l.curEnd]
   574  }
   575  
   576  // next sets the value to the next label. It skips the last label if it is empty.
   577  func (l *labelIter) next() {
   578  	l.i++
   579  	if l.slice != nil {
   580  		if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
   581  			l.curStart = len(l.orig)
   582  		}
   583  	} else {
   584  		l.curStart = l.curEnd + 1
   585  		if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
   586  			l.curStart = len(l.orig)
   587  		}
   588  	}
   589  }
   590  
   591  func (l *labelIter) set(s string) {
   592  	if l.slice == nil {
   593  		l.slice = strings.Split(l.orig, ".")
   594  	}
   595  	l.slice[l.i] = s
   596  }
   597  
   598  // acePrefix is the ASCII Compatible Encoding prefix.
   599  const acePrefix = "xn--"
   600  
   601  func (p *Profile) simplify(cat category) category {
   602  	switch cat {
   603  	case disallowedSTD3Mapped:
   604  		if p.useSTD3Rules {
   605  			cat = disallowed
   606  		} else {
   607  			cat = mapped
   608  		}
   609  	case disallowedSTD3Valid:
   610  		if p.useSTD3Rules {
   611  			cat = disallowed
   612  		} else {
   613  			cat = valid
   614  		}
   615  	case deviation:
   616  		if !p.transitional {
   617  			cat = valid
   618  		}
   619  	case validNV8, validXV8:
   620  		// TODO: handle V2008
   621  		cat = valid
   622  	}
   623  	return cat
   624  }
   625  
   626  func validateFromPunycode(p *Profile, s string) error {
   627  	if !norm.NFC.IsNormalString(s) {
   628  		return &labelError{s, "V1"}
   629  	}
   630  	// TODO: detect whether string may have to be normalized in the following
   631  	// loop.
   632  	for i := 0; i < len(s); {
   633  		v, sz := trie.lookupString(s[i:])
   634  		if sz == 0 {
   635  			return runeError(utf8.RuneError)
   636  		}
   637  		if c := p.simplify(info(v).category()); c != valid && c != deviation {
   638  			return &labelError{s, "V6"}
   639  		}
   640  		i += sz
   641  	}
   642  	return nil
   643  }
   644  
   645  const (
   646  	zwnj = "\u200c"
   647  	zwj  = "\u200d"
   648  )
   649  
   650  type joinState int8
   651  
   652  const (
   653  	stateStart joinState = iota
   654  	stateVirama
   655  	stateBefore
   656  	stateBeforeVirama
   657  	stateAfter
   658  	stateFAIL
   659  )
   660  
   661  var joinStates = [][numJoinTypes]joinState{
   662  	stateStart: {
   663  		joiningL:   stateBefore,
   664  		joiningD:   stateBefore,
   665  		joinZWNJ:   stateFAIL,
   666  		joinZWJ:    stateFAIL,
   667  		joinVirama: stateVirama,
   668  	},
   669  	stateVirama: {
   670  		joiningL: stateBefore,
   671  		joiningD: stateBefore,
   672  	},
   673  	stateBefore: {
   674  		joiningL:   stateBefore,
   675  		joiningD:   stateBefore,
   676  		joiningT:   stateBefore,
   677  		joinZWNJ:   stateAfter,
   678  		joinZWJ:    stateFAIL,
   679  		joinVirama: stateBeforeVirama,
   680  	},
   681  	stateBeforeVirama: {
   682  		joiningL: stateBefore,
   683  		joiningD: stateBefore,
   684  		joiningT: stateBefore,
   685  	},
   686  	stateAfter: {
   687  		joiningL:   stateFAIL,
   688  		joiningD:   stateBefore,
   689  		joiningT:   stateAfter,
   690  		joiningR:   stateStart,
   691  		joinZWNJ:   stateFAIL,
   692  		joinZWJ:    stateFAIL,
   693  		joinVirama: stateAfter, // no-op as we can't accept joiners here
   694  	},
   695  	stateFAIL: {
   696  		0:          stateFAIL,
   697  		joiningL:   stateFAIL,
   698  		joiningD:   stateFAIL,
   699  		joiningT:   stateFAIL,
   700  		joiningR:   stateFAIL,
   701  		joinZWNJ:   stateFAIL,
   702  		joinZWJ:    stateFAIL,
   703  		joinVirama: stateFAIL,
   704  	},
   705  }
   706  
   707  // validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
   708  // already implicitly satisfied by the overall implementation.
   709  func (p *Profile) validateLabel(s string) (err error) {
   710  	if s == "" {
   711  		if p.verifyDNSLength {
   712  			return &labelError{s, "A4"}
   713  		}
   714  		return nil
   715  	}
   716  	if p.checkHyphens {
   717  		if len(s) > 4 && s[2] == '-' && s[3] == '-' {
   718  			return &labelError{s, "V2"}
   719  		}
   720  		if s[0] == '-' || s[len(s)-1] == '-' {
   721  			return &labelError{s, "V3"}
   722  		}
   723  	}
   724  	if !p.checkJoiners {
   725  		return nil
   726  	}
   727  	trie := p.trie // p.checkJoiners is only set if trie is set.
   728  	// TODO: merge the use of this in the trie.
   729  	v, sz := trie.lookupString(s)
   730  	x := info(v)
   731  	if x.isModifier() {
   732  		return &labelError{s, "V5"}
   733  	}
   734  	// Quickly return in the absence of zero-width (non) joiners.
   735  	if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
   736  		return nil
   737  	}
   738  	st := stateStart
   739  	for i := 0; ; {
   740  		jt := x.joinType()
   741  		if s[i:i+sz] == zwj {
   742  			jt = joinZWJ
   743  		} else if s[i:i+sz] == zwnj {
   744  			jt = joinZWNJ
   745  		}
   746  		st = joinStates[st][jt]
   747  		if x.isViramaModifier() {
   748  			st = joinStates[st][joinVirama]
   749  		}
   750  		if i += sz; i == len(s) {
   751  			break
   752  		}
   753  		v, sz = trie.lookupString(s[i:])
   754  		x = info(v)
   755  	}
   756  	if st == stateFAIL || st == stateAfter {
   757  		return &labelError{s, "C"}
   758  	}
   759  	return nil
   760  }
   761  
   762  func ascii(s string) bool {
   763  	for i := 0; i < len(s); i++ {
   764  		if s[i] >= utf8.RuneSelf {
   765  			return false
   766  		}
   767  	}
   768  	return true
   769  }
   770  

View as plain text