...

Source file src/golang.org/x/text/internal/export/idna/idna9.0.0.go

Documentation: golang.org/x/text/internal/export/idna

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !go1.10
     6  
     7  //go:generate go run gen.go gen_trieval.go gen_common.go
     8  
     9  // Package idna implements IDNA2008 using the compatibility processing
    10  // defined by UTS (Unicode Technical Standard) #46, which defines a standard to
    11  // deal with the transition from IDNA2003.
    12  //
    13  // IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
    14  // 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
    15  // UTS #46 is defined in https://www.unicode.org/reports/tr46.
    16  // See https://unicode.org/cldr/utility/idna.jsp for a visualization of the
    17  // differences between these two standards.
    18  package idna // import "golang.org/x/text/internal/export/idna"
    19  
    20  import (
    21  	"fmt"
    22  	"strings"
    23  	"unicode/utf8"
    24  
    25  	"golang.org/x/text/secure/bidirule"
    26  	"golang.org/x/text/unicode/norm"
    27  )
    28  
    29  // NOTE: Unlike common practice in Go APIs, the functions will return a
    30  // sanitized domain name in case of errors. Browsers sometimes use a partially
    31  // evaluated string as lookup.
    32  // TODO: the current error handling is, in my opinion, the least opinionated.
    33  // Other strategies are also viable, though:
    34  // Option 1) Return an empty string in case of error, but allow the user to
    35  //    specify explicitly which errors to ignore.
    36  // Option 2) Return the partially evaluated string if it is itself a valid
    37  //    string, otherwise return the empty string in case of error.
    38  // Option 3) Option 1 and 2.
    39  // Option 4) Always return an empty string for now and implement Option 1 as
    40  //    needed, and document that the return string may not be empty in case of
    41  //    error in the future.
    42  // I think Option 1 is best, but it is quite opinionated.
    43  
    44  // ToASCII is a wrapper for Punycode.ToASCII.
    45  func ToASCII(s string) (string, error) {
    46  	return Punycode.process(s, true)
    47  }
    48  
    49  // ToUnicode is a wrapper for Punycode.ToUnicode.
    50  func ToUnicode(s string) (string, error) {
    51  	return Punycode.process(s, false)
    52  }
    53  
    54  // An Option configures a Profile at creation time.
    55  type Option func(*options)
    56  
    57  // Transitional sets a Profile to use the Transitional mapping as defined in UTS
    58  // #46. This will cause, for example, "ß" to be mapped to "ss". Using the
    59  // transitional mapping provides a compromise between IDNA2003 and IDNA2008
    60  // compatibility. It is used by some browsers when resolving domain names. This
    61  // option is only meaningful if combined with MapForLookup.
    62  func Transitional(transitional bool) Option {
    63  	return func(o *options) { o.transitional = transitional }
    64  }
    65  
    66  // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
    67  // are longer than allowed by the RFC.
    68  //
    69  // This option corresponds to the VerifyDnsLength flag in UTS #46.
    70  func VerifyDNSLength(verify bool) Option {
    71  	return func(o *options) { o.verifyDNSLength = verify }
    72  }
    73  
    74  // RemoveLeadingDots removes leading label separators. Leading runes that map to
    75  // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
    76  func RemoveLeadingDots(remove bool) Option {
    77  	return func(o *options) { o.removeLeadingDots = remove }
    78  }
    79  
    80  // ValidateLabels sets whether to check the mandatory label validation criteria
    81  // as defined in Section 5.4 of RFC 5891. This includes testing for correct use
    82  // of hyphens ('-'), normalization, validity of runes, and the context rules.
    83  // In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags
    84  // in UTS #46.
    85  func ValidateLabels(enable bool) Option {
    86  	return func(o *options) {
    87  		// Don't override existing mappings, but set one that at least checks
    88  		// normalization if it is not set.
    89  		if o.mapping == nil && enable {
    90  			o.mapping = normalize
    91  		}
    92  		o.trie = trie
    93  		o.checkJoiners = enable
    94  		o.checkHyphens = enable
    95  		if enable {
    96  			o.fromPuny = validateFromPunycode
    97  		} else {
    98  			o.fromPuny = nil
    99  		}
   100  	}
   101  }
   102  
   103  // CheckHyphens sets whether to check for correct use of hyphens ('-') in
   104  // labels. Most web browsers do not have this option set, since labels such as
   105  // "r3---sn-apo3qvuoxuxbt-j5pe" are in common use.
   106  //
   107  // This option corresponds to the CheckHyphens flag in UTS #46.
   108  func CheckHyphens(enable bool) Option {
   109  	return func(o *options) { o.checkHyphens = enable }
   110  }
   111  
   112  // CheckJoiners sets whether to check the ContextJ rules as defined in Appendix
   113  // A of RFC 5892, concerning the use of joiner runes.
   114  //
   115  // This option corresponds to the CheckJoiners flag in UTS #46.
   116  func CheckJoiners(enable bool) Option {
   117  	return func(o *options) {
   118  		o.trie = trie
   119  		o.checkJoiners = enable
   120  	}
   121  }
   122  
   123  // StrictDomainName limits the set of permissible ASCII characters to those
   124  // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
   125  // hyphen). This is set by default for MapForLookup and ValidateForRegistration,
   126  // but is only useful if ValidateLabels is set.
   127  //
   128  // This option is useful, for instance, for browsers that allow characters
   129  // outside this range, for example a '_' (U+005F LOW LINE). See
   130  // http://www.rfc-editor.org/std/std3.txt for more details.
   131  //
   132  // This option corresponds to the UseSTD3ASCIIRules flag in UTS #46.
   133  func StrictDomainName(use bool) Option {
   134  	return func(o *options) { o.useSTD3Rules = use }
   135  }
   136  
   137  // NOTE: the following options pull in tables. The tables should not be linked
   138  // in as long as the options are not used.
   139  
   140  // BidiRule enables the Bidi rule as defined in RFC 5893. Any application
   141  // that relies on proper validation of labels should include this rule.
   142  //
   143  // This option corresponds to the CheckBidi flag in UTS #46.
   144  func BidiRule() Option {
   145  	return func(o *options) { o.bidirule = bidirule.ValidString }
   146  }
   147  
   148  // ValidateForRegistration sets validation options to verify that a given IDN is
   149  // properly formatted for registration as defined by Section 4 of RFC 5891.
   150  func ValidateForRegistration() Option {
   151  	return func(o *options) {
   152  		o.mapping = validateRegistration
   153  		StrictDomainName(true)(o)
   154  		ValidateLabels(true)(o)
   155  		VerifyDNSLength(true)(o)
   156  		BidiRule()(o)
   157  	}
   158  }
   159  
   160  // MapForLookup sets validation and mapping options such that a given IDN is
   161  // transformed for domain name lookup according to the requirements set out in
   162  // Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
   163  // RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
   164  // to add this check.
   165  //
   166  // The mappings include normalization and mapping case, width and other
   167  // compatibility mappings.
   168  func MapForLookup() Option {
   169  	return func(o *options) {
   170  		o.mapping = validateAndMap
   171  		StrictDomainName(true)(o)
   172  		ValidateLabels(true)(o)
   173  		RemoveLeadingDots(true)(o)
   174  	}
   175  }
   176  
   177  type options struct {
   178  	transitional      bool
   179  	useSTD3Rules      bool
   180  	checkHyphens      bool
   181  	checkJoiners      bool
   182  	verifyDNSLength   bool
   183  	removeLeadingDots bool
   184  
   185  	trie *idnaTrie
   186  
   187  	// fromPuny calls validation rules when converting A-labels to U-labels.
   188  	fromPuny func(p *Profile, s string) error
   189  
   190  	// mapping implements a validation and mapping step as defined in RFC 5895
   191  	// or UTS 46, tailored to, for example, domain registration or lookup.
   192  	mapping func(p *Profile, s string) (string, error)
   193  
   194  	// bidirule, if specified, checks whether s conforms to the Bidi Rule
   195  	// defined in RFC 5893.
   196  	bidirule func(s string) bool
   197  }
   198  
   199  // A Profile defines the configuration of an IDNA mapper.
   200  type Profile struct {
   201  	options
   202  }
   203  
   204  func apply(o *options, opts []Option) {
   205  	for _, f := range opts {
   206  		f(o)
   207  	}
   208  }
   209  
   210  // New creates a new Profile.
   211  //
   212  // With no options, the returned Profile is the most permissive and equals the
   213  // Punycode Profile. Options can be passed to further restrict the Profile. The
   214  // MapForLookup and ValidateForRegistration options set a collection of options,
   215  // for lookup and registration purposes respectively, which can be tailored by
   216  // adding more fine-grained options, where later options override earlier
   217  // options.
   218  func New(o ...Option) *Profile {
   219  	p := &Profile{}
   220  	apply(&p.options, o)
   221  	return p
   222  }
   223  
   224  // ToASCII converts a domain or domain label to its ASCII form. For example,
   225  // ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
   226  // ToASCII("golang") is "golang". If an error is encountered it will return
   227  // an error and a (partially) processed result.
   228  func (p *Profile) ToASCII(s string) (string, error) {
   229  	return p.process(s, true)
   230  }
   231  
   232  // ToUnicode converts a domain or domain label to its Unicode form. For example,
   233  // ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
   234  // ToUnicode("golang") is "golang". If an error is encountered it will return
   235  // an error and a (partially) processed result.
   236  func (p *Profile) ToUnicode(s string) (string, error) {
   237  	pp := *p
   238  	pp.transitional = false
   239  	return pp.process(s, false)
   240  }
   241  
   242  // String reports a string with a description of the profile for debugging
   243  // purposes. The string format may change with different versions.
   244  func (p *Profile) String() string {
   245  	s := ""
   246  	if p.transitional {
   247  		s = "Transitional"
   248  	} else {
   249  		s = "NonTransitional"
   250  	}
   251  	if p.useSTD3Rules {
   252  		s += ":UseSTD3Rules"
   253  	}
   254  	if p.checkHyphens {
   255  		s += ":CheckHyphens"
   256  	}
   257  	if p.checkJoiners {
   258  		s += ":CheckJoiners"
   259  	}
   260  	if p.verifyDNSLength {
   261  		s += ":VerifyDNSLength"
   262  	}
   263  	return s
   264  }
   265  
   266  var (
   267  	// Punycode is a Profile that does raw punycode processing with a minimum
   268  	// of validation.
   269  	Punycode *Profile = punycode
   270  
   271  	// Lookup is the recommended profile for looking up domain names, according
   272  	// to Section 5 of RFC 5891. The exact configuration of this profile may
   273  	// change over time.
   274  	Lookup *Profile = lookup
   275  
   276  	// Display is the recommended profile for displaying domain names.
   277  	// The configuration of this profile may change over time.
   278  	Display *Profile = display
   279  
   280  	// Registration is the recommended profile for checking whether a given
   281  	// IDN is valid for registration, according to Section 4 of RFC 5891.
   282  	Registration *Profile = registration
   283  
   284  	punycode = &Profile{}
   285  	lookup   = &Profile{options{
   286  		transitional:      true,
   287  		removeLeadingDots: true,
   288  		useSTD3Rules:      true,
   289  		checkHyphens:      true,
   290  		checkJoiners:      true,
   291  		trie:              trie,
   292  		fromPuny:          validateFromPunycode,
   293  		mapping:           validateAndMap,
   294  		bidirule:          bidirule.ValidString,
   295  	}}
   296  	display = &Profile{options{
   297  		useSTD3Rules:      true,
   298  		removeLeadingDots: true,
   299  		checkHyphens:      true,
   300  		checkJoiners:      true,
   301  		trie:              trie,
   302  		fromPuny:          validateFromPunycode,
   303  		mapping:           validateAndMap,
   304  		bidirule:          bidirule.ValidString,
   305  	}}
   306  	registration = &Profile{options{
   307  		useSTD3Rules:    true,
   308  		verifyDNSLength: true,
   309  		checkHyphens:    true,
   310  		checkJoiners:    true,
   311  		trie:            trie,
   312  		fromPuny:        validateFromPunycode,
   313  		mapping:         validateRegistration,
   314  		bidirule:        bidirule.ValidString,
   315  	}}
   316  
   317  	// TODO: profiles
   318  	// Register: recommended for approving domain names: don't do any mappings
   319  	// but rather reject on invalid input. Bundle or block deviation characters.
   320  )
   321  
   322  type labelError struct{ label, code_ string }
   323  
   324  func (e labelError) code() string { return e.code_ }
   325  func (e labelError) Error() string {
   326  	return fmt.Sprintf("idna: invalid label %q", e.label)
   327  }
   328  
   329  type runeError rune
   330  
   331  func (e runeError) code() string { return "P1" }
   332  func (e runeError) Error() string {
   333  	return fmt.Sprintf("idna: disallowed rune %U", e)
   334  }
   335  
   336  // process implements the algorithm described in section 4 of UTS #46,
   337  // see https://www.unicode.org/reports/tr46.
   338  func (p *Profile) process(s string, toASCII bool) (string, error) {
   339  	var err error
   340  	if p.mapping != nil {
   341  		s, err = p.mapping(p, s)
   342  	}
   343  	// Remove leading empty labels.
   344  	if p.removeLeadingDots {
   345  		for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
   346  		}
   347  	}
   348  	// It seems like we should only create this error on ToASCII, but the
   349  	// UTS 46 conformance tests suggests we should always check this.
   350  	if err == nil && p.verifyDNSLength && s == "" {
   351  		err = &labelError{s, "A4"}
   352  	}
   353  	labels := labelIter{orig: s}
   354  	for ; !labels.done(); labels.next() {
   355  		label := labels.label()
   356  		if label == "" {
   357  			// Empty labels are not okay. The label iterator skips the last
   358  			// label if it is empty.
   359  			if err == nil && p.verifyDNSLength {
   360  				err = &labelError{s, "A4"}
   361  			}
   362  			continue
   363  		}
   364  		if strings.HasPrefix(label, acePrefix) {
   365  			u, err2 := decode(label[len(acePrefix):])
   366  			if err2 != nil {
   367  				if err == nil {
   368  					err = err2
   369  				}
   370  				// Spec says keep the old label.
   371  				continue
   372  			}
   373  			labels.set(u)
   374  			if err == nil && p.fromPuny != nil {
   375  				err = p.fromPuny(p, u)
   376  			}
   377  			if err == nil {
   378  				// This should be called on NonTransitional, according to the
   379  				// spec, but that currently does not have any effect. Use the
   380  				// original profile to preserve options.
   381  				err = p.validateLabel(u)
   382  			}
   383  		} else if err == nil {
   384  			err = p.validateLabel(label)
   385  		}
   386  	}
   387  	if toASCII {
   388  		for labels.reset(); !labels.done(); labels.next() {
   389  			label := labels.label()
   390  			if !ascii(label) {
   391  				a, err2 := encode(acePrefix, label)
   392  				if err == nil {
   393  					err = err2
   394  				}
   395  				label = a
   396  				labels.set(a)
   397  			}
   398  			n := len(label)
   399  			if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
   400  				err = &labelError{label, "A4"}
   401  			}
   402  		}
   403  	}
   404  	s = labels.result()
   405  	if toASCII && p.verifyDNSLength && err == nil {
   406  		// Compute the length of the domain name minus the root label and its dot.
   407  		n := len(s)
   408  		if n > 0 && s[n-1] == '.' {
   409  			n--
   410  		}
   411  		if len(s) < 1 || n > 253 {
   412  			err = &labelError{s, "A4"}
   413  		}
   414  	}
   415  	return s, err
   416  }
   417  
   418  func normalize(p *Profile, s string) (string, error) {
   419  	return norm.NFC.String(s), nil
   420  }
   421  
   422  func validateRegistration(p *Profile, s string) (string, error) {
   423  	if !norm.NFC.IsNormalString(s) {
   424  		return s, &labelError{s, "V1"}
   425  	}
   426  	for i := 0; i < len(s); {
   427  		v, sz := trie.lookupString(s[i:])
   428  		if sz == 0 {
   429  			return s, runeError(utf8.RuneError)
   430  		}
   431  		// Copy bytes not copied so far.
   432  		switch p.simplify(info(v).category()) {
   433  		// TODO: handle the NV8 defined in the Unicode idna data set to allow
   434  		// for strict conformance to IDNA2008.
   435  		case valid, deviation:
   436  		case disallowed, mapped, unknown, ignored:
   437  			r, _ := utf8.DecodeRuneInString(s[i:])
   438  			return s, runeError(r)
   439  		}
   440  		i += sz
   441  	}
   442  	return s, nil
   443  }
   444  
   445  func validateAndMap(p *Profile, s string) (string, error) {
   446  	var (
   447  		err error
   448  		b   []byte
   449  		k   int
   450  	)
   451  	for i := 0; i < len(s); {
   452  		v, sz := trie.lookupString(s[i:])
   453  		if sz == 0 {
   454  			b = append(b, s[k:i]...)
   455  			b = append(b, "\ufffd"...)
   456  			k = len(s)
   457  			if err == nil {
   458  				err = runeError(utf8.RuneError)
   459  			}
   460  			break
   461  		}
   462  		start := i
   463  		i += sz
   464  		// Copy bytes not copied so far.
   465  		switch p.simplify(info(v).category()) {
   466  		case valid:
   467  			continue
   468  		case disallowed:
   469  			if err == nil {
   470  				r, _ := utf8.DecodeRuneInString(s[start:])
   471  				err = runeError(r)
   472  			}
   473  			continue
   474  		case mapped, deviation:
   475  			b = append(b, s[k:start]...)
   476  			b = info(v).appendMapping(b, s[start:i])
   477  		case ignored:
   478  			b = append(b, s[k:start]...)
   479  			// drop the rune
   480  		case unknown:
   481  			b = append(b, s[k:start]...)
   482  			b = append(b, "\ufffd"...)
   483  		}
   484  		k = i
   485  	}
   486  	if k == 0 {
   487  		// No changes so far.
   488  		s = norm.NFC.String(s)
   489  	} else {
   490  		b = append(b, s[k:]...)
   491  		if norm.NFC.QuickSpan(b) != len(b) {
   492  			b = norm.NFC.Bytes(b)
   493  		}
   494  		// TODO: the punycode converters require strings as input.
   495  		s = string(b)
   496  	}
   497  	return s, err
   498  }
   499  
   500  // A labelIter allows iterating over domain name labels.
   501  type labelIter struct {
   502  	orig     string
   503  	slice    []string
   504  	curStart int
   505  	curEnd   int
   506  	i        int
   507  }
   508  
   509  func (l *labelIter) reset() {
   510  	l.curStart = 0
   511  	l.curEnd = 0
   512  	l.i = 0
   513  }
   514  
   515  func (l *labelIter) done() bool {
   516  	return l.curStart >= len(l.orig)
   517  }
   518  
   519  func (l *labelIter) result() string {
   520  	if l.slice != nil {
   521  		return strings.Join(l.slice, ".")
   522  	}
   523  	return l.orig
   524  }
   525  
   526  func (l *labelIter) label() string {
   527  	if l.slice != nil {
   528  		return l.slice[l.i]
   529  	}
   530  	p := strings.IndexByte(l.orig[l.curStart:], '.')
   531  	l.curEnd = l.curStart + p
   532  	if p == -1 {
   533  		l.curEnd = len(l.orig)
   534  	}
   535  	return l.orig[l.curStart:l.curEnd]
   536  }
   537  
   538  // next sets the value to the next label. It skips the last label if it is empty.
   539  func (l *labelIter) next() {
   540  	l.i++
   541  	if l.slice != nil {
   542  		if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
   543  			l.curStart = len(l.orig)
   544  		}
   545  	} else {
   546  		l.curStart = l.curEnd + 1
   547  		if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
   548  			l.curStart = len(l.orig)
   549  		}
   550  	}
   551  }
   552  
   553  func (l *labelIter) set(s string) {
   554  	if l.slice == nil {
   555  		l.slice = strings.Split(l.orig, ".")
   556  	}
   557  	l.slice[l.i] = s
   558  }
   559  
   560  // acePrefix is the ASCII Compatible Encoding prefix.
   561  const acePrefix = "xn--"
   562  
   563  func (p *Profile) simplify(cat category) category {
   564  	switch cat {
   565  	case disallowedSTD3Mapped:
   566  		if p.useSTD3Rules {
   567  			cat = disallowed
   568  		} else {
   569  			cat = mapped
   570  		}
   571  	case disallowedSTD3Valid:
   572  		if p.useSTD3Rules {
   573  			cat = disallowed
   574  		} else {
   575  			cat = valid
   576  		}
   577  	case deviation:
   578  		if !p.transitional {
   579  			cat = valid
   580  		}
   581  	case validNV8, validXV8:
   582  		// TODO: handle V2008
   583  		cat = valid
   584  	}
   585  	return cat
   586  }
   587  
   588  func validateFromPunycode(p *Profile, s string) error {
   589  	if !norm.NFC.IsNormalString(s) {
   590  		return &labelError{s, "V1"}
   591  	}
   592  	for i := 0; i < len(s); {
   593  		v, sz := trie.lookupString(s[i:])
   594  		if sz == 0 {
   595  			return runeError(utf8.RuneError)
   596  		}
   597  		if c := p.simplify(info(v).category()); c != valid && c != deviation {
   598  			return &labelError{s, "V6"}
   599  		}
   600  		i += sz
   601  	}
   602  	return nil
   603  }
   604  
   605  const (
   606  	zwnj = "\u200c"
   607  	zwj  = "\u200d"
   608  )
   609  
   610  type joinState int8
   611  
   612  const (
   613  	stateStart joinState = iota
   614  	stateVirama
   615  	stateBefore
   616  	stateBeforeVirama
   617  	stateAfter
   618  	stateFAIL
   619  )
   620  
   621  var joinStates = [][numJoinTypes]joinState{
   622  	stateStart: {
   623  		joiningL:   stateBefore,
   624  		joiningD:   stateBefore,
   625  		joinZWNJ:   stateFAIL,
   626  		joinZWJ:    stateFAIL,
   627  		joinVirama: stateVirama,
   628  	},
   629  	stateVirama: {
   630  		joiningL: stateBefore,
   631  		joiningD: stateBefore,
   632  	},
   633  	stateBefore: {
   634  		joiningL:   stateBefore,
   635  		joiningD:   stateBefore,
   636  		joiningT:   stateBefore,
   637  		joinZWNJ:   stateAfter,
   638  		joinZWJ:    stateFAIL,
   639  		joinVirama: stateBeforeVirama,
   640  	},
   641  	stateBeforeVirama: {
   642  		joiningL: stateBefore,
   643  		joiningD: stateBefore,
   644  		joiningT: stateBefore,
   645  	},
   646  	stateAfter: {
   647  		joiningL:   stateFAIL,
   648  		joiningD:   stateBefore,
   649  		joiningT:   stateAfter,
   650  		joiningR:   stateStart,
   651  		joinZWNJ:   stateFAIL,
   652  		joinZWJ:    stateFAIL,
   653  		joinVirama: stateAfter, // no-op as we can't accept joiners here
   654  	},
   655  	stateFAIL: {
   656  		0:          stateFAIL,
   657  		joiningL:   stateFAIL,
   658  		joiningD:   stateFAIL,
   659  		joiningT:   stateFAIL,
   660  		joiningR:   stateFAIL,
   661  		joinZWNJ:   stateFAIL,
   662  		joinZWJ:    stateFAIL,
   663  		joinVirama: stateFAIL,
   664  	},
   665  }
   666  
   667  // validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
   668  // already implicitly satisfied by the overall implementation.
   669  func (p *Profile) validateLabel(s string) error {
   670  	if s == "" {
   671  		if p.verifyDNSLength {
   672  			return &labelError{s, "A4"}
   673  		}
   674  		return nil
   675  	}
   676  	if p.bidirule != nil && !p.bidirule(s) {
   677  		return &labelError{s, "B"}
   678  	}
   679  	if p.checkHyphens {
   680  		if len(s) > 4 && s[2] == '-' && s[3] == '-' {
   681  			return &labelError{s, "V2"}
   682  		}
   683  		if s[0] == '-' || s[len(s)-1] == '-' {
   684  			return &labelError{s, "V3"}
   685  		}
   686  	}
   687  	if !p.checkJoiners {
   688  		return nil
   689  	}
   690  	trie := p.trie // p.checkJoiners is only set if trie is set.
   691  	// TODO: merge the use of this in the trie.
   692  	v, sz := trie.lookupString(s)
   693  	x := info(v)
   694  	if x.isModifier() {
   695  		return &labelError{s, "V5"}
   696  	}
   697  	// Quickly return in the absence of zero-width (non) joiners.
   698  	if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
   699  		return nil
   700  	}
   701  	st := stateStart
   702  	for i := 0; ; {
   703  		jt := x.joinType()
   704  		if s[i:i+sz] == zwj {
   705  			jt = joinZWJ
   706  		} else if s[i:i+sz] == zwnj {
   707  			jt = joinZWNJ
   708  		}
   709  		st = joinStates[st][jt]
   710  		if x.isViramaModifier() {
   711  			st = joinStates[st][joinVirama]
   712  		}
   713  		if i += sz; i == len(s) {
   714  			break
   715  		}
   716  		v, sz = trie.lookupString(s[i:])
   717  		x = info(v)
   718  	}
   719  	if st == stateFAIL || st == stateAfter {
   720  		return &labelError{s, "C"}
   721  	}
   722  	return nil
   723  }
   724  
   725  func ascii(s string) bool {
   726  	for i := 0; i < len(s); i++ {
   727  		if s[i] >= utf8.RuneSelf {
   728  			return false
   729  		}
   730  	}
   731  	return true
   732  }
   733  

View as plain text