...

Source file src/golang.org/x/net/idna/idna9.0.0.go

Documentation: golang.org/x/net/idna

     1  // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
     2  
     3  // Copyright 2016 The Go Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  //go:build !go1.10
     8  
     9  // Package idna implements IDNA2008 using the compatibility processing
    10  // defined by UTS (Unicode Technical Standard) #46, which defines a standard to
    11  // deal with the transition from IDNA2003.
    12  //
    13  // IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
    14  // 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
    15  // UTS #46 is defined in https://www.unicode.org/reports/tr46.
    16  // See https://unicode.org/cldr/utility/idna.jsp for a visualization of the
    17  // differences between these two standards.
    18  package idna // import "golang.org/x/net/idna"
    19  
    20  import (
    21  	"fmt"
    22  	"strings"
    23  	"unicode/utf8"
    24  
    25  	"golang.org/x/text/secure/bidirule"
    26  	"golang.org/x/text/unicode/norm"
    27  )
    28  
    29  // NOTE: Unlike common practice in Go APIs, the functions will return a
    30  // sanitized domain name in case of errors. Browsers sometimes use a partially
    31  // evaluated string as lookup.
    32  // TODO: the current error handling is, in my opinion, the least opinionated.
    33  // Other strategies are also viable, though:
    34  // Option 1) Return an empty string in case of error, but allow the user to
    35  //    specify explicitly which errors to ignore.
    36  // Option 2) Return the partially evaluated string if it is itself a valid
    37  //    string, otherwise return the empty string in case of error.
    38  // Option 3) Option 1 and 2.
    39  // Option 4) Always return an empty string for now and implement Option 1 as
    40  //    needed, and document that the return string may not be empty in case of
    41  //    error in the future.
    42  // I think Option 1 is best, but it is quite opinionated.
    43  
    44  // ToASCII is a wrapper for Punycode.ToASCII.
    45  func ToASCII(s string) (string, error) {
    46  	return Punycode.process(s, true)
    47  }
    48  
    49  // ToUnicode is a wrapper for Punycode.ToUnicode.
    50  func ToUnicode(s string) (string, error) {
    51  	return Punycode.process(s, false)
    52  }
    53  
    54  // An Option configures a Profile at creation time.
    55  type Option func(*options)
    56  
    57  // Transitional sets a Profile to use the Transitional mapping as defined in UTS
    58  // #46. This will cause, for example, "ß" to be mapped to "ss". Using the
    59  // transitional mapping provides a compromise between IDNA2003 and IDNA2008
    60  // compatibility. It is used by some browsers when resolving domain names. This
    61  // option is only meaningful if combined with MapForLookup.
    62  func Transitional(transitional bool) Option {
    63  	return func(o *options) { o.transitional = transitional }
    64  }
    65  
    66  // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
    67  // are longer than allowed by the RFC.
    68  //
    69  // This option corresponds to the VerifyDnsLength flag in UTS #46.
    70  func VerifyDNSLength(verify bool) Option {
    71  	return func(o *options) { o.verifyDNSLength = verify }
    72  }
    73  
    74  // RemoveLeadingDots removes leading label separators. Leading runes that map to
    75  // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
    76  func RemoveLeadingDots(remove bool) Option {
    77  	return func(o *options) { o.removeLeadingDots = remove }
    78  }
    79  
    80  // ValidateLabels sets whether to check the mandatory label validation criteria
    81  // as defined in Section 5.4 of RFC 5891. This includes testing for correct use
    82  // of hyphens ('-'), normalization, validity of runes, and the context rules.
    83  // In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags
    84  // in UTS #46.
    85  func ValidateLabels(enable bool) Option {
    86  	return func(o *options) {
    87  		// Don't override existing mappings, but set one that at least checks
    88  		// normalization if it is not set.
    89  		if o.mapping == nil && enable {
    90  			o.mapping = normalize
    91  		}
    92  		o.trie = trie
    93  		o.checkJoiners = enable
    94  		o.checkHyphens = enable
    95  		if enable {
    96  			o.fromPuny = validateFromPunycode
    97  		} else {
    98  			o.fromPuny = nil
    99  		}
   100  	}
   101  }
   102  
   103  // CheckHyphens sets whether to check for correct use of hyphens ('-') in
   104  // labels. Most web browsers do not have this option set, since labels such as
   105  // "r3---sn-apo3qvuoxuxbt-j5pe" are in common use.
   106  //
   107  // This option corresponds to the CheckHyphens flag in UTS #46.
   108  func CheckHyphens(enable bool) Option {
   109  	return func(o *options) { o.checkHyphens = enable }
   110  }
   111  
   112  // CheckJoiners sets whether to check the ContextJ rules as defined in Appendix
   113  // A of RFC 5892, concerning the use of joiner runes.
   114  //
   115  // This option corresponds to the CheckJoiners flag in UTS #46.
   116  func CheckJoiners(enable bool) Option {
   117  	return func(o *options) {
   118  		o.trie = trie
   119  		o.checkJoiners = enable
   120  	}
   121  }
   122  
   123  // StrictDomainName limits the set of permissible ASCII characters to those
   124  // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
   125  // hyphen). This is set by default for MapForLookup and ValidateForRegistration,
   126  // but is only useful if ValidateLabels is set.
   127  //
   128  // This option is useful, for instance, for browsers that allow characters
   129  // outside this range, for example a '_' (U+005F LOW LINE). See
   130  // http://www.rfc-editor.org/std/std3.txt for more details.
   131  //
   132  // This option corresponds to the UseSTD3ASCIIRules flag in UTS #46.
   133  func StrictDomainName(use bool) Option {
   134  	return func(o *options) { o.useSTD3Rules = use }
   135  }
   136  
   137  // NOTE: the following options pull in tables. The tables should not be linked
   138  // in as long as the options are not used.
   139  
   140  // BidiRule enables the Bidi rule as defined in RFC 5893. Any application
   141  // that relies on proper validation of labels should include this rule.
   142  //
   143  // This option corresponds to the CheckBidi flag in UTS #46.
   144  func BidiRule() Option {
   145  	return func(o *options) { o.bidirule = bidirule.ValidString }
   146  }
   147  
   148  // ValidateForRegistration sets validation options to verify that a given IDN is
   149  // properly formatted for registration as defined by Section 4 of RFC 5891.
   150  func ValidateForRegistration() Option {
   151  	return func(o *options) {
   152  		o.mapping = validateRegistration
   153  		StrictDomainName(true)(o)
   154  		ValidateLabels(true)(o)
   155  		VerifyDNSLength(true)(o)
   156  		BidiRule()(o)
   157  	}
   158  }
   159  
   160  // MapForLookup sets validation and mapping options such that a given IDN is
   161  // transformed for domain name lookup according to the requirements set out in
   162  // Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
   163  // RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
   164  // to add this check.
   165  //
   166  // The mappings include normalization and mapping case, width and other
   167  // compatibility mappings.
   168  func MapForLookup() Option {
   169  	return func(o *options) {
   170  		o.mapping = validateAndMap
   171  		StrictDomainName(true)(o)
   172  		ValidateLabels(true)(o)
   173  		RemoveLeadingDots(true)(o)
   174  	}
   175  }
   176  
   177  type options struct {
   178  	transitional      bool
   179  	useSTD3Rules      bool
   180  	checkHyphens      bool
   181  	checkJoiners      bool
   182  	verifyDNSLength   bool
   183  	removeLeadingDots bool
   184  
   185  	trie *idnaTrie
   186  
   187  	// fromPuny calls validation rules when converting A-labels to U-labels.
   188  	fromPuny func(p *Profile, s string) error
   189  
   190  	// mapping implements a validation and mapping step as defined in RFC 5895
   191  	// or UTS 46, tailored to, for example, domain registration or lookup.
   192  	mapping func(p *Profile, s string) (string, error)
   193  
   194  	// bidirule, if specified, checks whether s conforms to the Bidi Rule
   195  	// defined in RFC 5893.
   196  	bidirule func(s string) bool
   197  }
   198  
   199  // A Profile defines the configuration of a IDNA mapper.
   200  type Profile struct {
   201  	options
   202  }
   203  
   204  func apply(o *options, opts []Option) {
   205  	for _, f := range opts {
   206  		f(o)
   207  	}
   208  }
   209  
   210  // New creates a new Profile.
   211  //
   212  // With no options, the returned Profile is the most permissive and equals the
   213  // Punycode Profile. Options can be passed to further restrict the Profile. The
   214  // MapForLookup and ValidateForRegistration options set a collection of options,
   215  // for lookup and registration purposes respectively, which can be tailored by
   216  // adding more fine-grained options, where later options override earlier
   217  // options.
   218  func New(o ...Option) *Profile {
   219  	p := &Profile{}
   220  	apply(&p.options, o)
   221  	return p
   222  }
   223  
   224  // ToASCII converts a domain or domain label to its ASCII form. For example,
   225  // ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
   226  // ToASCII("golang") is "golang". If an error is encountered it will return
   227  // an error and a (partially) processed result.
   228  func (p *Profile) ToASCII(s string) (string, error) {
   229  	return p.process(s, true)
   230  }
   231  
   232  // ToUnicode converts a domain or domain label to its Unicode form. For example,
   233  // ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
   234  // ToUnicode("golang") is "golang". If an error is encountered it will return
   235  // an error and a (partially) processed result.
   236  func (p *Profile) ToUnicode(s string) (string, error) {
   237  	pp := *p
   238  	pp.transitional = false
   239  	return pp.process(s, false)
   240  }
   241  
   242  // String reports a string with a description of the profile for debugging
   243  // purposes. The string format may change with different versions.
   244  func (p *Profile) String() string {
   245  	s := ""
   246  	if p.transitional {
   247  		s = "Transitional"
   248  	} else {
   249  		s = "NonTransitional"
   250  	}
   251  	if p.useSTD3Rules {
   252  		s += ":UseSTD3Rules"
   253  	}
   254  	if p.checkHyphens {
   255  		s += ":CheckHyphens"
   256  	}
   257  	if p.checkJoiners {
   258  		s += ":CheckJoiners"
   259  	}
   260  	if p.verifyDNSLength {
   261  		s += ":VerifyDNSLength"
   262  	}
   263  	return s
   264  }
   265  
   266  var (
   267  	// Punycode is a Profile that does raw punycode processing with a minimum
   268  	// of validation.
   269  	Punycode *Profile = punycode
   270  
   271  	// Lookup is the recommended profile for looking up domain names, according
   272  	// to Section 5 of RFC 5891. The exact configuration of this profile may
   273  	// change over time.
   274  	Lookup *Profile = lookup
   275  
   276  	// Display is the recommended profile for displaying domain names.
   277  	// The configuration of this profile may change over time.
   278  	Display *Profile = display
   279  
   280  	// Registration is the recommended profile for checking whether a given
   281  	// IDN is valid for registration, according to Section 4 of RFC 5891.
   282  	Registration *Profile = registration
   283  
   284  	punycode = &Profile{}
   285  	lookup   = &Profile{options{
   286  		transitional:      true,
   287  		removeLeadingDots: true,
   288  		useSTD3Rules:      true,
   289  		checkHyphens:      true,
   290  		checkJoiners:      true,
   291  		trie:              trie,
   292  		fromPuny:          validateFromPunycode,
   293  		mapping:           validateAndMap,
   294  		bidirule:          bidirule.ValidString,
   295  	}}
   296  	display = &Profile{options{
   297  		useSTD3Rules:      true,
   298  		removeLeadingDots: true,
   299  		checkHyphens:      true,
   300  		checkJoiners:      true,
   301  		trie:              trie,
   302  		fromPuny:          validateFromPunycode,
   303  		mapping:           validateAndMap,
   304  		bidirule:          bidirule.ValidString,
   305  	}}
   306  	registration = &Profile{options{
   307  		useSTD3Rules:    true,
   308  		verifyDNSLength: true,
   309  		checkHyphens:    true,
   310  		checkJoiners:    true,
   311  		trie:            trie,
   312  		fromPuny:        validateFromPunycode,
   313  		mapping:         validateRegistration,
   314  		bidirule:        bidirule.ValidString,
   315  	}}
   316  
   317  	// TODO: profiles
   318  	// Register: recommended for approving domain names: don't do any mappings
   319  	// but rather reject on invalid input. Bundle or block deviation characters.
   320  )
   321  
   322  type labelError struct{ label, code_ string }
   323  
   324  func (e labelError) code() string { return e.code_ }
   325  func (e labelError) Error() string {
   326  	return fmt.Sprintf("idna: invalid label %q", e.label)
   327  }
   328  
   329  type runeError rune
   330  
   331  func (e runeError) code() string { return "P1" }
   332  func (e runeError) Error() string {
   333  	return fmt.Sprintf("idna: disallowed rune %U", e)
   334  }
   335  
   336  // process implements the algorithm described in section 4 of UTS #46,
   337  // see https://www.unicode.org/reports/tr46.
   338  func (p *Profile) process(s string, toASCII bool) (string, error) {
   339  	var err error
   340  	if p.mapping != nil {
   341  		s, err = p.mapping(p, s)
   342  	}
   343  	// Remove leading empty labels.
   344  	if p.removeLeadingDots {
   345  		for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
   346  		}
   347  	}
   348  	// It seems like we should only create this error on ToASCII, but the
   349  	// UTS 46 conformance tests suggests we should always check this.
   350  	if err == nil && p.verifyDNSLength && s == "" {
   351  		err = &labelError{s, "A4"}
   352  	}
   353  	labels := labelIter{orig: s}
   354  	for ; !labels.done(); labels.next() {
   355  		label := labels.label()
   356  		if label == "" {
   357  			// Empty labels are not okay. The label iterator skips the last
   358  			// label if it is empty.
   359  			if err == nil && p.verifyDNSLength {
   360  				err = &labelError{s, "A4"}
   361  			}
   362  			continue
   363  		}
   364  		if strings.HasPrefix(label, acePrefix) {
   365  			u, err2 := decode(label[len(acePrefix):])
   366  			if err2 != nil {
   367  				if err == nil {
   368  					err = err2
   369  				}
   370  				// Spec says keep the old label.
   371  				continue
   372  			}
   373  			labels.set(u)
   374  			if err == nil && p.fromPuny != nil {
   375  				err = p.fromPuny(p, u)
   376  			}
   377  			if err == nil {
   378  				// This should be called on NonTransitional, according to the
   379  				// spec, but that currently does not have any effect. Use the
   380  				// original profile to preserve options.
   381  				err = p.validateLabel(u)
   382  			}
   383  		} else if err == nil {
   384  			err = p.validateLabel(label)
   385  		}
   386  	}
   387  	if toASCII {
   388  		for labels.reset(); !labels.done(); labels.next() {
   389  			label := labels.label()
   390  			if !ascii(label) {
   391  				a, err2 := encode(acePrefix, label)
   392  				if err == nil {
   393  					err = err2
   394  				}
   395  				label = a
   396  				labels.set(a)
   397  			}
   398  			n := len(label)
   399  			if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
   400  				err = &labelError{label, "A4"}
   401  			}
   402  		}
   403  	}
   404  	s = labels.result()
   405  	if toASCII && p.verifyDNSLength && err == nil {
   406  		// Compute the length of the domain name minus the root label and its dot.
   407  		n := len(s)
   408  		if n > 0 && s[n-1] == '.' {
   409  			n--
   410  		}
   411  		if len(s) < 1 || n > 253 {
   412  			err = &labelError{s, "A4"}
   413  		}
   414  	}
   415  	return s, err
   416  }
   417  
   418  func normalize(p *Profile, s string) (string, error) {
   419  	return norm.NFC.String(s), nil
   420  }
   421  
   422  func validateRegistration(p *Profile, s string) (string, error) {
   423  	if !norm.NFC.IsNormalString(s) {
   424  		return s, &labelError{s, "V1"}
   425  	}
   426  	for i := 0; i < len(s); {
   427  		v, sz := trie.lookupString(s[i:])
   428  		// Copy bytes not copied so far.
   429  		switch p.simplify(info(v).category()) {
   430  		// TODO: handle the NV8 defined in the Unicode idna data set to allow
   431  		// for strict conformance to IDNA2008.
   432  		case valid, deviation:
   433  		case disallowed, mapped, unknown, ignored:
   434  			r, _ := utf8.DecodeRuneInString(s[i:])
   435  			return s, runeError(r)
   436  		}
   437  		i += sz
   438  	}
   439  	return s, nil
   440  }
   441  
   442  func validateAndMap(p *Profile, s string) (string, error) {
   443  	var (
   444  		err error
   445  		b   []byte
   446  		k   int
   447  	)
   448  	for i := 0; i < len(s); {
   449  		v, sz := trie.lookupString(s[i:])
   450  		start := i
   451  		i += sz
   452  		// Copy bytes not copied so far.
   453  		switch p.simplify(info(v).category()) {
   454  		case valid:
   455  			continue
   456  		case disallowed:
   457  			if err == nil {
   458  				r, _ := utf8.DecodeRuneInString(s[start:])
   459  				err = runeError(r)
   460  			}
   461  			continue
   462  		case mapped, deviation:
   463  			b = append(b, s[k:start]...)
   464  			b = info(v).appendMapping(b, s[start:i])
   465  		case ignored:
   466  			b = append(b, s[k:start]...)
   467  			// drop the rune
   468  		case unknown:
   469  			b = append(b, s[k:start]...)
   470  			b = append(b, "\ufffd"...)
   471  		}
   472  		k = i
   473  	}
   474  	if k == 0 {
   475  		// No changes so far.
   476  		s = norm.NFC.String(s)
   477  	} else {
   478  		b = append(b, s[k:]...)
   479  		if norm.NFC.QuickSpan(b) != len(b) {
   480  			b = norm.NFC.Bytes(b)
   481  		}
   482  		// TODO: the punycode converters require strings as input.
   483  		s = string(b)
   484  	}
   485  	return s, err
   486  }
   487  
   488  // A labelIter allows iterating over domain name labels.
   489  type labelIter struct {
   490  	orig     string
   491  	slice    []string
   492  	curStart int
   493  	curEnd   int
   494  	i        int
   495  }
   496  
   497  func (l *labelIter) reset() {
   498  	l.curStart = 0
   499  	l.curEnd = 0
   500  	l.i = 0
   501  }
   502  
   503  func (l *labelIter) done() bool {
   504  	return l.curStart >= len(l.orig)
   505  }
   506  
   507  func (l *labelIter) result() string {
   508  	if l.slice != nil {
   509  		return strings.Join(l.slice, ".")
   510  	}
   511  	return l.orig
   512  }
   513  
   514  func (l *labelIter) label() string {
   515  	if l.slice != nil {
   516  		return l.slice[l.i]
   517  	}
   518  	p := strings.IndexByte(l.orig[l.curStart:], '.')
   519  	l.curEnd = l.curStart + p
   520  	if p == -1 {
   521  		l.curEnd = len(l.orig)
   522  	}
   523  	return l.orig[l.curStart:l.curEnd]
   524  }
   525  
   526  // next sets the value to the next label. It skips the last label if it is empty.
   527  func (l *labelIter) next() {
   528  	l.i++
   529  	if l.slice != nil {
   530  		if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
   531  			l.curStart = len(l.orig)
   532  		}
   533  	} else {
   534  		l.curStart = l.curEnd + 1
   535  		if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
   536  			l.curStart = len(l.orig)
   537  		}
   538  	}
   539  }
   540  
   541  func (l *labelIter) set(s string) {
   542  	if l.slice == nil {
   543  		l.slice = strings.Split(l.orig, ".")
   544  	}
   545  	l.slice[l.i] = s
   546  }
   547  
   548  // acePrefix is the ASCII Compatible Encoding prefix.
   549  const acePrefix = "xn--"
   550  
   551  func (p *Profile) simplify(cat category) category {
   552  	switch cat {
   553  	case disallowedSTD3Mapped:
   554  		if p.useSTD3Rules {
   555  			cat = disallowed
   556  		} else {
   557  			cat = mapped
   558  		}
   559  	case disallowedSTD3Valid:
   560  		if p.useSTD3Rules {
   561  			cat = disallowed
   562  		} else {
   563  			cat = valid
   564  		}
   565  	case deviation:
   566  		if !p.transitional {
   567  			cat = valid
   568  		}
   569  	case validNV8, validXV8:
   570  		// TODO: handle V2008
   571  		cat = valid
   572  	}
   573  	return cat
   574  }
   575  
   576  func validateFromPunycode(p *Profile, s string) error {
   577  	if !norm.NFC.IsNormalString(s) {
   578  		return &labelError{s, "V1"}
   579  	}
   580  	for i := 0; i < len(s); {
   581  		v, sz := trie.lookupString(s[i:])
   582  		if c := p.simplify(info(v).category()); c != valid && c != deviation {
   583  			return &labelError{s, "V6"}
   584  		}
   585  		i += sz
   586  	}
   587  	return nil
   588  }
   589  
   590  const (
   591  	zwnj = "\u200c"
   592  	zwj  = "\u200d"
   593  )
   594  
   595  type joinState int8
   596  
   597  const (
   598  	stateStart joinState = iota
   599  	stateVirama
   600  	stateBefore
   601  	stateBeforeVirama
   602  	stateAfter
   603  	stateFAIL
   604  )
   605  
   606  var joinStates = [][numJoinTypes]joinState{
   607  	stateStart: {
   608  		joiningL:   stateBefore,
   609  		joiningD:   stateBefore,
   610  		joinZWNJ:   stateFAIL,
   611  		joinZWJ:    stateFAIL,
   612  		joinVirama: stateVirama,
   613  	},
   614  	stateVirama: {
   615  		joiningL: stateBefore,
   616  		joiningD: stateBefore,
   617  	},
   618  	stateBefore: {
   619  		joiningL:   stateBefore,
   620  		joiningD:   stateBefore,
   621  		joiningT:   stateBefore,
   622  		joinZWNJ:   stateAfter,
   623  		joinZWJ:    stateFAIL,
   624  		joinVirama: stateBeforeVirama,
   625  	},
   626  	stateBeforeVirama: {
   627  		joiningL: stateBefore,
   628  		joiningD: stateBefore,
   629  		joiningT: stateBefore,
   630  	},
   631  	stateAfter: {
   632  		joiningL:   stateFAIL,
   633  		joiningD:   stateBefore,
   634  		joiningT:   stateAfter,
   635  		joiningR:   stateStart,
   636  		joinZWNJ:   stateFAIL,
   637  		joinZWJ:    stateFAIL,
   638  		joinVirama: stateAfter, // no-op as we can't accept joiners here
   639  	},
   640  	stateFAIL: {
   641  		0:          stateFAIL,
   642  		joiningL:   stateFAIL,
   643  		joiningD:   stateFAIL,
   644  		joiningT:   stateFAIL,
   645  		joiningR:   stateFAIL,
   646  		joinZWNJ:   stateFAIL,
   647  		joinZWJ:    stateFAIL,
   648  		joinVirama: stateFAIL,
   649  	},
   650  }
   651  
   652  // validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
   653  // already implicitly satisfied by the overall implementation.
   654  func (p *Profile) validateLabel(s string) error {
   655  	if s == "" {
   656  		if p.verifyDNSLength {
   657  			return &labelError{s, "A4"}
   658  		}
   659  		return nil
   660  	}
   661  	if p.bidirule != nil && !p.bidirule(s) {
   662  		return &labelError{s, "B"}
   663  	}
   664  	if p.checkHyphens {
   665  		if len(s) > 4 && s[2] == '-' && s[3] == '-' {
   666  			return &labelError{s, "V2"}
   667  		}
   668  		if s[0] == '-' || s[len(s)-1] == '-' {
   669  			return &labelError{s, "V3"}
   670  		}
   671  	}
   672  	if !p.checkJoiners {
   673  		return nil
   674  	}
   675  	trie := p.trie // p.checkJoiners is only set if trie is set.
   676  	// TODO: merge the use of this in the trie.
   677  	v, sz := trie.lookupString(s)
   678  	x := info(v)
   679  	if x.isModifier() {
   680  		return &labelError{s, "V5"}
   681  	}
   682  	// Quickly return in the absence of zero-width (non) joiners.
   683  	if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
   684  		return nil
   685  	}
   686  	st := stateStart
   687  	for i := 0; ; {
   688  		jt := x.joinType()
   689  		if s[i:i+sz] == zwj {
   690  			jt = joinZWJ
   691  		} else if s[i:i+sz] == zwnj {
   692  			jt = joinZWNJ
   693  		}
   694  		st = joinStates[st][jt]
   695  		if x.isViramaModifier() {
   696  			st = joinStates[st][joinVirama]
   697  		}
   698  		if i += sz; i == len(s) {
   699  			break
   700  		}
   701  		v, sz = trie.lookupString(s[i:])
   702  		x = info(v)
   703  	}
   704  	if st == stateFAIL || st == stateAfter {
   705  		return &labelError{s, "C"}
   706  	}
   707  	return nil
   708  }
   709  
   710  func ascii(s string) bool {
   711  	for i := 0; i < len(s); i++ {
   712  		if s[i] >= utf8.RuneSelf {
   713  			return false
   714  		}
   715  	}
   716  	return true
   717  }
   718  

View as plain text