lookup.go

Documentation: golang.org/x/text/language/display

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package display
     6  
     7  // This file contains common lookup code that is shared between the various
     8  // implementations of Namer and Dictionaries.
     9  
    10  import (
    11  	"fmt"
    12  	"sort"
    13  	"strings"
    14  
    15  	"golang.org/x/text/language"
    16  )
    17  
    18  type namer interface {
    19  	// name gets the string for the given index. It should walk the
    20  	// inheritance chain if a value is not present in the base index.
    21  	name(idx int) string
    22  }
    23  
    24  func nameLanguage(n namer, x interface{}) string {
    25  	t, _ := language.All.Compose(x)
    26  	for {
    27  		i, _, _ := langTagSet.index(t.Raw())
    28  		if s := n.name(i); s != "" {
    29  			return s
    30  		}
    31  		if t = t.Parent(); t == language.Und {
    32  			return ""
    33  		}
    34  	}
    35  }
    36  
    37  func nameScript(n namer, x interface{}) string {
    38  	t, _ := language.DeprecatedScript.Compose(x)
    39  	_, s, _ := t.Raw()
    40  	return n.name(scriptIndex.index(s.String()))
    41  }
    42  
    43  func nameRegion(n namer, x interface{}) string {
    44  	t, _ := language.DeprecatedRegion.Compose(x)
    45  	_, _, r := t.Raw()
    46  	return n.name(regionIndex.index(r.String()))
    47  }
    48  
    49  func nameTag(langN, scrN, regN namer, x interface{}) string {
    50  	t, ok := x.(language.Tag)
    51  	if !ok {
    52  		return ""
    53  	}
    54  	const form = language.All &^ language.SuppressScript
    55  	if c, err := form.Canonicalize(t); err == nil {
    56  		t = c
    57  	}
    58  	_, sRaw, rRaw := t.Raw()
    59  	i, scr, reg := langTagSet.index(t.Raw())
    60  	for i != -1 {
    61  		if str := langN.name(i); str != "" {
    62  			if hasS, hasR := (scr != language.Script{}), (reg != language.Region{}); hasS || hasR {
    63  				ss, sr := "", ""
    64  				if hasS {
    65  					ss = scrN.name(scriptIndex.index(scr.String()))
    66  				}
    67  				if hasR {
    68  					sr = regN.name(regionIndex.index(reg.String()))
    69  				}
    70  				// TODO: use patterns in CLDR or at least confirm they are the
    71  				// same for all languages.
    72  				if ss != "" && sr != "" {
    73  					return fmt.Sprintf("%s (%s, %s)", str, ss, sr)
    74  				}
    75  				if ss != "" || sr != "" {
    76  					return fmt.Sprintf("%s (%s%s)", str, ss, sr)
    77  				}
    78  			}
    79  			return str
    80  		}
    81  		scr, reg = sRaw, rRaw
    82  		if t = t.Parent(); t == language.Und {
    83  			return ""
    84  		}
    85  		i, _, _ = langTagSet.index(t.Raw())
    86  	}
    87  	return ""
    88  }
    89  
    90  // header contains the data and indexes for a single namer.
    91  // data contains a series of strings concatenated into one. index contains the
    92  // offsets for a string in data. For example, consider a header that defines
    93  // strings for the languages de, el, en, fi, and nl:
    94  //
    95  //	header{
    96  //		data: "GermanGreekEnglishDutch",
    97  //		index: []uint16{0, 6, 11, 18, 18, 23},
    98  //	}
    99  //
   100  // For a language with index i, the string is defined by
   101  // data[index[i]:index[i+1]]. So the number of elements in index is always one
   102  // greater than the number of languages for which header defines a value.
   103  // A string for a language may be empty, which means the name is undefined. In
   104  // the above example, the name for fi (Finnish) is undefined.
   105  type header struct {
   106  	data  string
   107  	index []uint16
   108  }
   109  
   110  // name looks up the name for a tag in the dictionary, given its index.
   111  func (h *header) name(i int) string {
   112  	if 0 <= i && i < len(h.index)-1 {
   113  		return h.data[h.index[i]:h.index[i+1]]
   114  	}
   115  	return ""
   116  }
   117  
   118  // tagSet is used to find the index of a language in a set of tags.
   119  type tagSet struct {
   120  	single tagIndex
   121  	long   []string
   122  }
   123  
   124  var (
   125  	langTagSet = tagSet{
   126  		single: langIndex,
   127  		long:   langTagsLong,
   128  	}
   129  
   130  	// selfTagSet is used for indexing the language strings in their own
   131  	// language.
   132  	selfTagSet = tagSet{
   133  		single: selfIndex,
   134  		long:   selfTagsLong,
   135  	}
   136  
   137  	zzzz = language.MustParseScript("Zzzz")
   138  	zz   = language.MustParseRegion("ZZ")
   139  )
   140  
   141  // index returns the index of the tag for the given base, script and region or
   142  // its parent if the tag is not available. If the match is for a parent entry,
   143  // the excess script and region are returned.
   144  func (ts *tagSet) index(base language.Base, scr language.Script, reg language.Region) (int, language.Script, language.Region) {
   145  	lang := base.String()
   146  	index := -1
   147  	if (scr != language.Script{} || reg != language.Region{}) {
   148  		if scr == zzzz {
   149  			scr = language.Script{}
   150  		}
   151  		if reg == zz {
   152  			reg = language.Region{}
   153  		}
   154  
   155  		i := sort.SearchStrings(ts.long, lang)
   156  		// All entries have either a script or a region and not both.
   157  		scrStr, regStr := scr.String(), reg.String()
   158  		for ; i < len(ts.long) && strings.HasPrefix(ts.long[i], lang); i++ {
   159  			if s := ts.long[i][len(lang)+1:]; s == scrStr {
   160  				scr = language.Script{}
   161  				index = i + ts.single.len()
   162  				break
   163  			} else if s == regStr {
   164  				reg = language.Region{}
   165  				index = i + ts.single.len()
   166  				break
   167  			}
   168  		}
   169  	}
   170  	if index == -1 {
   171  		index = ts.single.index(lang)
   172  	}
   173  	return index, scr, reg
   174  }
   175  
   176  func (ts *tagSet) Tags() []language.Tag {
   177  	tags := make([]language.Tag, 0, ts.single.len()+len(ts.long))
   178  	ts.single.keys(func(s string) {
   179  		tags = append(tags, language.Raw.MustParse(s))
   180  	})
   181  	for _, s := range ts.long {
   182  		tags = append(tags, language.Raw.MustParse(s))
   183  	}
   184  	return tags
   185  }
   186  
   187  func supportedScripts() []language.Script {
   188  	scr := make([]language.Script, 0, scriptIndex.len())
   189  	scriptIndex.keys(func(s string) {
   190  		scr = append(scr, language.MustParseScript(s))
   191  	})
   192  	return scr
   193  }
   194  
   195  func supportedRegions() []language.Region {
   196  	reg := make([]language.Region, 0, regionIndex.len())
   197  	regionIndex.keys(func(s string) {
   198  		reg = append(reg, language.MustParseRegion(s))
   199  	})
   200  	return reg
   201  }
   202  
   203  // tagIndex holds a concatenated lists of subtags of length 2 to 4, one string
   204  // for each length, which can be used in combination with binary search to get
   205  // the index associated with a tag.
   206  // For example, a tagIndex{
   207  //
   208  //	"arenesfrruzh",  // 6 2-byte tags.
   209  //	"barwae",        // 2 3-byte tags.
   210  //	"",
   211  //
   212  // }
   213  // would mean that the 2-byte tag "fr" had an index of 3, and the 3-byte tag
   214  // "wae" had an index of 7.
   215  type tagIndex [3]string
   216  
   217  func (t *tagIndex) index(s string) int {
   218  	sz := len(s)
   219  	if sz < 2 || 4 < sz {
   220  		return -1
   221  	}
   222  	a := t[sz-2]
   223  	index := sort.Search(len(a)/sz, func(i int) bool {
   224  		p := i * sz
   225  		return a[p:p+sz] >= s
   226  	})
   227  	p := index * sz
   228  	if end := p + sz; end > len(a) || a[p:end] != s {
   229  		return -1
   230  	}
   231  	// Add the number of tags for smaller sizes.
   232  	for i := 0; i < sz-2; i++ {
   233  		index += len(t[i]) / (i + 2)
   234  	}
   235  	return index
   236  }
   237  
   238  // len returns the number of tags that are contained in the tagIndex.
   239  func (t *tagIndex) len() (n int) {
   240  	for i, s := range t {
   241  		n += len(s) / (i + 2)
   242  	}
   243  	return n
   244  }
   245  
   246  // keys calls f for each tag.
   247  func (t *tagIndex) keys(f func(key string)) {
   248  	for i, s := range *t {
   249  		for ; s != ""; s = s[i+2:] {
   250  			f(s[:i+2])
   251  		}
   252  	}
   253  }
   254
View as plain text