...

Source file src/golang.org/x/text/encoding/htmlindex/gen.go

Documentation: golang.org/x/text/encoding/htmlindex

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ignore
     6  
     7  package main
     8  
     9  import (
    10  	"bytes"
    11  	"encoding/json"
    12  	"fmt"
    13  	"log"
    14  	"strings"
    15  
    16  	"golang.org/x/text/internal/gen"
    17  )
    18  
    19  type group struct {
    20  	Encodings []struct {
    21  		Labels []string
    22  		Name   string
    23  	}
    24  }
    25  
    26  func main() {
    27  	gen.Init()
    28  
    29  	r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json")
    30  	var groups []group
    31  	if err := json.NewDecoder(r).Decode(&groups); err != nil {
    32  		log.Fatalf("Error reading encodings.json: %v", err)
    33  	}
    34  
    35  	w := &bytes.Buffer{}
    36  	fmt.Fprintln(w, "type htmlEncoding byte")
    37  	fmt.Fprintln(w, "const (")
    38  	for i, g := range groups {
    39  		for _, e := range g.Encodings {
    40  			key := strings.ToLower(e.Name)
    41  			name := consts[key]
    42  			if name == "" {
    43  				log.Fatalf("No const defined for %s.", key)
    44  			}
    45  			if i == 0 {
    46  				fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
    47  			} else {
    48  				fmt.Fprintf(w, "%s\n", name)
    49  			}
    50  		}
    51  	}
    52  	fmt.Fprintln(w, "numEncodings")
    53  	fmt.Fprint(w, ")\n\n")
    54  
    55  	fmt.Fprintln(w, "var canonical = [numEncodings]string{")
    56  	for _, g := range groups {
    57  		for _, e := range g.Encodings {
    58  			fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name))
    59  		}
    60  	}
    61  	fmt.Fprint(w, "}\n\n")
    62  
    63  	fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
    64  	for _, g := range groups {
    65  		for _, e := range g.Encodings {
    66  			for _, l := range e.Labels {
    67  				key := strings.ToLower(e.Name)
    68  				name := consts[key]
    69  				fmt.Fprintf(w, "%q: %s,\n", l, name)
    70  			}
    71  		}
    72  	}
    73  	fmt.Fprint(w, "}\n\n")
    74  
    75  	var tags []string
    76  	fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
    77  	for _, loc := range locales {
    78  		tags = append(tags, loc.tag)
    79  		fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
    80  	}
    81  	fmt.Fprint(w, "}\n\n")
    82  
    83  	fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))
    84  
    85  	gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
    86  }
    87  
    88  // consts maps canonical encoding name to internal constant.
    89  var consts = map[string]string{
    90  	"utf-8":          "utf8",
    91  	"ibm866":         "ibm866",
    92  	"iso-8859-2":     "iso8859_2",
    93  	"iso-8859-3":     "iso8859_3",
    94  	"iso-8859-4":     "iso8859_4",
    95  	"iso-8859-5":     "iso8859_5",
    96  	"iso-8859-6":     "iso8859_6",
    97  	"iso-8859-7":     "iso8859_7",
    98  	"iso-8859-8":     "iso8859_8",
    99  	"iso-8859-8-i":   "iso8859_8I",
   100  	"iso-8859-10":    "iso8859_10",
   101  	"iso-8859-13":    "iso8859_13",
   102  	"iso-8859-14":    "iso8859_14",
   103  	"iso-8859-15":    "iso8859_15",
   104  	"iso-8859-16":    "iso8859_16",
   105  	"koi8-r":         "koi8r",
   106  	"koi8-u":         "koi8u",
   107  	"macintosh":      "macintosh",
   108  	"windows-874":    "windows874",
   109  	"windows-1250":   "windows1250",
   110  	"windows-1251":   "windows1251",
   111  	"windows-1252":   "windows1252",
   112  	"windows-1253":   "windows1253",
   113  	"windows-1254":   "windows1254",
   114  	"windows-1255":   "windows1255",
   115  	"windows-1256":   "windows1256",
   116  	"windows-1257":   "windows1257",
   117  	"windows-1258":   "windows1258",
   118  	"x-mac-cyrillic": "macintoshCyrillic",
   119  	"gbk":            "gbk",
   120  	"gb18030":        "gb18030",
   121  	// "hz-gb-2312":     "hzgb2312", // Was removed from WhatWG
   122  	"big5":           "big5",
   123  	"euc-jp":         "eucjp",
   124  	"iso-2022-jp":    "iso2022jp",
   125  	"shift_jis":      "shiftJIS",
   126  	"euc-kr":         "euckr",
   127  	"replacement":    "replacement",
   128  	"utf-16be":       "utf16be",
   129  	"utf-16le":       "utf16le",
   130  	"x-user-defined": "xUserDefined",
   131  }
   132  
   133  // locales is taken from
   134  // https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
   135  var locales = []struct{ tag, name string }{
   136  	// The default value. Explicitly state latin to benefit from the exact
   137  	// script option, while still making 1252 the default encoding for languages
   138  	// written in Latin script.
   139  	{"und_Latn", "windows-1252"},
   140  	{"ar", "windows-1256"},
   141  	{"ba", "windows-1251"},
   142  	{"be", "windows-1251"},
   143  	{"bg", "windows-1251"},
   144  	{"cs", "windows-1250"},
   145  	{"el", "iso-8859-7"},
   146  	{"et", "windows-1257"},
   147  	{"fa", "windows-1256"},
   148  	{"he", "windows-1255"},
   149  	{"hr", "windows-1250"},
   150  	{"hu", "iso-8859-2"},
   151  	{"ja", "shift_jis"},
   152  	{"kk", "windows-1251"},
   153  	{"ko", "euc-kr"},
   154  	{"ku", "windows-1254"},
   155  	{"ky", "windows-1251"},
   156  	{"lt", "windows-1257"},
   157  	{"lv", "windows-1257"},
   158  	{"mk", "windows-1251"},
   159  	{"pl", "iso-8859-2"},
   160  	{"ru", "windows-1251"},
   161  	{"sah", "windows-1251"},
   162  	{"sk", "windows-1250"},
   163  	{"sl", "iso-8859-2"},
   164  	{"sr", "windows-1251"},
   165  	{"tg", "windows-1251"},
   166  	{"th", "windows-874"},
   167  	{"tr", "windows-1254"},
   168  	{"tt", "windows-1251"},
   169  	{"uk", "windows-1251"},
   170  	{"vi", "windows-1258"},
   171  	{"zh-hans", "gb18030"},
   172  	{"zh-hant", "big5"},
   173  }
   174  

View as plain text