...

Source file src/golang.org/x/text/encoding/internal/identifier/identifier.go

Documentation: golang.org/x/text/encoding/internal/identifier

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:generate go run gen.go
     6  
     7  // Package identifier defines the contract between implementations of Encoding
     8  // and Index by defining identifiers that uniquely identify standardized coded
     9  // character sets (CCS) and character encoding schemes (CES), which we will
    10  // together refer to as encodings, for which Encoding implementations provide
    11  // converters to and from UTF-8. This package is typically only of concern to
    12  // implementers of Indexes and Encodings.
    13  //
    14  // One part of the identifier is the MIB code, which is defined by IANA and
    15  // uniquely identifies a CCS or CES. Each code is associated with data that
    16  // references authorities, official documentation as well as aliases and MIME
    17  // names.
    18  //
    19  // Not all CESs are covered by the IANA registry. The "other" string that is
    20  // returned by ID can be used to identify other character sets or versions of
    21  // existing ones.
    22  //
    23  // It is recommended that each package that provides a set of Encodings provide
    24  // the All and Common variables to reference all supported encodings and
    25  // commonly used subset. This allows Index implementations to include all
    26  // available encodings without explicitly referencing or knowing about them.
    27  package identifier
    28  
    29  // Note: this package is internal, but could be made public if there is a need
    30  // for writing third-party Indexes and Encodings.
    31  
    32  // References:
    33  // - http://source.icu-project.org/repos/icu/icu/trunk/source/data/mappings/convrtrs.txt
    34  // - http://www.iana.org/assignments/character-sets/character-sets.xhtml
    35  // - http://www.iana.org/assignments/ianacharset-mib/ianacharset-mib
    36  // - http://www.ietf.org/rfc/rfc2978.txt
    37  // - https://www.unicode.org/reports/tr22/
    38  // - http://www.w3.org/TR/encoding/
    39  // - https://encoding.spec.whatwg.org/
    40  // - https://encoding.spec.whatwg.org/encodings.json
    41  // - https://tools.ietf.org/html/rfc6657#section-5
    42  
    43  // Interface can be implemented by Encodings to define the CCS or CES for which
    44  // it implements conversions.
    45  type Interface interface {
    46  	// ID returns an encoding identifier. Exactly one of the mib and other
    47  	// values should be non-zero.
    48  	//
    49  	// In the usual case it is only necessary to indicate the MIB code. The
    50  	// other string can be used to specify encodings for which there is no MIB,
    51  	// such as "x-mac-dingbat".
    52  	//
    53  	// The other string may only contain the characters a-z, A-Z, 0-9, - and _.
    54  	ID() (mib MIB, other string)
    55  
    56  	// NOTE: the restrictions on the encoding are to allow extending the syntax
    57  	// with additional information such as versions, vendors and other variants.
    58  }
    59  
    60  // A MIB identifies an encoding. It is derived from the IANA MIB codes and adds
    61  // some identifiers for some encodings that are not covered by the IANA
    62  // standard.
    63  //
    64  // See http://www.iana.org/assignments/ianacharset-mib.
    65  type MIB uint16
    66  
    67  // These additional MIB types are not defined in IANA. They are added because
    68  // they are common and defined within the text repo.
    69  const (
    70  	// Unofficial marks the start of encodings not registered by IANA.
    71  	Unofficial MIB = 10000 + iota
    72  
    73  	// Replacement is the WhatWG replacement encoding.
    74  	Replacement
    75  
    76  	// XUserDefined is the code for x-user-defined.
    77  	XUserDefined
    78  
    79  	// MacintoshCyrillic is the code for x-mac-cyrillic.
    80  	MacintoshCyrillic
    81  )
    82  

View as plain text