...

Source file src/golang.org/x/text/internal/catmsg/catmsg.go

Documentation: golang.org/x/text/internal/catmsg

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package catmsg contains support types for package x/text/message/catalog.
     6  //
     7  // This package contains the low-level implementations of Message used by the
     8  // catalog package and provides primitives for other packages to implement their
     9  // own. For instance, the plural package provides functionality for selecting
    10  // translation strings based on the plural category of substitution arguments.
    11  //
    12  // # Encoding and Decoding
    13  //
    14  // Catalogs store Messages encoded as a single string. Compiling a message into
    15  // a string both results in compacter representation and speeds up evaluation.
    16  //
    17  // A Message must implement a Compile method to convert its arbitrary
    18  // representation to a string. The Compile method takes an Encoder which
    19  // facilitates serializing the message. Encoders also provide more context of
    20  // the messages's creation (such as for which language the message is intended),
    21  // which may not be known at the time of the creation of the message.
    22  //
    23  // Each message type must also have an accompanying decoder registered to decode
    24  // the message. This decoder takes a Decoder argument which provides the
    25  // counterparts for the decoding.
    26  //
    27  // # Renderers
    28  //
    29  // A Decoder must be initialized with a Renderer implementation. These
    30  // implementations must be provided by packages that use Catalogs, typically
    31  // formatting packages such as x/text/message. A typical user will not need to
    32  // worry about this type; it is only relevant to packages that do string
    33  // formatting and want to use the catalog package to handle localized strings.
    34  //
    35  // A package that uses catalogs for selecting strings receives selection results
    36  // as sequence of substrings passed to the Renderer. The following snippet shows
    37  // how to express the above example using the message package.
    38  //
    39  //	message.Set(language.English, "You are %d minute(s) late.",
    40  //		catalog.Var("minutes", plural.Select(1, "one", "minute")),
    41  //		catalog.String("You are %[1]d ${minutes} late."))
    42  //
    43  //	p := message.NewPrinter(language.English)
    44  //	p.Printf("You are %d minute(s) late.", 5) // always 5 minutes late.
    45  //
    46  // To evaluate the Printf, package message wraps the arguments in a Renderer
    47  // that is passed to the catalog for message decoding. The call sequence that
    48  // results from evaluating the above message, assuming the person is rather
    49  // tardy, is:
    50  //
    51  //	Render("You are %[1]d ")
    52  //	Arg(1)
    53  //	Render("minutes")
    54  //	Render(" late.")
    55  //
    56  // The calls to Arg is caused by the plural.Select execution, which evaluates
    57  // the argument to determine whether the singular or plural message form should
    58  // be selected. The calls to Render reports the partial results to the message
    59  // package for further evaluation.
    60  package catmsg
    61  
    62  import (
    63  	"errors"
    64  	"fmt"
    65  	"strconv"
    66  	"strings"
    67  	"sync"
    68  
    69  	"golang.org/x/text/language"
    70  )
    71  
    72  // A Handle refers to a registered message type.
    73  type Handle int
    74  
    75  // A Handler decodes and evaluates data compiled by a Message and sends the
    76  // result to the Decoder. The output may depend on the value of the substitution
    77  // arguments, accessible by the Decoder's Arg method. The Handler returns false
    78  // if there is no translation for the given substitution arguments.
    79  type Handler func(d *Decoder) bool
    80  
    81  // Register records the existence of a message type and returns a Handle that
    82  // can be used in the Encoder's EncodeMessageType method to create such
    83  // messages. The prefix of the name should be the package path followed by
    84  // an optional disambiguating string.
    85  // Register will panic if a handle for the same name was already registered.
    86  func Register(name string, handler Handler) Handle {
    87  	mutex.Lock()
    88  	defer mutex.Unlock()
    89  
    90  	if _, ok := names[name]; ok {
    91  		panic(fmt.Errorf("catmsg: handler for %q already exists", name))
    92  	}
    93  	h := Handle(len(handlers))
    94  	names[name] = h
    95  	handlers = append(handlers, handler)
    96  	return h
    97  }
    98  
    99  // These handlers require fixed positions in the handlers slice.
   100  const (
   101  	msgVars Handle = iota
   102  	msgFirst
   103  	msgRaw
   104  	msgString
   105  	msgAffix
   106  	// Leave some arbitrary room for future expansion: 20 should suffice.
   107  	numInternal = 20
   108  )
   109  
   110  const prefix = "golang.org/x/text/internal/catmsg."
   111  
   112  var (
   113  	// TODO: find a more stable way to link handles to message types.
   114  	mutex sync.Mutex
   115  	names = map[string]Handle{
   116  		prefix + "Vars":   msgVars,
   117  		prefix + "First":  msgFirst,
   118  		prefix + "Raw":    msgRaw,
   119  		prefix + "String": msgString,
   120  		prefix + "Affix":  msgAffix,
   121  	}
   122  	handlers = make([]Handler, numInternal)
   123  )
   124  
   125  func init() {
   126  	// This handler is a message type wrapper that initializes a decoder
   127  	// with a variable block. This message type, if present, is always at the
   128  	// start of an encoded message.
   129  	handlers[msgVars] = func(d *Decoder) bool {
   130  		blockSize := int(d.DecodeUint())
   131  		d.vars = d.data[:blockSize]
   132  		d.data = d.data[blockSize:]
   133  		return d.executeMessage()
   134  	}
   135  
   136  	// First takes the first message in a sequence that results in a match for
   137  	// the given substitution arguments.
   138  	handlers[msgFirst] = func(d *Decoder) bool {
   139  		for !d.Done() {
   140  			if d.ExecuteMessage() {
   141  				return true
   142  			}
   143  		}
   144  		return false
   145  	}
   146  
   147  	handlers[msgRaw] = func(d *Decoder) bool {
   148  		d.Render(d.data)
   149  		return true
   150  	}
   151  
   152  	// A String message alternates between a string constant and a variable
   153  	// substitution.
   154  	handlers[msgString] = func(d *Decoder) bool {
   155  		for !d.Done() {
   156  			if str := d.DecodeString(); str != "" {
   157  				d.Render(str)
   158  			}
   159  			if d.Done() {
   160  				break
   161  			}
   162  			d.ExecuteSubstitution()
   163  		}
   164  		return true
   165  	}
   166  
   167  	handlers[msgAffix] = func(d *Decoder) bool {
   168  		// TODO: use an alternative method for common cases.
   169  		prefix := d.DecodeString()
   170  		suffix := d.DecodeString()
   171  		if prefix != "" {
   172  			d.Render(prefix)
   173  		}
   174  		ret := d.ExecuteMessage()
   175  		if suffix != "" {
   176  			d.Render(suffix)
   177  		}
   178  		return ret
   179  	}
   180  }
   181  
   182  var (
   183  	// ErrIncomplete indicates a compiled message does not define translations
   184  	// for all possible argument values. If this message is returned, evaluating
   185  	// a message may result in the ErrNoMatch error.
   186  	ErrIncomplete = errors.New("catmsg: incomplete message; may not give result for all inputs")
   187  
   188  	// ErrNoMatch indicates no translation message matched the given input
   189  	// parameters when evaluating a message.
   190  	ErrNoMatch = errors.New("catmsg: no translation for inputs")
   191  )
   192  
   193  // A Message holds a collection of translations for the same phrase that may
   194  // vary based on the values of substitution arguments.
   195  type Message interface {
   196  	// Compile encodes the format string(s) of the message as a string for later
   197  	// evaluation.
   198  	//
   199  	// The first call Compile makes on the encoder must be EncodeMessageType.
   200  	// The handle passed to this call may either be a handle returned by
   201  	// Register to encode a single custom message, or HandleFirst followed by
   202  	// a sequence of calls to EncodeMessage.
   203  	//
   204  	// Compile must return ErrIncomplete if it is possible for evaluation to
   205  	// not match any translation for a given set of formatting parameters.
   206  	// For example, selecting a translation based on plural form may not yield
   207  	// a match if the form "Other" is not one of the selectors.
   208  	//
   209  	// Compile may return any other application-specific error. For backwards
   210  	// compatibility with package like fmt, which often do not do sanity
   211  	// checking of format strings ahead of time, Compile should still make an
   212  	// effort to have some sensible fallback in case of an error.
   213  	Compile(e *Encoder) error
   214  }
   215  
   216  // Compile converts a Message to a data string that can be stored in a Catalog.
   217  // The resulting string can subsequently be decoded by passing to the Execute
   218  // method of a Decoder.
   219  func Compile(tag language.Tag, macros Dictionary, m Message) (data string, err error) {
   220  	// TODO: pass macros so they can be used for validation.
   221  	v := &Encoder{inBody: true} // encoder for variables
   222  	v.root = v
   223  	e := &Encoder{root: v, parent: v, tag: tag} // encoder for messages
   224  	err = m.Compile(e)
   225  	// This package serves te message package, which in turn is meant to be a
   226  	// drop-in replacement for fmt.  With the fmt package, format strings are
   227  	// evaluated lazily and errors are handled by substituting strings in the
   228  	// result, rather then returning an error. Dealing with multiple languages
   229  	// makes it more important to check errors ahead of time. We chose to be
   230  	// consistent and compatible and allow graceful degradation in case of
   231  	// errors.
   232  	buf := e.buf[stripPrefix(e.buf):]
   233  	if len(v.buf) > 0 {
   234  		// Prepend variable block.
   235  		b := make([]byte, 1+maxVarintBytes+len(v.buf)+len(buf))
   236  		b[0] = byte(msgVars)
   237  		b = b[:1+encodeUint(b[1:], uint64(len(v.buf)))]
   238  		b = append(b, v.buf...)
   239  		b = append(b, buf...)
   240  		buf = b
   241  	}
   242  	if err == nil {
   243  		err = v.err
   244  	}
   245  	return string(buf), err
   246  }
   247  
   248  // FirstOf is a message type that prints the first message in the sequence that
   249  // resolves to a match for the given substitution arguments.
   250  type FirstOf []Message
   251  
   252  // Compile implements Message.
   253  func (s FirstOf) Compile(e *Encoder) error {
   254  	e.EncodeMessageType(msgFirst)
   255  	err := ErrIncomplete
   256  	for i, m := range s {
   257  		if err == nil {
   258  			return fmt.Errorf("catalog: message argument %d is complete and blocks subsequent messages", i-1)
   259  		}
   260  		err = e.EncodeMessage(m)
   261  	}
   262  	return err
   263  }
   264  
   265  // Var defines a message that can be substituted for a placeholder of the same
   266  // name. If an expression does not result in a string after evaluation, Name is
   267  // used as the substitution. For example:
   268  //
   269  //	Var{
   270  //	  Name:    "minutes",
   271  //	  Message: plural.Select(1, "one", "minute"),
   272  //	}
   273  //
   274  // will resolve to minute for singular and minutes for plural forms.
   275  type Var struct {
   276  	Name    string
   277  	Message Message
   278  }
   279  
   280  var errIsVar = errors.New("catmsg: variable used as message")
   281  
   282  // Compile implements Message.
   283  //
   284  // Note that this method merely registers a variable; it does not create an
   285  // encoded message.
   286  func (v *Var) Compile(e *Encoder) error {
   287  	if err := e.addVar(v.Name, v.Message); err != nil {
   288  		return err
   289  	}
   290  	// Using a Var by itself is an error. If it is in a sequence followed by
   291  	// other messages referring to it, this error will be ignored.
   292  	return errIsVar
   293  }
   294  
   295  // Raw is a message consisting of a single format string that is passed as is
   296  // to the Renderer.
   297  //
   298  // Note that a Renderer may still do its own variable substitution.
   299  type Raw string
   300  
   301  // Compile implements Message.
   302  func (r Raw) Compile(e *Encoder) (err error) {
   303  	e.EncodeMessageType(msgRaw)
   304  	// Special case: raw strings don't have a size encoding and so don't use
   305  	// EncodeString.
   306  	e.buf = append(e.buf, r...)
   307  	return nil
   308  }
   309  
   310  // String is a message consisting of a single format string which contains
   311  // placeholders that may be substituted with variables.
   312  //
   313  // Variable substitutions are marked with placeholders and a variable name of
   314  // the form ${name}. Any other substitutions such as Go templates or
   315  // printf-style substitutions are left to be done by the Renderer.
   316  //
   317  // When evaluation a string interpolation, a Renderer will receive separate
   318  // calls for each placeholder and interstitial string. For example, for the
   319  // message: "%[1]v ${invites} %[2]v to ${their} party." The sequence of calls
   320  // is:
   321  //
   322  //	d.Render("%[1]v ")
   323  //	d.Arg(1)
   324  //	d.Render(resultOfInvites)
   325  //	d.Render(" %[2]v to ")
   326  //	d.Arg(2)
   327  //	d.Render(resultOfTheir)
   328  //	d.Render(" party.")
   329  //
   330  // where the messages for "invites" and "their" both use a plural.Select
   331  // referring to the first argument.
   332  //
   333  // Strings may also invoke macros. Macros are essentially variables that can be
   334  // reused. Macros may, for instance, be used to make selections between
   335  // different conjugations of a verb. See the catalog package description for an
   336  // overview of macros.
   337  type String string
   338  
   339  // Compile implements Message. It parses the placeholder formats and returns
   340  // any error.
   341  func (s String) Compile(e *Encoder) (err error) {
   342  	msg := string(s)
   343  	const subStart = "${"
   344  	hasHeader := false
   345  	p := 0
   346  	b := []byte{}
   347  	for {
   348  		i := strings.Index(msg[p:], subStart)
   349  		if i == -1 {
   350  			break
   351  		}
   352  		b = append(b, msg[p:p+i]...)
   353  		p += i + len(subStart)
   354  		if i = strings.IndexByte(msg[p:], '}'); i == -1 {
   355  			b = append(b, "$!(MISSINGBRACE)"...)
   356  			err = fmt.Errorf("catmsg: missing '}'")
   357  			p = len(msg)
   358  			break
   359  		}
   360  		name := strings.TrimSpace(msg[p : p+i])
   361  		if q := strings.IndexByte(name, '('); q == -1 {
   362  			if !hasHeader {
   363  				hasHeader = true
   364  				e.EncodeMessageType(msgString)
   365  			}
   366  			e.EncodeString(string(b))
   367  			e.EncodeSubstitution(name)
   368  			b = b[:0]
   369  		} else if j := strings.IndexByte(name[q:], ')'); j == -1 {
   370  			// TODO: what should the error be?
   371  			b = append(b, "$!(MISSINGPAREN)"...)
   372  			err = fmt.Errorf("catmsg: missing ')'")
   373  		} else if x, sErr := strconv.ParseUint(strings.TrimSpace(name[q+1:q+j]), 10, 32); sErr != nil {
   374  			// TODO: handle more than one argument
   375  			b = append(b, "$!(BADNUM)"...)
   376  			err = fmt.Errorf("catmsg: invalid number %q", strings.TrimSpace(name[q+1:q+j]))
   377  		} else {
   378  			if !hasHeader {
   379  				hasHeader = true
   380  				e.EncodeMessageType(msgString)
   381  			}
   382  			e.EncodeString(string(b))
   383  			e.EncodeSubstitution(name[:q], int(x))
   384  			b = b[:0]
   385  		}
   386  		p += i + 1
   387  	}
   388  	b = append(b, msg[p:]...)
   389  	if !hasHeader {
   390  		// Simplify string to a raw string.
   391  		Raw(string(b)).Compile(e)
   392  	} else if len(b) > 0 {
   393  		e.EncodeString(string(b))
   394  	}
   395  	return err
   396  }
   397  
   398  // Affix is a message that adds a prefix and suffix to another message.
   399  // This is mostly used add back whitespace to a translation that was stripped
   400  // before sending it out.
   401  type Affix struct {
   402  	Message Message
   403  	Prefix  string
   404  	Suffix  string
   405  }
   406  
   407  // Compile implements Message.
   408  func (a Affix) Compile(e *Encoder) (err error) {
   409  	// TODO: consider adding a special message type that just adds a single
   410  	// return. This is probably common enough to handle the majority of cases.
   411  	// Get some stats first, though.
   412  	e.EncodeMessageType(msgAffix)
   413  	e.EncodeString(a.Prefix)
   414  	e.EncodeString(a.Suffix)
   415  	e.EncodeMessage(a.Message)
   416  	return nil
   417  }
   418  

View as plain text