1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package catmsg contains support types for package x/text/message/catalog. 6 // 7 // This package contains the low-level implementations of Message used by the 8 // catalog package and provides primitives for other packages to implement their 9 // own. For instance, the plural package provides functionality for selecting 10 // translation strings based on the plural category of substitution arguments. 11 // 12 // # Encoding and Decoding 13 // 14 // Catalogs store Messages encoded as a single string. Compiling a message into 15 // a string both results in compacter representation and speeds up evaluation. 16 // 17 // A Message must implement a Compile method to convert its arbitrary 18 // representation to a string. The Compile method takes an Encoder which 19 // facilitates serializing the message. Encoders also provide more context of 20 // the messages's creation (such as for which language the message is intended), 21 // which may not be known at the time of the creation of the message. 22 // 23 // Each message type must also have an accompanying decoder registered to decode 24 // the message. This decoder takes a Decoder argument which provides the 25 // counterparts for the decoding. 26 // 27 // # Renderers 28 // 29 // A Decoder must be initialized with a Renderer implementation. These 30 // implementations must be provided by packages that use Catalogs, typically 31 // formatting packages such as x/text/message. A typical user will not need to 32 // worry about this type; it is only relevant to packages that do string 33 // formatting and want to use the catalog package to handle localized strings. 34 // 35 // A package that uses catalogs for selecting strings receives selection results 36 // as sequence of substrings passed to the Renderer. The following snippet shows 37 // how to express the above example using the message package. 38 // 39 // message.Set(language.English, "You are %d minute(s) late.", 40 // catalog.Var("minutes", plural.Select(1, "one", "minute")), 41 // catalog.String("You are %[1]d ${minutes} late.")) 42 // 43 // p := message.NewPrinter(language.English) 44 // p.Printf("You are %d minute(s) late.", 5) // always 5 minutes late. 45 // 46 // To evaluate the Printf, package message wraps the arguments in a Renderer 47 // that is passed to the catalog for message decoding. The call sequence that 48 // results from evaluating the above message, assuming the person is rather 49 // tardy, is: 50 // 51 // Render("You are %[1]d ") 52 // Arg(1) 53 // Render("minutes") 54 // Render(" late.") 55 // 56 // The calls to Arg is caused by the plural.Select execution, which evaluates 57 // the argument to determine whether the singular or plural message form should 58 // be selected. The calls to Render reports the partial results to the message 59 // package for further evaluation. 60 package catmsg 61 62 import ( 63 "errors" 64 "fmt" 65 "strconv" 66 "strings" 67 "sync" 68 69 "golang.org/x/text/language" 70 ) 71 72 // A Handle refers to a registered message type. 73 type Handle int 74 75 // A Handler decodes and evaluates data compiled by a Message and sends the 76 // result to the Decoder. The output may depend on the value of the substitution 77 // arguments, accessible by the Decoder's Arg method. The Handler returns false 78 // if there is no translation for the given substitution arguments. 79 type Handler func(d *Decoder) bool 80 81 // Register records the existence of a message type and returns a Handle that 82 // can be used in the Encoder's EncodeMessageType method to create such 83 // messages. The prefix of the name should be the package path followed by 84 // an optional disambiguating string. 85 // Register will panic if a handle for the same name was already registered. 86 func Register(name string, handler Handler) Handle { 87 mutex.Lock() 88 defer mutex.Unlock() 89 90 if _, ok := names[name]; ok { 91 panic(fmt.Errorf("catmsg: handler for %q already exists", name)) 92 } 93 h := Handle(len(handlers)) 94 names[name] = h 95 handlers = append(handlers, handler) 96 return h 97 } 98 99 // These handlers require fixed positions in the handlers slice. 100 const ( 101 msgVars Handle = iota 102 msgFirst 103 msgRaw 104 msgString 105 msgAffix 106 // Leave some arbitrary room for future expansion: 20 should suffice. 107 numInternal = 20 108 ) 109 110 const prefix = "golang.org/x/text/internal/catmsg." 111 112 var ( 113 // TODO: find a more stable way to link handles to message types. 114 mutex sync.Mutex 115 names = map[string]Handle{ 116 prefix + "Vars": msgVars, 117 prefix + "First": msgFirst, 118 prefix + "Raw": msgRaw, 119 prefix + "String": msgString, 120 prefix + "Affix": msgAffix, 121 } 122 handlers = make([]Handler, numInternal) 123 ) 124 125 func init() { 126 // This handler is a message type wrapper that initializes a decoder 127 // with a variable block. This message type, if present, is always at the 128 // start of an encoded message. 129 handlers[msgVars] = func(d *Decoder) bool { 130 blockSize := int(d.DecodeUint()) 131 d.vars = d.data[:blockSize] 132 d.data = d.data[blockSize:] 133 return d.executeMessage() 134 } 135 136 // First takes the first message in a sequence that results in a match for 137 // the given substitution arguments. 138 handlers[msgFirst] = func(d *Decoder) bool { 139 for !d.Done() { 140 if d.ExecuteMessage() { 141 return true 142 } 143 } 144 return false 145 } 146 147 handlers[msgRaw] = func(d *Decoder) bool { 148 d.Render(d.data) 149 return true 150 } 151 152 // A String message alternates between a string constant and a variable 153 // substitution. 154 handlers[msgString] = func(d *Decoder) bool { 155 for !d.Done() { 156 if str := d.DecodeString(); str != "" { 157 d.Render(str) 158 } 159 if d.Done() { 160 break 161 } 162 d.ExecuteSubstitution() 163 } 164 return true 165 } 166 167 handlers[msgAffix] = func(d *Decoder) bool { 168 // TODO: use an alternative method for common cases. 169 prefix := d.DecodeString() 170 suffix := d.DecodeString() 171 if prefix != "" { 172 d.Render(prefix) 173 } 174 ret := d.ExecuteMessage() 175 if suffix != "" { 176 d.Render(suffix) 177 } 178 return ret 179 } 180 } 181 182 var ( 183 // ErrIncomplete indicates a compiled message does not define translations 184 // for all possible argument values. If this message is returned, evaluating 185 // a message may result in the ErrNoMatch error. 186 ErrIncomplete = errors.New("catmsg: incomplete message; may not give result for all inputs") 187 188 // ErrNoMatch indicates no translation message matched the given input 189 // parameters when evaluating a message. 190 ErrNoMatch = errors.New("catmsg: no translation for inputs") 191 ) 192 193 // A Message holds a collection of translations for the same phrase that may 194 // vary based on the values of substitution arguments. 195 type Message interface { 196 // Compile encodes the format string(s) of the message as a string for later 197 // evaluation. 198 // 199 // The first call Compile makes on the encoder must be EncodeMessageType. 200 // The handle passed to this call may either be a handle returned by 201 // Register to encode a single custom message, or HandleFirst followed by 202 // a sequence of calls to EncodeMessage. 203 // 204 // Compile must return ErrIncomplete if it is possible for evaluation to 205 // not match any translation for a given set of formatting parameters. 206 // For example, selecting a translation based on plural form may not yield 207 // a match if the form "Other" is not one of the selectors. 208 // 209 // Compile may return any other application-specific error. For backwards 210 // compatibility with package like fmt, which often do not do sanity 211 // checking of format strings ahead of time, Compile should still make an 212 // effort to have some sensible fallback in case of an error. 213 Compile(e *Encoder) error 214 } 215 216 // Compile converts a Message to a data string that can be stored in a Catalog. 217 // The resulting string can subsequently be decoded by passing to the Execute 218 // method of a Decoder. 219 func Compile(tag language.Tag, macros Dictionary, m Message) (data string, err error) { 220 // TODO: pass macros so they can be used for validation. 221 v := &Encoder{inBody: true} // encoder for variables 222 v.root = v 223 e := &Encoder{root: v, parent: v, tag: tag} // encoder for messages 224 err = m.Compile(e) 225 // This package serves te message package, which in turn is meant to be a 226 // drop-in replacement for fmt. With the fmt package, format strings are 227 // evaluated lazily and errors are handled by substituting strings in the 228 // result, rather then returning an error. Dealing with multiple languages 229 // makes it more important to check errors ahead of time. We chose to be 230 // consistent and compatible and allow graceful degradation in case of 231 // errors. 232 buf := e.buf[stripPrefix(e.buf):] 233 if len(v.buf) > 0 { 234 // Prepend variable block. 235 b := make([]byte, 1+maxVarintBytes+len(v.buf)+len(buf)) 236 b[0] = byte(msgVars) 237 b = b[:1+encodeUint(b[1:], uint64(len(v.buf)))] 238 b = append(b, v.buf...) 239 b = append(b, buf...) 240 buf = b 241 } 242 if err == nil { 243 err = v.err 244 } 245 return string(buf), err 246 } 247 248 // FirstOf is a message type that prints the first message in the sequence that 249 // resolves to a match for the given substitution arguments. 250 type FirstOf []Message 251 252 // Compile implements Message. 253 func (s FirstOf) Compile(e *Encoder) error { 254 e.EncodeMessageType(msgFirst) 255 err := ErrIncomplete 256 for i, m := range s { 257 if err == nil { 258 return fmt.Errorf("catalog: message argument %d is complete and blocks subsequent messages", i-1) 259 } 260 err = e.EncodeMessage(m) 261 } 262 return err 263 } 264 265 // Var defines a message that can be substituted for a placeholder of the same 266 // name. If an expression does not result in a string after evaluation, Name is 267 // used as the substitution. For example: 268 // 269 // Var{ 270 // Name: "minutes", 271 // Message: plural.Select(1, "one", "minute"), 272 // } 273 // 274 // will resolve to minute for singular and minutes for plural forms. 275 type Var struct { 276 Name string 277 Message Message 278 } 279 280 var errIsVar = errors.New("catmsg: variable used as message") 281 282 // Compile implements Message. 283 // 284 // Note that this method merely registers a variable; it does not create an 285 // encoded message. 286 func (v *Var) Compile(e *Encoder) error { 287 if err := e.addVar(v.Name, v.Message); err != nil { 288 return err 289 } 290 // Using a Var by itself is an error. If it is in a sequence followed by 291 // other messages referring to it, this error will be ignored. 292 return errIsVar 293 } 294 295 // Raw is a message consisting of a single format string that is passed as is 296 // to the Renderer. 297 // 298 // Note that a Renderer may still do its own variable substitution. 299 type Raw string 300 301 // Compile implements Message. 302 func (r Raw) Compile(e *Encoder) (err error) { 303 e.EncodeMessageType(msgRaw) 304 // Special case: raw strings don't have a size encoding and so don't use 305 // EncodeString. 306 e.buf = append(e.buf, r...) 307 return nil 308 } 309 310 // String is a message consisting of a single format string which contains 311 // placeholders that may be substituted with variables. 312 // 313 // Variable substitutions are marked with placeholders and a variable name of 314 // the form ${name}. Any other substitutions such as Go templates or 315 // printf-style substitutions are left to be done by the Renderer. 316 // 317 // When evaluation a string interpolation, a Renderer will receive separate 318 // calls for each placeholder and interstitial string. For example, for the 319 // message: "%[1]v ${invites} %[2]v to ${their} party." The sequence of calls 320 // is: 321 // 322 // d.Render("%[1]v ") 323 // d.Arg(1) 324 // d.Render(resultOfInvites) 325 // d.Render(" %[2]v to ") 326 // d.Arg(2) 327 // d.Render(resultOfTheir) 328 // d.Render(" party.") 329 // 330 // where the messages for "invites" and "their" both use a plural.Select 331 // referring to the first argument. 332 // 333 // Strings may also invoke macros. Macros are essentially variables that can be 334 // reused. Macros may, for instance, be used to make selections between 335 // different conjugations of a verb. See the catalog package description for an 336 // overview of macros. 337 type String string 338 339 // Compile implements Message. It parses the placeholder formats and returns 340 // any error. 341 func (s String) Compile(e *Encoder) (err error) { 342 msg := string(s) 343 const subStart = "${" 344 hasHeader := false 345 p := 0 346 b := []byte{} 347 for { 348 i := strings.Index(msg[p:], subStart) 349 if i == -1 { 350 break 351 } 352 b = append(b, msg[p:p+i]...) 353 p += i + len(subStart) 354 if i = strings.IndexByte(msg[p:], '}'); i == -1 { 355 b = append(b, "$!(MISSINGBRACE)"...) 356 err = fmt.Errorf("catmsg: missing '}'") 357 p = len(msg) 358 break 359 } 360 name := strings.TrimSpace(msg[p : p+i]) 361 if q := strings.IndexByte(name, '('); q == -1 { 362 if !hasHeader { 363 hasHeader = true 364 e.EncodeMessageType(msgString) 365 } 366 e.EncodeString(string(b)) 367 e.EncodeSubstitution(name) 368 b = b[:0] 369 } else if j := strings.IndexByte(name[q:], ')'); j == -1 { 370 // TODO: what should the error be? 371 b = append(b, "$!(MISSINGPAREN)"...) 372 err = fmt.Errorf("catmsg: missing ')'") 373 } else if x, sErr := strconv.ParseUint(strings.TrimSpace(name[q+1:q+j]), 10, 32); sErr != nil { 374 // TODO: handle more than one argument 375 b = append(b, "$!(BADNUM)"...) 376 err = fmt.Errorf("catmsg: invalid number %q", strings.TrimSpace(name[q+1:q+j])) 377 } else { 378 if !hasHeader { 379 hasHeader = true 380 e.EncodeMessageType(msgString) 381 } 382 e.EncodeString(string(b)) 383 e.EncodeSubstitution(name[:q], int(x)) 384 b = b[:0] 385 } 386 p += i + 1 387 } 388 b = append(b, msg[p:]...) 389 if !hasHeader { 390 // Simplify string to a raw string. 391 Raw(string(b)).Compile(e) 392 } else if len(b) > 0 { 393 e.EncodeString(string(b)) 394 } 395 return err 396 } 397 398 // Affix is a message that adds a prefix and suffix to another message. 399 // This is mostly used add back whitespace to a translation that was stripped 400 // before sending it out. 401 type Affix struct { 402 Message Message 403 Prefix string 404 Suffix string 405 } 406 407 // Compile implements Message. 408 func (a Affix) Compile(e *Encoder) (err error) { 409 // TODO: consider adding a special message type that just adds a single 410 // return. This is probably common enough to handle the majority of cases. 411 // Get some stats first, though. 412 e.EncodeMessageType(msgAffix) 413 e.EncodeString(a.Prefix) 414 e.EncodeString(a.Suffix) 415 e.EncodeMessage(a.Message) 416 return nil 417 } 418