...

Source file src/golang.org/x/text/message/pipeline/pipeline.go

Documentation: golang.org/x/text/message/pipeline

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package pipeline provides tools for creating translation pipelines.
     6  //
     7  // NOTE: UNDER DEVELOPMENT. API MAY CHANGE.
     8  package pipeline
     9  
    10  import (
    11  	"bytes"
    12  	"encoding/json"
    13  	"fmt"
    14  	"go/build"
    15  	"go/parser"
    16  	"io/ioutil"
    17  	"log"
    18  	"os"
    19  	"path/filepath"
    20  	"regexp"
    21  	"strings"
    22  	"text/template"
    23  	"unicode"
    24  
    25  	"golang.org/x/text/internal"
    26  	"golang.org/x/text/language"
    27  	"golang.org/x/text/runes"
    28  	"golang.org/x/tools/go/loader"
    29  )
    30  
    31  const (
    32  	extractFile  = "extracted.gotext.json"
    33  	outFile      = "out.gotext.json"
    34  	gotextSuffix = "gotext.json"
    35  )
    36  
    37  // Config contains configuration for the translation pipeline.
    38  type Config struct {
    39  	// Supported indicates the languages for which data should be generated.
    40  	// The default is to support all locales for which there are matching
    41  	// translation files.
    42  	Supported []language.Tag
    43  
    44  	// --- Extraction
    45  
    46  	SourceLanguage language.Tag
    47  
    48  	Packages []string
    49  
    50  	// --- File structure
    51  
    52  	// Dir is the root dir for all operations.
    53  	Dir string
    54  
    55  	// TranslationsPattern is a regular expression to match incoming translation
    56  	// files. These files may appear in any directory rooted at Dir.
    57  	// language for the translation files is determined as follows:
    58  	//   1. From the Language field in the file.
    59  	//   2. If not present, from a valid language tag in the filename, separated
    60  	//      by dots (e.g. "en-US.json" or "incoming.pt_PT.xmb").
    61  	//   3. If not present, from a the closest subdirectory in which the file
    62  	//      is contained that parses as a valid language tag.
    63  	TranslationsPattern string
    64  
    65  	// OutPattern defines the location for translation files for a certain
    66  	// language. The default is "{{.Dir}}/{{.Language}}/out.{{.Ext}}"
    67  	OutPattern string
    68  
    69  	// Format defines the file format for generated translation files.
    70  	// The default is XMB. Alternatives are GetText, XLIFF, L20n, GoText.
    71  	Format string
    72  
    73  	Ext string
    74  
    75  	// TODO:
    76  	// Actions are additional actions to be performed after the initial extract
    77  	// and merge.
    78  	// Actions []struct {
    79  	// 	Name    string
    80  	// 	Options map[string]string
    81  	// }
    82  
    83  	// --- Generation
    84  
    85  	// GenFile may be in a different package. It is not defined, it will
    86  	// be written to stdout.
    87  	GenFile string
    88  
    89  	// GenPackage is the package or relative path into which to generate the
    90  	// file. If not specified it is relative to the current directory.
    91  	GenPackage string
    92  
    93  	// DeclareVar defines a variable to which to assign the generated Catalog.
    94  	DeclareVar string
    95  
    96  	// SetDefault determines whether to assign the generated Catalog to
    97  	// message.DefaultCatalog. The default for this is true if DeclareVar is
    98  	// not defined, false otherwise.
    99  	SetDefault bool
   100  
   101  	// TODO:
   102  	// - Printf-style configuration
   103  	// - Template-style configuration
   104  	// - Extraction options
   105  	// - Rewrite options
   106  	// - Generation options
   107  }
   108  
   109  // Operations:
   110  // - extract:       get the strings
   111  // - disambiguate:  find messages with the same key, but possible different meaning.
   112  // - create out:    create a list of messages that need translations
   113  // - load trans:    load the list of current translations
   114  // - merge:         assign list of translations as done
   115  // - (action)expand:    analyze features and create example sentences for each version.
   116  // - (action)googletrans:   pre-populate messages with automatic translations.
   117  // - (action)export:    send out messages somewhere non-standard
   118  // - (action)import:    load messages from somewhere non-standard
   119  // - vet program:   don't pass "foo" + var + "bar" strings. Not using funcs for translated strings.
   120  // - vet trans:     coverage: all translations/ all features.
   121  // - generate:      generate Go code
   122  
   123  // State holds all accumulated information on translations during processing.
   124  type State struct {
   125  	Config Config
   126  
   127  	Package string
   128  	program *loader.Program
   129  
   130  	Extracted Messages `json:"messages"`
   131  
   132  	// Messages includes all messages for which there need to be translations.
   133  	// Duplicates may be eliminated. Generation will be done from these messages
   134  	// (usually after merging).
   135  	Messages []Messages
   136  
   137  	// Translations are incoming translations for the application messages.
   138  	Translations []Messages
   139  }
   140  
   141  func (s *State) dir() string {
   142  	if d := s.Config.Dir; d != "" {
   143  		return d
   144  	}
   145  	return "./locales"
   146  }
   147  
   148  func outPattern(s *State) (string, error) {
   149  	c := s.Config
   150  	pat := c.OutPattern
   151  	if pat == "" {
   152  		pat = "{{.Dir}}/{{.Language}}/out.{{.Ext}}"
   153  	}
   154  
   155  	ext := c.Ext
   156  	if ext == "" {
   157  		ext = c.Format
   158  	}
   159  	if ext == "" {
   160  		ext = gotextSuffix
   161  	}
   162  	t, err := template.New("").Parse(pat)
   163  	if err != nil {
   164  		return "", wrap(err, "error parsing template")
   165  	}
   166  	buf := bytes.Buffer{}
   167  	err = t.Execute(&buf, map[string]string{
   168  		"Dir":      s.dir(),
   169  		"Language": "%s",
   170  		"Ext":      ext,
   171  	})
   172  	return filepath.FromSlash(buf.String()), wrap(err, "incorrect OutPattern")
   173  }
   174  
   175  var transRE = regexp.MustCompile(`.*\.` + gotextSuffix)
   176  
   177  // Import loads existing translation files.
   178  func (s *State) Import() error {
   179  	outPattern, err := outPattern(s)
   180  	if err != nil {
   181  		return err
   182  	}
   183  	re := transRE
   184  	if pat := s.Config.TranslationsPattern; pat != "" {
   185  		if re, err = regexp.Compile(pat); err != nil {
   186  			return wrapf(err, "error parsing regexp %q", s.Config.TranslationsPattern)
   187  		}
   188  	}
   189  	x := importer{s, outPattern, re}
   190  	return x.walkImport(s.dir(), s.Config.SourceLanguage)
   191  }
   192  
   193  type importer struct {
   194  	state      *State
   195  	outPattern string
   196  	transFile  *regexp.Regexp
   197  }
   198  
   199  func (i *importer) walkImport(path string, tag language.Tag) error {
   200  	files, err := ioutil.ReadDir(path)
   201  	if err != nil {
   202  		return nil
   203  	}
   204  	for _, f := range files {
   205  		name := f.Name()
   206  		tag := tag
   207  		if f.IsDir() {
   208  			if t, err := language.Parse(name); err == nil {
   209  				tag = t
   210  			}
   211  			// We ignore errors
   212  			if err := i.walkImport(filepath.Join(path, name), tag); err != nil {
   213  				return err
   214  			}
   215  			continue
   216  		}
   217  		for _, l := range strings.Split(name, ".") {
   218  			if t, err := language.Parse(l); err == nil {
   219  				tag = t
   220  			}
   221  		}
   222  		file := filepath.Join(path, name)
   223  		// TODO: Should we skip files that match output files?
   224  		if fmt.Sprintf(i.outPattern, tag) == file {
   225  			continue
   226  		}
   227  		// TODO: handle different file formats.
   228  		if !i.transFile.MatchString(name) {
   229  			continue
   230  		}
   231  		b, err := ioutil.ReadFile(file)
   232  		if err != nil {
   233  			return wrap(err, "read file failed")
   234  		}
   235  		var translations Messages
   236  		if err := json.Unmarshal(b, &translations); err != nil {
   237  			return wrap(err, "parsing translation file failed")
   238  		}
   239  		i.state.Translations = append(i.state.Translations, translations)
   240  	}
   241  	return nil
   242  }
   243  
   244  // Merge merges the extracted messages with the existing translations.
   245  func (s *State) Merge() error {
   246  	if s.Messages != nil {
   247  		panic("already merged")
   248  	}
   249  	// Create an index for each unique message.
   250  	// Duplicates are okay as long as the substitution arguments are okay as
   251  	// well.
   252  	// Top-level messages are okay to appear in multiple substitution points.
   253  
   254  	// Collect key equivalence.
   255  	msgs := []*Message{}
   256  	keyToIDs := map[string]*Message{}
   257  	for _, m := range s.Extracted.Messages {
   258  		m := m
   259  		if prev, ok := keyToIDs[m.Key]; ok {
   260  			if err := checkEquivalence(&m, prev); err != nil {
   261  				warnf("Key %q matches conflicting messages: %v and %v", m.Key, prev.ID, m.ID)
   262  				// TODO: track enough information so that the rewriter can
   263  				// suggest/disambiguate messages.
   264  			}
   265  			// TODO: add position to message.
   266  			continue
   267  		}
   268  		i := len(msgs)
   269  		msgs = append(msgs, &m)
   270  		keyToIDs[m.Key] = msgs[i]
   271  	}
   272  
   273  	// Messages with different keys may still refer to the same translated
   274  	// message (e.g. different whitespace). Filter these.
   275  	idMap := map[string]bool{}
   276  	filtered := []*Message{}
   277  	for _, m := range msgs {
   278  		found := false
   279  		for _, id := range m.ID {
   280  			found = found || idMap[id]
   281  		}
   282  		if !found {
   283  			filtered = append(filtered, m)
   284  		}
   285  		for _, id := range m.ID {
   286  			idMap[id] = true
   287  		}
   288  	}
   289  
   290  	// Build index of translations.
   291  	translations := map[language.Tag]map[string]Message{}
   292  	languages := append([]language.Tag{}, s.Config.Supported...)
   293  
   294  	for _, t := range s.Translations {
   295  		tag := t.Language
   296  		if _, ok := translations[tag]; !ok {
   297  			translations[tag] = map[string]Message{}
   298  			languages = append(languages, tag)
   299  		}
   300  		for _, m := range t.Messages {
   301  			if !m.Translation.IsEmpty() {
   302  				for _, id := range m.ID {
   303  					if _, ok := translations[tag][id]; ok {
   304  						warnf("Duplicate translation in locale %q for message %q", tag, id)
   305  					}
   306  					translations[tag][id] = m
   307  				}
   308  			}
   309  		}
   310  	}
   311  	languages = internal.UniqueTags(languages)
   312  
   313  	for _, tag := range languages {
   314  		ms := Messages{Language: tag}
   315  		for _, orig := range filtered {
   316  			m := *orig
   317  			m.Key = ""
   318  			m.Position = ""
   319  
   320  			for _, id := range m.ID {
   321  				if t, ok := translations[tag][id]; ok {
   322  					m.Translation = t.Translation
   323  					if t.TranslatorComment != "" {
   324  						m.TranslatorComment = t.TranslatorComment
   325  						m.Fuzzy = t.Fuzzy
   326  					}
   327  					break
   328  				}
   329  			}
   330  			if tag == s.Config.SourceLanguage && m.Translation.IsEmpty() {
   331  				m.Translation = m.Message
   332  				if m.TranslatorComment == "" {
   333  					m.TranslatorComment = "Copied from source."
   334  					m.Fuzzy = true
   335  				}
   336  			}
   337  			// TODO: if translation is empty: pre-expand based on available
   338  			// linguistic features. This may also be done as a plugin.
   339  			ms.Messages = append(ms.Messages, m)
   340  		}
   341  		s.Messages = append(s.Messages, ms)
   342  	}
   343  	return nil
   344  }
   345  
   346  // Export writes out the messages to translation out files.
   347  func (s *State) Export() error {
   348  	path, err := outPattern(s)
   349  	if err != nil {
   350  		return wrap(err, "export failed")
   351  	}
   352  	for _, out := range s.Messages {
   353  		// TODO: inject translations from existing files to avoid retranslation.
   354  		data, err := json.MarshalIndent(out, "", "    ")
   355  		if err != nil {
   356  			return wrap(err, "JSON marshal failed")
   357  		}
   358  		file := fmt.Sprintf(path, out.Language)
   359  		if err := os.MkdirAll(filepath.Dir(file), 0755); err != nil {
   360  			return wrap(err, "dir create failed")
   361  		}
   362  		if err := ioutil.WriteFile(file, data, 0644); err != nil {
   363  			return wrap(err, "write failed")
   364  		}
   365  	}
   366  	return nil
   367  }
   368  
   369  var (
   370  	ws    = runes.In(unicode.White_Space).Contains
   371  	notWS = runes.NotIn(unicode.White_Space).Contains
   372  )
   373  
   374  func trimWS(s string) (trimmed, leadWS, trailWS string) {
   375  	trimmed = strings.TrimRightFunc(s, ws)
   376  	trailWS = s[len(trimmed):]
   377  	if i := strings.IndexFunc(trimmed, notWS); i > 0 {
   378  		leadWS = trimmed[:i]
   379  		trimmed = trimmed[i:]
   380  	}
   381  	return trimmed, leadWS, trailWS
   382  }
   383  
   384  // NOTE: The command line tool already prefixes with "gotext:".
   385  var (
   386  	wrap = func(err error, msg string) error {
   387  		if err == nil {
   388  			return nil
   389  		}
   390  		return fmt.Errorf("%s: %v", msg, err)
   391  	}
   392  	wrapf = func(err error, msg string, args ...interface{}) error {
   393  		if err == nil {
   394  			return nil
   395  		}
   396  		return wrap(err, fmt.Sprintf(msg, args...))
   397  	}
   398  	errorf = fmt.Errorf
   399  )
   400  
   401  func warnf(format string, args ...interface{}) {
   402  	// TODO: don't log.
   403  	log.Printf(format, args...)
   404  }
   405  
   406  func loadPackages(conf *loader.Config, args []string) (*loader.Program, error) {
   407  	if len(args) == 0 {
   408  		args = []string{"."}
   409  	}
   410  
   411  	conf.Build = &build.Default
   412  	conf.ParserMode = parser.ParseComments
   413  
   414  	// Use the initial packages from the command line.
   415  	args, err := conf.FromArgs(args, false)
   416  	if err != nil {
   417  		return nil, wrap(err, "loading packages failed")
   418  	}
   419  
   420  	// Load, parse and type-check the whole program.
   421  	return conf.Load()
   422  }
   423  

View as plain text