1
2
3
4
5 package pipeline
6
7 import (
8 "bytes"
9 "errors"
10 "fmt"
11 "go/ast"
12 "go/constant"
13 "go/format"
14 "go/token"
15 "go/types"
16 "path/filepath"
17 "sort"
18 "strings"
19 "unicode"
20 "unicode/utf8"
21
22 fmtparser "golang.org/x/text/internal/format"
23 "golang.org/x/tools/go/callgraph"
24 "golang.org/x/tools/go/callgraph/cha"
25 "golang.org/x/tools/go/loader"
26 "golang.org/x/tools/go/ssa"
27 "golang.org/x/tools/go/ssa/ssautil"
28 )
29
30 const debug = false
31
32
33
34
35
36
37
38
39
40
41 func Extract(c *Config) (*State, error) {
42 x, err := newExtracter(c)
43 if err != nil {
44 return nil, wrap(err, "")
45 }
46
47 if err := x.seedEndpoints(); err != nil {
48 return nil, err
49 }
50 x.extractMessages()
51
52 return &State{
53 Config: *c,
54 program: x.iprog,
55 Extracted: Messages{
56 Language: c.SourceLanguage,
57 Messages: x.messages,
58 },
59 }, nil
60 }
61
62 type extracter struct {
63 conf loader.Config
64 iprog *loader.Program
65 prog *ssa.Program
66 callGraph *callgraph.Graph
67
68
69 globals map[token.Pos]*constData
70 funcs map[token.Pos]*callData
71 messages []Message
72 }
73
74 func newExtracter(c *Config) (x *extracter, err error) {
75 x = &extracter{
76 conf: loader.Config{},
77 globals: map[token.Pos]*constData{},
78 funcs: map[token.Pos]*callData{},
79 }
80
81 x.iprog, err = loadPackages(&x.conf, c.Packages)
82 if err != nil {
83 return nil, wrap(err, "")
84 }
85
86 x.prog = ssautil.CreateProgram(x.iprog, ssa.GlobalDebug|ssa.BareInits)
87 x.prog.Build()
88
89 x.callGraph = cha.CallGraph(x.prog)
90
91 return x, nil
92 }
93
94 func (x *extracter) globalData(pos token.Pos) *constData {
95 cd := x.globals[pos]
96 if cd == nil {
97 cd = &constData{}
98 x.globals[pos] = cd
99 }
100 return cd
101 }
102
103 func (x *extracter) seedEndpoints() error {
104 pkgInfo := x.iprog.Package("golang.org/x/text/message")
105 if pkgInfo == nil {
106 return errors.New("pipeline: golang.org/x/text/message is not imported")
107 }
108 pkg := x.prog.Package(pkgInfo.Pkg)
109 typ := types.NewPointer(pkg.Type("Printer").Type())
110
111 x.processGlobalVars()
112
113 x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Printf"), &callData{
114 formatPos: 1,
115 argPos: 2,
116 isMethod: true,
117 })
118 x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Sprintf"), &callData{
119 formatPos: 1,
120 argPos: 2,
121 isMethod: true,
122 })
123 x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Fprintf"), &callData{
124 formatPos: 2,
125 argPos: 3,
126 isMethod: true,
127 })
128 return nil
129 }
130
131
132
133 func (x *extracter) processGlobalVars() {
134 for _, p := range x.prog.AllPackages() {
135 m, ok := p.Members["init"]
136 if !ok {
137 continue
138 }
139 for _, b := range m.(*ssa.Function).Blocks {
140 for _, i := range b.Instrs {
141 s, ok := i.(*ssa.Store)
142 if !ok {
143 continue
144 }
145 a, ok := s.Addr.(*ssa.Global)
146 if !ok {
147 continue
148 }
149 t := a.Type()
150 for {
151 p, ok := t.(*types.Pointer)
152 if !ok {
153 break
154 }
155 t = p.Elem()
156 }
157 if b, ok := t.(*types.Basic); !ok || b.Kind() != types.String {
158 continue
159 }
160 x.visitInit(a, s.Val)
161 }
162 }
163 }
164 }
165
166 type constData struct {
167 call *callData
168 values []constVal
169 others []token.Pos
170 }
171
172 func (d *constData) visit(x *extracter, f func(c constant.Value)) {
173 for _, v := range d.values {
174 f(v.value)
175 }
176 for _, p := range d.others {
177 if od, ok := x.globals[p]; ok {
178 od.visit(x, f)
179 }
180 }
181 }
182
183 type constVal struct {
184 value constant.Value
185 pos token.Pos
186 }
187
188 type callData struct {
189 call ssa.CallInstruction
190 expr *ast.CallExpr
191 formats []constant.Value
192
193 callee *callData
194 isMethod bool
195 formatPos int
196 argPos int
197 argTypes []int
198 }
199
200 func (c *callData) callFormatPos() int {
201 c = c.callee
202 if c.isMethod {
203 return c.formatPos - 1
204 }
205 return c.formatPos
206 }
207
208 func (c *callData) callArgsStart() int {
209 c = c.callee
210 if c.isMethod {
211 return c.argPos - 1
212 }
213 return c.argPos
214 }
215
216 func (c *callData) Pos() token.Pos { return c.call.Pos() }
217 func (c *callData) Pkg() *types.Package { return c.call.Parent().Pkg.Pkg }
218
219 func (x *extracter) handleFunc(f *ssa.Function, fd *callData) {
220 for _, e := range x.callGraph.Nodes[f].In {
221 if e.Pos() == 0 {
222 continue
223 }
224
225 call := e.Site
226 caller := x.funcs[call.Pos()]
227 if caller != nil {
228
229
230 continue
231 }
232 x.debug(call, "CALL", f.String())
233
234 caller = &callData{
235 call: call,
236 callee: fd,
237 formatPos: -1,
238 argPos: -1,
239 }
240
241 offset := 0
242 if call.Common().IsInvoke() {
243 offset = -1
244 }
245 x.funcs[call.Pos()] = caller
246 if fd.argPos >= 0 {
247 x.visitArgs(caller, call.Common().Args[fd.argPos+offset])
248 }
249 x.visitFormats(caller, call.Common().Args[fd.formatPos+offset])
250 }
251 }
252
253 type posser interface {
254 Pos() token.Pos
255 Parent() *ssa.Function
256 }
257
258 func (x *extracter) debug(v posser, header string, args ...interface{}) {
259 if debug {
260 pos := ""
261 if p := v.Parent(); p != nil {
262 pos = posString(&x.conf, p.Package().Pkg, v.Pos())
263 }
264 if header != "CALL" && header != "INSERT" {
265 header = " " + header
266 }
267 fmt.Printf("%-32s%-10s%-15T ", pos+fmt.Sprintf("@%d", v.Pos()), header, v)
268 for _, a := range args {
269 fmt.Printf(" %v", a)
270 }
271 fmt.Println()
272 }
273 }
274
275
276
277 func (x *extracter) visitInit(global *ssa.Global, v ssa.Value) {
278 if v == nil {
279 return
280 }
281 x.debug(v, "GLOBAL", v)
282
283 switch v := v.(type) {
284 case *ssa.Phi:
285 for _, e := range v.Edges {
286 x.visitInit(global, e)
287 }
288
289 case *ssa.Const:
290
291 if str := constant.StringVal(v.Value); isMsg(str) {
292 cd := x.globalData(global.Pos())
293 cd.values = append(cd.values, constVal{v.Value, v.Pos()})
294 }
295
296
297 case *ssa.Global:
298 cd := x.globalData(global.Pos())
299 cd.others = append(cd.others, v.Pos())
300
301 case *ssa.FieldAddr, *ssa.Field:
302
303
304
305 case *ssa.Slice:
306 if v.Low == nil && v.High == nil && v.Max == nil {
307 x.visitInit(global, v.X)
308 }
309
310 case *ssa.Alloc:
311 if ref := v.Referrers(); ref == nil {
312 for _, r := range *ref {
313 values := []ssa.Value{}
314 for _, o := range r.Operands(nil) {
315 if o == nil || *o == v {
316 continue
317 }
318 values = append(values, *o)
319 }
320
321
322 if len(values) == 1 {
323 x.visitInit(global, values[0])
324 }
325 }
326 }
327
328 case ssa.Instruction:
329 rands := v.Operands(nil)
330 if len(rands) == 1 && rands[0] != nil {
331 x.visitInit(global, *rands[0])
332 }
333 }
334 return
335 }
336
337
338
339
340 func (x *extracter) visitFormats(call *callData, v ssa.Value) {
341 if v == nil {
342 return
343 }
344 x.debug(v, "VALUE", v)
345
346 switch v := v.(type) {
347 case *ssa.Phi:
348 for _, e := range v.Edges {
349 x.visitFormats(call, e)
350 }
351
352 case *ssa.Const:
353
354 if isMsg(constant.StringVal(v.Value)) {
355 x.debug(call.call, "FORMAT", v.Value.ExactString())
356 call.formats = append(call.formats, v.Value)
357 }
358
359
360 case *ssa.Global:
361 x.globalData(v.Pos()).call = call
362
363 case *ssa.FieldAddr, *ssa.Field:
364
365
366
367 case *ssa.Slice:
368 if v.Low == nil && v.High == nil && v.Max == nil {
369 x.visitFormats(call, v.X)
370 }
371
372 case *ssa.Parameter:
373
374 f := v.Parent()
375 for i, p := range f.Params {
376 if p == v {
377 if call.formatPos < 0 {
378 call.formatPos = i
379
380
381 call.isMethod = len(f.Params) > f.Signature.Params().Len()
382 x.handleFunc(v.Parent(), call)
383 } else if debug && i != call.formatPos {
384
385 fmt.Printf("WARNING:%s: format string passed to arg %d and %d\n",
386 posString(&x.conf, call.Pkg(), call.Pos()),
387 call.formatPos, i)
388 }
389 }
390 }
391
392 case *ssa.Alloc:
393 if ref := v.Referrers(); ref == nil {
394 for _, r := range *ref {
395 values := []ssa.Value{}
396 for _, o := range r.Operands(nil) {
397 if o == nil || *o == v {
398 continue
399 }
400 values = append(values, *o)
401 }
402
403
404 if len(values) == 1 {
405 x.visitFormats(call, values[0])
406 }
407 }
408 }
409
410
411
412
413
414
415
416
417
418 case *ssa.FreeVar:
419
420
421
422
423
424
425
426
427
428 case *ssa.Call:
429
430 case ssa.Instruction:
431 rands := v.Operands(nil)
432 if len(rands) == 1 && rands[0] != nil {
433 x.visitFormats(call, *rands[0])
434 }
435 }
436 }
437
438
439
440
441
442 func (x *extracter) visitArgs(fd *callData, v ssa.Value) {
443 if v == nil {
444 return
445 }
446 x.debug(v, "ARGV", v)
447 switch v := v.(type) {
448
449 case *ssa.Slice:
450 if v.Low == nil && v.High == nil && v.Max == nil {
451 x.visitArgs(fd, v.X)
452 }
453
454 case *ssa.Parameter:
455
456 f := v.Parent()
457 for i, p := range f.Params {
458 if p == v {
459 fd.argPos = i
460 }
461 }
462
463 case *ssa.Alloc:
464 if ref := v.Referrers(); ref == nil {
465 for _, r := range *ref {
466 values := []ssa.Value{}
467 for _, o := range r.Operands(nil) {
468 if o == nil || *o == v {
469 continue
470 }
471 values = append(values, *o)
472 }
473
474
475 if len(values) == 1 {
476 x.visitArgs(fd, values[0])
477 }
478 }
479 }
480
481 case ssa.Instruction:
482 rands := v.Operands(nil)
483 if len(rands) == 1 && rands[0] != nil {
484 x.visitArgs(fd, *rands[0])
485 }
486 }
487 }
488
489
490 func (x *extracter) print(n ast.Node) string {
491 var buf bytes.Buffer
492 format.Node(&buf, x.conf.Fset, n)
493 return buf.String()
494 }
495
496 type packageExtracter struct {
497 f *ast.File
498 x *extracter
499 info *loader.PackageInfo
500 cmap ast.CommentMap
501 }
502
503 func (px packageExtracter) getComment(n ast.Node) string {
504 cs := px.cmap.Filter(n).Comments()
505 if len(cs) > 0 {
506 return strings.TrimSpace(cs[0].Text())
507 }
508 return ""
509 }
510
511 func (x *extracter) extractMessages() {
512 prog := x.iprog
513 keys := make([]*types.Package, 0, len(x.iprog.AllPackages))
514 for k := range x.iprog.AllPackages {
515 keys = append(keys, k)
516 }
517 sort.Slice(keys, func(i, j int) bool { return keys[i].Path() < keys[j].Path() })
518 files := []packageExtracter{}
519 for _, k := range keys {
520 info := x.iprog.AllPackages[k]
521 for _, f := range info.Files {
522
523 px := packageExtracter{
524 f, x, info,
525 ast.NewCommentMap(prog.Fset, f, f.Comments),
526 }
527 files = append(files, px)
528 }
529 }
530 for _, px := range files {
531 ast.Inspect(px.f, func(n ast.Node) bool {
532 switch v := n.(type) {
533 case *ast.CallExpr:
534 if d := x.funcs[v.Lparen]; d != nil {
535 d.expr = v
536 }
537 }
538 return true
539 })
540 }
541 for _, px := range files {
542 ast.Inspect(px.f, func(n ast.Node) bool {
543 switch v := n.(type) {
544 case *ast.CallExpr:
545 return px.handleCall(v)
546 case *ast.ValueSpec:
547 return px.handleGlobal(v)
548 }
549 return true
550 })
551 }
552 }
553
554 func (px packageExtracter) handleGlobal(spec *ast.ValueSpec) bool {
555 comment := px.getComment(spec)
556
557 for _, ident := range spec.Names {
558 data, ok := px.x.globals[ident.Pos()]
559 if !ok {
560 continue
561 }
562 name := ident.Name
563 var arguments []argument
564 if data.call != nil {
565 arguments = px.getArguments(data.call)
566 } else if !strings.HasPrefix(name, "msg") && !strings.HasPrefix(name, "Msg") {
567 continue
568 }
569 data.visit(px.x, func(c constant.Value) {
570 px.addMessage(spec.Pos(), []string{name}, c, comment, arguments)
571 })
572 }
573
574 return true
575 }
576
577 func (px packageExtracter) handleCall(call *ast.CallExpr) bool {
578 x := px.x
579 data := x.funcs[call.Lparen]
580 if data == nil || len(data.formats) == 0 {
581 return true
582 }
583 if data.expr != call {
584 panic("invariant `data.call != call` failed")
585 }
586 x.debug(data.call, "INSERT", data.formats)
587
588 argn := data.callFormatPos()
589 if argn >= len(call.Args) {
590 return true
591 }
592 format := call.Args[argn]
593
594 arguments := px.getArguments(data)
595
596 comment := ""
597 key := []string{}
598 if ident, ok := format.(*ast.Ident); ok {
599 key = append(key, ident.Name)
600 if v, ok := ident.Obj.Decl.(*ast.ValueSpec); ok && v.Comment != nil {
601
602 comment = v.Comment.Text()
603 }
604 }
605 if c := px.getComment(call.Args[0]); c != "" {
606 comment = c
607 }
608
609 formats := data.formats
610 for _, c := range formats {
611 px.addMessage(call.Lparen, key, c, comment, arguments)
612 }
613 return true
614 }
615
616 func (px packageExtracter) getArguments(data *callData) []argument {
617 arguments := []argument{}
618 x := px.x
619 info := px.info
620 if data.callArgsStart() >= 0 {
621 args := data.expr.Args[data.callArgsStart():]
622 for i, arg := range args {
623 expr := x.print(arg)
624 val := ""
625 if v := info.Types[arg].Value; v != nil {
626 val = v.ExactString()
627 switch arg.(type) {
628 case *ast.BinaryExpr, *ast.UnaryExpr:
629 expr = val
630 }
631 }
632 arguments = append(arguments, argument{
633 ArgNum: i + 1,
634 Type: info.Types[arg].Type.String(),
635 UnderlyingType: info.Types[arg].Type.Underlying().String(),
636 Expr: expr,
637 Value: val,
638 Comment: px.getComment(arg),
639 Position: posString(&x.conf, info.Pkg, arg.Pos()),
640
641
642
643 })
644 }
645 }
646 return arguments
647 }
648
649 func (px packageExtracter) addMessage(
650 pos token.Pos,
651 key []string,
652 c constant.Value,
653 comment string,
654 arguments []argument) {
655 x := px.x
656 fmtMsg := constant.StringVal(c)
657
658 ph := placeholders{index: map[string]string{}}
659
660 trimmed, _, _ := trimWS(fmtMsg)
661
662 p := fmtparser.Parser{}
663 simArgs := make([]interface{}, len(arguments))
664 for i, v := range arguments {
665 simArgs[i] = v
666 }
667 msg := ""
668 p.Reset(simArgs)
669 for p.SetFormat(trimmed); p.Scan(); {
670 name := ""
671 var arg *argument
672 switch p.Status {
673 case fmtparser.StatusText:
674 msg += p.Text()
675 continue
676 case fmtparser.StatusSubstitution,
677 fmtparser.StatusBadWidthSubstitution,
678 fmtparser.StatusBadPrecSubstitution:
679 arguments[p.ArgNum-1].used = true
680 arg = &arguments[p.ArgNum-1]
681 name = getID(arg)
682 case fmtparser.StatusBadArgNum, fmtparser.StatusMissingArg:
683 arg = &argument{
684 ArgNum: p.ArgNum,
685 Position: posString(&x.conf, px.info.Pkg, pos),
686 }
687 name, arg.UnderlyingType = verbToPlaceholder(p.Text(), p.ArgNum)
688 }
689 sub := p.Text()
690 if !p.HasIndex {
691 r, sz := utf8.DecodeLastRuneInString(sub)
692 sub = fmt.Sprintf("%s[%d]%c", sub[:len(sub)-sz], p.ArgNum, r)
693 }
694 msg += fmt.Sprintf("{%s}", ph.addArg(arg, name, sub))
695 }
696 key = append(key, msg)
697
698
699
700 for _, arg := range arguments {
701 if arg.used {
702 continue
703 }
704 ph.addArg(&arg, getID(&arg), fmt.Sprintf("%%[%d]v", arg.ArgNum))
705 }
706
707 x.messages = append(x.messages, Message{
708 ID: key,
709 Key: fmtMsg,
710 Message: Text{Msg: msg},
711
712 Comment: comment,
713 Placeholders: ph.slice,
714 Position: posString(&x.conf, px.info.Pkg, pos),
715 })
716 }
717
718 func posString(conf *loader.Config, pkg *types.Package, pos token.Pos) string {
719 p := conf.Fset.Position(pos)
720 file := fmt.Sprintf("%s:%d:%d", filepath.Base(p.Filename), p.Line, p.Column)
721 return filepath.Join(pkg.Path(), file)
722 }
723
724 func getID(arg *argument) string {
725 s := getLastComponent(arg.Expr)
726 s = strip(s)
727 s = strings.Replace(s, " ", "", -1)
728
729 if len(s) <= 2 && arg.UnderlyingType != arg.Type {
730 s = getLastComponent(arg.Type)
731 }
732 return strings.Title(s)
733 }
734
735
736 func strip(s string) string {
737 s = strings.Map(func(r rune) rune {
738 if unicode.IsSpace(r) || r == '-' {
739 return '_'
740 }
741 if !unicode.In(r, unicode.Letter, unicode.Mark, unicode.Number) {
742 return -1
743 }
744 return r
745 }, s)
746
747 if strings.HasPrefix(s, "Get") || strings.HasPrefix(s, "get") {
748 if len(s) > len("get") {
749 r, _ := utf8.DecodeRuneInString(s)
750 if !unicode.In(r, unicode.Ll, unicode.M) {
751 s = s[len("get"):]
752 }
753 }
754 }
755 return s
756 }
757
758
759
760
761 func verbToPlaceholder(sub string, pos int) (name, underlying string) {
762 r, _ := utf8.DecodeLastRuneInString(sub)
763 name = fmt.Sprintf("Arg_%d", pos)
764 switch r {
765 case 's', 'q':
766 underlying = "string"
767 case 'd':
768 name = "Integer"
769 underlying = "int"
770 case 'e', 'f', 'g':
771 name = "Number"
772 underlying = "float64"
773 case 'm':
774 name = "Message"
775 underlying = "string"
776 default:
777 underlying = "interface{}"
778 }
779 return name, underlying
780 }
781
782 type placeholders struct {
783 index map[string]string
784 slice []Placeholder
785 }
786
787 func (p *placeholders) addArg(arg *argument, name, sub string) (id string) {
788 id = name
789 alt, ok := p.index[id]
790 for i := 1; ok && alt != sub; i++ {
791 id = fmt.Sprintf("%s_%d", name, i)
792 alt, ok = p.index[id]
793 }
794 p.index[id] = sub
795 p.slice = append(p.slice, Placeholder{
796 ID: id,
797 String: sub,
798 Type: arg.Type,
799 UnderlyingType: arg.UnderlyingType,
800 ArgNum: arg.ArgNum,
801 Expr: arg.Expr,
802 Comment: arg.Comment,
803 })
804 return id
805 }
806
807 func getLastComponent(s string) string {
808 return s[1+strings.LastIndexByte(s, '.'):]
809 }
810
811
812 func isMsg(s string) bool {
813
814
815 for _, r := range s {
816 if unicode.In(r, unicode.L) {
817 return true
818 }
819 }
820 return false
821 }
822
View as plain text