...

Source file src/google.golang.org/protobuf/internal/strs/strings.go

Documentation: google.golang.org/protobuf/internal/strs

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package strs provides string manipulation functionality specific to protobuf.
     6  package strs
     7  
     8  import (
     9  	"go/token"
    10  	"strings"
    11  	"unicode"
    12  	"unicode/utf8"
    13  
    14  	"google.golang.org/protobuf/internal/flags"
    15  	"google.golang.org/protobuf/reflect/protoreflect"
    16  )
    17  
    18  // EnforceUTF8 reports whether to enforce strict UTF-8 validation.
    19  func EnforceUTF8(fd protoreflect.FieldDescriptor) bool {
    20  	if flags.ProtoLegacy {
    21  		if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok {
    22  			return fd.EnforceUTF8()
    23  		}
    24  	}
    25  	return fd.Syntax() == protoreflect.Proto3
    26  }
    27  
    28  // GoCamelCase camel-cases a protobuf name for use as a Go identifier.
    29  //
    30  // If there is an interior underscore followed by a lower case letter,
    31  // drop the underscore and convert the letter to upper case.
    32  func GoCamelCase(s string) string {
    33  	// Invariant: if the next letter is lower case, it must be converted
    34  	// to upper case.
    35  	// That is, we process a word at a time, where words are marked by _ or
    36  	// upper case letter. Digits are treated as words.
    37  	var b []byte
    38  	for i := 0; i < len(s); i++ {
    39  		c := s[i]
    40  		switch {
    41  		case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
    42  			// Skip over '.' in ".{{lowercase}}".
    43  		case c == '.':
    44  			b = append(b, '_') // convert '.' to '_'
    45  		case c == '_' && (i == 0 || s[i-1] == '.'):
    46  			// Convert initial '_' to ensure we start with a capital letter.
    47  			// Do the same for '_' after '.' to match historic behavior.
    48  			b = append(b, 'X') // convert '_' to 'X'
    49  		case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
    50  			// Skip over '_' in "_{{lowercase}}".
    51  		case isASCIIDigit(c):
    52  			b = append(b, c)
    53  		default:
    54  			// Assume we have a letter now - if not, it's a bogus identifier.
    55  			// The next word is a sequence of characters that must start upper case.
    56  			if isASCIILower(c) {
    57  				c -= 'a' - 'A' // convert lowercase to uppercase
    58  			}
    59  			b = append(b, c)
    60  
    61  			// Accept lower case sequence that follows.
    62  			for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
    63  				b = append(b, s[i+1])
    64  			}
    65  		}
    66  	}
    67  	return string(b)
    68  }
    69  
    70  // GoSanitized converts a string to a valid Go identifier.
    71  func GoSanitized(s string) string {
    72  	// Sanitize the input to the set of valid characters,
    73  	// which must be '_' or be in the Unicode L or N categories.
    74  	s = strings.Map(func(r rune) rune {
    75  		if unicode.IsLetter(r) || unicode.IsDigit(r) {
    76  			return r
    77  		}
    78  		return '_'
    79  	}, s)
    80  
    81  	// Prepend '_' in the event of a Go keyword conflict or if
    82  	// the identifier is invalid (does not start in the Unicode L category).
    83  	r, _ := utf8.DecodeRuneInString(s)
    84  	if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
    85  		return "_" + s
    86  	}
    87  	return s
    88  }
    89  
    90  // JSONCamelCase converts a snake_case identifier to a camelCase identifier,
    91  // according to the protobuf JSON specification.
    92  func JSONCamelCase(s string) string {
    93  	var b []byte
    94  	var wasUnderscore bool
    95  	for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
    96  		c := s[i]
    97  		if c != '_' {
    98  			if wasUnderscore && isASCIILower(c) {
    99  				c -= 'a' - 'A' // convert to uppercase
   100  			}
   101  			b = append(b, c)
   102  		}
   103  		wasUnderscore = c == '_'
   104  	}
   105  	return string(b)
   106  }
   107  
   108  // JSONSnakeCase converts a camelCase identifier to a snake_case identifier,
   109  // according to the protobuf JSON specification.
   110  func JSONSnakeCase(s string) string {
   111  	var b []byte
   112  	for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
   113  		c := s[i]
   114  		if isASCIIUpper(c) {
   115  			b = append(b, '_')
   116  			c += 'a' - 'A' // convert to lowercase
   117  		}
   118  		b = append(b, c)
   119  	}
   120  	return string(b)
   121  }
   122  
   123  // MapEntryName derives the name of the map entry message given the field name.
   124  // See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057
   125  func MapEntryName(s string) string {
   126  	var b []byte
   127  	upperNext := true
   128  	for _, c := range s {
   129  		switch {
   130  		case c == '_':
   131  			upperNext = true
   132  		case upperNext:
   133  			b = append(b, byte(unicode.ToUpper(c)))
   134  			upperNext = false
   135  		default:
   136  			b = append(b, byte(c))
   137  		}
   138  	}
   139  	b = append(b, "Entry"...)
   140  	return string(b)
   141  }
   142  
   143  // EnumValueName derives the camel-cased enum value name.
   144  // See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313
   145  func EnumValueName(s string) string {
   146  	var b []byte
   147  	upperNext := true
   148  	for _, c := range s {
   149  		switch {
   150  		case c == '_':
   151  			upperNext = true
   152  		case upperNext:
   153  			b = append(b, byte(unicode.ToUpper(c)))
   154  			upperNext = false
   155  		default:
   156  			b = append(b, byte(unicode.ToLower(c)))
   157  			upperNext = false
   158  		}
   159  	}
   160  	return string(b)
   161  }
   162  
   163  // TrimEnumPrefix trims the enum name prefix from an enum value name,
   164  // where the prefix is all lowercase without underscores.
   165  // See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375
   166  func TrimEnumPrefix(s, prefix string) string {
   167  	s0 := s // original input
   168  	for len(s) > 0 && len(prefix) > 0 {
   169  		if s[0] == '_' {
   170  			s = s[1:]
   171  			continue
   172  		}
   173  		if unicode.ToLower(rune(s[0])) != rune(prefix[0]) {
   174  			return s0 // no prefix match
   175  		}
   176  		s, prefix = s[1:], prefix[1:]
   177  	}
   178  	if len(prefix) > 0 {
   179  		return s0 // no prefix match
   180  	}
   181  	s = strings.TrimLeft(s, "_")
   182  	if len(s) == 0 {
   183  		return s0 // avoid returning empty string
   184  	}
   185  	return s
   186  }
   187  
   188  func isASCIILower(c byte) bool {
   189  	return 'a' <= c && c <= 'z'
   190  }
   191  func isASCIIUpper(c byte) bool {
   192  	return 'A' <= c && c <= 'Z'
   193  }
   194  func isASCIIDigit(c byte) bool {
   195  	return '0' <= c && c <= '9'
   196  }
   197  

View as plain text