Jelajahi Sumber

Issue #40: Expanded testing of magic field/enum/message names

This mines the SwiftProtobuf library source code to generate
a mined_words.txt list of ~600 Swift names that might potentially
cause problems.  This list includes Swift type names and reserved
words as well as identifiers used by the library that might
conflict.

This list is used to generate several proto files that use these words
as the names of messages, fields, enums, and enum cases.  These proto
files are rebuilt as part of `make regenerate` so they should remain
up-to-date over time.

Based on this test, add several names to our ReservedWords list
and update the sanitizing logic to better handle certain cases.
Tim Kientzle 9 tahun lalu
induk
melakukan
2151854576

+ 1 - 0
.gitignore

@@ -7,6 +7,7 @@ xcbaselines
 /_test
 /docs
 /build
+mined_words.txt
 
 # Intermediate conformance test outputs
 failing_tests.txt

+ 90 - 1
Makefile

@@ -80,6 +80,10 @@ CONFORMANCE_HOST=${GOOGLE_PROTOBUF_CHECKOUT}/conformance/conformance-test-runner
 # Protos used for the unit and functional tests
 TEST_PROTOS= \
 	Protos/conformance/conformance.proto \
+	Protos/generated_swift_names_enums.proto \
+	Protos/generated_swift_names_enum_cases.proto \
+	Protos/generated_swift_names_fields.proto \
+	Protos/generated_swift_names_messages.proto \
 	Protos/google/protobuf/any_test.proto \
 	Protos/google/protobuf/descriptor.proto \
 	Protos/google/protobuf/map_proto2_unittest.proto \
@@ -374,7 +378,7 @@ regenerate-plugin-protos: build ${PROTOC_GEN_SWIFT}
 # Note: Some of these protos define the same package.(message|enum)s, so they
 # can't be done in a single protoc/proto-gen-swift invoke and have to be done
 # one at a time instead.
-regenerate-test-protos: build ${PROTOC_GEN_SWIFT}
+regenerate-test-protos: build ${PROTOC_GEN_SWIFT} Protos/generated_swift_names_enums.proto Protos/generated_swift_names_enum_cases.proto Protos/generated_swift_names_fields.proto Protos/generated_swift_names_messages.proto
 	for t in ${TEST_PROTOS}; do \
 		${GENERATE_SRCS} \
 			--tfiws_opt=FileNaming=DropPath \
@@ -382,6 +386,91 @@ regenerate-test-protos: build ${PROTOC_GEN_SWIFT}
 			$$t; \
 	done
 
+#
+# Collect a list of words that appear in the SwiftProtobuf library
+# source.  These are words that may cause problems for generated code.
+#
+# The logic here builds a word list as follows:
+#  = Look at every Swift source file in the library
+#  = Take every line with the word 'public', 'func', or 'var'
+#  = Break each such line into words (stripping all punctuation)
+#  = Remove words that differ only in case
+#
+# Selecting lines with 'public', 'func' or 'var' ensures we get every
+# public protocol, struct, enum, or class name, as well as every
+# method or property defined in a public protocol, struct, or class.
+# It also gives us a large collection of Swift names.
+Protos/mined_words.txt: Sources/SwiftProtobuf/*
+	@echo Building $@
+	@cat Sources/SwiftProtobuf/* | \
+	grep ' \(public\|func\|var\) ' | \
+	grep -v ' \(private\|internal\) ' | \
+	sed -e 's/[^a-zA-Z0-9_]/ /g' | \
+	tr " " "\n" | \
+	sed -e 's/^_//' | \
+	sort -f | \
+	uniq -i | \
+	grep '^[a-zA-Z_]' > $@
+
+# Build some proto files full of landmines
+#
+# This takes the word list Protos/mined_words.txt and uses
+# it to build several proto files:
+#  = Build a message with one `int32` field for each word
+#  = Build an enum with a case for each such word
+#  = Build a message with a submessage named with each word
+#  = Build a message with an enum named with each word
+#
+# If the Swift compiler can actually compile the result, that suggests
+# we can correctly handle every symbol in the library itself that
+# might cause problems.  Failures compiling this indicate weaknesses
+# in protoc-gen-swift's name sanitization logic.
+#
+Protos/generated_swift_names_fields.proto: Protos/mined_words.txt
+	@echo Building $@
+	@echo '// See Makefile for the logic that generates this' > $@
+	@echo '// Protoc errors imply this file is being generated incorrectly' > $@
+	@echo '// Swift compile errors are probably bugs in protoc-gen-swift' > $@
+	@echo 'syntax = "proto3";' > $@
+	@echo 'package protobuf_unittest;' >> $@
+	@echo 'message GeneratedSwiftReservedFields {' >> $@
+	@cat Protos/mined_words.txt | awk 'BEGIN{n = 1} {print "  int32 " $$1 " = " n ";"; n += 1 }' >> $@
+	@echo '}' >> $@
+
+Protos/generated_swift_names_enum_cases.proto: Protos/mined_words.txt
+	@echo Building $@
+	@echo '// See Makefile for the logic that generates this' > $@
+	@echo '// Protoc errors imply this file is being generated incorrectly' > $@
+	@echo '// Swift compile errors are probably bugs in protoc-gen-swift' > $@
+	@echo 'syntax = "proto3";' > $@
+	@echo 'package protobuf_unittest;' >> $@
+	@echo 'enum GeneratedSwiftReservedEnum {' >> $@
+	@echo '  NONE = 0;' >> $@
+	@cat Protos/mined_words.txt | awk 'BEGIN{n = 1} {print "  " $$1 " = " n ";"; n += 1 }' >> $@
+	@echo '}' >> $@
+
+Protos/generated_swift_names_messages.proto: Protos/mined_words.txt
+	@echo Building $@
+	@echo '// See Makefile for the logic that generates this' > $@
+	@echo '// Protoc errors imply this file is being generated incorrectly' > $@
+	@echo '// Swift compile errors are probably bugs in protoc-gen-swift' > $@
+	@echo 'syntax = "proto3";' > $@
+	@echo 'package protobuf_unittest;' >> $@
+	@echo 'message GeneratedSwiftReservedMessages {' >> $@
+	@cat Protos/mined_words.txt | awk '{print "  message " $$1 " { int32 a = 1; }"}' >> $@
+	@echo '}' >> $@
+
+Protos/generated_swift_names_enums.proto: Protos/mined_words.txt
+	@echo Building $@
+	@echo '// See Makefile for the logic that generates this' > $@
+	@echo '// Protoc errors imply this file is being generated incorrectly' > $@
+	@echo '// Swift compile errors are probably bugs in protoc-gen-swift' > $@
+	@echo 'syntax = "proto3";' > $@
+	@echo 'package protobuf_unittest;' >> $@
+	@echo 'message GeneratedSwiftReservedEnums {' >> $@
+	@cat Protos/mined_words.txt | awk '{print "  enum " $$1 " { NONE_" $$1 " = 0; }"}' >> $@
+	@echo '}' >> $@
+
 # Rebuild just the protos used by the conformance test runner.
 regenerate-conformance-protos: build ${PROTOC_GEN_SWIFT}
 	${GENERATE_SRCS} \

+ 1 - 1
Sources/PluginLibrary/SwiftLanguage.swift

@@ -155,7 +155,7 @@ public let swiftKeywordsReservedInParticularContexts: Set<String> = [
 /// These are standard Swift types that are heavily used, although
 /// they are not technically reserved.  Defining fields or structs
 /// with these names would break our generated code quite badly:
-public let swiftCommonTypes: Set<String> = [ "Data", "Double", "Float", "Int",
+public let swiftCommonTypes: Set<String> = [ "Bool", "Data", "Double", "Float", "Int",
     "Int32", "Int64", "String", "UInt", "UInt32", "UInt64",
 ]
 

+ 18 - 18
Sources/protoc-gen-swift/ReservedWords.swift

@@ -22,13 +22,15 @@ private let reservedTypeNames: Set<String> = {
     var names: Set<String> = [
         "anyTypeURL",
         "debugDescription",
-        "decodeField",
+        "decodeMessage",
         "description",
         "dynamicType",
         "hashValue",
         "isEmpty",
         "isEqual",
         "jsonFieldNames",
+        "protoMessageName",
+        "SwiftProtobuf",
         "traverse",
         "unknownFields",
     ]
@@ -47,35 +49,30 @@ private let reservedTypeNames: Set<String> = {
     return names
 }()
 
-func sanitizeMessageTypeName(_ s: String) -> String {
+private func sanitizeTypeName(_ s: String, disambiguator: String) -> String {
     if reservedTypeNames.contains(s) {
-        return s + "Message"
+        return s + disambiguator
     } else if isAllUnderscore(s) {
-        return s + "Message"
+        return s + disambiguator
+    } else if s.hasSuffix(disambiguator) {
+        let e = s.index(s.endIndex, offsetBy: -disambiguator.characters.count)
+        let truncated = s.substring(to: e)
+        return sanitizeTypeName(truncated, disambiguator: disambiguator) + disambiguator
     } else {
         return s
     }
 }
 
+func sanitizeMessageTypeName(_ s: String) -> String {
+    return sanitizeTypeName(s, disambiguator: "Message")
+}
 
 func sanitizeEnumTypeName(_ s: String) -> String {
-    if reservedTypeNames.contains(s) {
-        return s + "Enum"
-    } else if isAllUnderscore(s) {
-        return s + "Enum"
-    } else {
-        return s
-    }
+    return sanitizeTypeName(s, disambiguator: "Enum")
 }
 
 func sanitizeOneofTypeName(_ s: String) -> String {
-    if reservedTypeNames.contains(s) {
-        return s + "Oneof"
-    } else if isAllUnderscore(s) {
-        return s + "Oneof"
-    } else {
-        return s
-    }
+    return sanitizeTypeName(s, disambiguator: "Oneof")
 }
 
 private let reservedFieldNames: Set<String> =  {
@@ -173,6 +170,9 @@ private let quotableMessageScopedExtensionNames: Set<String> = quotableEnumCases
 /// enum case names are sanitized by adding
 /// backticks `` around them.
 func isAllUnderscore(_ s: String) -> Bool {
+    if s.isEmpty {
+        return false
+    }
     for c in s.characters {
         if c != "_" {return false}
     }