NamingUtils.swift 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612
  1. // Sources/SwiftProtobufPluginLibrary/NamingUtils.swift - Utilities for generating names
  2. //
  3. // Copyright (c) 2014 - 2017 Apple Inc. and the project authors
  4. // Licensed under Apache License v2.0 with Runtime Library Exception
  5. //
  6. // See LICENSE.txt for license information:
  7. // https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
  8. //
  9. // -----------------------------------------------------------------------------
  10. ///
  11. /// This provides some utilities for generating names.
  12. ///
  13. /// NOTE: Only a very small subset of this is public. The intent is for this to
  14. /// expose a defined api within the PluginLib, but the the SwiftProtobufNamer
  15. /// to be what exposes the reusable parts at a much higher level. This reduces
  16. /// the changes of something being reimplemented but with minor differences.
  17. ///
  18. // -----------------------------------------------------------------------------
  19. import Foundation
  20. import SwiftProtobuf
  21. ///
  22. /// We won't generate types (structs, enums) with these names:
  23. ///
  24. private let reservedTypeNames: Set<String> = {
  25. () -> Set<String> in
  26. var names: Set<String> = []
  27. // Main SwiftProtobuf namespace
  28. // Shadowing this leads to Bad Things.
  29. names.insert("SwiftProtobuf")
  30. // Subtype of many messages, used to scope nested extensions
  31. names.insert("Extensions")
  32. // Subtypes are static references, so can conflict with static
  33. // class properties:
  34. names.insert("protoMessageName")
  35. // Methods on Message that we need to avoid shadowing. Testing
  36. // shows we do not need to avoid `serializedData` or `isEqualTo`,
  37. // but it's not obvious to me what's different about them. Maybe
  38. // because these two are generic? Because they throw?
  39. names.insert("decodeMessage")
  40. names.insert("traverse")
  41. // Basic Message properties we don't want to shadow:
  42. names.insert("isInitialized")
  43. names.insert("unknownFields")
  44. // Standard Swift property names we don't want
  45. // to conflict with:
  46. names.insert("debugDescription")
  47. names.insert("description")
  48. names.insert("dynamicType")
  49. names.insert("hashValue")
  50. // We don't need to protect all of these keywords, just the ones
  51. // that interfere with type expressions:
  52. // names = names.union(swiftKeywordsReservedInParticularContexts)
  53. names.insert("Type")
  54. names.insert("Protocol")
  55. // Getting something called "Swift" would be bad as it blocks access
  56. // to built in things.
  57. names.insert("Swift")
  58. // And getting things on some of the common protocols could create
  59. // some odd confusion.
  60. names.insert("Equatable")
  61. names.insert("Hashable")
  62. names.insert("Sendable")
  63. names = names.union(swiftKeywordsUsedInDeclarations)
  64. names = names.union(swiftKeywordsUsedInStatements)
  65. names = names.union(swiftKeywordsUsedInExpressionsAndTypes)
  66. names = names.union(swiftCommonTypes)
  67. names = names.union(swiftSpecialVariables)
  68. return names
  69. }()
  70. ///
  71. /// Many Swift reserved words can be used as fields names if we put backticks
  72. /// around them:
  73. ///
  74. private let quotableFieldNames: Set<String> = {
  75. () -> Set<String> in
  76. var names: Set<String> = []
  77. names = names.union(swiftKeywordsUsedInDeclarations)
  78. names = names.union(swiftKeywordsUsedInStatements)
  79. names = names.union(swiftKeywordsUsedInExpressionsAndTypes)
  80. return names
  81. }()
  82. private let reservedFieldNames: Set<String> = {
  83. () -> Set<String> in
  84. var names: Set<String> = []
  85. // Properties are instance names, so can't shadow static class
  86. // properties such as `protoMessageName`.
  87. // Properties can't shadow methods. For example, we don't need to
  88. // avoid `isEqualTo` as a field name.
  89. // Basic Message properties that we don't want to shadow
  90. names.insert("isInitialized")
  91. names.insert("unknownFields")
  92. // Standard Swift property names we don't want
  93. // to conflict with:
  94. names.insert("debugDescription")
  95. names.insert("description")
  96. names.insert("dynamicType")
  97. names.insert("hashValue")
  98. names.insert("init")
  99. names.insert("self")
  100. // We don't need to protect all of these keywords, just the ones
  101. // that interfere with type expressions:
  102. // names = names.union(swiftKeywordsReservedInParticularContexts)
  103. names.insert("Type")
  104. names.insert("Protocol")
  105. names = names.union(swiftCommonTypes)
  106. names = names.union(swiftSpecialVariables)
  107. return names
  108. }()
  109. ///
  110. /// Many Swift reserved words can be used as enum cases if we put quotes
  111. /// around them:
  112. ///
  113. private let quotableEnumCases: Set<String> = {
  114. () -> Set<String> in
  115. var names: Set<String> = []
  116. // We don't need to protect all of these keywords, just the ones
  117. // that interfere with enum cases:
  118. // names = names.union(swiftKeywordsReservedInParticularContexts)
  119. names.insert("associativity")
  120. names.insert("dynamicType")
  121. names.insert("optional")
  122. names.insert("required")
  123. names = names.union(swiftKeywordsUsedInDeclarations)
  124. names = names.union(swiftKeywordsUsedInStatements)
  125. names = names.union(swiftKeywordsUsedInExpressionsAndTypes)
  126. // Common type and variable names don't cause problems as enum
  127. // cases, because enum case names only appear in special contexts:
  128. // names = names.union(swiftCommonTypes)
  129. // names = names.union(swiftSpecialVariables)
  130. return names
  131. }()
  132. ///
  133. /// Some words cannot be used for enum cases, even if they are quoted with
  134. /// backticks:
  135. ///
  136. private let reservedEnumCases: Set<String> = [
  137. // Don't conflict with standard Swift property names:
  138. "allCases",
  139. "debugDescription",
  140. "description",
  141. "dynamicType",
  142. "hashValue",
  143. "init",
  144. "rawValue",
  145. "self",
  146. ]
  147. ///
  148. /// Message scoped extensions are scoped within the Message struct with `enum
  149. /// Extensions { ... }`, so we resuse the same sets for backticks and reserved
  150. /// words.
  151. ///
  152. private let quotableMessageScopedExtensionNames: Set<String> = quotableEnumCases
  153. private let reservedMessageScopedExtensionNames: Set<String> = reservedEnumCases
  154. private func isAllUnderscore(_ s: String) -> Bool {
  155. if s.isEmpty {
  156. return false
  157. }
  158. for c in s.unicodeScalars {
  159. if c != "_" { return false }
  160. }
  161. return true
  162. }
  163. private func sanitizeTypeName(_ s: String, disambiguator: String, forbiddenTypeNames: Set<String>) -> String {
  164. // NOTE: This code relies on the protoc validation of _identifier_ is defined
  165. // (in Tokenizer::Next() as `[a-zA-Z_][a-zA-Z0-9_]*`, so this does not need
  166. // any complex validation or handing of characters outside those ranges. Since
  167. // those rules prevent a leading digit; nothing needs to be done, and any
  168. // explicitly use Message or Enum name will be valid. The one exception is
  169. // this code is also used for determining the OneOf enums, but that code is
  170. // responsible for dealing with the issues in the transforms it makes.
  171. if reservedTypeNames.contains(s) {
  172. return s + disambiguator
  173. } else if isAllUnderscore(s) {
  174. return s + disambiguator
  175. } else if s.hasSuffix(disambiguator) {
  176. // If `foo` and `fooMessage` both exist, and `foo` gets
  177. // expanded to `fooMessage`, then we also should expand
  178. // `fooMessage` to `fooMessageMessage` to avoid creating a new
  179. // conflict. This can be resolved recursively by stripping
  180. // the disambiguator, sanitizing the root, then re-adding the
  181. // disambiguator:
  182. let e = s.index(s.endIndex, offsetBy: -disambiguator.count)
  183. let truncated = String(s[..<e])
  184. return sanitizeTypeName(truncated, disambiguator: disambiguator, forbiddenTypeNames: forbiddenTypeNames)
  185. + disambiguator
  186. } else if forbiddenTypeNames.contains(s) {
  187. // NOTE: It is important that this case runs after the hasSuffix case.
  188. // This set of forbidden type names is not fixed, and may contain something
  189. // like "FooMessage". If it does, and if s is "FooMessage with a
  190. // disambiguator of "Message", then we want to sanitize on the basis of
  191. // the suffix rather simply appending the disambiguator.
  192. // We use this for module imports that are configurable (like SwiftProtobuf
  193. // renaming).
  194. return s + disambiguator
  195. } else {
  196. return s
  197. }
  198. }
  199. private func isCharacterUppercase(_ s: String, index: Int) -> Bool {
  200. let scalars = s.unicodeScalars
  201. let start = scalars.index(scalars.startIndex, offsetBy: index)
  202. if start == scalars.endIndex {
  203. // it ended, so just say the next character wasn't uppercase.
  204. return false
  205. }
  206. return scalars[start].isASCUppercase
  207. }
  208. private func makeUnicodeScalarView(
  209. from unicodeScalar: UnicodeScalar
  210. ) -> String.UnicodeScalarView {
  211. var view = String.UnicodeScalarView()
  212. view.append(unicodeScalar)
  213. return view
  214. }
  215. private enum CamelCaser {
  216. // Abbreviation that should be all uppercase when camelcasing. Used in
  217. // camelCased(:initialUpperCase:).
  218. static let appreviations: Set<String> = ["url", "http", "https", "id"]
  219. // The diffent "classes" a character can belong in for segmenting.
  220. enum CharClass {
  221. case digit
  222. case lower
  223. case upper
  224. case underscore
  225. case other
  226. init(_ from: UnicodeScalar) {
  227. switch from {
  228. case "0"..."9":
  229. self = .digit
  230. case "a"..."z":
  231. self = .lower
  232. case "A"..."Z":
  233. self = .upper
  234. case "_":
  235. self = .underscore
  236. default:
  237. self = .other
  238. }
  239. }
  240. }
  241. /// Transforms the input into a camelcase name that is a valid Swift
  242. /// identifier. The input is assumed to be a protocol buffer identifier (or
  243. /// something like that), meaning that it is a "snake_case_name" and the
  244. /// underscores and be used to split into segements and then capitalize as
  245. /// needed. The splits happen based on underscores and/or changes in case
  246. /// and/or use of digits. If underscores are repeated, then the "extras"
  247. /// (past the first) are carried over into the output.
  248. ///
  249. /// NOTE: protoc validation of an _identifier_ is defined (in Tokenizer::Next()
  250. /// as `[a-zA-Z_][a-zA-Z0-9_]*`, Since leading underscores are removed, it does
  251. /// have to handle if things would have started with a digit. If that happens,
  252. /// then an underscore is added before it (which matches what the proto file
  253. /// would have had to have a valid identifier also).
  254. static func transform(_ s: String, initialUpperCase: Bool) -> String {
  255. var result = String()
  256. var current = String.UnicodeScalarView() // Collects in lowercase.
  257. var lastClass = CharClass("\0")
  258. func addCurrent() {
  259. guard !current.isEmpty else {
  260. return
  261. }
  262. var currentAsString = String(current)
  263. if result.isEmpty && !initialUpperCase {
  264. // Nothing, want it to stay lowercase.
  265. } else if appreviations.contains(currentAsString) {
  266. currentAsString = currentAsString.uppercased()
  267. } else {
  268. currentAsString = NamingUtils.uppercaseFirstCharacter(currentAsString)
  269. }
  270. result += String(currentAsString)
  271. current = String.UnicodeScalarView()
  272. }
  273. for scalar in s.unicodeScalars {
  274. let scalarClass = CharClass(scalar)
  275. switch scalarClass {
  276. case .digit:
  277. if lastClass != .digit {
  278. addCurrent()
  279. }
  280. if result.isEmpty {
  281. // Don't want to start with a number for the very first thing.
  282. result += "_"
  283. }
  284. current.append(scalar)
  285. case .upper:
  286. if lastClass != .upper {
  287. addCurrent()
  288. }
  289. current.append(scalar.ascLowercased())
  290. case .lower:
  291. if lastClass != .lower && lastClass != .upper {
  292. addCurrent()
  293. }
  294. current.append(scalar)
  295. case .underscore:
  296. addCurrent()
  297. if lastClass == .underscore {
  298. result += "_"
  299. }
  300. case .other:
  301. addCurrent()
  302. let escapeIt =
  303. result.isEmpty
  304. ? !isSwiftIdentifierHeadCharacter(scalar)
  305. : !isSwiftIdentifierCharacter(scalar)
  306. if escapeIt {
  307. result.append("_u\(scalar.value)")
  308. } else {
  309. current.append(scalar)
  310. }
  311. }
  312. lastClass = scalarClass
  313. }
  314. // Add the last segment collected.
  315. addCurrent()
  316. // If things end in an underscore, add one also.
  317. if lastClass == .underscore {
  318. result += "_"
  319. }
  320. return result
  321. }
  322. }
  323. // Scope for the utilies to they are less likely to conflict when imported into
  324. // generators.
  325. public enum NamingUtils {
  326. // Returns the type prefix to use for a given
  327. package static func typePrefix(protoPackage: String, fileOptions: Google_Protobuf_FileOptions) -> String {
  328. // Explicit option (including blank), wins.
  329. if fileOptions.hasSwiftPrefix {
  330. return fileOptions.swiftPrefix
  331. }
  332. if protoPackage.isEmpty {
  333. return String()
  334. }
  335. // NOTE: This code relies on the protoc validation of proto packages. Look
  336. // at Parser::ParsePackage() to see the logic, it comes down to reading
  337. // _identifiers_ joined by '.'. And _identifier_ is defined (in
  338. // Tokenizer::Next() as `[a-zA-Z_][a-zA-Z0-9_]*`, so this does not need
  339. // any complex validation or handing of characters outside those ranges.
  340. // It just has to deal with ended up with a leading digit after the pruning
  341. // of '_'s.
  342. // Transforms:
  343. // "package.name" -> "Package_Name"
  344. // "package_name" -> "PackageName"
  345. // "pacakge.some_name" -> "Package_SomeName"
  346. var prefix = String.UnicodeScalarView()
  347. var makeUpper = true
  348. for c in protoPackage.unicodeScalars {
  349. if c == "_" {
  350. makeUpper = true
  351. } else if c == "." {
  352. makeUpper = true
  353. prefix.append("_")
  354. } else {
  355. if prefix.isEmpty && c.isASCDigit {
  356. // If the first character is going to be a digit, add an underscore
  357. // to ensure it is a valid Swift identifier.
  358. prefix.append("_")
  359. }
  360. if makeUpper {
  361. prefix.append(c.ascUppercased())
  362. makeUpper = false
  363. } else {
  364. prefix.append(c)
  365. }
  366. }
  367. }
  368. // End in an underscore to split off anything that gets added to it.
  369. return String(prefix) + "_"
  370. }
  371. /// Helper a proto prefix from strings. A proto prefix means underscores
  372. /// and letter case are ignored.
  373. ///
  374. /// NOTE: Since this is acting on proto enum names and enum cases, we know
  375. /// the values must be _identifier_s which is defined (in Tokenizer::Next() as
  376. /// `[a-zA-Z_][a-zA-Z0-9_]*`, so this code is based on that limited input.
  377. package struct PrefixStripper {
  378. private let prefixChars: String.UnicodeScalarView
  379. package init(prefix: String) {
  380. self.prefixChars = prefix.lowercased().replacingOccurrences(of: "_", with: "").unicodeScalars
  381. }
  382. /// Strip the prefix and return the result, or return nil if it can't
  383. /// be stripped.
  384. package func strip(from: String) -> String? {
  385. var prefixIndex = prefixChars.startIndex
  386. let prefixEnd = prefixChars.endIndex
  387. let fromChars = from.lowercased().unicodeScalars
  388. var fromIndex = fromChars.startIndex
  389. let fromEnd = fromChars.endIndex
  390. while prefixIndex != prefixEnd {
  391. if fromIndex == fromEnd {
  392. // Reached the end of the string while still having prefix to go
  393. // nothing to strip.
  394. return nil
  395. }
  396. if fromChars[fromIndex] == "_" {
  397. fromIndex = fromChars.index(after: fromIndex)
  398. continue
  399. }
  400. if prefixChars[prefixIndex] != fromChars[fromIndex] {
  401. // They differed before the end of the prefix, can't drop.
  402. return nil
  403. }
  404. prefixIndex = prefixChars.index(after: prefixIndex)
  405. fromIndex = fromChars.index(after: fromIndex)
  406. }
  407. // Remove any more underscores.
  408. while fromIndex != fromEnd && fromChars[fromIndex] == "_" {
  409. fromIndex = fromChars.index(after: fromIndex)
  410. }
  411. if fromIndex == fromEnd {
  412. // They matched, can't strip.
  413. return nil
  414. }
  415. guard fromChars[fromIndex].isASCLowercase else {
  416. // Next character isn't a lowercase letter (it must be a digit
  417. // (fromChars was lowercased)), that would mean to make an enum value it
  418. // would have to get prefixed with an underscore which most folks
  419. // wouldn't consider to be a better Swift naming, so don't strip the
  420. // prefix.
  421. return nil
  422. }
  423. let count = fromChars.distance(from: fromChars.startIndex, to: fromIndex)
  424. let idx = from.index(from.startIndex, offsetBy: count)
  425. return String(from[idx..<from.endIndex])
  426. }
  427. }
  428. package static func sanitize(messageName s: String, forbiddenTypeNames: Set<String>) -> String {
  429. sanitizeTypeName(s, disambiguator: "Message", forbiddenTypeNames: forbiddenTypeNames)
  430. }
  431. package static func sanitize(enumName s: String, forbiddenTypeNames: Set<String>) -> String {
  432. sanitizeTypeName(s, disambiguator: "Enum", forbiddenTypeNames: forbiddenTypeNames)
  433. }
  434. package static func sanitize(oneofName s: String, forbiddenTypeNames: Set<String>) -> String {
  435. sanitizeTypeName(s, disambiguator: "Oneof", forbiddenTypeNames: forbiddenTypeNames)
  436. }
  437. package static func sanitize(fieldName s: String, basedOn: String) -> String {
  438. if basedOn.hasPrefix("clear") && isCharacterUppercase(basedOn, index: 5) {
  439. return s + "_p"
  440. } else if basedOn.hasPrefix("has") && isCharacterUppercase(basedOn, index: 3) {
  441. return s + "_p"
  442. } else if reservedFieldNames.contains(basedOn) {
  443. return s + "_p"
  444. } else if basedOn == s && quotableFieldNames.contains(basedOn) {
  445. // backticks are only used on the base names, if we're sanitizing based on something else
  446. // this is skipped (the "hasFoo" doesn't get backticks just because the "foo" does).
  447. return "`\(s)`"
  448. } else if isAllUnderscore(basedOn) {
  449. return s + "__"
  450. } else {
  451. return s
  452. }
  453. }
  454. package static func sanitize(fieldName s: String) -> String {
  455. sanitize(fieldName: s, basedOn: s)
  456. }
  457. package static func sanitize(enumCaseName s: String) -> String {
  458. if reservedEnumCases.contains(s) {
  459. return "\(s)_"
  460. } else if quotableEnumCases.contains(s) {
  461. return "`\(s)`"
  462. } else if isAllUnderscore(s) {
  463. return s + "__"
  464. } else {
  465. return s
  466. }
  467. }
  468. package static func sanitize(messageScopedExtensionName s: String) -> String {
  469. if reservedMessageScopedExtensionNames.contains(s) {
  470. return "\(s)_"
  471. } else if quotableMessageScopedExtensionNames.contains(s) {
  472. return "`\(s)`"
  473. } else if isAllUnderscore(s) {
  474. return s + "__"
  475. } else {
  476. return s
  477. }
  478. }
  479. /// Forces the first character to be uppercase (if possible) and leaves
  480. /// the rest of the characters in their existing case.
  481. ///
  482. /// Use toUpperCamelCase() to get leading "HTTP", "URL", etc. correct.
  483. package static func uppercaseFirstCharacter(_ s: String) -> String {
  484. let out = s.unicodeScalars
  485. if let first = out.first {
  486. var result = makeUnicodeScalarView(from: first.ascUppercased())
  487. result.append(
  488. contentsOf: out[out.index(after: out.startIndex)..<out.endIndex]
  489. )
  490. return String(result)
  491. } else {
  492. return s
  493. }
  494. }
  495. /// Accepts any inputs and tranforms form it into a leading
  496. /// UpperCaseCamelCased Swift identifier. It follows the same conventions as
  497. /// that are used for mapping field names into the Message property names.
  498. public static func toUpperCamelCase(_ s: String) -> String {
  499. CamelCaser.transform(s, initialUpperCase: true)
  500. }
  501. /// Accepts any inputs and tranforms form it into a leading
  502. /// lowerCaseCamelCased Swift identifier. It follows the same conventions as
  503. /// that are used for mapping field names into the Message property names.
  504. public static func toLowerCamelCase(_ s: String) -> String {
  505. CamelCaser.transform(s, initialUpperCase: false)
  506. }
  507. package static func trimBackticks(_ s: String) -> String {
  508. // This only has to deal with the backticks added when computing relative names, so
  509. // they are always matched and a single set.
  510. let backtick = "`"
  511. guard s.hasPrefix(backtick) else {
  512. assert(!s.hasSuffix(backtick))
  513. return s
  514. }
  515. assert(s.hasSuffix(backtick))
  516. let result = s.dropFirst().dropLast()
  517. assert(!result.hasPrefix(backtick) && !result.hasSuffix(backtick))
  518. return String(result)
  519. }
  520. static func periodsToUnderscores(_ s: String) -> String {
  521. s.replacingOccurrences(of: ".", with: "_")
  522. }
  523. /// This must be exactly the same as the corresponding code in the
  524. /// SwiftProtobuf library. Changing it will break compatibility of
  525. /// the generated code with old library version.
  526. public static func toJsonFieldName(_ s: String) -> String {
  527. var result = String.UnicodeScalarView()
  528. var capitalizeNext = false
  529. for c in s.unicodeScalars {
  530. if c == "_" {
  531. capitalizeNext = true
  532. } else if capitalizeNext {
  533. result.append(c.ascUppercased())
  534. capitalizeNext = false
  535. } else {
  536. result.append(c)
  537. }
  538. }
  539. return String(result)
  540. }
  541. }