// Sources/SwiftProtobuf/TextFormatEncodingVisitor.swift - Text format encoding support // // Copyright (c) 2014 - 2016 Apple Inc. and the project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See LICENSE.txt for license information: // https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt // // ----------------------------------------------------------------------------- /// /// Text format serialization engine. /// // ----------------------------------------------------------------------------- import Foundation private let mapNameResolver: [Int: StaticString] = [1: "key", 2: "value"] /// Visitor that serializes a message into protobuf text format. internal struct TextFormatEncodingVisitor: Visitor { private var encoder: TextFormatEncoder private var nameMap: _NameMap? private var nameResolver: [Int: StaticString] private var extensions: ExtensionFieldValueSet? private let options: TextFormatEncodingOptions /// The protobuf text produced by the visitor. var result: String { encoder.stringResult } /// Creates a new visitor that serializes the given message to protobuf text /// format. init(message: any Message, options: TextFormatEncodingOptions) { let nameMap: _NameMap? if let nameProviding = message as? (any _ProtoNameProviding) { nameMap = type(of: nameProviding)._protobuf_nameMap } else { nameMap = nil } let extensions = (message as? (any ExtensibleMessage))?._protobuf_extensionFieldValues self.nameMap = nameMap self.nameResolver = [:] self.extensions = extensions self.encoder = TextFormatEncoder() self.options = options } // TODO: This largely duplicates emitFieldName() below. // But, it's slower so we don't want to just have emitFieldName() use // formatFieldName(). Also, we need to measure whether the optimization // this provides to repeated fields is worth the effort; consider just // removing this and having repeated fields just re-run emitFieldName() // for each item. private func formatFieldName(lookingUp fieldNumber: Int) -> [UInt8] { var bytes = [UInt8]() if let protoName = nameMap?.names(for: fieldNumber)?.proto { bytes.append(contentsOf: protoName.utf8Buffer) } else if let protoName = nameResolver[fieldNumber] { let buff = UnsafeBufferPointer(start: protoName.utf8Start, count: protoName.utf8CodeUnitCount) bytes.append(contentsOf: buff) } else if let extensionName = extensions?[fieldNumber]?.protobufExtension.fieldName { bytes.append(UInt8(ascii: "[")) bytes.append(contentsOf: extensionName.utf8) bytes.append(UInt8(ascii: "]")) } else { bytes.append(contentsOf: fieldNumber.description.utf8) } return bytes } private mutating func emitFieldName(lookingUp fieldNumber: Int) { if let protoName = nameMap?.names(for: fieldNumber)?.proto { encoder.emitFieldName(name: protoName.utf8Buffer) } else if let protoName = nameResolver[fieldNumber] { encoder.emitFieldName(name: protoName) } else if let extensionName = extensions?[fieldNumber]?.protobufExtension.fieldName { encoder.emitExtensionFieldName(name: extensionName) } else { encoder.emitFieldNumber(number: fieldNumber) } } mutating func visitUnknown(bytes: Data) throws { if options.printUnknownFields { try bytes.withUnsafeBytes { (body: UnsafeRawBufferPointer) -> Void in if let baseAddress = body.baseAddress, body.count > 0 { // All fields will be directly handled, so there is no need for // the unknown field buffering/collection (when scannings to see // if something is a message, this would be extremely wasteful). var binaryOptions = BinaryDecodingOptions() binaryOptions.discardUnknownFields = true var decoder = BinaryDecoder( forReadingFrom: baseAddress, count: body.count, options: binaryOptions ) try visitUnknown(decoder: &decoder) } } } } /// Helper for printing out unknowns. /// /// The implementation tries to be "helpful" and if a length delimited field /// appears to be a submessage, it prints it as such. However, that opens the /// door to someone sending a message with an unknown field that is a stack /// bomb, i.e. - it causes this code to recurse, exhausting the stack and /// thus opening up an attack vector. To keep this "help", but avoid the /// attack, a limit is placed on how many times it will recurse before just /// treating the length delimited fields as bytes and not trying to decode /// them. private mutating func visitUnknown( decoder: inout BinaryDecoder, recursionBudget: Int = 10 ) throws { // This stack serves to avoid recursion for groups within groups within // groups..., this avoid the stack attack that the message detection // hits. No limit is placed on this because there is no stack risk with // recursion, and because if a limit was hit, there is no other way to // encode the group (the message field can just print as length // delimited, groups don't have an option like that). var groupFieldNumberStack: [Int] = [] while let tag = try decoder.getTag() { switch tag.wireFormat { case .varint: encoder.emitFieldNumber(number: tag.fieldNumber) var value: UInt64 = 0 encoder.startRegularField() try decoder.decodeSingularUInt64Field(value: &value) encoder.putUInt64(value: value) encoder.endRegularField() case .fixed64: encoder.emitFieldNumber(number: tag.fieldNumber) var value: UInt64 = 0 encoder.startRegularField() try decoder.decodeSingularFixed64Field(value: &value) encoder.putUInt64Hex(value: value, digits: 16) encoder.endRegularField() case .lengthDelimited: encoder.emitFieldNumber(number: tag.fieldNumber) var bytes = Data() try decoder.decodeSingularBytesField(value: &bytes) var encodeAsBytes = true if bytes.count > 0 && recursionBudget > 0 { bytes.withUnsafeBytes { (body: UnsafeRawBufferPointer) -> Void in if let baseAddress = body.baseAddress, body.count > 0 { do { // Walk all the fields to test if it looks like a message var testDecoder = BinaryDecoder( forReadingFrom: baseAddress, count: body.count, parent: decoder ) while let _ = try testDecoder.nextFieldNumber() { } // No error? Output the message body. encodeAsBytes = false var subDecoder = BinaryDecoder( forReadingFrom: baseAddress, count: bytes.count, parent: decoder ) encoder.startMessageField() try visitUnknown( decoder: &subDecoder, recursionBudget: recursionBudget - 1 ) encoder.endMessageField() } catch { encodeAsBytes = true } } } } if encodeAsBytes { encoder.startRegularField() encoder.putBytesValue(value: bytes) encoder.endRegularField() } case .startGroup: encoder.emitFieldNumber(number: tag.fieldNumber) encoder.startMessageField() groupFieldNumberStack.append(tag.fieldNumber) case .endGroup: let groupFieldNumber = groupFieldNumberStack.popLast() // Unknown data is scanned and verified by the // binary parser, so this can never fail. assert(tag.fieldNumber == groupFieldNumber) encoder.endMessageField() case .fixed32: encoder.emitFieldNumber(number: tag.fieldNumber) var value: UInt32 = 0 encoder.startRegularField() try decoder.decodeSingularFixed32Field(value: &value) encoder.putUInt64Hex(value: UInt64(value), digits: 8) encoder.endRegularField() } } // Unknown data is scanned and verified by the binary parser, so this can // never fail. assert(groupFieldNumberStack.isEmpty) } // Visitor.swift defines default versions for other singular field types // that simply widen and dispatch to one of the following. Since Text format // does not distinguish e.g., Fixed64 vs. UInt64, this is sufficient. mutating func visitSingularFloatField(value: Float, fieldNumber: Int) throws { emitFieldName(lookingUp: fieldNumber) encoder.startRegularField() encoder.putFloatValue(value: value) encoder.endRegularField() } mutating func visitSingularDoubleField(value: Double, fieldNumber: Int) throws { emitFieldName(lookingUp: fieldNumber) encoder.startRegularField() encoder.putDoubleValue(value: value) encoder.endRegularField() } mutating func visitSingularInt64Field(value: Int64, fieldNumber: Int) throws { emitFieldName(lookingUp: fieldNumber) encoder.startRegularField() encoder.putInt64(value: value) encoder.endRegularField() } mutating func visitSingularUInt64Field(value: UInt64, fieldNumber: Int) throws { emitFieldName(lookingUp: fieldNumber) encoder.startRegularField() encoder.putUInt64(value: value) encoder.endRegularField() } mutating func visitSingularBoolField(value: Bool, fieldNumber: Int) throws { emitFieldName(lookingUp: fieldNumber) encoder.startRegularField() encoder.putBoolValue(value: value) encoder.endRegularField() } mutating func visitSingularStringField(value: String, fieldNumber: Int) throws { emitFieldName(lookingUp: fieldNumber) encoder.startRegularField() encoder.putStringValue(value: value) encoder.endRegularField() } mutating func visitSingularBytesField(value: Data, fieldNumber: Int) throws { emitFieldName(lookingUp: fieldNumber) encoder.startRegularField() encoder.putBytesValue(value: value) encoder.endRegularField() } mutating func visitSingularEnumField(value: E, fieldNumber: Int) throws { emitFieldName(lookingUp: fieldNumber) encoder.startRegularField() encoder.putEnumValue(value: value) encoder.endRegularField() } mutating func visitSingularMessageField( value: M, fieldNumber: Int ) throws { emitFieldName(lookingUp: fieldNumber) // Cache old visitor configuration let oldNameMap = self.nameMap let oldNameResolver = self.nameResolver let oldExtensions = self.extensions // Update configuration for new message self.nameMap = (M.self as? any _ProtoNameProviding.Type)?._protobuf_nameMap self.nameResolver = [:] self.extensions = (value as? (any ExtensibleMessage))?._protobuf_extensionFieldValues // Encode submessage encoder.startMessageField() if let any = value as? Google_Protobuf_Any { any.textTraverse(visitor: &self) } else { try! value.traverse(visitor: &self) } encoder.endMessageField() // Restore configuration before returning self.extensions = oldExtensions self.nameResolver = oldNameResolver self.nameMap = oldNameMap } // Emit the full "verbose" form of an Any. This writes the typeURL // as a field name in `[...]` followed by the fields of the // contained message. internal mutating func visitAnyVerbose(value: any Message, typeURL: String) { encoder.emitExtensionFieldName(name: typeURL) encoder.startMessageField() // Cache old visitor configuration let oldNameMap = self.nameMap let oldNameResolver = self.nameResolver let oldExtensions = self.extensions // Update configuration for new message self.nameMap = (type(of: value) as? any _ProtoNameProviding.Type)?._protobuf_nameMap self.nameResolver = [:] self.extensions = (value as? (any ExtensibleMessage))?._protobuf_extensionFieldValues if let any = value as? Google_Protobuf_Any { any.textTraverse(visitor: &self) } else { try! value.traverse(visitor: &self) } // Restore configuration before returning self.extensions = oldExtensions self.nameResolver = oldNameResolver self.nameMap = oldNameMap encoder.endMessageField() } // Write a single special field called "#json". This // is used for Any objects with undecoded JSON contents. internal mutating func visitAnyJSONBytesField(value: Data) { encoder.indent() encoder.append(staticText: "#json: ") encoder.putBytesValue(value: value) encoder.append(staticText: "\n") } // The default implementations in Visitor.swift provide the correct // results, but we get significantly better performance by only doing // the name lookup once for the array, rather than once for each element: mutating func visitRepeatedFloatField(value: [Float], fieldNumber: Int) throws { assert(!value.isEmpty) let fieldName = formatFieldName(lookingUp: fieldNumber) for v in value { encoder.emitFieldName(name: fieldName) encoder.startRegularField() encoder.putFloatValue(value: v) encoder.endRegularField() } } mutating func visitRepeatedDoubleField(value: [Double], fieldNumber: Int) throws { assert(!value.isEmpty) let fieldName = formatFieldName(lookingUp: fieldNumber) for v in value { encoder.emitFieldName(name: fieldName) encoder.startRegularField() encoder.putDoubleValue(value: v) encoder.endRegularField() } } mutating func visitRepeatedInt32Field(value: [Int32], fieldNumber: Int) throws { assert(!value.isEmpty) let fieldName = formatFieldName(lookingUp: fieldNumber) for v in value { encoder.emitFieldName(name: fieldName) encoder.startRegularField() encoder.putInt64(value: Int64(v)) encoder.endRegularField() } } mutating func visitRepeatedInt64Field(value: [Int64], fieldNumber: Int) throws { assert(!value.isEmpty) let fieldName = formatFieldName(lookingUp: fieldNumber) for v in value { encoder.emitFieldName(name: fieldName) encoder.startRegularField() encoder.putInt64(value: v) encoder.endRegularField() } } mutating func visitRepeatedUInt32Field(value: [UInt32], fieldNumber: Int) throws { assert(!value.isEmpty) let fieldName = formatFieldName(lookingUp: fieldNumber) for v in value { encoder.emitFieldName(name: fieldName) encoder.startRegularField() encoder.putUInt64(value: UInt64(v)) encoder.endRegularField() } } mutating func visitRepeatedUInt64Field(value: [UInt64], fieldNumber: Int) throws { assert(!value.isEmpty) let fieldName = formatFieldName(lookingUp: fieldNumber) for v in value { encoder.emitFieldName(name: fieldName) encoder.startRegularField() encoder.putUInt64(value: v) encoder.endRegularField() } } mutating func visitRepeatedSInt32Field(value: [Int32], fieldNumber: Int) throws { try visitRepeatedInt32Field(value: value, fieldNumber: fieldNumber) } mutating func visitRepeatedSInt64Field(value: [Int64], fieldNumber: Int) throws { try visitRepeatedInt64Field(value: value, fieldNumber: fieldNumber) } mutating func visitRepeatedFixed32Field(value: [UInt32], fieldNumber: Int) throws { try visitRepeatedUInt32Field(value: value, fieldNumber: fieldNumber) } mutating func visitRepeatedFixed64Field(value: [UInt64], fieldNumber: Int) throws { try visitRepeatedUInt64Field(value: value, fieldNumber: fieldNumber) } mutating func visitRepeatedSFixed32Field(value: [Int32], fieldNumber: Int) throws { try visitRepeatedInt32Field(value: value, fieldNumber: fieldNumber) } mutating func visitRepeatedSFixed64Field(value: [Int64], fieldNumber: Int) throws { try visitRepeatedInt64Field(value: value, fieldNumber: fieldNumber) } mutating func visitRepeatedBoolField(value: [Bool], fieldNumber: Int) throws { assert(!value.isEmpty) let fieldName = formatFieldName(lookingUp: fieldNumber) for v in value { encoder.emitFieldName(name: fieldName) encoder.startRegularField() encoder.putBoolValue(value: v) encoder.endRegularField() } } mutating func visitRepeatedStringField(value: [String], fieldNumber: Int) throws { assert(!value.isEmpty) let fieldName = formatFieldName(lookingUp: fieldNumber) for v in value { encoder.emitFieldName(name: fieldName) encoder.startRegularField() encoder.putStringValue(value: v) encoder.endRegularField() } } mutating func visitRepeatedBytesField(value: [Data], fieldNumber: Int) throws { assert(!value.isEmpty) let fieldName = formatFieldName(lookingUp: fieldNumber) for v in value { encoder.emitFieldName(name: fieldName) encoder.startRegularField() encoder.putBytesValue(value: v) encoder.endRegularField() } } mutating func visitRepeatedEnumField(value: [E], fieldNumber: Int) throws { assert(!value.isEmpty) let fieldName = formatFieldName(lookingUp: fieldNumber) for v in value { encoder.emitFieldName(name: fieldName) encoder.startRegularField() encoder.putEnumValue(value: v) encoder.endRegularField() } } // Messages and groups mutating func visitRepeatedMessageField( value: [M], fieldNumber: Int ) throws { assert(!value.isEmpty) // Look up field name against outer message encoding state let fieldName = formatFieldName(lookingUp: fieldNumber) // Cache old visitor state let oldNameMap = self.nameMap let oldNameResolver = self.nameResolver let oldExtensions = self.extensions // Update encoding state for new message type self.nameMap = (M.self as? any _ProtoNameProviding.Type)?._protobuf_nameMap self.nameResolver = [:] self.extensions = (value as? (any ExtensibleMessage))?._protobuf_extensionFieldValues // Iterate and encode each message for v in value { encoder.emitFieldName(name: fieldName) encoder.startMessageField() if let any = v as? Google_Protobuf_Any { any.textTraverse(visitor: &self) } else { try! v.traverse(visitor: &self) } encoder.endMessageField() } // Restore state self.extensions = oldExtensions self.nameResolver = oldNameResolver self.nameMap = oldNameMap } // Google's C++ implementation of Text format supports two formats // for repeated numeric fields: "short" format writes the list as a // single field with values enclosed in `[...]`, "long" format // writes a separate field name/value for each item. They provide // an option for callers to select which output version they prefer. // Since this distinction mirrors the difference in Protobuf Binary // between "packed" and "non-packed", I've chosen to use the short // format for packed fields and the long version for repeated // fields. This provides a clear visual distinction between these // fields (including proto3's default use of packed) without // introducing the baggage of a separate option. private mutating func iterateAndEncode( packedValue: [T], fieldNumber: Int, encode: (T, inout TextFormatEncoder) -> Void ) throws { assert(!packedValue.isEmpty) emitFieldName(lookingUp: fieldNumber) encoder.startRegularField() var firstItem = true encoder.startArray() for v in packedValue { if !firstItem { encoder.arraySeparator() } encode(v, &encoder) firstItem = false } encoder.endArray() encoder.endRegularField() } mutating func visitPackedFloatField(value: [Float], fieldNumber: Int) throws { try iterateAndEncode(packedValue: value, fieldNumber: fieldNumber) { (v: Float, encoder: inout TextFormatEncoder) in encoder.putFloatValue(value: v) } } mutating func visitPackedDoubleField(value: [Double], fieldNumber: Int) throws { try iterateAndEncode(packedValue: value, fieldNumber: fieldNumber) { (v: Double, encoder: inout TextFormatEncoder) in encoder.putDoubleValue(value: v) } } mutating func visitPackedInt32Field(value: [Int32], fieldNumber: Int) throws { try iterateAndEncode(packedValue: value, fieldNumber: fieldNumber) { (v: Int32, encoder: inout TextFormatEncoder) in encoder.putInt64(value: Int64(v)) } } mutating func visitPackedInt64Field(value: [Int64], fieldNumber: Int) throws { try iterateAndEncode(packedValue: value, fieldNumber: fieldNumber) { (v: Int64, encoder: inout TextFormatEncoder) in encoder.putInt64(value: v) } } mutating func visitPackedUInt32Field(value: [UInt32], fieldNumber: Int) throws { try iterateAndEncode(packedValue: value, fieldNumber: fieldNumber) { (v: UInt32, encoder: inout TextFormatEncoder) in encoder.putUInt64(value: UInt64(v)) } } mutating func visitPackedUInt64Field(value: [UInt64], fieldNumber: Int) throws { try iterateAndEncode(packedValue: value, fieldNumber: fieldNumber) { (v: UInt64, encoder: inout TextFormatEncoder) in encoder.putUInt64(value: v) } } mutating func visitPackedSInt32Field(value: [Int32], fieldNumber: Int) throws { try visitPackedInt32Field(value: value, fieldNumber: fieldNumber) } mutating func visitPackedSInt64Field(value: [Int64], fieldNumber: Int) throws { try visitPackedInt64Field(value: value, fieldNumber: fieldNumber) } mutating func visitPackedFixed32Field(value: [UInt32], fieldNumber: Int) throws { try visitPackedUInt32Field(value: value, fieldNumber: fieldNumber) } mutating func visitPackedFixed64Field(value: [UInt64], fieldNumber: Int) throws { try visitPackedUInt64Field(value: value, fieldNumber: fieldNumber) } mutating func visitPackedSFixed32Field(value: [Int32], fieldNumber: Int) throws { try visitPackedInt32Field(value: value, fieldNumber: fieldNumber) } mutating func visitPackedSFixed64Field(value: [Int64], fieldNumber: Int) throws { try visitPackedInt64Field(value: value, fieldNumber: fieldNumber) } mutating func visitPackedBoolField(value: [Bool], fieldNumber: Int) throws { try iterateAndEncode(packedValue: value, fieldNumber: fieldNumber) { (v: Bool, encoder: inout TextFormatEncoder) in encoder.putBoolValue(value: v) } } mutating func visitPackedEnumField(value: [E], fieldNumber: Int) throws { try iterateAndEncode(packedValue: value, fieldNumber: fieldNumber) { (v: E, encoder: inout TextFormatEncoder) in encoder.putEnumValue(value: v) } } /// Helper to encapsulate the common structure of iterating over a map /// and encoding the keys and values. private mutating func iterateAndEncode( map: [K: V], fieldNumber: Int, isOrderedBefore: (K, K) -> Bool, encode: (inout TextFormatEncodingVisitor, K, V) throws -> Void ) throws { // Cache old visitor configuration let oldNameMap = self.nameMap let oldNameResolver = self.nameResolver let oldExtensions = self.extensions for (k, v) in map.sorted(by: { isOrderedBefore($0.0, $1.0) }) { emitFieldName(lookingUp: fieldNumber) encoder.startMessageField() // Update visitor configuration for map self.nameMap = nil self.nameResolver = mapNameResolver self.extensions = nil try encode(&self, k, v) // Restore configuration before resuming containing message self.extensions = oldExtensions self.nameResolver = oldNameResolver self.nameMap = oldNameMap encoder.endMessageField() } } mutating func visitMapField( fieldType: _ProtobufMap.Type, value: _ProtobufMap.BaseType, fieldNumber: Int ) throws { try iterateAndEncode(map: value, fieldNumber: fieldNumber, isOrderedBefore: KeyType._lessThan) { (visitor: inout TextFormatEncodingVisitor, key, value) throws -> Void in try KeyType.visitSingular(value: key, fieldNumber: 1, with: &visitor) try ValueType.visitSingular(value: value, fieldNumber: 2, with: &visitor) } } mutating func visitMapField( fieldType: _ProtobufEnumMap.Type, value: _ProtobufEnumMap.BaseType, fieldNumber: Int ) throws where ValueType.RawValue == Int { try iterateAndEncode(map: value, fieldNumber: fieldNumber, isOrderedBefore: KeyType._lessThan) { (visitor: inout TextFormatEncodingVisitor, key, value) throws -> Void in try KeyType.visitSingular(value: key, fieldNumber: 1, with: &visitor) try visitor.visitSingularEnumField(value: value, fieldNumber: 2) } } mutating func visitMapField( fieldType: _ProtobufMessageMap.Type, value: _ProtobufMessageMap.BaseType, fieldNumber: Int ) throws { try iterateAndEncode(map: value, fieldNumber: fieldNumber, isOrderedBefore: KeyType._lessThan) { (visitor: inout TextFormatEncodingVisitor, key, value) throws -> Void in try KeyType.visitSingular(value: key, fieldNumber: 1, with: &visitor) try visitor.visitSingularMessageField(value: value, fieldNumber: 2) } } }