// Sources/SwiftProtobuf/TextFormatScanner.swift - Text format decoding
//
// Copyright (c) 2014 - 2019 Apple Inc. and the project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See LICENSE.txt for license information:
// https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
//
// -----------------------------------------------------------------------------
///
/// Test format decoding engine.
///
// -----------------------------------------------------------------------------

import Foundation

private let asciiBell = UInt8(7)
private let asciiBackspace = UInt8(8)
private let asciiTab = UInt8(9)
private let asciiNewLine = UInt8(10)
private let asciiVerticalTab = UInt8(11)
private let asciiFormFeed = UInt8(12)
private let asciiCarriageReturn = UInt8(13)
private let asciiZero = UInt8(ascii: "0")
private let asciiOne = UInt8(ascii: "1")
private let asciiThree = UInt8(ascii: "3")
private let asciiSeven = UInt8(ascii: "7")
private let asciiNine = UInt8(ascii: "9")
private let asciiColon = UInt8(ascii: ":")
private let asciiPeriod = UInt8(ascii: ".")
private let asciiPlus = UInt8(ascii: "+")
private let asciiComma = UInt8(ascii: ",")
private let asciiSemicolon = UInt8(ascii: ";")
private let asciiDoubleQuote = UInt8(ascii: "\"")
private let asciiSingleQuote = UInt8(ascii: "\'")
private let asciiBackslash = UInt8(ascii: "\\")
private let asciiForwardSlash = UInt8(ascii: "/")
private let asciiHash = UInt8(ascii: "#")
private let asciiUnderscore = UInt8(ascii: "_")
private let asciiQuestionMark = UInt8(ascii: "?")
private let asciiSpace = UInt8(ascii: " ")
private let asciiOpenSquareBracket = UInt8(ascii: "[")
private let asciiCloseSquareBracket = UInt8(ascii: "]")
private let asciiOpenCurlyBracket = UInt8(ascii: "{")
private let asciiCloseCurlyBracket = UInt8(ascii: "}")
private let asciiOpenAngleBracket = UInt8(ascii: "<")
private let asciiCloseAngleBracket = UInt8(ascii: ">")
private let asciiMinus = UInt8(ascii: "-")
private let asciiLowerA = UInt8(ascii: "a")
private let asciiUpperA = UInt8(ascii: "A")
private let asciiLowerB = UInt8(ascii: "b")
private let asciiLowerE = UInt8(ascii: "e")
private let asciiUpperE = UInt8(ascii: "E")
private let asciiLowerF = UInt8(ascii: "f")
private let asciiUpperF = UInt8(ascii: "F")
private let asciiLowerI = UInt8(ascii: "i")
private let asciiLowerL = UInt8(ascii: "l")
private let asciiLowerN = UInt8(ascii: "n")
private let asciiLowerR = UInt8(ascii: "r")
private let asciiLowerS = UInt8(ascii: "s")
private let asciiLowerT = UInt8(ascii: "t")
private let asciiUpperT = UInt8(ascii: "T")
private let asciiLowerU = UInt8(ascii: "u")
private let asciiUpperU = UInt8(ascii: "U")
private let asciiLowerV = UInt8(ascii: "v")
private let asciiLowerX = UInt8(ascii: "x")
private let asciiLowerY = UInt8(ascii: "y")
private let asciiLowerZ = UInt8(ascii: "z")
private let asciiUpperZ = UInt8(ascii: "Z")

// https://protobuf.dev/programming-guides/proto2/#assigning
// Fields can be between 1 and 536,870,911. So we can stop parsing
// a raw number if we go over this (it also avoid rollover).
private let maxFieldNumLength: Int = 9

private func fromHexDigit(_ c: UInt8) -> UInt8? {
    if c >= asciiZero && c <= asciiNine {
        return c - asciiZero
    }
    if c >= asciiUpperA && c <= asciiUpperF {
        return c - asciiUpperA + UInt8(10)
    }
    if c >= asciiLowerA && c <= asciiLowerF {
        return c - asciiLowerA + UInt8(10)
    }
    return nil
}

private func uint32FromHexDigit(_ c: UInt8) -> UInt32? {
    guard let u8 = fromHexDigit(c) else {
        return nil
    }
    return UInt32(u8)
}

// Protobuf Text encoding assumes that you're working directly
// in UTF-8.  So this implementation converts the string to UTF8,
// then decodes it into a sequence of bytes, then converts
// it back into a string.
private func decodeString(_ s: String) -> String? {

    // Helper to read 4 hex digits as a UInt32
    func read4HexDigits(_ i: inout String.UTF8View.Iterator) -> UInt32? {
        if let digit1 = i.next(),
            let d1 = uint32FromHexDigit(digit1),
            let digit2 = i.next(),
            let d2 = uint32FromHexDigit(digit2),
            let digit3 = i.next(),
            let d3 = uint32FromHexDigit(digit3),
            let digit4 = i.next(),
            let d4 = uint32FromHexDigit(digit4)
        {
            return (d1 << 12) + (d2 << 8) + (d3 << 4) + d4
        }
        return nil
    }

    var out = [UInt8]()
    var bytes = s.utf8.makeIterator()
    while let byte = bytes.next() {
        switch byte {
        case asciiBackslash:  // backslash
            if let escaped = bytes.next() {
                switch escaped {
                case asciiZero...asciiSeven:  // 0...7
                    // C standard allows 1, 2, or 3 octal digits.
                    let savedPosition = bytes
                    let digit1 = escaped
                    let digit1Value = digit1 - asciiZero
                    if let digit2 = bytes.next(),
                        digit2 >= asciiZero && digit2 <= asciiSeven
                    {
                        let digit2Value = digit2 - asciiZero
                        let innerSavedPosition = bytes
                        if let digit3 = bytes.next(),
                            digit3 >= asciiZero && digit3 <= asciiSeven
                        {
                            let digit3Value = digit3 - asciiZero
                            // The max octal digit is actually \377, but looking at the C++
                            // protobuf code in strutil.cc:UnescapeCEscapeSequences(), it
                            // decodes with rollover, so just duplicate that behavior for
                            // consistency between languages.
                            let n = digit1Value &* 64 &+ digit2Value &* 8 &+ digit3Value
                            out.append(n)
                        } else {
                            let n = digit1Value * 8 + digit2Value
                            out.append(n)
                            bytes = innerSavedPosition
                        }
                    } else {
                        let n = digit1Value
                        out.append(n)
                        bytes = savedPosition
                    }
                case asciiLowerU, asciiUpperU:  // "u"
                    // \u - 4 hex digits, \U 8 hex digits:
                    guard let first = read4HexDigits(&bytes) else { return nil }
                    var codePoint = first
                    if escaped == asciiUpperU {
                        guard let second = read4HexDigits(&bytes) else { return nil }
                        codePoint = (codePoint << 16) + second
                    }
                    switch codePoint {
                    case 0...0x7f:
                        // 1 byte encoding
                        out.append(UInt8(truncatingIfNeeded: codePoint))
                    case 0x80...0x7ff:
                        // 2 byte encoding
                        out.append(0xC0 + UInt8(truncatingIfNeeded: codePoint >> 6))
                        out.append(0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F))
                    case 0x800...0xffff:
                        // 3 byte encoding
                        out.append(0xE0 + UInt8(truncatingIfNeeded: codePoint >> 12))
                        out.append(0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F))
                        out.append(0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F))
                    case 0x10000...0x10FFFF:
                        // 4 byte encoding
                        out.append(0xF0 + UInt8(truncatingIfNeeded: codePoint >> 18))
                        out.append(0x80 + UInt8(truncatingIfNeeded: (codePoint >> 12) & 0x3F))
                        out.append(0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F))
                        out.append(0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F))
                    default:
                        return nil
                    }
                case asciiLowerX:  // "x"
                    // Unlike C/C++, protobuf only allows 1 or 2 digits here:
                    if let byte = bytes.next(), let digit = fromHexDigit(byte) {
                        var n = digit
                        let savedPosition = bytes
                        if let byte = bytes.next(), let digit = fromHexDigit(byte) {
                            n = n &* 16 + digit
                        } else {
                            // No second digit; reset the iterator
                            bytes = savedPosition
                        }
                        out.append(n)
                    } else {
                        return nil  // Hex escape must have at least 1 digit
                    }
                case asciiLowerA:  // \a
                    out.append(asciiBell)
                case asciiLowerB:  // \b
                    out.append(asciiBackspace)
                case asciiLowerF:  // \f
                    out.append(asciiFormFeed)
                case asciiLowerN:  // \n
                    out.append(asciiNewLine)
                case asciiLowerR:  // \r
                    out.append(asciiCarriageReturn)
                case asciiLowerT:  // \t
                    out.append(asciiTab)
                case asciiLowerV:  // \v
                    out.append(asciiVerticalTab)
                case asciiDoubleQuote,
                    asciiSingleQuote,
                    asciiQuestionMark,
                    asciiBackslash:  // " ' ? \
                    out.append(escaped)
                default:
                    return nil  // Unrecognized escape
                }
            } else {
                return nil  // Input ends with backslash
            }
        default:
            out.append(byte)
        }
    }
    // There has got to be an easier way to convert a [UInt8] into a String.
    return out.withUnsafeBufferPointer { ptr in
        if let addr = ptr.baseAddress {
            return utf8ToString(bytes: addr, count: ptr.count)
        } else {
            return String()
        }
    }
}

///
/// TextFormatScanner has no public members.
///
internal struct TextFormatScanner {
    internal let extensions: (any ExtensionMap)?
    private var p: UnsafeRawPointer
    private let end: UnsafeRawPointer
    private let doubleParser = DoubleParser()

    internal let options: TextFormatDecodingOptions
    internal var recursionBudget: Int

    internal var complete: Bool { p == end }

    internal init(
        utf8Pointer: UnsafeRawPointer,
        count: Int,
        options: TextFormatDecodingOptions,
        extensions: (any ExtensionMap)? = nil
    ) {
        p = utf8Pointer
        end = p + count
        self.extensions = extensions
        self.options = options
        // Since the root message doesn't start with a `skipObjectStart`, the
        // budget starts with one less depth to cover that top message.
        recursionBudget = options.messageDepthLimit - 1
        skipWhitespace()
    }

    private mutating func incrementRecursionDepth() throws {
        recursionBudget -= 1
        if recursionBudget < 0 {
            throw TextFormatDecodingError.messageDepthLimit
        }
    }

    private mutating func decrementRecursionDepth() {
        recursionBudget += 1
        // This should never happen, if it does, something is probably
        // corrupting memory, and simply throwing doesn't make much sense.
        if recursionBudget > options.messageDepthLimit {
            fatalError("Somehow TextFormatDecoding unwound more objects than it started")
        }
    }

    /// Skip whitespace
    private mutating func skipWhitespace() {
        while p != end {
            let u = p[0]
            switch u {
            case asciiSpace,
                asciiTab,
                asciiNewLine,
                asciiCarriageReturn:  // space, tab, NL, CR
                p += 1
            case asciiHash:  // # comment
                p += 1
                while p != end {
                    // Skip until end of line
                    let c = p[0]
                    p += 1
                    if c == asciiNewLine || c == asciiCarriageReturn {
                        break
                    }
                }
            default:
                return
            }
        }
    }

    /// Return a buffer containing the raw UTF8 for an identifier.
    /// Assumes that you already know the current byte is a valid
    /// start of identifier.
    private mutating func parseUTF8Identifier() -> UnsafeRawBufferPointer {
        let start = p
        loop: while p != end {
            let c = p[0]
            switch c {
            case asciiLowerA...asciiLowerZ,
                asciiUpperA...asciiUpperZ,
                asciiZero...asciiNine,
                asciiUnderscore:
                p += 1
            default:
                break loop
            }
        }
        let s = UnsafeRawBufferPointer(start: start, count: p - start)
        skipWhitespace()
        return s
    }

    /// Return a String containing the next identifier.
    private mutating func parseIdentifier() -> String {
        let buff = parseUTF8Identifier()
        let s = utf8ToString(bytes: buff.baseAddress!, count: buff.count)
        // Force-unwrap is OK:  we never have invalid UTF8 at this point.
        return s!
    }

    /// Scan a string that encodes a byte field, return a count of
    /// the number of bytes that should be decoded from it
    private mutating func validateAndCountBytesFromString(terminator: UInt8, sawBackslash: inout Bool) throws -> Int {
        var count = 0
        let start = p
        sawBackslash = false
        while p != end {
            let byte = p[0]
            p += 1
            if byte == terminator {
                p = start
                return count
            }
            switch byte {
            case asciiNewLine, asciiCarriageReturn:
                // Can't have a newline in the middle of a bytes string.
                throw TextFormatDecodingError.malformedText
            case asciiBackslash:  //  "\\"
                sawBackslash = true
                if p != end {
                    let escaped = p[0]
                    p += 1
                    switch escaped {
                    case asciiZero...asciiSeven:  // '0'...'7'
                        // C standard allows 1, 2, or 3 octal digits.
                        if p != end, p[0] >= asciiZero, p[0] <= asciiSeven {
                            p += 1
                            if p != end, p[0] >= asciiZero, p[0] <= asciiSeven {
                                if escaped > asciiThree {
                                    // Out of range octal: three digits and first digit is greater than 3
                                    throw TextFormatDecodingError.malformedText
                                }
                                p += 1
                            }
                        }
                        count += 1
                    case asciiLowerU, asciiUpperU:  // 'u' or 'U' unicode escape
                        let numDigits = (escaped == asciiLowerU) ? 4 : 8
                        guard (end - p) >= numDigits else {
                            throw TextFormatDecodingError.malformedText  // unicode escape must 4/8 digits
                        }
                        var codePoint: UInt32 = 0
                        for i in 0..<numDigits {
                            if let digit = uint32FromHexDigit(p[i]) {
                                codePoint = (codePoint << 4) + digit
                            } else {
                                throw TextFormatDecodingError.malformedText  // wasn't a hex digit
                            }
                        }
                        p += numDigits
                        switch codePoint {
                        case 0...0x7f:
                            // 1 byte encoding
                            count += 1
                        case 0x80...0x7ff:
                            // 2 byte encoding
                            count += 2
                        case 0xD800...0xDFFF:
                            // Surrogate pair (low or high), shouldn't get a unicode literal of those.
                            throw TextFormatDecodingError.malformedText
                        case 0x800...0xffff:
                            // 3 byte encoding
                            count += 3
                        case 0x10000...0x10FFFF:
                            // 4 byte encoding
                            count += 4
                        default:
                            throw TextFormatDecodingError.malformedText  // Isn't a valid unicode character
                        }
                    case asciiLowerX:  // 'x' hexadecimal escape
                        if p != end && fromHexDigit(p[0]) != nil {
                            p += 1
                            if p != end && fromHexDigit(p[0]) != nil {
                                p += 1
                            }
                        } else {
                            throw TextFormatDecodingError.malformedText  // Hex escape must have at least 1 digit
                        }
                        count += 1
                    case asciiLowerA,  // \a ("alert")
                        asciiLowerB,  // \b
                        asciiLowerF,  // \f
                        asciiLowerN,  // \n
                        asciiLowerR,  // \r
                        asciiLowerT,  // \t
                        asciiLowerV,  // \v
                        asciiSingleQuote,  // \'
                        asciiDoubleQuote,  // \"
                        asciiQuestionMark,  // \?
                        asciiBackslash:  // \\
                        count += 1
                    default:
                        throw TextFormatDecodingError.malformedText  // Unrecognized escape
                    }
                }
            default:
                count += 1
            }
        }
        throw TextFormatDecodingError.malformedText
    }

    /// Protobuf Text format uses C ASCII conventions for
    /// encoding byte sequences, including the use of octal
    /// and hexadecimal escapes.
    ///
    /// Assumes that validateAndCountBytesFromString() has already
    /// verified the correctness.  So we get to avoid error checks here.
    private mutating func parseBytesFromString(terminator: UInt8, into data: inout Data) {
        data.withUnsafeMutableBytes {
            (body: UnsafeMutableRawBufferPointer) in
            if var out = body.baseAddress, body.count > 0 {
                while p[0] != terminator {
                    let byte = p[0]
                    p += 1
                    switch byte {
                    case asciiBackslash:  //  "\\"
                        let escaped = p[0]
                        p += 1
                        switch escaped {
                        case asciiZero...asciiSeven:  // '0'...'7'
                            // C standard allows 1, 2, or 3 octal digits.
                            let digit1Value = escaped - asciiZero
                            let digit2 = p[0]
                            if digit2 >= asciiZero, digit2 <= asciiSeven {
                                p += 1
                                let digit2Value = digit2 - asciiZero
                                let digit3 = p[0]
                                if digit3 >= asciiZero, digit3 <= asciiSeven {
                                    p += 1
                                    let digit3Value = digit3 - asciiZero
                                    out[0] = digit1Value &* 64 + digit2Value * 8 + digit3Value
                                    out += 1
                                } else {
                                    out[0] = digit1Value * 8 + digit2Value
                                    out += 1
                                }
                            } else {
                                out[0] = digit1Value
                                out += 1
                            }
                        case asciiLowerU, asciiUpperU:
                            let numDigits = (escaped == asciiLowerU) ? 4 : 8
                            var codePoint: UInt32 = 0
                            for i in 0..<numDigits {
                                codePoint = (codePoint << 4) + uint32FromHexDigit(p[i])!
                            }
                            p += numDigits
                            switch codePoint {
                            case 0...0x7f:
                                // 1 byte encoding
                                out[0] = UInt8(truncatingIfNeeded: codePoint)
                                out += 1
                            case 0x80...0x7ff:
                                // 2 byte encoding
                                out[0] = 0xC0 + UInt8(truncatingIfNeeded: codePoint >> 6)
                                out[1] = 0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F)
                                out += 2
                            case 0x800...0xffff:
                                // 3 byte encoding
                                out[0] = 0xE0 + UInt8(truncatingIfNeeded: codePoint >> 12)
                                out[1] = 0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F)
                                out[2] = 0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F)
                                out += 3
                            case 0x10000...0x10FFFF:
                                // 4 byte encoding
                                out[0] = 0xF0 + UInt8(truncatingIfNeeded: codePoint >> 18)
                                out[1] = 0x80 + UInt8(truncatingIfNeeded: (codePoint >> 12) & 0x3F)
                                out[2] = 0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F)
                                out[3] = 0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F)
                                out += 4
                            default:
                                preconditionFailure()  // Already validated, can't happen
                            }
                        case asciiLowerX:  // 'x' hexadecimal escape
                            // We already validated, so we know there's at least one digit:
                            var n = fromHexDigit(p[0])!
                            p += 1
                            if let digit = fromHexDigit(p[0]) {
                                n = n &* 16 &+ digit
                                p += 1
                            }
                            out[0] = n
                            out += 1
                        case asciiLowerA:  // \a ("alert")
                            out[0] = asciiBell
                            out += 1
                        case asciiLowerB:  // \b
                            out[0] = asciiBackspace
                            out += 1
                        case asciiLowerF:  // \f
                            out[0] = asciiFormFeed
                            out += 1
                        case asciiLowerN:  // \n
                            out[0] = asciiNewLine
                            out += 1
                        case asciiLowerR:  // \r
                            out[0] = asciiCarriageReturn
                            out += 1
                        case asciiLowerT:  // \t
                            out[0] = asciiTab
                            out += 1
                        case asciiLowerV:  // \v
                            out[0] = asciiVerticalTab
                            out += 1
                        default:
                            out[0] = escaped
                            out += 1
                        }
                    default:
                        out[0] = byte
                        out += 1
                    }
                }
                p += 1  // Consume terminator
            }
        }
    }

    /// Assumes the leading quote has already been consumed
    private mutating func parseStringSegment(terminator: UInt8) -> String? {
        let start = p
        var sawBackslash = false
        while p != end {
            let c = p[0]
            if c == terminator {
                let s = utf8ToString(bytes: start, count: p - start)
                p += 1
                skipWhitespace()
                if let s = s, sawBackslash {
                    return decodeString(s)
                } else {
                    return s
                }
            }
            p += 1
            if c == asciiBackslash {  //  \
                if p == end {
                    return nil
                }
                sawBackslash = true
                p += 1
            }
            if c == asciiNewLine || c == asciiCarriageReturn {
                // Can't have a newline in the middle of a raw string.
                return nil
            }
        }
        return nil  // Unterminated quoted string
    }

    internal mutating func nextUInt() throws -> UInt64 {
        if p == end {
            throw TextFormatDecodingError.malformedNumber
        }
        let c = p[0]
        p += 1
        if c == asciiZero {  // leading '0' precedes octal or hex
            if p == end {
                // The TextFormat ended with a field value of zero.
                return 0
            }
            if p[0] == asciiLowerX {  // 'x' => hex
                p += 1
                var n: UInt64 = 0
                while p != end {
                    let digit = p[0]
                    let val: UInt64
                    switch digit {
                    case asciiZero...asciiNine:  // 0...9
                        val = UInt64(digit - asciiZero)
                    case asciiLowerA...asciiLowerF:  // a...f
                        val = UInt64(digit - asciiLowerA + 10)
                    case asciiUpperA...asciiUpperF:
                        val = UInt64(digit - asciiUpperA + 10)
                    default:
                        skipWhitespace()
                        return n
                    }
                    if n > UInt64.max / 16 {
                        throw TextFormatDecodingError.malformedNumber
                    }
                    p += 1
                    n = n * 16 + val
                }
                skipWhitespace()
                return n
            } else {  // octal
                var n: UInt64 = 0
                while p != end {
                    let digit = p[0]
                    if digit < asciiZero || digit > asciiSeven {
                        skipWhitespace()
                        return n  // not octal digit
                    }
                    let val = UInt64(digit - asciiZero)
                    if n > UInt64.max / 8 {
                        throw TextFormatDecodingError.malformedNumber
                    }
                    p += 1
                    n = n * 8 + val
                }
                skipWhitespace()
                return n
            }
        } else if c > asciiZero && c <= asciiNine {  // 1...9
            var n = UInt64(c - asciiZero)
            while p != end {
                let digit = p[0]
                if digit < asciiZero || digit > asciiNine {
                    skipWhitespace()
                    return n  // not a digit
                }
                let val = UInt64(digit - asciiZero)
                if n > UInt64.max / 10 || n * 10 > UInt64.max - val {
                    throw TextFormatDecodingError.malformedNumber
                }
                p += 1
                n = n * 10 + val
            }
            skipWhitespace()
            return n
        }
        throw TextFormatDecodingError.malformedNumber
    }

    internal mutating func nextSInt() throws -> Int64 {
        if p == end {
            throw TextFormatDecodingError.malformedNumber
        }
        let c = p[0]
        if c == asciiMinus {  // -
            p += 1
            if p == end {
                throw TextFormatDecodingError.malformedNumber
            }
            // character after '-' must be digit
            let digit = p[0]
            if digit < asciiZero || digit > asciiNine {
                throw TextFormatDecodingError.malformedNumber
            }
            let n = try nextUInt()
            let limit: UInt64 = 0x8000_0000_0000_0000  // -Int64.min
            if n >= limit {
                if n > limit {
                    // Too large negative number
                    throw TextFormatDecodingError.malformedNumber
                } else {
                    return Int64.min  // Special case for Int64.min
                }
            }
            return -Int64(bitPattern: n)
        } else {
            let n = try nextUInt()
            if n > UInt64(bitPattern: Int64.max) {
                throw TextFormatDecodingError.malformedNumber
            }
            return Int64(bitPattern: n)
        }
    }

    internal mutating func nextStringValue() throws -> String {
        var result: String
        skipWhitespace()
        if p == end {
            throw TextFormatDecodingError.malformedText
        }
        let c = p[0]
        if c != asciiSingleQuote && c != asciiDoubleQuote {
            throw TextFormatDecodingError.malformedText
        }
        p += 1
        if let s = parseStringSegment(terminator: c) {
            result = s
        } else {
            throw TextFormatDecodingError.malformedText
        }

        while true {
            if p == end {
                return result
            }
            let c = p[0]
            if c != asciiSingleQuote && c != asciiDoubleQuote {
                return result
            }
            p += 1
            if let s = parseStringSegment(terminator: c) {
                result.append(s)
            } else {
                throw TextFormatDecodingError.malformedText
            }
        }
    }

    /// Protobuf Text Format allows a single bytes field to
    /// contain multiple quoted strings.  The values
    /// are separately decoded and then concatenated:
    ///  field1: "bytes" 'more bytes'
    ///        "and even more bytes"
    internal mutating func nextBytesValue() throws -> Data {
        // Get the first string's contents
        var result: Data
        skipWhitespace()
        if p == end {
            throw TextFormatDecodingError.malformedText
        }
        let c = p[0]
        if c != asciiSingleQuote && c != asciiDoubleQuote {
            throw TextFormatDecodingError.malformedText
        }
        p += 1
        var sawBackslash = false
        let n = try validateAndCountBytesFromString(terminator: c, sawBackslash: &sawBackslash)
        if sawBackslash {
            result = Data(count: n)
            parseBytesFromString(terminator: c, into: &result)
        } else {
            result = Data(bytes: p, count: n)
            p += n + 1  // Skip string body + close quote
        }

        // If there are more strings, decode them
        // and append to the result:
        while true {
            skipWhitespace()
            if p == end {
                return result
            }
            let c = p[0]
            if c != asciiSingleQuote && c != asciiDoubleQuote {
                return result
            }
            p += 1
            var sawBackslash = false
            let n = try validateAndCountBytesFromString(terminator: c, sawBackslash: &sawBackslash)
            if sawBackslash {
                var b = Data(count: n)
                parseBytesFromString(terminator: c, into: &b)
                result.append(b)
            } else {
                result.append(Data(bytes: p, count: n))
                p += n + 1  // Skip string body + close quote
            }
        }
    }

    // Tries to identify a sequence of UTF8 characters
    // that represent a numeric floating-point value.
    private mutating func tryParseFloatString() -> Double? {
        guard p != end else { return nil }
        let start = p
        var c = p[0]
        if c == asciiMinus {
            p += 1
            guard p != end else {
                p = start
                return nil
            }
            c = p[0]
        }
        switch c {
        case asciiZero:  // '0' as first character is not allowed followed by digit
            p += 1
            guard p != end else { break }
            c = p[0]
            if c >= asciiZero && c <= asciiNine {
                p = start
                return nil
            }
        case asciiPeriod:  // '.' as first char only if followed by digit
            p += 1
            guard p != end else {
                p = start
                return nil
            }
            c = p[0]
            if c < asciiZero || c > asciiNine {
                p = start
                return nil
            }
        case asciiOne...asciiNine:
            break
        default:
            p = start
            return nil
        }
        loop: while p != end {
            let c = p[0]
            switch c {
            case asciiZero...asciiNine,
                asciiPeriod,
                asciiPlus,
                asciiMinus,
                asciiLowerE,
                asciiUpperE:  // 0...9, ., +, -, e, E
                p += 1
            case asciiLowerF, asciiUpperF:  // f or F
                let d = doubleParser.utf8ToDouble(
                    bytes: UnsafeRawBufferPointer(
                        start: start,
                        count: p - start
                    ),
                    finiteOnly: false
                )
                // Just skip the 'f'/'F'
                p += 1
                skipWhitespace()
                return d
            default:
                break loop
            }
        }
        let d = doubleParser.utf8ToDouble(
            bytes: UnsafeRawBufferPointer(
                start: start,
                count: p - start
            ),
            finiteOnly: false
        )
        skipWhitespace()
        return d
    }

    // Skip specified characters if they all match
    private mutating func skipOptionalCharacters(bytes: [UInt8]) {
        let start = p
        for b in bytes {
            if p == end || p[0] != b {
                p = start
                return
            }
            p += 1
        }
    }

    // Skip following keyword if it matches (case-insensitively)
    // the given keyword (specified as a series of bytes).
    private mutating func skipOptionalKeyword(bytes: [UInt8]) -> Bool {
        let start = p
        for b in bytes {
            if p == end {
                p = start
                return false
            }
            var c = p[0]
            if c >= asciiUpperA && c <= asciiUpperZ {
                // Convert to lower case
                // (Protobuf text keywords are case insensitive)
                c += asciiLowerA - asciiUpperA
            }
            if c != b {
                p = start
                return false
            }
            p += 1
        }
        if p == end {
            return true
        }
        let c = p[0]
        if (c >= asciiUpperA && c <= asciiUpperZ)
            || (c >= asciiLowerA && c <= asciiLowerZ)
        {
            p = start
            return false
        }
        skipWhitespace()
        return true
    }

    // If the next token is the identifier "nan", return true.
    private mutating func skipOptionalNaN() -> Bool {
        let start = p
        // "-nan" doesn't mean anything, but upstream handles it, so skip
        // over any leading minus when checking for "nan".
        if p != end && p[0] == asciiMinus {
            p += 1
        }
        if skipOptionalKeyword(bytes: [asciiLowerN, asciiLowerA, asciiLowerN]) {
            return true
        } else {
            p = start  // It wasn't "nan", rewind incase we skipped a minus sign.
            return false
        }
    }

    // If the next token is a recognized spelling of "infinity",
    // return Float.infinity or -Float.infinity
    private mutating func skipOptionalInfinity() -> Float? {
        if p == end {
            return nil
        }
        let start = p
        let c = p[0]
        let negated: Bool
        if c == asciiMinus {
            negated = true
            p += 1
        } else {
            negated = false
        }
        let inf = [asciiLowerI, asciiLowerN, asciiLowerF]
        let infinity = [
            asciiLowerI, asciiLowerN, asciiLowerF, asciiLowerI,
            asciiLowerN, asciiLowerI, asciiLowerT, asciiLowerY,
        ]
        if skipOptionalKeyword(bytes: inf)
            || skipOptionalKeyword(bytes: infinity)
        {
            return negated ? -Float.infinity : Float.infinity
        }
        p = start
        return nil
    }

    internal mutating func nextFloat() throws -> Float {
        if let d = tryParseFloatString() {
            return Float(d)
        }
        if skipOptionalNaN() {
            return Float.nan
        }
        if let inf = skipOptionalInfinity() {
            return inf
        }
        throw TextFormatDecodingError.malformedNumber
    }

    internal mutating func nextDouble() throws -> Double {
        if let d = tryParseFloatString() {
            return d
        }
        if skipOptionalNaN() {
            return Double.nan
        }
        if let inf = skipOptionalInfinity() {
            return Double(inf)
        }
        throw TextFormatDecodingError.malformedNumber
    }

    internal mutating func nextBool() throws -> Bool {
        skipWhitespace()
        if p == end {
            throw TextFormatDecodingError.malformedText
        }
        let c = p[0]
        p += 1
        let result: Bool
        switch c {
        case asciiZero:
            result = false
        case asciiOne:
            result = true
        case asciiLowerF, asciiUpperF:
            if p != end {
                let alse = [asciiLowerA, asciiLowerL, asciiLowerS, asciiLowerE]
                skipOptionalCharacters(bytes: alse)
            }
            result = false
        case asciiLowerT, asciiUpperT:
            if p != end {
                let rue = [asciiLowerR, asciiLowerU, asciiLowerE]
                skipOptionalCharacters(bytes: rue)
            }
            result = true
        default:
            throw TextFormatDecodingError.malformedText
        }
        if p == end {
            return result
        }
        switch p[0] {
        case asciiSpace,
            asciiTab,
            asciiNewLine,
            asciiCarriageReturn,
            asciiHash,
            asciiComma,
            asciiSemicolon,
            asciiCloseSquareBracket,
            asciiCloseCurlyBracket,
            asciiCloseAngleBracket:
            skipWhitespace()
            return result
        default:
            throw TextFormatDecodingError.malformedText
        }
    }

    internal mutating func nextOptionalEnumName() throws -> UnsafeRawBufferPointer? {
        skipWhitespace()
        if p == end {
            throw TextFormatDecodingError.malformedText
        }
        switch p[0] {
        case asciiLowerA...asciiLowerZ, asciiUpperA...asciiUpperZ:
            return parseUTF8Identifier()
        default:
            return nil
        }
    }

    /// Any URLs are syntactically (almost) identical to extension
    /// keys, so we share the code for those.
    internal mutating func nextOptionalAnyURL() throws -> String? {
        try nextOptionalExtensionKey()
    }

    /// Returns next extension key or nil if end-of-input or
    /// if next token is not an extension key.
    ///
    /// Throws an error if the next token starts with '[' but
    /// cannot be parsed as an extension key.
    ///
    /// Note: This accepts / characters to support Any URL parsing.
    /// Technically, Any URLs can contain / characters and extension
    /// key names cannot.  But in practice, accepting / chracters for
    /// extension keys works fine, since the result just gets rejected
    /// when the key is looked up.
    internal mutating func nextOptionalExtensionKey() throws -> String? {
        skipWhitespace()
        if p == end {
            return nil
        }
        guard p[0] == asciiOpenSquareBracket else {  // [
            return nil
        }
        return try parseExtensionKey()
    }

    /// Parse the rest of an [extension_field_name] in the input, assuming the
    /// initial "[" character has already been read (and is in the prefix)
    /// This is also used for AnyURL, so we include "/".
    private mutating func parseExtensionKey() throws -> String {
        assert(p[0] == asciiOpenSquareBracket)
        p += 1
        if p == end {
            throw TextFormatDecodingError.malformedText
        }
        let start = p
        switch p[0] {
        case asciiLowerA...asciiLowerZ, asciiUpperA...asciiUpperZ:
            p += 1
        default:
            throw TextFormatDecodingError.malformedText
        }
        loop: while p != end {
            switch p[0] {
            case asciiLowerA...asciiLowerZ,
                asciiUpperA...asciiUpperZ,
                asciiZero...asciiNine,
                asciiUnderscore,
                asciiPeriod,
                asciiForwardSlash:
                p += 1
            case asciiCloseSquareBracket:  // ]
                break loop
            default:
                throw TextFormatDecodingError.malformedText
            }
        }
        if p == end || p[0] != asciiCloseSquareBracket {
            throw TextFormatDecodingError.malformedText
        }
        guard let extensionName = utf8ToString(bytes: start, count: p - start) else {
            throw TextFormatDecodingError.malformedText
        }
        p += 1  // Skip ]
        skipWhitespace()
        return extensionName
    }

    /// Returns text of next regular key or nil if end-of-input.
    internal mutating func nextKey(allowExtensions: Bool) throws -> String? {
        skipWhitespace()
        if p == end {
            return nil
        }
        let c = p[0]
        switch c {
        case asciiOpenSquareBracket:  // [
            if allowExtensions {
                return "[\(try parseExtensionKey())]"
            }
            throw TextFormatDecodingError.unknownField
        case asciiLowerA...asciiLowerZ,
            asciiUpperA...asciiUpperZ:  // a...z, A...Z
            return parseIdentifier()
        case asciiOne...asciiNine:  // 1...9 (field numbers are 123, not 0123)
            let start = p
            p += 1
            while p != end {
                let c = p[0]
                if c < asciiZero || c > asciiNine {
                    break
                }
                p += 1
                if p - start > maxFieldNumLength {
                    throw TextFormatDecodingError.malformedText
                }
            }
            let buff = UnsafeRawBufferPointer(start: start, count: p - start)
            skipWhitespace()
            let s = utf8ToString(bytes: buff.baseAddress!, count: buff.count)
            // Safe, can't be invalid UTF-8 given the input.
            return s!
        default:
            throw TextFormatDecodingError.malformedText
        }
    }

    /// Parse a field name, look it up, and return the corresponding
    /// field number.
    ///
    /// returns nil at end-of-input
    ///
    /// Throws if field name cannot be parsed or if field name is
    /// unknown.
    ///
    /// This function accounts for as much as 2/3 of the total run
    /// time of the entire parse.
    internal mutating func nextFieldNumber(
        names: _NameMap,
        messageType: any Message.Type,
        terminator: UInt8?
    ) throws -> Int? {
        while true {
            skipWhitespace()
            if p == end {
                if terminator == nil {
                    return nil
                } else {
                    // Never got the terminator.
                    throw TextFormatDecodingError.malformedText
                }
            }
            var isReserved = false
            let c = p[0]
            switch c {
            case asciiLowerA...asciiLowerZ,
                asciiUpperA...asciiUpperZ:  // a...z, A...Z
                let key = parseUTF8Identifier()
                if let fieldNumber = names.number(forProtoName: key) {
                    return fieldNumber
                }
                if !options.ignoreUnknownFields {
                    if names.isReserved(name: key) {
                        isReserved = true
                    } else {
                        throw TextFormatDecodingError.unknownField
                    }
                }
                // Unknown field name or reserved, break and skip
                break
            case asciiOpenSquareBracket:  // Start of an extension field
                let key = try parseExtensionKey()
                if let fieldNumber = extensions?.fieldNumberForProto(messageType: messageType, protoFieldName: key) {
                    return fieldNumber
                }
                if !options.ignoreUnknownExtensionFields {
                    throw TextFormatDecodingError.unknownField
                }
                // Unknown field name, break and skip
                break
            case asciiOne...asciiNine:  // 1-9 (field numbers are 123, not 0123)
                let start = p
                var fieldNum = Int(c) - Int(asciiZero)
                p += 1
                while p != end {
                    let c = p[0]
                    if c >= asciiZero && c <= asciiNine {
                        fieldNum = fieldNum &* 10 &+ (Int(c) - Int(asciiZero))
                    } else {
                        break
                    }
                    p += 1
                    if p - start > maxFieldNumLength {
                        throw TextFormatDecodingError.malformedText
                    }
                }
                skipWhitespace()
                if names.names(for: fieldNum) != nil {
                    return fieldNum
                }
                if !options.ignoreUnknownFields {
                    // fieldNumber is range checked while parsing, so safe can truncate.
                    if names.isReserved(number: Int32(truncatingIfNeeded: fieldNum)) {
                        isReserved = true
                    } else {
                        throw TextFormatDecodingError.unknownField
                    }
                }
                // Unknown field name or reserved, break and skip
                break
            default:
                if c == terminator {
                    let _ = skipOptionalObjectEnd(c)
                    return nil
                }
                throw TextFormatDecodingError.malformedText
            }

            assert(options.ignoreUnknownFields || options.ignoreUnknownExtensionFields || isReserved)
            try skipUnknownFieldValue()
            // Skip any separator before looping around to try for another field.
            skipOptionalSeparator()
        }
    }

    // Helper to skip past an unknown field value, when called `p` will be pointing
    // at the first character after the unknown field name.
    internal mutating func skipUnknownFieldValue() throws {
        // This is modeled after the C++ text_format.cpp `ConsumeField()`
        //
        // Guess the type of this field:
        // - If this field is not a message, there should be a ":" between the
        //   field name and the field value and also the field value should not
        //   start with "{" or "<" which indicates the beginning of a message body.
        // - If there is no ":" or there is a "{" or "<" after ":", this field has
        //   to be a message or the input is ill-formed.

        skipWhitespace()
        if skipOptionalColon() {
            if p == end {
                // Nothing after the ':'?
                throw TextFormatDecodingError.malformedText
            }
            let c = p[0]
            if c != asciiOpenAngleBracket && c != asciiOpenCurlyBracket {
                try skipUnknownPrimativeFieldValue()
            } else {
                try skipUnknownMessageFieldValue()
            }
        } else {
            try skipUnknownMessageFieldValue()
        }
    }

    /// Helper to see if this could be the start of a hex or octal number so unknown field
    /// value parsing can decide how to parse/validate.
    private func mustParseNumberAsDecimal() -> Bool {
        // NOTE: If we run out of characters/can't tell; then just say it doesn't have
        // to be decimal, and let the other code error handle it.
        var scan = p
        var c = scan[0]

        // Floats or decimals can have leading '-'
        if c == asciiMinus {
            scan += 1
            if scan == end { return false }
            c = scan[0]
        }

        if c == asciiPeriod {
            return false  // "(-)." : clearly a float
        }

        if c == asciiZero {
            scan += 1
            if scan == end { return true }  // "(-)0[end]" : parse it as decimal
            c = scan[0]
            if c == asciiLowerX  // "(-)0x" : hex - must parse as decimal
                || (c >= asciiZero && c <= asciiSeven)
            {  // "(-)0[0-7]" : octal - must parse as decimal
                return true
            }
            if c == asciiPeriod {
                return false  // "(-)0." : clearly a float
            }
        }

        // At this point, it doesn't realy matter what comes next. We'll call it a floating
        // point value since even if it was a decimal, it might be too large for a UInt64 but
        // would still be valid for a float/double field.
        return false
    }

    private mutating func skipUnknownPrimativeFieldValue(canBeList: Bool = true) throws {
        // This is modeled after the C++ text_format.cpp `SkipFieldValue()`
        let c = p[0]

        if c == asciiSingleQuote || c == asciiDoubleQuote {
            // Note: the field could be 'bytes', so we can't parse that as a string
            // as it might fail.
            let _ = try nextBytesValue()
            return
        }

        if skipOptionalBeginArray() {
            guard canBeList else {
                // Have encounted an array as an element in an array, that isn't legal.
                throw TextFormatDecodingError.malformedText
            }
            if skipOptionalEndArray() {
                return
            }
            while true {
                if p == end {
                    throw TextFormatDecodingError.malformedText
                }
                let c = p[0]
                if c != asciiOpenAngleBracket && c != asciiOpenCurlyBracket {
                    try skipUnknownPrimativeFieldValue(canBeList: false)
                } else {
                    try skipUnknownMessageFieldValue()
                }
                if skipOptionalEndArray() {
                    return
                }
                try skipRequiredComma()
            }
        }

        // NOTE: This will also cover "true", "false" for booleans, "nan"/"inf" for floats.
        if let _ = try nextOptionalEnumName() {
            skipWhitespace()  // `nextOptionalEnumName()` doesn't skip trailing whitespace
            return
        }

        // NOTE: We don't need to special case "-nan"/"-inf", as they won't be forced
        // to parse as decimal, and `nextDouble()` already supports them.
        if mustParseNumberAsDecimal() {
            if c == asciiMinus {
                let _ = try nextSInt()
            } else {
                let _ = try nextUInt()
            }
        } else {
            let _ = try nextDouble()
        }
    }

    private mutating func skipUnknownMessageFieldValue() throws {
        // This is modeled after the C++ text_format.cpp `SkipFieldMessage()`

        let terminator = try skipObjectStart()
        while !skipOptionalObjectEnd(terminator) {
            if p == end {
                throw TextFormatDecodingError.malformedText
            }
            if let _ = try nextKey(allowExtensions: true) {
                // Got a valid field name or extension name ("[ext.name]")
            } else {
                throw TextFormatDecodingError.malformedText
            }
            try skipUnknownFieldValue()
            skipOptionalSeparator()
        }
    }

    private mutating func skipRequiredCharacter(_ c: UInt8) throws {
        skipWhitespace()
        if p != end && p[0] == c {
            p += 1
            skipWhitespace()
        } else {
            throw TextFormatDecodingError.malformedText
        }
    }

    internal mutating func skipRequiredComma() throws {
        try skipRequiredCharacter(asciiComma)
    }

    internal mutating func skipRequiredColon() throws {
        try skipRequiredCharacter(asciiColon)
    }

    private mutating func skipOptionalCharacter(_ c: UInt8) -> Bool {
        if p != end && p[0] == c {
            p += 1
            skipWhitespace()
            return true
        }
        return false
    }

    internal mutating func skipOptionalColon() -> Bool {
        skipOptionalCharacter(asciiColon)
    }

    internal mutating func skipOptionalEndArray() -> Bool {
        skipOptionalCharacter(asciiCloseSquareBracket)
    }

    internal mutating func skipOptionalBeginArray() -> Bool {
        skipOptionalCharacter(asciiOpenSquareBracket)
    }

    internal mutating func skipOptionalObjectEnd(_ c: UInt8) -> Bool {
        let result = skipOptionalCharacter(c)
        if result {
            decrementRecursionDepth()
        }
        return result
    }

    internal mutating func skipOptionalSeparator() {
        if p != end {
            let c = p[0]
            if c == asciiComma || c == asciiSemicolon {  // comma or semicolon
                p += 1
                skipWhitespace()
            }
        }
    }

    /// Returns the character that should end this field.
    /// E.g., if object starts with "{", returns "}"
    internal mutating func skipObjectStart() throws -> UInt8 {
        try incrementRecursionDepth()
        if p != end {
            let c = p[0]
            p += 1
            skipWhitespace()
            switch c {
            case asciiOpenCurlyBracket:  // {
                return asciiCloseCurlyBracket  // }
            case asciiOpenAngleBracket:  // <
                return asciiCloseAngleBracket  // >
            default:
                break
            }
        }
        throw TextFormatDecodingError.malformedText
    }
}