ordered_code.cc 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585
  1. /*
  2. * Copyright 2017 Google
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "Firestore/Port/ordered_code.h"
  17. #include <assert.h>
  18. #include <absl/base/internal/endian.h>
  19. #include <absl/base/internal/unaligned_access.h>
  20. #include <absl/base/port.h>
  21. #include <leveldb/db.h> // For Slice
  22. #include "Firestore/Port/bits.h"
  23. #define UNALIGNED_LOAD32 ABSL_INTERNAL_UNALIGNED_LOAD32
  24. #define UNALIGNED_LOAD64 ABSL_INTERNAL_UNALIGNED_LOAD64
  25. #define UNALIGNED_STORE32 ABSL_INTERNAL_UNALIGNED_STORE32
  26. #define UNALIGNED_STORE64 ABSL_INTERNAL_UNALIGNED_STORE64
  27. // We encode a string in different ways depending on whether the item
  28. // should be in lexicographically increasing or decreasing order.
  29. //
  30. //
  31. // Lexicographically increasing order
  32. //
  33. // We want a string-to-string mapping F(x) such that for any two strings
  34. //
  35. // x < y => F(x) < F(y)
  36. //
  37. // In addition to the normal characters '\x00' through '\xff', we want to
  38. // encode a few extra symbols in strings:
  39. //
  40. // <sep> Separator between items
  41. // <infinity> Infinite string
  42. //
  43. // Therefore we need an alphabet with at least 258 symbols. Each
  44. // character '\1' through '\xfe' is mapped to itself. The other four are
  45. // encoded into two-letter sequences starting with '\0' and '\xff':
  46. //
  47. // <sep> encoded as => \0\1
  48. // \0 encoded as => \0\xff
  49. // \xff encoded as => \xff\x00
  50. // <infinity> encoded as => \xff\xff
  51. //
  52. // The remaining two-letter sequences starting with '\0' and '\xff' are
  53. // currently unused.
  54. //
  55. // F(<infinity>) is defined above. For any finite string x, F(x) is the
  56. // the encodings of x's characters followed by the encoding for <sep>. The
  57. // ordering of two finite strings is the same as the ordering of the
  58. // respective characters at the first position where they differ, which in
  59. // turn is the same as the ordering of the encodings of those two
  60. // characters. Moreover, for every finite string x, F(x) < F(<infinity>).
  61. namespace Firestore {
  62. using leveldb::Slice;
  63. static const char kEscape1 = '\000';
  64. static const char kNullCharacter = '\xff'; // Combined with kEscape1
  65. static const char kSeparator = '\001'; // Combined with kEscape1
  66. static const char kEscape2 = '\xff';
  67. static const char kInfinity = '\xff'; // Combined with kEscape2
  68. static const char kFFCharacter = '\000'; // Combined with kEscape2
  69. static const char kEscape1_Separator[2] = {kEscape1, kSeparator};
  70. // Append to "*dest" the "len" bytes starting from "*src".
  71. inline static void AppendBytes(std::string* dest, const char* src, size_t len) {
  72. dest->append(src, len);
  73. }
  74. inline bool IsSpecialByte(char c) { return ((unsigned char)(c + 1)) < 2; }
  75. // Returns 0 if one or more of the bytes in the specified uint32 value
  76. // are the special values 0 or 255, and returns 4 otherwise. The
  77. // result of this routine can be added to "p" to either advance past
  78. // the next 4 bytes if they do not contain a special byte, or to
  79. // remain on this set of four bytes if they contain the next special
  80. // byte occurrence.
  81. //
  82. // REQUIRES: v is the value of loading the next 4 bytes from "*p" (we
  83. // pass in v rather than loading it because in some cases, the client
  84. // may already have the value in a register: "p" is just used for
  85. // assertion checking).
  86. inline int AdvanceIfNoSpecialBytes(uint32_t v_32, const char* p) {
  87. assert(UNALIGNED_LOAD32(p) == v_32);
  88. // See comments in SkipToNextSpecialByte if you wish to
  89. // understand this expression (which checks for the occurrence
  90. // of the special byte values 0 or 255 in any of the bytes of v_32).
  91. if ((v_32 - 0x01010101u) & ~(v_32 + 0x01010101u) & 0x80808080u) {
  92. // Special byte is in p[0..3]
  93. assert(IsSpecialByte(p[0]) || IsSpecialByte(p[1]) || IsSpecialByte(p[2]) ||
  94. IsSpecialByte(p[3]));
  95. return 0;
  96. } else {
  97. assert(!IsSpecialByte(p[0]));
  98. assert(!IsSpecialByte(p[1]));
  99. assert(!IsSpecialByte(p[2]));
  100. assert(!IsSpecialByte(p[3]));
  101. return 4;
  102. }
  103. }
  104. // Return a pointer to the first byte in the range "[start..limit)"
  105. // whose value is 0 or 255 (kEscape1 or kEscape2). If no such byte
  106. // exists in the range, returns "limit".
  107. inline const char* SkipToNextSpecialByte(const char* start, const char* limit) {
  108. // If these constants were ever changed, this routine needs to change
  109. assert(kEscape1 == 0);
  110. assert((kEscape2 & 0xffu) == 255u);
  111. const char* p = start;
  112. while (p + 8 <= limit) {
  113. // Find out if any of the next 8 bytes are either 0 or 255 (our
  114. // two characters that require special handling). We do this using
  115. // the technique described in:
  116. //
  117. // http://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord
  118. //
  119. // We use the test (x + 1) < 2 to check x = 0 or -1(255)
  120. //
  121. // If x is a byte value (0x00..0xff):
  122. // (x - 0x01) & 0x80 is true only when x = 0x81..0xff, 0x00
  123. // ~(x + 0x01) & 0x80 is true only when x = 0x00..0x7e, 0xff
  124. // The intersection of the above two sets is x = 0x00 or 0xff.
  125. // We can ignore carry between bytes because only x = 0x00 or 0xff
  126. // can cause carry in the expression -- and such x already makes the
  127. // result value non-zero.
  128. uint64_t v = UNALIGNED_LOAD64(p);
  129. bool hasZeroOr255Byte = (v - 0x0101010101010101ull) &
  130. ~(v + 0x0101010101010101ull) &
  131. 0x8080808080808080ull;
  132. if (!hasZeroOr255Byte) {
  133. // No special values in the next 8 bytes
  134. p += 8;
  135. } else {
  136. // We know the next 8 bytes have a special byte: find it
  137. #ifdef IS_LITTLE_ENDIAN
  138. uint32_t v_32 = static_cast<uint32_t>(v); // Low 32 bits of v
  139. #else
  140. uint32_t v_32 = UNALIGNED_LOAD32(p);
  141. #endif
  142. // Test 32 bits at once to see if special byte is in next 4 bytes
  143. // or the following 4 bytes
  144. p += AdvanceIfNoSpecialBytes(v_32, p);
  145. if (IsSpecialByte(p[0])) return p;
  146. if (IsSpecialByte(p[1])) return p + 1;
  147. if (IsSpecialByte(p[2])) return p + 2;
  148. assert(IsSpecialByte(p[3])); // Last byte must be the special one
  149. return p + 3;
  150. }
  151. }
  152. if (p + 4 <= limit) {
  153. uint32_t v_32 = UNALIGNED_LOAD32(p);
  154. p += AdvanceIfNoSpecialBytes(v_32, p);
  155. }
  156. while (p < limit && !IsSpecialByte(*p)) {
  157. p++;
  158. }
  159. return p;
  160. }
  161. // Expose SkipToNextSpecialByte for testing purposes
  162. const char* OrderedCode::TEST_SkipToNextSpecialByte(const char* start,
  163. const char* limit) {
  164. return SkipToNextSpecialByte(start, limit);
  165. }
  166. // Helper routine to encode "s" and append to "*dest", escaping special
  167. // characters.
  168. inline static void EncodeStringFragment(std::string* dest, Slice s) {
  169. const char* p = s.data();
  170. const char* limit = p + s.size();
  171. const char* copy_start = p;
  172. while (true) {
  173. p = SkipToNextSpecialByte(p, limit);
  174. if (p >= limit) break; // No more special characters that need escaping
  175. char c = *(p++);
  176. assert(IsSpecialByte(c));
  177. if (c == kEscape1) {
  178. AppendBytes(dest, copy_start, p - copy_start - 1);
  179. dest->push_back(kEscape1);
  180. dest->push_back(kNullCharacter);
  181. copy_start = p;
  182. } else {
  183. assert(c == kEscape2);
  184. AppendBytes(dest, copy_start, p - copy_start - 1);
  185. dest->push_back(kEscape2);
  186. dest->push_back(kFFCharacter);
  187. copy_start = p;
  188. }
  189. }
  190. if (p > copy_start) {
  191. AppendBytes(dest, copy_start, p - copy_start);
  192. }
  193. }
  194. void OrderedCode::WriteString(std::string* dest, Slice s) {
  195. EncodeStringFragment(dest, s);
  196. AppendBytes(dest, kEscape1_Separator, 2);
  197. }
  198. // Return number of bytes needed to encode the non-length portion
  199. // of val in ordered coding. Returns number in range [0,8].
  200. static inline unsigned int OrderedNumLength(uint64_t val) {
  201. const int lg = Bits::Log2Floor64(val); // -1 if val==0
  202. return static_cast<unsigned int>(lg + 1 + 7) / 8;
  203. }
  204. // Append n bytes from src to *dst.
  205. // REQUIRES: n <= 9
  206. // REQUIRES: src[0..8] are readable bytes (even if n is smaller)
  207. //
  208. // If we use string::append() instead of this routine, it increases the
  209. // runtime of WriteNumIncreasing from ~9ns to ~13ns.
  210. static inline void AppendUpto9(std::string* dst, const char* src,
  211. unsigned int n) {
  212. dst->append(src, 9); // Fixed-length append
  213. const size_t extra = 9 - n; // How many extra bytes we added
  214. dst->erase(dst->size() - extra, extra);
  215. }
  216. void OrderedCode::WriteNumIncreasing(std::string* dest, uint64_t val) {
  217. // Values are encoded with a single byte length prefix, followed
  218. // by the actual value in big-endian format with leading 0 bytes
  219. // dropped.
  220. // 8 bytes for value plus one byte for length. In addition, we have
  221. // 8 extra bytes at the end so that we can have a fixed-length append
  222. // call on *dest.
  223. char buf[17];
  224. UNALIGNED_STORE64(buf + 1, absl::ghtonll(val)); // buf[0] may be needed for length
  225. const unsigned int length = OrderedNumLength(val);
  226. char* start = buf + 9 - length - 1;
  227. *start = length;
  228. AppendUpto9(dest, start, length + 1);
  229. }
  230. inline static void WriteInfinityInternal(std::string* dest) {
  231. // Make an array so that we can just do one string operation for performance
  232. static const char buf[2] = {kEscape2, kInfinity};
  233. dest->append(buf, 2);
  234. }
  235. void OrderedCode::WriteInfinity(std::string* dest) {
  236. WriteInfinityInternal(dest);
  237. }
  238. void OrderedCode::WriteTrailingString(std::string* dest, Slice str) {
  239. dest->append(str.data(), str.size());
  240. }
  241. // Parse the encoding of a string previously encoded with or without
  242. // inversion. If parse succeeds, return true, consume encoding from
  243. // "*src", and if result != NULL append the decoded string to "*result".
  244. // Otherwise, return false and leave both undefined.
  245. inline static bool ReadStringInternal(Slice* src, std::string* result) {
  246. const char* start = src->data();
  247. const char* string_limit = src->data() + src->size();
  248. // We only scan up to "limit-2" since a valid string must end with
  249. // a two character terminator: 'kEscape1 kSeparator'
  250. const char* limit = string_limit - 1;
  251. const char* copy_start = start;
  252. while (true) {
  253. start = SkipToNextSpecialByte(start, limit);
  254. if (start >= limit) break; // No terminator sequence found
  255. const char c = *(start++);
  256. // If inversion is required, instead of inverting 'c', we invert the
  257. // character constants to which 'c' is compared. We get the same
  258. // behavior but save the runtime cost of inverting 'c'.
  259. assert(IsSpecialByte(c));
  260. if (c == kEscape1) {
  261. if (result) {
  262. AppendBytes(result, copy_start, start - copy_start - 1);
  263. }
  264. // kEscape1 kSeparator ends component
  265. // kEscape1 kNullCharacter represents '\0'
  266. const char next = *(start++);
  267. if (next == kSeparator) {
  268. src->remove_prefix(start - src->data());
  269. return true;
  270. } else if (next == kNullCharacter) {
  271. if (result) {
  272. *result += '\0';
  273. }
  274. } else {
  275. return false;
  276. }
  277. copy_start = start;
  278. } else {
  279. assert(c == kEscape2);
  280. if (result) {
  281. AppendBytes(result, copy_start, start - copy_start - 1);
  282. }
  283. // kEscape2 kFFCharacter represents '\xff'
  284. // kEscape2 kInfinity is an error
  285. const char next = *(start++);
  286. if (next == kFFCharacter) {
  287. if (result) {
  288. *result += '\xff';
  289. }
  290. } else {
  291. return false;
  292. }
  293. copy_start = start;
  294. }
  295. }
  296. return false;
  297. }
  298. bool OrderedCode::ReadString(Slice* src, std::string* result) {
  299. return ReadStringInternal(src, result);
  300. }
  301. bool OrderedCode::ReadNumIncreasing(Slice* src, uint64_t* result) {
  302. if (src->empty()) {
  303. return false; // Not enough bytes
  304. }
  305. // Decode length byte
  306. const int len = static_cast<unsigned char>((*src)[0]);
  307. // If len > 0 and src is longer than 1, the first byte of "payload"
  308. // must be non-zero (otherwise the encoding is not minimal).
  309. // In opt mode, we don't enforce that encodings must be minimal.
  310. assert(0 == len || src->size() == 1 || (*src)[1] != '\0');
  311. if (len + 1 > src->size() || len > 8) {
  312. return false; // Not enough bytes or too many bytes
  313. }
  314. if (result) {
  315. uint64_t tmp = 0;
  316. for (int i = 0; i < len; i++) {
  317. tmp <<= 8;
  318. tmp |= static_cast<unsigned char>((*src)[1 + i]);
  319. }
  320. *result = tmp;
  321. }
  322. src->remove_prefix(len + 1);
  323. return true;
  324. }
  325. inline static bool ReadInfinityInternal(Slice* src) {
  326. if (src->size() >= 2 && ((*src)[0] == kEscape2) && ((*src)[1] == kInfinity)) {
  327. src->remove_prefix(2);
  328. return true;
  329. } else {
  330. return false;
  331. }
  332. }
  333. bool OrderedCode::ReadInfinity(Slice* src) { return ReadInfinityInternal(src); }
  334. inline static bool ReadStringOrInfinityInternal(Slice* src, std::string* result,
  335. bool* inf) {
  336. if (ReadInfinityInternal(src)) {
  337. if (inf) *inf = true;
  338. return true;
  339. }
  340. // We don't use ReadStringInternal here because that would inline
  341. // the whole encoded string parsing code here. Depending on INVERT, only
  342. // one of the following two calls will be generated at compile time.
  343. bool success = OrderedCode::ReadString(src, result);
  344. if (success) {
  345. if (inf) *inf = false;
  346. return true;
  347. } else {
  348. return false;
  349. }
  350. }
  351. bool OrderedCode::ReadStringOrInfinity(Slice* src, std::string* result,
  352. bool* inf) {
  353. return ReadStringOrInfinityInternal(src, result, inf);
  354. }
  355. bool OrderedCode::ReadTrailingString(Slice* src, std::string* result) {
  356. if (result) result->assign(src->data(), src->size());
  357. src->remove_prefix(src->size());
  358. return true;
  359. }
  360. void OrderedCode::TEST_Corrupt(std::string* str, int k) {
  361. int seen_seps = 0;
  362. for (int i = 0; i < str->size() - 1; i++) {
  363. if ((*str)[i] == kEscape1 && (*str)[i + 1] == kSeparator) {
  364. seen_seps++;
  365. if (seen_seps == k) {
  366. (*str)[i + 1] = kSeparator + 1;
  367. return;
  368. }
  369. }
  370. }
  371. }
  372. // Signed number encoding/decoding /////////////////////////////////////
  373. //
  374. // The format is as follows:
  375. //
  376. // The first bit (the most significant bit of the first byte)
  377. // represents the sign, 0 if the number is negative and
  378. // 1 if the number is >= 0.
  379. //
  380. // Any unbroken sequence of successive bits with the same value as the sign
  381. // bit, up to 9 (the 8th and 9th are the most significant bits of the next
  382. // byte), are size bits that count the number of bytes after the first byte.
  383. // That is, the total length is between 1 and 10 bytes.
  384. //
  385. // The value occupies the bits after the sign bit and the "size bits"
  386. // till the end of the string, in network byte order. If the number
  387. // is negative, the bits are in 2-complement.
  388. //
  389. //
  390. // Example 1: number 0x424242 -> 4 byte big-endian hex string 0xf0424242:
  391. //
  392. // +---------------+---------------+---------------+---------------+
  393. // 1 1 1 1 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0
  394. // +---------------+---------------+---------------+---------------+
  395. // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
  396. // | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
  397. // | | | | payload: the remaining bits after the sign and size bits
  398. // | | | | and the delimiter bit, the value is 0x424242
  399. // | | | |
  400. // | size bits: 3 successive bits with the same value as the sign bit
  401. // | (followed by a delimiter bit with the opposite value)
  402. // | mean that there are 3 bytes after the first byte, 4 total
  403. // |
  404. // sign bit: 1 means that the number is non-negative
  405. //
  406. // Example 2: negative number -0x800 -> 2 byte big-endian hex string 0x3800:
  407. //
  408. // +---------------+---------------+
  409. // 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0
  410. // +---------------+---------------+
  411. // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
  412. // | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
  413. // | | payload: the remaining bits after the sign and size bits and the
  414. // | | delimiter bit, 2-complement because of the negative sign,
  415. // | | value is ~0x7ff, represents the value -0x800
  416. // | |
  417. // | size bits: 1 bit with the same value as the sign bit
  418. // | (followed by a delimiter bit with the opposite value)
  419. // | means that there is 1 byte after the first byte, 2 total
  420. // |
  421. // sign bit: 0 means that the number is negative
  422. //
  423. //
  424. // Compared with the simpler unsigned format used for uint64_t numbers,
  425. // this format is more compact for small numbers, namely one byte encodes
  426. // numbers in the range [-64,64), two bytes cover the range [-2^13,2^13), etc.
  427. // In general, n bytes encode numbers in the range [-2^(n*7-1),2^(n*7-1)).
  428. // (The cross-over point for compactness of representation is 8 bytes,
  429. // where this format only covers the range [-2^55,2^55),
  430. // whereas an encoding with sign bit and length in the first byte and
  431. // payload in all following bytes would cover [-2^56,2^56).)
  432. static const int kMaxSigned64Length = 10;
  433. // This array maps encoding length to header bits in the first two bytes.
  434. static const char kLengthToHeaderBits[1 + kMaxSigned64Length][2] = {
  435. {0, 0}, {'\x80', 0}, {'\xc0', 0}, {'\xe0', 0},
  436. {'\xf0', 0}, {'\xf8', 0}, {'\xfc', 0}, {'\xfe', 0},
  437. {'\xff', 0}, {'\xff', '\x80'}, {'\xff', '\xc0'}};
  438. // This array maps encoding lengths to the header bits that overlap with
  439. // the payload and need fixing when reading.
  440. static const uint64_t kLengthToMask[1 + kMaxSigned64Length] = {
  441. 0ULL,
  442. 0x80ULL,
  443. 0xc000ULL,
  444. 0xe00000ULL,
  445. 0xf0000000ULL,
  446. 0xf800000000ULL,
  447. 0xfc0000000000ULL,
  448. 0xfe000000000000ULL,
  449. 0xff00000000000000ULL,
  450. 0x8000000000000000ULL,
  451. 0ULL};
  452. // This array maps the number of bits in a number to the encoding
  453. // length produced by WriteSignedNumIncreasing.
  454. // For positive numbers, the number of bits is 1 plus the most significant
  455. // bit position (the highest bit position in a positive int64_t is 63).
  456. // For a negative number n, we count the bits in ~n.
  457. // That is, length = kBitsToLength[Bits::Log2Floor64(n < 0 ? ~n : n) + 1].
  458. static const int8_t kBitsToLength[1 + 63] = {
  459. 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4,
  460. 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 7, 7,
  461. 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10};
  462. // Calculates the encoding length in bytes of the signed number n.
  463. static inline int SignedEncodingLength(int64_t n) {
  464. return kBitsToLength[Bits::Log2Floor64(n < 0 ? ~n : n) + 1];
  465. }
  466. // Slightly faster version for n > 0.
  467. static inline int SignedEncodingLengthPositive(int64_t n) {
  468. return kBitsToLength[Bits::Log2FloorNonZero64(n) + 1];
  469. }
  470. void OrderedCode::WriteSignedNumIncreasing(std::string* dest, int64_t val) {
  471. const uint64_t x = val < 0 ? ~val : val;
  472. if (x < 64) { // fast path for encoding length == 1
  473. *dest += kLengthToHeaderBits[1][0] ^ val;
  474. return;
  475. }
  476. // buf = val in network byte order, sign extended to 10 bytes
  477. const char sign_byte = val < 0 ? '\xff' : '\0';
  478. char buf[10] = {
  479. sign_byte, sign_byte,
  480. };
  481. UNALIGNED_STORE64(buf + 2, absl::ghtonll(val));
  482. static_assert(sizeof(buf) == kMaxSigned64Length, "max length size mismatch");
  483. const int len = SignedEncodingLengthPositive(x);
  484. assert(len >= 2);
  485. char* const begin = buf + sizeof(buf) - len;
  486. begin[0] ^= kLengthToHeaderBits[len][0];
  487. begin[1] ^= kLengthToHeaderBits[len][1]; // ok because len >= 2
  488. dest->append(begin, len);
  489. }
  490. bool OrderedCode::ReadSignedNumIncreasing(Slice* src, int64_t* result) {
  491. if (src->empty()) return false;
  492. const uint64_t xor_mask = (!((*src)[0] & 0x80)) ? ~0ULL : 0ULL;
  493. const unsigned char first_byte = (*src)[0] ^ (xor_mask & 0xff);
  494. // now calculate and test length, and set x to raw (unmasked) result
  495. int len;
  496. uint64_t x;
  497. if (first_byte != 0xff) {
  498. len = 7 - Bits::Log2FloorNonZero(first_byte ^ 0xff);
  499. if (src->size() < len) return false;
  500. x = xor_mask; // sign extend using xor_mask
  501. for (int i = 0; i < len; ++i)
  502. x = (x << 8) | static_cast<unsigned char>((*src)[i]);
  503. } else {
  504. len = 8;
  505. if (src->size() < len) return false;
  506. const unsigned char second_byte = (*src)[1] ^ (xor_mask & 0xff);
  507. if (second_byte >= 0x80) {
  508. if (second_byte < 0xc0) {
  509. len = 9;
  510. } else {
  511. const unsigned char third_byte = (*src)[2] ^ (xor_mask & 0xff);
  512. if (second_byte == 0xc0 && third_byte < 0x80) {
  513. len = 10;
  514. } else {
  515. return false; // either len > 10 or len == 10 and #bits > 63
  516. }
  517. }
  518. if (src->size() < len) return false;
  519. }
  520. x = absl::gntohll(UNALIGNED_LOAD64(src->data() + len - 8));
  521. }
  522. x ^= kLengthToMask[len]; // remove spurious header bits
  523. assert(len == SignedEncodingLength(x));
  524. if (result) *result = x;
  525. src->remove_prefix(len);
  526. return true;
  527. }
  528. } // namespace Firestore