--
f28d30df5769bb832dec3ff36d2fcd2bcdf494a3 by Shaindel Schwartz <shaindel@google.com>: Internal change PiperOrigin-RevId: 201046831 -- 711715a78b7e53dfaafd4d7f08a74e76db22af88 by Mark Barolak <mbar@google.com>: Internal fix PiperOrigin-RevId: 201043684 -- 64b53edd6bf1fa48f74e7f5d33f00f80d5089147 by Shaindel Schwartz <shaindel@google.com>: Remove extra whitespace PiperOrigin-RevId: 201041989 -- 0bdd2a0b33657b688e4a04aeba9ebba47e4dc6ca by Shaindel Schwartz <shaindel@google.com>: Whitespace fix. PiperOrigin-RevId: 201034413 -- 3deb0ac296ef1b74c4789e114a8a8bf53253f26b by Shaindel Schwartz <shaindel@google.com>: Scrub build tags. No functional changes. PiperOrigin-RevId: 201032927 -- da75d0f8b73baa7e8f4e9a092bba546012ed3b71 by Alex Strelnikov <strel@google.com>: Internal change. PiperOrigin-RevId: 201026131 -- 6815d80caa19870d0c441b6b9816c68db41393a5 by Tom Manshreck <shreck@google.com>: Add documentation for our LTS snapshot branches PiperOrigin-RevId: 201025191 -- 64c3b02006f39e6a8127bbabf9ec947fb45b6504 by Greg Falcon <gfalcon@google.com>: Provide absl::from_chars for double and float types. This is a forward-compatible implementation of std::from_chars from C++17. This provides exact "round_to_nearest" conversions, and has some nice properties: * Works with string_view (it can convert numbers from non-NUL-terminated buffers) * Never allocates memory * Faster than the standard library strtod() in our toolchain * Uses integer math in its calculations, so is unaffected by floating point environment * Unaffected by C locale Also change SimpleAtod/SimpleAtoi to use this new API under the hood. PiperOrigin-RevId: 201003324 -- 542869258eb100779497c899103dc96aced52749 by Greg Falcon <gfalcon@google.com>: Internal change PiperOrigin-RevId: 200999200 -- 3aba192775c7f80e2cd7f221b0a73537823c54ea by Gennadiy Rozental <rogeeff@google.com>: Internal change PiperOrigin-RevId: 200947470 -- daf9b9feedd748d5364a4c06165b7cb7604d3e1e by Mark Barolak <mbar@google.com>: Add an absl:: qualification to a usage of base_internal::SchedulingMode outside of an absl:: namespace. PiperOrigin-RevId: 200748234 -- a8d265290a22d629f3d9bf9f872c204200bfe8c8 by Mark Barolak <mbar@google.com>: Add a missing namespace closing comment to optional.h. PiperOrigin-RevId: 200739934 -- f05af8ee1c6b864dad2df7c907d424209a3e3202 by Abseil Team <absl-team@google.com>: Internal change PiperOrigin-RevId: 200719115 GitOrigin-RevId: f28d30df5769bb832dec3ff36d2fcd2bcdf494a3 Change-Id: Ie4fa601078fd4aa57286372611f1d114fdec82c0
This commit is contained in:
parent
f44e1eed08
commit
bd40a41cc1
23 changed files with 4164 additions and 78 deletions
357
absl/strings/internal/charconv_bigint.cc
Normal file
357
absl/strings/internal/charconv_bigint.cc
Normal file
|
|
@ -0,0 +1,357 @@
|
|||
// Copyright 2018 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "absl/strings/internal/charconv_bigint.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
|
||||
namespace absl {
|
||||
namespace strings_internal {
|
||||
|
||||
namespace {
|
||||
|
||||
// Table containing some large powers of 5, for fast computation.
|
||||
|
||||
// Constant step size for entries in the kLargePowersOfFive table. Each entry
|
||||
// is larger than the previous entry by a factor of 5**kLargePowerOfFiveStep
|
||||
// (or 5**27).
|
||||
//
|
||||
// In other words, the Nth entry in the table is 5**(27*N).
|
||||
//
|
||||
// 5**27 is the largest power of 5 that fits in 64 bits.
|
||||
constexpr int kLargePowerOfFiveStep = 27;
|
||||
|
||||
// The largest legal index into the kLargePowersOfFive table.
|
||||
//
|
||||
// In other words, the largest precomputed power of 5 is 5**(27*20).
|
||||
constexpr int kLargestPowerOfFiveIndex = 20;
|
||||
|
||||
// Table of powers of (5**27), up to (5**27)**20 == 5**540.
|
||||
//
|
||||
// Used to generate large powers of 5 while limiting the number of repeated
|
||||
// multiplications required.
|
||||
//
|
||||
// clang-format off
|
||||
const uint32_t kLargePowersOfFive[] = {
|
||||
// 5**27 (i=1), start=0, end=2
|
||||
0xfa10079dU, 0x6765c793U,
|
||||
// 5**54 (i=2), start=2, end=6
|
||||
0x97d9f649U, 0x6664242dU, 0x29939b14U, 0x29c30f10U,
|
||||
// 5**81 (i=3), start=6, end=12
|
||||
0xc4f809c5U, 0x7bf3f22aU, 0x67bdae34U, 0xad340517U, 0x369d1b5fU, 0x10de1593U,
|
||||
// 5**108 (i=4), start=12, end=20
|
||||
0x92b260d1U, 0x9efff7c7U, 0x81de0ec6U, 0xaeba5d56U, 0x410664a4U, 0x4f40737aU,
|
||||
0x20d3846fU, 0x06d00f73U,
|
||||
// 5**135 (i=5), start=20, end=30
|
||||
0xff1b172dU, 0x13a1d71cU, 0xefa07617U, 0x7f682d3dU, 0xff8c90c0U, 0x3f0131e7U,
|
||||
0x3fdcb9feU, 0x917b0177U, 0x16c407a7U, 0x02c06b9dU,
|
||||
// 5**162 (i=6), start=30, end=42
|
||||
0x960f7199U, 0x056667ecU, 0xe07aefd8U, 0x80f2b9ccU, 0x8273f5e3U, 0xeb9a214aU,
|
||||
0x40b38005U, 0x0e477ad4U, 0x277d08e6U, 0xfa28b11eU, 0xd3f7d784U, 0x011c835bU,
|
||||
// 5**189 (i=7), start=42, end=56
|
||||
0xf723d9d5U, 0x3282d3f3U, 0xe00857d1U, 0x69659d25U, 0x2cf117cfU, 0x24da6d07U,
|
||||
0x954d1417U, 0x3e5d8cedU, 0x7a8bb766U, 0xfd785ae6U, 0x645436d2U, 0x40c78b34U,
|
||||
0x94151217U, 0x0072e9f7U,
|
||||
// 5**216 (i=8), start=56, end=72
|
||||
0x2b416aa1U, 0x7893c5a7U, 0xe37dc6d4U, 0x2bad2beaU, 0xf0fc846cU, 0x7575ae4bU,
|
||||
0x62587b14U, 0x83b67a34U, 0x02110cdbU, 0xf7992f55U, 0x00deb022U, 0xa4a23becU,
|
||||
0x8af5c5cdU, 0xb85b654fU, 0x818df38bU, 0x002e69d2U,
|
||||
// 5**243 (i=9), start=72, end=90
|
||||
0x3518cbbdU, 0x20b0c15fU, 0x38756c2fU, 0xfb5dc3ddU, 0x22ad2d94U, 0xbf35a952U,
|
||||
0xa699192aU, 0x9a613326U, 0xad2a9cedU, 0xd7f48968U, 0xe87dfb54U, 0xc8f05db6U,
|
||||
0x5ef67531U, 0x31c1ab49U, 0xe202ac9fU, 0x9b2957b5U, 0xa143f6d3U, 0x0012bf07U,
|
||||
// 5**270 (i=10), start=90, end=110
|
||||
0x8b971de9U, 0x21aba2e1U, 0x63944362U, 0x57172336U, 0xd9544225U, 0xfb534166U,
|
||||
0x08c563eeU, 0x14640ee2U, 0x24e40d31U, 0x02b06537U, 0x03887f14U, 0x0285e533U,
|
||||
0xb744ef26U, 0x8be3a6c4U, 0x266979b4U, 0x6761ece2U, 0xd9cb39e4U, 0xe67de319U,
|
||||
0x0d39e796U, 0x00079250U,
|
||||
// 5**297 (i=11), start=110, end=132
|
||||
0x260eb6e5U, 0xf414a796U, 0xee1a7491U, 0xdb9368ebU, 0xf50c105bU, 0x59157750U,
|
||||
0x9ed2fb5cU, 0xf6e56d8bU, 0xeaee8d23U, 0x0f319f75U, 0x2aa134d6U, 0xac2908e9U,
|
||||
0xd4413298U, 0x02f02a55U, 0x989d5a7aU, 0x70dde184U, 0xba8040a7U, 0x03200981U,
|
||||
0xbe03b11cU, 0x3c1c2a18U, 0xd60427a1U, 0x00030ee0U,
|
||||
// 5**324 (i=12), start=132, end=156
|
||||
0xce566d71U, 0xf1c4aa25U, 0x4e93ca53U, 0xa72283d0U, 0x551a73eaU, 0x3d0538e2U,
|
||||
0x8da4303fU, 0x6a58de60U, 0x0e660221U, 0x49cf61a6U, 0x8d058fc1U, 0xb9d1a14cU,
|
||||
0x4bab157dU, 0xc85c6932U, 0x518c8b9eU, 0x9b92b8d0U, 0x0d8a0e21U, 0xbd855df9U,
|
||||
0xb3ea59a1U, 0x8da29289U, 0x4584d506U, 0x3752d80fU, 0xb72569c6U, 0x00013c33U,
|
||||
// 5**351 (i=13), start=156, end=182
|
||||
0x190f354dU, 0x83695cfeU, 0xe5a4d0c7U, 0xb60fb7e8U, 0xee5bbcc4U, 0xb922054cU,
|
||||
0xbb4f0d85U, 0x48394028U, 0x1d8957dbU, 0x0d7edb14U, 0x4ecc7587U, 0x505e9e02U,
|
||||
0x4c87f36bU, 0x99e66bd6U, 0x44b9ed35U, 0x753037d4U, 0xe5fe5f27U, 0x2742c203U,
|
||||
0x13b2ed2bU, 0xdc525d2cU, 0xe6fde59aU, 0x77ffb18fU, 0x13c5752cU, 0x08a84bccU,
|
||||
0x859a4940U, 0x00007fb6U,
|
||||
// 5**378 (i=14), start=182, end=210
|
||||
0x4f98cb39U, 0xa60edbbcU, 0x83b5872eU, 0xa501acffU, 0x9cc76f78U, 0xbadd4c73U,
|
||||
0x43e989faU, 0xca7acf80U, 0x2e0c824fU, 0xb19f4ffcU, 0x092fd81cU, 0xe4eb645bU,
|
||||
0xa1ff84c2U, 0x8a5a83baU, 0xa8a1fae9U, 0x1db43609U, 0xb0fed50bU, 0x0dd7d2bdU,
|
||||
0x7d7accd8U, 0x91fa640fU, 0x37dcc6c5U, 0x1c417fd5U, 0xe4d462adU, 0xe8a43399U,
|
||||
0x131bf9a5U, 0x8df54d29U, 0x36547dc1U, 0x00003395U,
|
||||
// 5**405 (i=15), start=210, end=240
|
||||
0x5bd330f5U, 0x77d21967U, 0x1ac481b7U, 0x6be2f7ceU, 0x7f4792a9U, 0xe84c2c52U,
|
||||
0x84592228U, 0x9dcaf829U, 0xdab44ce1U, 0x3d0c311bU, 0x532e297dU, 0x4704e8b4U,
|
||||
0x9cdc32beU, 0x41e64d9dU, 0x7717bea1U, 0xa824c00dU, 0x08f50b27U, 0x0f198d77U,
|
||||
0x49bbfdf0U, 0x025c6c69U, 0xd4e55cd3U, 0xf083602bU, 0xb9f0fecdU, 0xc0864aeaU,
|
||||
0x9cb98681U, 0xaaf620e9U, 0xacb6df30U, 0x4faafe66U, 0x8af13c3bU, 0x000014d5U,
|
||||
// 5**432 (i=16), start=240, end=272
|
||||
0x682bb941U, 0x89a9f297U, 0xcba75d7bU, 0x404217b1U, 0xb4e519e9U, 0xa1bc162bU,
|
||||
0xf7f5910aU, 0x98715af5U, 0x2ff53e57U, 0xe3ef118cU, 0x490c4543U, 0xbc9b1734U,
|
||||
0x2affbe4dU, 0x4cedcb4cU, 0xfb14e99eU, 0x35e34212U, 0xece39c24U, 0x07673ab3U,
|
||||
0xe73115ddU, 0xd15d38e7U, 0x093eed3bU, 0xf8e7eac5U, 0x78a8cc80U, 0x25227aacU,
|
||||
0x3f590551U, 0x413da1cbU, 0xdf643a55U, 0xab65ad44U, 0xd70b23d7U, 0xc672cd76U,
|
||||
0x3364ea62U, 0x0000086aU,
|
||||
// 5**459 (i=17), start=272, end=306
|
||||
0x22f163ddU, 0x23cf07acU, 0xbe2af6c2U, 0xf412f6f6U, 0xc3ff541eU, 0x6eeaf7deU,
|
||||
0xa47047e0U, 0x408cda92U, 0x0f0eeb08U, 0x56deba9dU, 0xcfc6b090U, 0x8bbbdf04U,
|
||||
0x3933cdb3U, 0x9e7bb67dU, 0x9f297035U, 0x38946244U, 0xee1d37bbU, 0xde898174U,
|
||||
0x63f3559dU, 0x705b72fbU, 0x138d27d9U, 0xf8603a78U, 0x735eec44U, 0xe30987d5U,
|
||||
0xc6d38070U, 0x9cfe548eU, 0x9ff01422U, 0x7c564aa8U, 0x91cc60baU, 0xcbc3565dU,
|
||||
0x7550a50bU, 0x6909aeadU, 0x13234c45U, 0x00000366U,
|
||||
// 5**486 (i=18), start=306, end=342
|
||||
0x17954989U, 0x3a7d7709U, 0x98042de5U, 0xa9011443U, 0x45e723c2U, 0x269ffd6fU,
|
||||
0x58852a46U, 0xaaa1042aU, 0x2eee8153U, 0xb2b6c39eU, 0xaf845b65U, 0xf6c365d7U,
|
||||
0xe4cffb2bU, 0xc840e90cU, 0xabea8abbU, 0x5c58f8d2U, 0x5c19fa3aU, 0x4670910aU,
|
||||
0x4449f21cU, 0xefa645b3U, 0xcc427decU, 0x083c3d73U, 0x467cb413U, 0x6fe10ae4U,
|
||||
0x3caffc72U, 0x9f8da55eU, 0x5e5c8ea7U, 0x490594bbU, 0xf0871b0bU, 0xdd89816cU,
|
||||
0x8e931df8U, 0xe85ce1c9U, 0xcca090a5U, 0x575fa16bU, 0x6b9f106cU, 0x0000015fU,
|
||||
// 5**513 (i=19), start=342, end=380
|
||||
0xee20d805U, 0x57bc3c07U, 0xcdea624eU, 0xd3f0f52dU, 0x9924b4f4U, 0xcf968640U,
|
||||
0x61d41962U, 0xe87fb464U, 0xeaaf51c7U, 0x564c8b60U, 0xccda4028U, 0x529428bbU,
|
||||
0x313a1fa8U, 0x96bd0f94U, 0x7a82ebaaU, 0xad99e7e9U, 0xf2668cd4U, 0xbe33a45eU,
|
||||
0xfd0db669U, 0x87ee369fU, 0xd3ec20edU, 0x9c4d7db7U, 0xdedcf0d8U, 0x7cd2ca64U,
|
||||
0xe25a6577U, 0x61003fd4U, 0xe56f54ccU, 0x10b7c748U, 0x40526e5eU, 0x7300ae87U,
|
||||
0x5c439261U, 0x2c0ff469U, 0xbf723f12U, 0xb2379b61U, 0xbf59b4f5U, 0xc91b1c3fU,
|
||||
0xf0046d27U, 0x0000008dU,
|
||||
// 5**540 (i=20), start=380, end=420
|
||||
0x525c9e11U, 0xf4e0eb41U, 0xebb2895dU, 0x5da512f9U, 0x7d9b29d4U, 0x452f4edcU,
|
||||
0x0b90bc37U, 0x341777cbU, 0x63d269afU, 0x1da77929U, 0x0a5c1826U, 0x77991898U,
|
||||
0x5aeddf86U, 0xf853a877U, 0x538c31ccU, 0xe84896daU, 0xb7a0010bU, 0x17ef4de5U,
|
||||
0xa52a2adeU, 0x029fd81cU, 0x987ce701U, 0x27fefd77U, 0xdb46c66fU, 0x5d301900U,
|
||||
0x496998c0U, 0xbb6598b9U, 0x5eebb607U, 0xe547354aU, 0xdf4a2f7eU, 0xf06c4955U,
|
||||
0x96242ffaU, 0x1775fb27U, 0xbecc58ceU, 0xebf2a53bU, 0x3eaad82aU, 0xf41137baU,
|
||||
0x573e6fbaU, 0xfb4866b8U, 0x54002148U, 0x00000039U,
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
// Returns a pointer to the big integer data for (5**27)**i. i must be
|
||||
// between 1 and 20, inclusive.
|
||||
const uint32_t* LargePowerOfFiveData(int i) {
|
||||
return kLargePowersOfFive + i * (i - 1);
|
||||
}
|
||||
|
||||
// Returns the size of the big integer data for (5**27)**i, in words. i must be
|
||||
// between 1 and 20, inclusive.
|
||||
int LargePowerOfFiveSize(int i) { return 2 * i; }
|
||||
} // namespace
|
||||
|
||||
const uint32_t kFiveToNth[14] = {
|
||||
1, 5, 25, 125, 625, 3125, 15625,
|
||||
78125, 390625, 1953125, 9765625, 48828125, 244140625, 1220703125,
|
||||
};
|
||||
|
||||
const uint32_t kTenToNth[10] = {
|
||||
1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000,
|
||||
};
|
||||
|
||||
template <int max_words>
|
||||
int BigUnsigned<max_words>::ReadFloatMantissa(const ParsedFloat& fp,
|
||||
int significant_digits) {
|
||||
SetToZero();
|
||||
assert(fp.type == FloatType::kNumber);
|
||||
|
||||
if (fp.subrange_begin == nullptr) {
|
||||
// We already exactly parsed the mantissa, so no more work is necessary.
|
||||
words_[0] = fp.mantissa & 0xffffffffu;
|
||||
words_[1] = fp.mantissa >> 32;
|
||||
if (words_[1]) {
|
||||
size_ = 2;
|
||||
} else if (words_[0]) {
|
||||
size_ = 1;
|
||||
}
|
||||
return fp.exponent;
|
||||
}
|
||||
int exponent_adjust =
|
||||
ReadDigits(fp.subrange_begin, fp.subrange_end, significant_digits);
|
||||
return fp.literal_exponent + exponent_adjust;
|
||||
}
|
||||
|
||||
template <int max_words>
|
||||
int BigUnsigned<max_words>::ReadDigits(const char* begin, const char* end,
|
||||
int significant_digits) {
|
||||
assert(significant_digits <= Digits10() + 1);
|
||||
SetToZero();
|
||||
|
||||
bool after_decimal_point = false;
|
||||
// Discard any leading zeroes before the decimal point
|
||||
while (begin < end && *begin == '0') {
|
||||
++begin;
|
||||
}
|
||||
int dropped_digits = 0;
|
||||
// Discard any trailing zeroes. These may or may not be after the decimal
|
||||
// point.
|
||||
while (begin < end && *std::prev(end) == '0') {
|
||||
--end;
|
||||
++dropped_digits;
|
||||
}
|
||||
if (begin < end && *std::prev(end) == '.') {
|
||||
// If the std::string ends in '.', either before or after dropping zeroes, then
|
||||
// drop the decimal point and look for more digits to drop.
|
||||
dropped_digits = 0;
|
||||
--end;
|
||||
while (begin < end && *std::prev(end) == '0') {
|
||||
--end;
|
||||
++dropped_digits;
|
||||
}
|
||||
} else if (dropped_digits) {
|
||||
// We dropped digits, and aren't sure if they're before or after the decimal
|
||||
// point. Figure that out now.
|
||||
const char* dp = std::find(begin, end, '.');
|
||||
if (dp != end) {
|
||||
// The dropped trailing digits were after the decimal point, so don't
|
||||
// count them.
|
||||
dropped_digits = 0;
|
||||
}
|
||||
}
|
||||
// Any non-fraction digits we dropped need to be accounted for in our exponent
|
||||
// adjustment.
|
||||
int exponent_adjust = dropped_digits;
|
||||
|
||||
uint32_t queued = 0;
|
||||
int digits_queued = 0;
|
||||
for (; begin != end && significant_digits > 0; ++begin) {
|
||||
if (*begin == '.') {
|
||||
after_decimal_point = true;
|
||||
continue;
|
||||
}
|
||||
if (after_decimal_point) {
|
||||
// For each fractional digit we emit in our parsed integer, adjust our
|
||||
// decimal exponent to compensate.
|
||||
--exponent_adjust;
|
||||
}
|
||||
int digit = (*begin - '0');
|
||||
--significant_digits;
|
||||
if (significant_digits == 0 && std::next(begin) != end &&
|
||||
(digit == 0 || digit == 5)) {
|
||||
// If this is the very last significant digit, but insignificant digits
|
||||
// remain, we know that the last of those remaining significant digits is
|
||||
// nonzero. (If it wasn't, we would have stripped it before we got here.)
|
||||
// So if this final digit is a 0 or 5, adjust it upward by 1.
|
||||
//
|
||||
// This adjustment is what allows incredibly large mantissas ending in
|
||||
// 500000...000000000001 to correctly round up, rather than to nearest.
|
||||
++digit;
|
||||
}
|
||||
queued = 10 * queued + digit;
|
||||
++digits_queued;
|
||||
if (digits_queued == kMaxSmallPowerOfTen) {
|
||||
MultiplyBy(kTenToNth[kMaxSmallPowerOfTen]);
|
||||
AddWithCarry(0, queued);
|
||||
queued = digits_queued = 0;
|
||||
}
|
||||
}
|
||||
// Encode any remaining digits.
|
||||
if (digits_queued) {
|
||||
MultiplyBy(kTenToNth[digits_queued]);
|
||||
AddWithCarry(0, queued);
|
||||
}
|
||||
|
||||
// If any insignificant digits remain, we will drop them. But if we have not
|
||||
// yet read the decimal point, then we have to adjust the exponent to account
|
||||
// for the dropped digits.
|
||||
if (begin < end && !after_decimal_point) {
|
||||
// This call to std::find will result in a pointer either to the decimal
|
||||
// point, or to the end of our buffer if there was none.
|
||||
//
|
||||
// Either way, [begin, decimal_point) will contain the set of dropped digits
|
||||
// that require an exponent adjustment.
|
||||
const char* decimal_point = std::find(begin, end, '.');
|
||||
exponent_adjust += (decimal_point - begin);
|
||||
}
|
||||
return exponent_adjust;
|
||||
}
|
||||
|
||||
template <int max_words>
|
||||
/* static */ BigUnsigned<max_words> BigUnsigned<max_words>::FiveToTheNth(
|
||||
int n) {
|
||||
BigUnsigned answer(1u);
|
||||
|
||||
// Seed from the table of large powers, if possible.
|
||||
bool first_pass = true;
|
||||
while (n >= kLargePowerOfFiveStep) {
|
||||
int big_power =
|
||||
std::min(n / kLargePowerOfFiveStep, kLargestPowerOfFiveIndex);
|
||||
if (first_pass) {
|
||||
// just copy, rather than multiplying by 1
|
||||
std::copy(
|
||||
LargePowerOfFiveData(big_power),
|
||||
LargePowerOfFiveData(big_power) + LargePowerOfFiveSize(big_power),
|
||||
answer.words_);
|
||||
answer.size_ = LargePowerOfFiveSize(big_power);
|
||||
first_pass = false;
|
||||
} else {
|
||||
answer.MultiplyBy(LargePowerOfFiveSize(big_power),
|
||||
LargePowerOfFiveData(big_power));
|
||||
}
|
||||
n -= kLargePowerOfFiveStep * big_power;
|
||||
}
|
||||
answer.MultiplyByFiveToTheNth(n);
|
||||
return answer;
|
||||
}
|
||||
|
||||
template <int max_words>
|
||||
void BigUnsigned<max_words>::MultiplyStep(int original_size,
|
||||
const uint32_t* other_words,
|
||||
int other_size, int step) {
|
||||
int this_i = std::min(original_size - 1, step);
|
||||
int other_i = step - this_i;
|
||||
|
||||
uint64_t this_word = 0;
|
||||
uint64_t carry = 0;
|
||||
for (; this_i >= 0 && other_i < other_size; --this_i, ++other_i) {
|
||||
uint64_t product = words_[this_i];
|
||||
product *= other_words[other_i];
|
||||
this_word += product;
|
||||
carry += (this_word >> 32);
|
||||
this_word &= 0xffffffff;
|
||||
}
|
||||
AddWithCarry(step + 1, carry);
|
||||
words_[step] = this_word & 0xffffffff;
|
||||
if (this_word > 0 && size_ <= step) {
|
||||
size_ = step + 1;
|
||||
}
|
||||
}
|
||||
|
||||
template <int max_words>
|
||||
std::string BigUnsigned<max_words>::ToString() const {
|
||||
BigUnsigned<max_words> copy = *this;
|
||||
std::string result;
|
||||
// Build result in reverse order
|
||||
while (copy.size() > 0) {
|
||||
int next_digit = copy.DivMod<10>();
|
||||
result.push_back('0' + next_digit);
|
||||
}
|
||||
if (result.empty()) {
|
||||
result.push_back('0');
|
||||
}
|
||||
std::reverse(result.begin(), result.end());
|
||||
return result;
|
||||
}
|
||||
|
||||
template class BigUnsigned<4>;
|
||||
template class BigUnsigned<84>;
|
||||
|
||||
} // namespace strings_internal
|
||||
} // namespace absl
|
||||
426
absl/strings/internal/charconv_bigint.h
Normal file
426
absl/strings/internal/charconv_bigint.h
Normal file
|
|
@ -0,0 +1,426 @@
|
|||
// Copyright 2018 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
|
||||
#define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "absl/strings/ascii.h"
|
||||
#include "absl/strings/internal/charconv_parse.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
|
||||
namespace absl {
|
||||
namespace strings_internal {
|
||||
|
||||
// The largest power that 5 that can be raised to, and still fit in a uint32_t.
|
||||
constexpr int kMaxSmallPowerOfFive = 13;
|
||||
// The largest power that 10 that can be raised to, and still fit in a uint32_t.
|
||||
constexpr int kMaxSmallPowerOfTen = 9;
|
||||
|
||||
extern const uint32_t kFiveToNth[kMaxSmallPowerOfFive + 1];
|
||||
extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1];
|
||||
|
||||
// Large, fixed-width unsigned integer.
|
||||
//
|
||||
// Exact rounding for decimal-to-binary floating point conversion requires very
|
||||
// large integer math, but a design goal of absl::from_chars is to avoid
|
||||
// allocating memory. The integer precision needed for decimal-to-binary
|
||||
// conversions is large but bounded, so a huge fixed-width integer class
|
||||
// suffices.
|
||||
//
|
||||
// This is an intentionally limited big integer class. Only needed operations
|
||||
// are implemented. All storage lives in an array data member, and all
|
||||
// arithmetic is done in-place, to avoid requiring separate storage for operand
|
||||
// and result.
|
||||
//
|
||||
// This is an internal class. Some methods live in the .cc file, and are
|
||||
// instantiated only for the values of max_words we need.
|
||||
template <int max_words>
|
||||
class BigUnsigned {
|
||||
public:
|
||||
static_assert(max_words == 4 || max_words == 84,
|
||||
"unsupported max_words value");
|
||||
|
||||
BigUnsigned() : size_(0), words_{} {}
|
||||
explicit BigUnsigned(uint32_t v) : size_(v > 0 ? 1 : 0), words_{v} {}
|
||||
explicit BigUnsigned(uint64_t v)
|
||||
: size_(0),
|
||||
words_{static_cast<uint32_t>(v & 0xffffffff),
|
||||
static_cast<uint32_t>(v >> 32)} {
|
||||
if (words_[1]) {
|
||||
size_ = 2;
|
||||
} else if (words_[0]) {
|
||||
size_ = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Constructs a BigUnsigned from the given string_view containing a decimal
|
||||
// value. If the input std::string is not a decimal integer, constructs a 0
|
||||
// instead.
|
||||
explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} {
|
||||
// Check for valid input, returning a 0 otherwise. This is reasonable
|
||||
// behavior only because this constructor is for unit tests.
|
||||
if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() ||
|
||||
sv.empty()) {
|
||||
return;
|
||||
}
|
||||
int exponent_adjust =
|
||||
ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1);
|
||||
if (exponent_adjust > 0) {
|
||||
MultiplyByTenToTheNth(exponent_adjust);
|
||||
}
|
||||
}
|
||||
|
||||
// Loads the mantissa value of a previously-parsed float.
|
||||
//
|
||||
// Returns the associated decimal exponent. The value of the parsed float is
|
||||
// exactly *this * 10**exponent.
|
||||
int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits);
|
||||
|
||||
// Returns the number of decimal digits of precision this type provides. All
|
||||
// numbers with this many decimal digits or fewer are representable by this
|
||||
// type.
|
||||
//
|
||||
// Analagous to std::numeric_limits<BigUnsigned>::digits10.
|
||||
static constexpr int Digits10() {
|
||||
// 9975007/1035508 is very slightly less than log10(2**32).
|
||||
return static_cast<uint64_t>(max_words) * 9975007 / 1035508;
|
||||
}
|
||||
|
||||
// Shifts left by the given number of bits.
|
||||
void ShiftLeft(int count) {
|
||||
if (count > 0) {
|
||||
const int word_shift = count / 32;
|
||||
if (word_shift >= max_words) {
|
||||
SetToZero();
|
||||
return;
|
||||
}
|
||||
size_ = std::min(size_ + word_shift, max_words);
|
||||
count %= 32;
|
||||
if (count == 0) {
|
||||
std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_);
|
||||
} else {
|
||||
for (int i = std::min(size_, max_words - 1); i > word_shift; --i) {
|
||||
words_[i] = (words_[i - word_shift] << count) |
|
||||
(words_[i - word_shift - 1] >> (32 - count));
|
||||
}
|
||||
words_[word_shift] = words_[0] << count;
|
||||
// Grow size_ if necessary.
|
||||
if (size_ < max_words && words_[size_]) {
|
||||
++size_;
|
||||
}
|
||||
}
|
||||
std::fill(words_, words_ + word_shift, 0u);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Multiplies by v in-place.
|
||||
void MultiplyBy(uint32_t v) {
|
||||
if (size_ == 0 || v == 1) {
|
||||
return;
|
||||
}
|
||||
if (v == 0) {
|
||||
SetToZero();
|
||||
return;
|
||||
}
|
||||
const uint64_t factor = v;
|
||||
uint64_t window = 0;
|
||||
for (int i = 0; i < size_; ++i) {
|
||||
window += factor * words_[i];
|
||||
words_[i] = window & 0xffffffff;
|
||||
window >>= 32;
|
||||
}
|
||||
// If carry bits remain and there's space for them, grow size_.
|
||||
if (window && size_ < max_words) {
|
||||
words_[size_] = window & 0xffffffff;
|
||||
++size_;
|
||||
}
|
||||
}
|
||||
|
||||
void MultiplyBy(uint64_t v) {
|
||||
uint32_t words[2];
|
||||
words[0] = static_cast<uint32_t>(v);
|
||||
words[1] = static_cast<uint32_t>(v >> 32);
|
||||
if (words[1] == 0) {
|
||||
MultiplyBy(words[0]);
|
||||
} else {
|
||||
MultiplyBy(2, words);
|
||||
}
|
||||
}
|
||||
|
||||
// Multiplies in place by 5 to the power of n. n must be non-negative.
|
||||
void MultiplyByFiveToTheNth(int n) {
|
||||
while (n >= kMaxSmallPowerOfFive) {
|
||||
MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]);
|
||||
n -= kMaxSmallPowerOfFive;
|
||||
}
|
||||
if (n > 0) {
|
||||
MultiplyBy(kFiveToNth[n]);
|
||||
}
|
||||
}
|
||||
|
||||
// Multiplies in place by 10 to the power of n. n must be non-negative.
|
||||
void MultiplyByTenToTheNth(int n) {
|
||||
if (n > kMaxSmallPowerOfTen) {
|
||||
// For large n, raise to a power of 5, then shift left by the same amount.
|
||||
// (10**n == 5**n * 2**n.) This requires fewer multiplications overall.
|
||||
MultiplyByFiveToTheNth(n);
|
||||
ShiftLeft(n);
|
||||
} else if (n > 0) {
|
||||
// We can do this more quickly for very small N by using a single
|
||||
// multiplication.
|
||||
MultiplyBy(kTenToNth[n]);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the value of 5**n, for non-negative n. This implementation uses
|
||||
// a lookup table, and is faster then seeding a BigUnsigned with 1 and calling
|
||||
// MultiplyByFiveToTheNth().
|
||||
static BigUnsigned FiveToTheNth(int n);
|
||||
|
||||
// Multiplies by another BigUnsigned, in-place.
|
||||
template <int M>
|
||||
void MultiplyBy(const BigUnsigned<M>& other) {
|
||||
MultiplyBy(other.size(), other.words());
|
||||
}
|
||||
|
||||
void SetToZero() {
|
||||
std::fill(words_, words_ + size_, 0u);
|
||||
size_ = 0;
|
||||
}
|
||||
|
||||
// Returns the value of the nth word of this BigUnsigned. This is
|
||||
// range-checked, and returns 0 on out-of-bounds accesses.
|
||||
uint32_t GetWord(int index) const {
|
||||
if (index < 0 || index >= size_) {
|
||||
return 0;
|
||||
}
|
||||
return words_[index];
|
||||
}
|
||||
|
||||
// Returns this integer as a decimal std::string. This is not used in the decimal-
|
||||
// to-binary conversion; it is intended to aid in testing.
|
||||
std::string ToString() const;
|
||||
|
||||
int size() const { return size_; }
|
||||
const uint32_t* words() const { return words_; }
|
||||
|
||||
private:
|
||||
// Reads the number between [begin, end), possibly containing a decimal point,
|
||||
// into this BigUnsigned.
|
||||
//
|
||||
// Callers are required to ensure [begin, end) contains a valid number, with
|
||||
// one or more decimal digits and at most one decimal point. This routine
|
||||
// will behave unpredictably if these preconditions are not met.
|
||||
//
|
||||
// Only the first `significant_digits` digits are read. Digits beyond this
|
||||
// limit are "sticky": If the final significant digit is 0 or 5, and if any
|
||||
// dropped digit is nonzero, then that final significant digit is adjusted up
|
||||
// to 1 or 6. This adjustment allows for precise rounding.
|
||||
//
|
||||
// Returns `exponent_adjustment`, a power-of-ten exponent adjustment to
|
||||
// account for the decimal point and for dropped significant digits. After
|
||||
// this function returns,
|
||||
// actual_value_of_parsed_string ~= *this * 10**exponent_adjustment.
|
||||
int ReadDigits(const char* begin, const char* end, int significant_digits);
|
||||
|
||||
// Performs a step of big integer multiplication. This computes the full
|
||||
// (64-bit-wide) values that should be added at the given index (step), and
|
||||
// adds to that location in-place.
|
||||
//
|
||||
// Because our math all occurs in place, we must multiply starting from the
|
||||
// highest word working downward. (This is a bit more expensive due to the
|
||||
// extra carries involved.)
|
||||
//
|
||||
// This must be called in steps, for each word to be calculated, starting from
|
||||
// the high end and working down to 0. The first value of `step` should be
|
||||
// `std::min(original_size + other.size_ - 2, max_words - 1)`.
|
||||
// The reason for this expression is that multiplying the i'th word from one
|
||||
// multiplicand and the j'th word of another multiplicand creates a
|
||||
// two-word-wide value to be stored at the (i+j)'th element. The highest
|
||||
// word indices we will access are `original_size - 1` from this object, and
|
||||
// `other.size_ - 1` from our operand. Therefore,
|
||||
// `original_size + other.size_ - 2` is the first step we should calculate,
|
||||
// but limited on an upper bound by max_words.
|
||||
|
||||
// Working from high-to-low ensures that we do not overwrite the portions of
|
||||
// the initial value of *this which are still needed for later steps.
|
||||
//
|
||||
// Once called with step == 0, *this contains the result of the
|
||||
// multiplication.
|
||||
//
|
||||
// `original_size` is the size_ of *this before the first call to
|
||||
// MultiplyStep(). `other_words` and `other_size` are the contents of our
|
||||
// operand. `step` is the step to perform, as described above.
|
||||
void MultiplyStep(int original_size, const uint32_t* other_words,
|
||||
int other_size, int step);
|
||||
|
||||
void MultiplyBy(int other_size, const uint32_t* other_words) {
|
||||
const int original_size = size_;
|
||||
const int first_step =
|
||||
std::min(original_size + other_size - 2, max_words - 1);
|
||||
for (int step = first_step; step >= 0; --step) {
|
||||
MultiplyStep(original_size, other_words, other_size, step);
|
||||
}
|
||||
}
|
||||
|
||||
// Adds a 32-bit value to the index'th word, with carry.
|
||||
void AddWithCarry(int index, uint32_t value) {
|
||||
if (value) {
|
||||
while (index < max_words && value > 0) {
|
||||
words_[index] += value;
|
||||
// carry if we overflowed in this word:
|
||||
if (value > words_[index]) {
|
||||
value = 1;
|
||||
++index;
|
||||
} else {
|
||||
value = 0;
|
||||
}
|
||||
}
|
||||
size_ = std::min(max_words, std::max(index + 1, size_));
|
||||
}
|
||||
}
|
||||
|
||||
void AddWithCarry(int index, uint64_t value) {
|
||||
if (value && index < max_words) {
|
||||
uint32_t high = value >> 32;
|
||||
uint32_t low = value & 0xffffffff;
|
||||
words_[index] += low;
|
||||
if (words_[index] < low) {
|
||||
++high;
|
||||
if (high == 0) {
|
||||
// Carry from the low word caused our high word to overflow.
|
||||
// Short circuit here to do the right thing.
|
||||
AddWithCarry(index + 2, static_cast<uint32_t>(1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (high > 0) {
|
||||
AddWithCarry(index + 1, high);
|
||||
} else {
|
||||
// Normally 32-bit AddWithCarry() sets size_, but since we don't call
|
||||
// it when `high` is 0, do it ourselves here.
|
||||
size_ = std::min(max_words, std::max(index + 1, size_));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Divide this in place by a constant divisor. Returns the remainder of the
|
||||
// division.
|
||||
template <uint32_t divisor>
|
||||
uint32_t DivMod() {
|
||||
uint64_t accumulator = 0;
|
||||
for (int i = size_ - 1; i >= 0; --i) {
|
||||
accumulator <<= 32;
|
||||
accumulator += words_[i];
|
||||
// accumulator / divisor will never overflow an int32_t in this loop
|
||||
words_[i] = static_cast<uint32_t>(accumulator / divisor);
|
||||
accumulator = accumulator % divisor;
|
||||
}
|
||||
while (size_ > 0 && words_[size_ - 1] == 0) {
|
||||
--size_;
|
||||
}
|
||||
return static_cast<uint32_t>(accumulator);
|
||||
}
|
||||
|
||||
// The number of elements in words_ that may carry significant values.
|
||||
// All elements beyond this point are 0.
|
||||
//
|
||||
// When size_ is 0, this BigUnsigned stores the value 0.
|
||||
// When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is
|
||||
// nonzero. This can occur due to overflow truncation.
|
||||
// In particular, x.size_ != y.size_ does *not* imply x != y.
|
||||
int size_;
|
||||
uint32_t words_[max_words];
|
||||
};
|
||||
|
||||
// Compares two big integer instances.
|
||||
//
|
||||
// Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs.
|
||||
template <int N, int M>
|
||||
int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
|
||||
int limit = std::max(lhs.size(), rhs.size());
|
||||
for (int i = limit - 1; i >= 0; --i) {
|
||||
const uint32_t lhs_word = lhs.GetWord(i);
|
||||
const uint32_t rhs_word = rhs.GetWord(i);
|
||||
if (lhs_word < rhs_word) {
|
||||
return -1;
|
||||
} else if (lhs_word > rhs_word) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <int N, int M>
|
||||
bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
|
||||
int limit = std::max(lhs.size(), rhs.size());
|
||||
for (int i = 0; i < limit; ++i) {
|
||||
if (lhs.GetWord(i) != rhs.GetWord(i)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <int N, int M>
|
||||
bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
template <int N, int M>
|
||||
bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
|
||||
return Compare(lhs, rhs) == -1;
|
||||
}
|
||||
|
||||
template <int N, int M>
|
||||
bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
|
||||
return rhs < lhs;
|
||||
}
|
||||
template <int N, int M>
|
||||
bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
|
||||
return !(rhs < lhs);
|
||||
}
|
||||
template <int N, int M>
|
||||
bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
|
||||
return !(lhs < rhs);
|
||||
}
|
||||
|
||||
// Output operator for BigUnsigned, for testing purposes only.
|
||||
template <int N>
|
||||
std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) {
|
||||
return os << num.ToString();
|
||||
}
|
||||
|
||||
// Explicit instantiation declarations for the sizes of BigUnsigned that we
|
||||
// are using.
|
||||
//
|
||||
// For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is
|
||||
// still bigger than an int128, and 84 is a large value we will want to use
|
||||
// in the from_chars implementation.
|
||||
//
|
||||
// Comments justifying the use of 84 belong in the from_chars implementation,
|
||||
// and will be added in a follow-up CL.
|
||||
extern template class BigUnsigned<4>;
|
||||
extern template class BigUnsigned<84>;
|
||||
|
||||
} // namespace strings_internal
|
||||
} // namespace absl
|
||||
|
||||
#endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
|
||||
203
absl/strings/internal/charconv_bigint_test.cc
Normal file
203
absl/strings/internal/charconv_bigint_test.cc
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
// Copyright 2018 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "absl/strings/internal/charconv_bigint.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace absl {
|
||||
namespace strings_internal {
|
||||
|
||||
TEST(BigUnsigned, ShiftLeft) {
|
||||
{
|
||||
// Check that 3 * 2**100 is calculated correctly
|
||||
BigUnsigned<4> num(3u);
|
||||
num.ShiftLeft(100);
|
||||
EXPECT_EQ(num, BigUnsigned<4>("3802951800684688204490109616128"));
|
||||
}
|
||||
{
|
||||
// Test that overflow is truncated properly.
|
||||
// 15 is 4 bits long, and BigUnsigned<4> is a 128-bit bigint.
|
||||
// Shifting left by 125 bits should truncate off the high bit, so that
|
||||
// 15 << 125 == 7 << 125
|
||||
// after truncation.
|
||||
BigUnsigned<4> a(15u);
|
||||
BigUnsigned<4> b(7u);
|
||||
BigUnsigned<4> c(3u);
|
||||
a.ShiftLeft(125);
|
||||
b.ShiftLeft(125);
|
||||
c.ShiftLeft(125);
|
||||
EXPECT_EQ(a, b);
|
||||
EXPECT_NE(a, c);
|
||||
}
|
||||
{
|
||||
// Same test, larger bigint:
|
||||
BigUnsigned<84> a(15u);
|
||||
BigUnsigned<84> b(7u);
|
||||
BigUnsigned<84> c(3u);
|
||||
a.ShiftLeft(84 * 32 - 3);
|
||||
b.ShiftLeft(84 * 32 - 3);
|
||||
c.ShiftLeft(84 * 32 - 3);
|
||||
EXPECT_EQ(a, b);
|
||||
EXPECT_NE(a, c);
|
||||
}
|
||||
{
|
||||
// Check that incrementally shifting has the same result as doing it all at
|
||||
// once (attempting to capture corner cases.)
|
||||
const std::string seed = "1234567890123456789012345678901234567890";
|
||||
BigUnsigned<84> a(seed);
|
||||
for (int i = 1; i <= 84 * 32; ++i) {
|
||||
a.ShiftLeft(1);
|
||||
BigUnsigned<84> b(seed);
|
||||
b.ShiftLeft(i);
|
||||
EXPECT_EQ(a, b);
|
||||
}
|
||||
// And we should have fully rotated all bits off by now:
|
||||
EXPECT_EQ(a, BigUnsigned<84>(0u));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(BigUnsigned, MultiplyByUint32) {
|
||||
const BigUnsigned<84> factorial_100(
|
||||
"933262154439441526816992388562667004907159682643816214685929638952175999"
|
||||
"932299156089414639761565182862536979208272237582511852109168640000000000"
|
||||
"00000000000000");
|
||||
BigUnsigned<84> a(1u);
|
||||
for (uint32_t i = 1; i <= 100; ++i) {
|
||||
a.MultiplyBy(i);
|
||||
}
|
||||
EXPECT_EQ(a, BigUnsigned<84>(factorial_100));
|
||||
}
|
||||
|
||||
TEST(BigUnsigned, MultiplyByBigUnsigned) {
|
||||
{
|
||||
// Put the terms of factorial_200 into two bigints, and multiply them
|
||||
// together.
|
||||
const BigUnsigned<84> factorial_200(
|
||||
"7886578673647905035523632139321850622951359776871732632947425332443594"
|
||||
"4996340334292030428401198462390417721213891963883025764279024263710506"
|
||||
"1926624952829931113462857270763317237396988943922445621451664240254033"
|
||||
"2918641312274282948532775242424075739032403212574055795686602260319041"
|
||||
"7032406235170085879617892222278962370389737472000000000000000000000000"
|
||||
"0000000000000000000000000");
|
||||
BigUnsigned<84> evens(1u);
|
||||
BigUnsigned<84> odds(1u);
|
||||
for (uint32_t i = 1; i < 200; i += 2) {
|
||||
odds.MultiplyBy(i);
|
||||
evens.MultiplyBy(i + 1);
|
||||
}
|
||||
evens.MultiplyBy(odds);
|
||||
EXPECT_EQ(evens, factorial_200);
|
||||
}
|
||||
{
|
||||
// Multiply various powers of 10 together.
|
||||
for (int a = 0 ; a < 700; a += 25) {
|
||||
SCOPED_TRACE(a);
|
||||
BigUnsigned<84> a_value("3" + std::string(a, '0'));
|
||||
for (int b = 0; b < (700 - a); b += 25) {
|
||||
SCOPED_TRACE(b);
|
||||
BigUnsigned<84> b_value("2" + std::string(b, '0'));
|
||||
BigUnsigned<84> expected_product("6" + std::string(a + b, '0'));
|
||||
b_value.MultiplyBy(a_value);
|
||||
EXPECT_EQ(b_value, expected_product);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(BigUnsigned, MultiplyByOverflow) {
|
||||
{
|
||||
// Check that multiplcation overflow predictably truncates.
|
||||
|
||||
// A big int with all bits on.
|
||||
BigUnsigned<4> all_bits_on("340282366920938463463374607431768211455");
|
||||
// Modulo 2**128, this is equal to -1. Therefore the square of this,
|
||||
// modulo 2**128, should be 1.
|
||||
all_bits_on.MultiplyBy(all_bits_on);
|
||||
EXPECT_EQ(all_bits_on, BigUnsigned<4>(1u));
|
||||
}
|
||||
{
|
||||
// Try multiplying a large bigint by 2**50, and compare the result to
|
||||
// shifting.
|
||||
BigUnsigned<4> value_1("12345678901234567890123456789012345678");
|
||||
BigUnsigned<4> value_2("12345678901234567890123456789012345678");
|
||||
BigUnsigned<4> two_to_fiftieth(1u);
|
||||
two_to_fiftieth.ShiftLeft(50);
|
||||
|
||||
value_1.ShiftLeft(50);
|
||||
value_2.MultiplyBy(two_to_fiftieth);
|
||||
EXPECT_EQ(value_1, value_2);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(BigUnsigned, FiveToTheNth) {
|
||||
{
|
||||
// Sanity check that MultiplyByFiveToTheNth gives consistent answers, up to
|
||||
// and including overflow.
|
||||
for (int i = 0; i < 1160; ++i) {
|
||||
SCOPED_TRACE(i);
|
||||
BigUnsigned<84> value_1(123u);
|
||||
BigUnsigned<84> value_2(123u);
|
||||
value_1.MultiplyByFiveToTheNth(i);
|
||||
for (int j = 0; j < i; j++) {
|
||||
value_2.MultiplyBy(5u);
|
||||
}
|
||||
EXPECT_EQ(value_1, value_2);
|
||||
}
|
||||
}
|
||||
{
|
||||
// Check that the faster, table-lookup-based static method returns the same
|
||||
// result that multiplying in-place would return, up to and including
|
||||
// overflow.
|
||||
for (int i = 0; i < 1160; ++i) {
|
||||
SCOPED_TRACE(i);
|
||||
BigUnsigned<84> value_1(1u);
|
||||
value_1.MultiplyByFiveToTheNth(i);
|
||||
BigUnsigned<84> value_2 = BigUnsigned<84>::FiveToTheNth(i);
|
||||
EXPECT_EQ(value_1, value_2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(BigUnsigned, TenToTheNth) {
|
||||
{
|
||||
// Sanity check MultiplyByTenToTheNth.
|
||||
for (int i = 0; i < 800; ++i) {
|
||||
SCOPED_TRACE(i);
|
||||
BigUnsigned<84> value_1(123u);
|
||||
BigUnsigned<84> value_2(123u);
|
||||
value_1.MultiplyByTenToTheNth(i);
|
||||
for (int j = 0; j < i; j++) {
|
||||
value_2.MultiplyBy(10u);
|
||||
}
|
||||
EXPECT_EQ(value_1, value_2);
|
||||
}
|
||||
}
|
||||
{
|
||||
// Alternate testing approach, taking advantage of the decimal parser.
|
||||
for (int i = 0; i < 200; ++i) {
|
||||
SCOPED_TRACE(i);
|
||||
BigUnsigned<84> value_1(135u);
|
||||
value_1.MultiplyByTenToTheNth(i);
|
||||
BigUnsigned<84> value_2("135" + std::string(i, '0'));
|
||||
EXPECT_EQ(value_1, value_2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace strings_internal
|
||||
} // namespace absl
|
||||
496
absl/strings/internal/charconv_parse.cc
Normal file
496
absl/strings/internal/charconv_parse.cc
Normal file
|
|
@ -0,0 +1,496 @@
|
|||
// Copyright 2018 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "absl/strings/internal/charconv_parse.h"
|
||||
#include "absl/strings/charconv.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
|
||||
#include "absl/strings/internal/memutil.h"
|
||||
|
||||
namespace absl {
|
||||
namespace {
|
||||
|
||||
// ParseFloat<10> will read the first 19 significant digits of the mantissa.
|
||||
// This number was chosen for multiple reasons.
|
||||
//
|
||||
// (a) First, for whatever integer type we choose to represent the mantissa, we
|
||||
// want to choose the largest possible number of decimal digits for that integer
|
||||
// type. We are using uint64_t, which can express any 19-digit unsigned
|
||||
// integer.
|
||||
//
|
||||
// (b) Second, we need to parse enough digits that the binary value of any
|
||||
// mantissa we capture has more bits of resolution than the mantissa
|
||||
// representation in the target float. Our algorithm requires at least 3 bits
|
||||
// of headway, but 19 decimal digits give a little more than that.
|
||||
//
|
||||
// The following static assertions verify the above comments:
|
||||
constexpr int kDecimalMantissaDigitsMax = 19;
|
||||
|
||||
static_assert(std::numeric_limits<uint64_t>::digits10 ==
|
||||
kDecimalMantissaDigitsMax,
|
||||
"(a) above");
|
||||
|
||||
// IEEE doubles, which we assume in Abseil, have 53 binary bits of mantissa.
|
||||
static_assert(std::numeric_limits<double>::is_iec559, "IEEE double assumed");
|
||||
static_assert(std::numeric_limits<double>::radix == 2, "IEEE double fact");
|
||||
static_assert(std::numeric_limits<double>::digits == 53, "IEEE double fact");
|
||||
|
||||
// The lowest valued 19-digit decimal mantissa we can read still contains
|
||||
// sufficient information to reconstruct a binary mantissa.
|
||||
static_assert(1000000000000000000u > (uint64_t(1) << (53 + 3)), "(b) above");
|
||||
|
||||
// ParseFloat<16> will read the first 15 significant digits of the mantissa.
|
||||
//
|
||||
// Because a base-16-to-base-2 conversion can be done exactly, we do not need
|
||||
// to maximize the number of scanned hex digits to improve our conversion. What
|
||||
// is required is to scan two more bits than the mantissa can represent, so that
|
||||
// we always round correctly.
|
||||
//
|
||||
// (One extra bit does not suffice to perform correct rounding, since a number
|
||||
// exactly halfway between two representable floats has unique rounding rules,
|
||||
// so we need to differentiate between a "halfway between" number and a "closer
|
||||
// to the larger value" number.)
|
||||
constexpr int kHexadecimalMantissaDigitsMax = 15;
|
||||
|
||||
// The minimum number of significant bits that will be read from
|
||||
// kHexadecimalMantissaDigitsMax hex digits. We must subtract by three, since
|
||||
// the most significant digit can be a "1", which only contributes a single
|
||||
// significant bit.
|
||||
constexpr int kGuaranteedHexadecimalMantissaBitPrecision =
|
||||
4 * kHexadecimalMantissaDigitsMax - 3;
|
||||
|
||||
static_assert(kGuaranteedHexadecimalMantissaBitPrecision >
|
||||
std::numeric_limits<double>::digits + 2,
|
||||
"kHexadecimalMantissaDigitsMax too small");
|
||||
|
||||
// We also impose a limit on the number of significant digits we will read from
|
||||
// an exponent, to avoid having to deal with integer overflow. We use 9 for
|
||||
// this purpose.
|
||||
//
|
||||
// If we read a 9 digit exponent, the end result of the conversion will
|
||||
// necessarily be infinity or zero, depending on the sign of the exponent.
|
||||
// Therefore we can just drop extra digits on the floor without any extra
|
||||
// logic.
|
||||
constexpr int kDecimalExponentDigitsMax = 9;
|
||||
static_assert(std::numeric_limits<int>::digits10 >= kDecimalExponentDigitsMax,
|
||||
"int type too small");
|
||||
|
||||
// To avoid incredibly large inputs causing integer overflow for our exponent,
|
||||
// we impose an arbitrary but very large limit on the number of significant
|
||||
// digits we will accept. The implementation refuses to match a std::string with
|
||||
// more consecutive significant mantissa digits than this.
|
||||
constexpr int kDecimalDigitLimit = 50000000;
|
||||
|
||||
// Corresponding limit for hexadecimal digit inputs. This is one fourth the
|
||||
// amount of kDecimalDigitLimit, since each dropped hexadecimal digit requires
|
||||
// a binary exponent adjustment of 4.
|
||||
constexpr int kHexadecimalDigitLimit = kDecimalDigitLimit / 4;
|
||||
|
||||
// The largest exponent we can read is 999999999 (per
|
||||
// kDecimalExponentDigitsMax), and the largest exponent adjustment we can get
|
||||
// from dropped mantissa digits is 2 * kDecimalDigitLimit, and the sum of these
|
||||
// comfortably fits in an integer.
|
||||
//
|
||||
// We count kDecimalDigitLimit twice because there are independent limits for
|
||||
// numbers before and after the decimal point. (In the case where there are no
|
||||
// significant digits before the decimal point, there are independent limits for
|
||||
// post-decimal-point leading zeroes and for significant digits.)
|
||||
static_assert(999999999 + 2 * kDecimalDigitLimit <
|
||||
std::numeric_limits<int>::max(),
|
||||
"int type too small");
|
||||
static_assert(999999999 + 2 * (4 * kHexadecimalDigitLimit) <
|
||||
std::numeric_limits<int>::max(),
|
||||
"int type too small");
|
||||
|
||||
// Returns true if the provided bitfield allows parsing an exponent value
|
||||
// (e.g., "1.5e100").
|
||||
bool AllowExponent(chars_format flags) {
|
||||
bool fixed = (flags & chars_format::fixed) == chars_format::fixed;
|
||||
bool scientific =
|
||||
(flags & chars_format::scientific) == chars_format::scientific;
|
||||
return scientific || !fixed;
|
||||
}
|
||||
|
||||
// Returns true if the provided bitfield requires an exponent value be present.
|
||||
bool RequireExponent(chars_format flags) {
|
||||
bool fixed = (flags & chars_format::fixed) == chars_format::fixed;
|
||||
bool scientific =
|
||||
(flags & chars_format::scientific) == chars_format::scientific;
|
||||
return scientific && !fixed;
|
||||
}
|
||||
|
||||
const int8_t kAsciiToInt[256] = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
|
||||
9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1};
|
||||
|
||||
// Returns true if `ch` is a digit in the given base
|
||||
template <int base>
|
||||
bool IsDigit(char ch);
|
||||
|
||||
// Converts a valid `ch` to its digit value in the given base.
|
||||
template <int base>
|
||||
unsigned ToDigit(char ch);
|
||||
|
||||
// Returns true if `ch` is the exponent delimiter for the given base.
|
||||
template <int base>
|
||||
bool IsExponentCharacter(char ch);
|
||||
|
||||
// Returns the maximum number of significant digits we will read for a float
|
||||
// in the given base.
|
||||
template <int base>
|
||||
constexpr int MantissaDigitsMax();
|
||||
|
||||
// Returns the largest consecutive run of digits we will accept when parsing a
|
||||
// number in the given base.
|
||||
template <int base>
|
||||
constexpr int DigitLimit();
|
||||
|
||||
// Returns the amount the exponent must be adjusted by for each dropped digit.
|
||||
// (For decimal this is 1, since the digits are in base 10 and the exponent base
|
||||
// is also 10, but for hexadecimal this is 4, since the digits are base 16 but
|
||||
// the exponent base is 2.)
|
||||
template <int base>
|
||||
constexpr int DigitMagnitude();
|
||||
|
||||
template <>
|
||||
bool IsDigit<10>(char ch) {
|
||||
return ch >= '0' && ch <= '9';
|
||||
}
|
||||
template <>
|
||||
bool IsDigit<16>(char ch) {
|
||||
return kAsciiToInt[static_cast<unsigned char>(ch)] >= 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
unsigned ToDigit<10>(char ch) {
|
||||
return ch - '0';
|
||||
}
|
||||
template <>
|
||||
unsigned ToDigit<16>(char ch) {
|
||||
return kAsciiToInt[static_cast<unsigned char>(ch)];
|
||||
}
|
||||
|
||||
template <>
|
||||
bool IsExponentCharacter<10>(char ch) {
|
||||
return ch == 'e' || ch == 'E';
|
||||
}
|
||||
|
||||
template <>
|
||||
bool IsExponentCharacter<16>(char ch) {
|
||||
return ch == 'p' || ch == 'P';
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr int MantissaDigitsMax<10>() {
|
||||
return kDecimalMantissaDigitsMax;
|
||||
}
|
||||
template <>
|
||||
constexpr int MantissaDigitsMax<16>() {
|
||||
return kHexadecimalMantissaDigitsMax;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr int DigitLimit<10>() {
|
||||
return kDecimalDigitLimit;
|
||||
}
|
||||
template <>
|
||||
constexpr int DigitLimit<16>() {
|
||||
return kHexadecimalDigitLimit;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr int DigitMagnitude<10>() {
|
||||
return 1;
|
||||
}
|
||||
template <>
|
||||
constexpr int DigitMagnitude<16>() {
|
||||
return 4;
|
||||
}
|
||||
|
||||
// Reads decimal digits from [begin, end) into *out. Returns the number of
|
||||
// digits consumed.
|
||||
//
|
||||
// After max_digits has been read, keeps consuming characters, but no longer
|
||||
// adjusts *out. If a nonzero digit is dropped this way, *dropped_nonzero_digit
|
||||
// is set; otherwise, it is left unmodified.
|
||||
//
|
||||
// If no digits are matched, returns 0 and leaves *out unchanged.
|
||||
//
|
||||
// ConsumeDigits does not protect against overflow on *out; max_digits must
|
||||
// be chosen with respect to type T to avoid the possibility of overflow.
|
||||
template <int base, typename T>
|
||||
std::size_t ConsumeDigits(const char* begin, const char* end, int max_digits,
|
||||
T* out, bool* dropped_nonzero_digit) {
|
||||
if (base == 10) {
|
||||
assert(max_digits <= std::numeric_limits<T>::digits10);
|
||||
} else if (base == 16) {
|
||||
assert(max_digits * 4 <= std::numeric_limits<T>::digits);
|
||||
}
|
||||
const char* const original_begin = begin;
|
||||
T accumulator = *out;
|
||||
const char* significant_digits_end =
|
||||
(end - begin > max_digits) ? begin + max_digits : end;
|
||||
while (begin < significant_digits_end && IsDigit<base>(*begin)) {
|
||||
// Do not guard against *out overflow; max_digits was chosen to avoid this.
|
||||
// Do assert against it, to detect problems in debug builds.
|
||||
auto digit = static_cast<T>(ToDigit<base>(*begin));
|
||||
assert(accumulator * base >= accumulator);
|
||||
accumulator *= base;
|
||||
assert(accumulator + digit >= accumulator);
|
||||
accumulator += digit;
|
||||
++begin;
|
||||
}
|
||||
bool dropped_nonzero = false;
|
||||
while (begin < end && IsDigit<base>(*begin)) {
|
||||
dropped_nonzero = dropped_nonzero || (*begin != '0');
|
||||
++begin;
|
||||
}
|
||||
if (dropped_nonzero && dropped_nonzero_digit != nullptr) {
|
||||
*dropped_nonzero_digit = true;
|
||||
}
|
||||
*out = accumulator;
|
||||
return begin - original_begin;
|
||||
}
|
||||
|
||||
// Returns true if `v` is one of the chars allowed inside parentheses following
|
||||
// a NaN.
|
||||
bool IsNanChar(char v) {
|
||||
return (v == '_') || (v >= '0' && v <= '9') || (v >= 'a' && v <= 'z') ||
|
||||
(v >= 'A' && v <= 'Z');
|
||||
}
|
||||
|
||||
// Checks the range [begin, end) for a strtod()-formatted infinity or NaN. If
|
||||
// one is found, sets `out` appropriately and returns true.
|
||||
bool ParseInfinityOrNan(const char* begin, const char* end,
|
||||
strings_internal::ParsedFloat* out) {
|
||||
if (end - begin < 3) {
|
||||
return false;
|
||||
}
|
||||
switch (*begin) {
|
||||
case 'i':
|
||||
case 'I': {
|
||||
// An infinity std::string consists of the characters "inf" or "infinity",
|
||||
// case insensitive.
|
||||
if (strings_internal::memcasecmp(begin + 1, "nf", 2) != 0) {
|
||||
return false;
|
||||
}
|
||||
out->type = strings_internal::FloatType::kInfinity;
|
||||
if (end - begin >= 8 &&
|
||||
strings_internal::memcasecmp(begin + 3, "inity", 5) == 0) {
|
||||
out->end = begin + 8;
|
||||
} else {
|
||||
out->end = begin + 3;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
case 'n':
|
||||
case 'N': {
|
||||
// A NaN consists of the characters "nan", case insensitive, optionally
|
||||
// followed by a parenthesized sequence of zero or more alphanumeric
|
||||
// characters and/or underscores.
|
||||
if (strings_internal::memcasecmp(begin + 1, "an", 2) != 0) {
|
||||
return false;
|
||||
}
|
||||
out->type = strings_internal::FloatType::kNan;
|
||||
out->end = begin + 3;
|
||||
// NaN is allowed to be followed by a parenthesized std::string, consisting of
|
||||
// only the characters [a-zA-Z0-9_]. Match that if it's present.
|
||||
begin += 3;
|
||||
if (begin < end && *begin == '(') {
|
||||
const char* nan_begin = begin + 1;
|
||||
while (nan_begin < end && IsNanChar(*nan_begin)) {
|
||||
++nan_begin;
|
||||
}
|
||||
if (nan_begin < end && *nan_begin == ')') {
|
||||
// We found an extra NaN specifier range
|
||||
out->subrange_begin = begin + 1;
|
||||
out->subrange_end = nan_begin;
|
||||
out->end = nan_begin + 1;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace strings_internal {
|
||||
|
||||
template <int base>
|
||||
strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end,
|
||||
chars_format format_flags) {
|
||||
strings_internal::ParsedFloat result;
|
||||
|
||||
// Exit early if we're given an empty range.
|
||||
if (begin == end) return result;
|
||||
|
||||
// Handle the infinity and NaN cases.
|
||||
if (ParseInfinityOrNan(begin, end, &result)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
const char* const mantissa_begin = begin;
|
||||
while (begin < end && *begin == '0') {
|
||||
++begin; // skip leading zeros
|
||||
}
|
||||
uint64_t mantissa = 0;
|
||||
|
||||
int exponent_adjustment = 0;
|
||||
bool mantissa_is_inexact = false;
|
||||
std::size_t pre_decimal_digits = ConsumeDigits<base>(
|
||||
begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact);
|
||||
begin += pre_decimal_digits;
|
||||
int digits_left;
|
||||
if (pre_decimal_digits >= DigitLimit<base>()) {
|
||||
// refuse to parse pathological inputs
|
||||
return result;
|
||||
} else if (pre_decimal_digits > MantissaDigitsMax<base>()) {
|
||||
// We dropped some non-fraction digits on the floor. Adjust our exponent
|
||||
// to compensate.
|
||||
exponent_adjustment =
|
||||
static_cast<int>(pre_decimal_digits - MantissaDigitsMax<base>());
|
||||
digits_left = 0;
|
||||
} else {
|
||||
digits_left =
|
||||
static_cast<int>(MantissaDigitsMax<base>() - pre_decimal_digits);
|
||||
}
|
||||
if (begin < end && *begin == '.') {
|
||||
++begin;
|
||||
if (mantissa == 0) {
|
||||
// If we haven't seen any nonzero digits yet, keep skipping zeros. We
|
||||
// have to adjust the exponent to reflect the changed place value.
|
||||
const char* begin_zeros = begin;
|
||||
while (begin < end && *begin == '0') {
|
||||
++begin;
|
||||
}
|
||||
std::size_t zeros_skipped = begin - begin_zeros;
|
||||
if (zeros_skipped >= DigitLimit<base>()) {
|
||||
// refuse to parse pathological inputs
|
||||
return result;
|
||||
}
|
||||
exponent_adjustment -= static_cast<int>(zeros_skipped);
|
||||
}
|
||||
std::size_t post_decimal_digits = ConsumeDigits<base>(
|
||||
begin, end, digits_left, &mantissa, &mantissa_is_inexact);
|
||||
begin += post_decimal_digits;
|
||||
|
||||
// Since `mantissa` is an integer, each significant digit we read after
|
||||
// the decimal point requires an adjustment to the exponent. "1.23e0" will
|
||||
// be stored as `mantissa` == 123 and `exponent` == -2 (that is,
|
||||
// "123e-2").
|
||||
if (post_decimal_digits >= DigitLimit<base>()) {
|
||||
// refuse to parse pathological inputs
|
||||
return result;
|
||||
} else if (post_decimal_digits > digits_left) {
|
||||
exponent_adjustment -= digits_left;
|
||||
} else {
|
||||
exponent_adjustment -= post_decimal_digits;
|
||||
}
|
||||
}
|
||||
// If we've found no mantissa whatsoever, this isn't a number.
|
||||
if (mantissa_begin == begin) {
|
||||
return result;
|
||||
}
|
||||
// A bare "." doesn't count as a mantissa either.
|
||||
if (begin - mantissa_begin == 1 && *mantissa_begin == '.') {
|
||||
return result;
|
||||
}
|
||||
|
||||
if (mantissa_is_inexact) {
|
||||
// We dropped significant digits on the floor. Handle this appropriately.
|
||||
if (base == 10) {
|
||||
// If we truncated significant decimal digits, store the full range of the
|
||||
// mantissa for future big integer math for exact rounding.
|
||||
result.subrange_begin = mantissa_begin;
|
||||
result.subrange_end = begin;
|
||||
} else if (base == 16) {
|
||||
// If we truncated hex digits, reflect this fact by setting the low
|
||||
// ("sticky") bit. This allows for correct rounding in all cases.
|
||||
mantissa |= 1;
|
||||
}
|
||||
}
|
||||
result.mantissa = mantissa;
|
||||
|
||||
const char* const exponent_begin = begin;
|
||||
result.literal_exponent = 0;
|
||||
bool found_exponent = false;
|
||||
if (AllowExponent(format_flags) && begin < end &&
|
||||
IsExponentCharacter<base>(*begin)) {
|
||||
bool negative_exponent = false;
|
||||
++begin;
|
||||
if (begin < end && *begin == '-') {
|
||||
negative_exponent = true;
|
||||
++begin;
|
||||
} else if (begin < end && *begin == '+') {
|
||||
++begin;
|
||||
}
|
||||
const char* const exponent_digits_begin = begin;
|
||||
// Exponent is always expressed in decimal, even for hexadecimal floats.
|
||||
begin += ConsumeDigits<10>(begin, end, kDecimalExponentDigitsMax,
|
||||
&result.literal_exponent, nullptr);
|
||||
if (begin == exponent_digits_begin) {
|
||||
// there were no digits where we expected an exponent. We failed to read
|
||||
// an exponent and should not consume the 'e' after all. Rewind 'begin'.
|
||||
found_exponent = false;
|
||||
begin = exponent_begin;
|
||||
} else {
|
||||
found_exponent = true;
|
||||
if (negative_exponent) {
|
||||
result.literal_exponent = -result.literal_exponent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!found_exponent && RequireExponent(format_flags)) {
|
||||
// Provided flags required an exponent, but none was found. This results
|
||||
// in a failure to scan.
|
||||
return result;
|
||||
}
|
||||
|
||||
// Success!
|
||||
result.type = strings_internal::FloatType::kNumber;
|
||||
if (result.mantissa > 0) {
|
||||
result.exponent = result.literal_exponent +
|
||||
(DigitMagnitude<base>() * exponent_adjustment);
|
||||
} else {
|
||||
result.exponent = 0;
|
||||
}
|
||||
result.end = begin;
|
||||
return result;
|
||||
}
|
||||
|
||||
template ParsedFloat ParseFloat<10>(const char* begin, const char* end,
|
||||
chars_format format_flags);
|
||||
template ParsedFloat ParseFloat<16>(const char* begin, const char* end,
|
||||
chars_format format_flags);
|
||||
|
||||
} // namespace strings_internal
|
||||
} // namespace absl
|
||||
96
absl/strings/internal/charconv_parse.h
Normal file
96
absl/strings/internal/charconv_parse.h
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
// Copyright 2018 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_
|
||||
#define ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "absl/strings/charconv.h"
|
||||
|
||||
namespace absl {
|
||||
namespace strings_internal {
|
||||
|
||||
// Enum indicating whether a parsed float is a number or special value.
|
||||
enum class FloatType { kNumber, kInfinity, kNan };
|
||||
|
||||
// The decomposed parts of a parsed `float` or `double`.
|
||||
struct ParsedFloat {
|
||||
// Representation of the parsed mantissa, with the decimal point adjusted to
|
||||
// make it an integer.
|
||||
//
|
||||
// During decimal scanning, this contains 19 significant digits worth of
|
||||
// mantissa value. If digits beyond this point are found, they
|
||||
// are truncated, and if any of these dropped digits are nonzero, then
|
||||
// `mantissa` is inexact, and the full mantissa is stored in [subrange_begin,
|
||||
// subrange_end).
|
||||
//
|
||||
// During hexadecimal scanning, this contains 15 significant hex digits worth
|
||||
// of mantissa value. Digits beyond this point are sticky -- they are
|
||||
// truncated, but if any dropped digits are nonzero, the low bit of mantissa
|
||||
// will be set. (This allows for precise rounding, and avoids the need
|
||||
// to store the full mantissa in [subrange_begin, subrange_end).)
|
||||
uint64_t mantissa = 0;
|
||||
|
||||
// Floating point expontent. This reflects any decimal point adjustments and
|
||||
// any truncated digits from the mantissa. The absolute value of the parsed
|
||||
// number is represented by mantissa * (base ** exponent), where base==10 for
|
||||
// decimal floats, and base==2 for hexadecimal floats.
|
||||
int exponent = 0;
|
||||
|
||||
// The literal exponent value scanned from the input, or 0 if none was
|
||||
// present. This does not reflect any adjustments applied to mantissa.
|
||||
int literal_exponent = 0;
|
||||
|
||||
// The type of number scanned.
|
||||
FloatType type = FloatType::kNumber;
|
||||
|
||||
// When non-null, [subrange_begin, subrange_end) marks a range of characters
|
||||
// that require further processing. The meaning is dependent on float type.
|
||||
// If type == kNumber and this is set, this is a "wide input": the input
|
||||
// mantissa contained more than 19 digits. The range contains the full
|
||||
// mantissa. It plus `literal_exponent` need to be examined to find the best
|
||||
// floating point match.
|
||||
// If type == kNan and this is set, the range marks the contents of a
|
||||
// matched parenthesized character region after the NaN.
|
||||
const char* subrange_begin = nullptr;
|
||||
const char* subrange_end = nullptr;
|
||||
|
||||
// One-past-the-end of the successfully parsed region, or nullptr if no
|
||||
// matching pattern was found.
|
||||
const char* end = nullptr;
|
||||
};
|
||||
|
||||
// Read the floating point number in the provided range, and populate
|
||||
// ParsedFloat accordingly.
|
||||
//
|
||||
// format_flags is a bitmask value specifying what patterns this API will match.
|
||||
// `scientific` and `fixed` are honored per std::from_chars rules
|
||||
// ([utility.from.chars], C++17): if exactly one of these bits is set, then an
|
||||
// exponent is required, or dislallowed, respectively.
|
||||
//
|
||||
// Template parameter `base` must be either 10 or 16. For base 16, a "0x" is
|
||||
// *not* consumed. The `hex` bit from format_flags is ignored by ParseFloat.
|
||||
template <int base>
|
||||
ParsedFloat ParseFloat(const char* begin, const char* end,
|
||||
absl::chars_format format_flags);
|
||||
|
||||
extern template ParsedFloat ParseFloat<10>(const char* begin, const char* end,
|
||||
absl::chars_format format_flags);
|
||||
extern template ParsedFloat ParseFloat<16>(const char* begin, const char* end,
|
||||
absl::chars_format format_flags);
|
||||
|
||||
} // namespace strings_internal
|
||||
} // namespace absl
|
||||
#endif // ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_
|
||||
357
absl/strings/internal/charconv_parse_test.cc
Normal file
357
absl/strings/internal/charconv_parse_test.cc
Normal file
|
|
@ -0,0 +1,357 @@
|
|||
// Copyright 2018 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "absl/strings/internal/charconv_parse.h"
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "absl/base/internal/raw_logging.h"
|
||||
#include "absl/strings/str_cat.h"
|
||||
|
||||
using absl::chars_format;
|
||||
using absl::strings_internal::FloatType;
|
||||
using absl::strings_internal::ParsedFloat;
|
||||
using absl::strings_internal::ParseFloat;
|
||||
|
||||
namespace {
|
||||
|
||||
// Check that a given std::string input is parsed to the expected mantissa and
|
||||
// exponent.
|
||||
//
|
||||
// Input std::string `s` must contain a '$' character. It marks the end of the
|
||||
// characters that should be consumed by the match. It is stripped from the
|
||||
// input to ParseFloat.
|
||||
//
|
||||
// If input std::string `s` contains '[' and ']' characters, these mark the region
|
||||
// of characters that should be marked as the "subrange". For NaNs, this is
|
||||
// the location of the extended NaN std::string. For numbers, this is the location
|
||||
// of the full, over-large mantissa.
|
||||
template <int base>
|
||||
void ExpectParsedFloat(std::string s, absl::chars_format format_flags,
|
||||
FloatType expected_type, uint64_t expected_mantissa,
|
||||
int expected_exponent,
|
||||
int expected_literal_exponent = -999) {
|
||||
SCOPED_TRACE(s);
|
||||
|
||||
int begin_subrange = -1;
|
||||
int end_subrange = -1;
|
||||
// If s contains '[' and ']', then strip these characters and set the subrange
|
||||
// indices appropriately.
|
||||
std::string::size_type open_bracket_pos = s.find('[');
|
||||
if (open_bracket_pos != std::string::npos) {
|
||||
begin_subrange = static_cast<int>(open_bracket_pos);
|
||||
s.replace(open_bracket_pos, 1, "");
|
||||
std::string::size_type close_bracket_pos = s.find(']');
|
||||
ABSL_RAW_CHECK(close_bracket_pos != absl::string_view::npos,
|
||||
"Test input contains [ without matching ]");
|
||||
end_subrange = static_cast<int>(close_bracket_pos);
|
||||
s.replace(close_bracket_pos, 1, "");
|
||||
}
|
||||
const std::string::size_type expected_characters_matched = s.find('$');
|
||||
ABSL_RAW_CHECK(expected_characters_matched != std::string::npos,
|
||||
"Input std::string must contain $");
|
||||
s.replace(expected_characters_matched, 1, "");
|
||||
|
||||
ParsedFloat parsed =
|
||||
ParseFloat<base>(s.data(), s.data() + s.size(), format_flags);
|
||||
|
||||
EXPECT_NE(parsed.end, nullptr);
|
||||
if (parsed.end == nullptr) {
|
||||
return; // The following tests are not useful if we fully failed to parse
|
||||
}
|
||||
EXPECT_EQ(parsed.type, expected_type);
|
||||
if (begin_subrange == -1) {
|
||||
EXPECT_EQ(parsed.subrange_begin, nullptr);
|
||||
EXPECT_EQ(parsed.subrange_end, nullptr);
|
||||
} else {
|
||||
EXPECT_EQ(parsed.subrange_begin, s.data() + begin_subrange);
|
||||
EXPECT_EQ(parsed.subrange_end, s.data() + end_subrange);
|
||||
}
|
||||
if (parsed.type == FloatType::kNumber) {
|
||||
EXPECT_EQ(parsed.mantissa, expected_mantissa);
|
||||
EXPECT_EQ(parsed.exponent, expected_exponent);
|
||||
if (expected_literal_exponent != -999) {
|
||||
EXPECT_EQ(parsed.literal_exponent, expected_literal_exponent);
|
||||
}
|
||||
}
|
||||
auto characters_matched = static_cast<int>(parsed.end - s.data());
|
||||
EXPECT_EQ(characters_matched, expected_characters_matched);
|
||||
}
|
||||
|
||||
// Check that a given std::string input is parsed to the expected mantissa and
|
||||
// exponent.
|
||||
//
|
||||
// Input std::string `s` must contain a '$' character. It marks the end of the
|
||||
// characters that were consumed by the match.
|
||||
template <int base>
|
||||
void ExpectNumber(std::string s, absl::chars_format format_flags,
|
||||
uint64_t expected_mantissa, int expected_exponent,
|
||||
int expected_literal_exponent = -999) {
|
||||
ExpectParsedFloat<base>(std::move(s), format_flags, FloatType::kNumber,
|
||||
expected_mantissa, expected_exponent,
|
||||
expected_literal_exponent);
|
||||
}
|
||||
|
||||
// Check that a given std::string input is parsed to the given special value.
|
||||
//
|
||||
// This tests against both number bases, since infinities and NaNs have
|
||||
// identical representations in both modes.
|
||||
void ExpectSpecial(const std::string& s, absl::chars_format format_flags,
|
||||
FloatType type) {
|
||||
ExpectParsedFloat<10>(s, format_flags, type, 0, 0);
|
||||
ExpectParsedFloat<16>(s, format_flags, type, 0, 0);
|
||||
}
|
||||
|
||||
// Check that a given input std::string is not matched by Float.
|
||||
template <int base>
|
||||
void ExpectFailedParse(absl::string_view s, absl::chars_format format_flags) {
|
||||
ParsedFloat parsed =
|
||||
ParseFloat<base>(s.data(), s.data() + s.size(), format_flags);
|
||||
EXPECT_EQ(parsed.end, nullptr);
|
||||
}
|
||||
|
||||
TEST(ParseFloat, SimpleValue) {
|
||||
// Test that various forms of floating point numbers all parse correctly.
|
||||
ExpectNumber<10>("1.23456789e5$", chars_format::general, 123456789, -3);
|
||||
ExpectNumber<10>("1.23456789e+5$", chars_format::general, 123456789, -3);
|
||||
ExpectNumber<10>("1.23456789E5$", chars_format::general, 123456789, -3);
|
||||
ExpectNumber<10>("1.23456789e05$", chars_format::general, 123456789, -3);
|
||||
ExpectNumber<10>("123.456789e3$", chars_format::general, 123456789, -3);
|
||||
ExpectNumber<10>("0.000123456789e9$", chars_format::general, 123456789, -3);
|
||||
ExpectNumber<10>("123456.789$", chars_format::general, 123456789, -3);
|
||||
ExpectNumber<10>("123456789e-3$", chars_format::general, 123456789, -3);
|
||||
|
||||
ExpectNumber<16>("1.234abcdefp28$", chars_format::general, 0x1234abcdef, -8);
|
||||
ExpectNumber<16>("1.234abcdefp+28$", chars_format::general, 0x1234abcdef, -8);
|
||||
ExpectNumber<16>("1.234ABCDEFp28$", chars_format::general, 0x1234abcdef, -8);
|
||||
ExpectNumber<16>("1.234AbCdEfP0028$", chars_format::general, 0x1234abcdef,
|
||||
-8);
|
||||
ExpectNumber<16>("123.4abcdefp20$", chars_format::general, 0x1234abcdef, -8);
|
||||
ExpectNumber<16>("0.0001234abcdefp44$", chars_format::general, 0x1234abcdef,
|
||||
-8);
|
||||
ExpectNumber<16>("1234abcd.ef$", chars_format::general, 0x1234abcdef, -8);
|
||||
ExpectNumber<16>("1234abcdefp-8$", chars_format::general, 0x1234abcdef, -8);
|
||||
|
||||
// ExpectNumber does not attempt to drop trailing zeroes.
|
||||
ExpectNumber<10>("0001.2345678900e005$", chars_format::general, 12345678900,
|
||||
-5);
|
||||
ExpectNumber<16>("0001.234abcdef000p28$", chars_format::general,
|
||||
0x1234abcdef000, -20);
|
||||
|
||||
// Ensure non-matching characters after a number are ignored, even when they
|
||||
// look like potentially matching characters.
|
||||
ExpectNumber<10>("1.23456789e5$ ", chars_format::general, 123456789, -3);
|
||||
ExpectNumber<10>("1.23456789e5$e5e5", chars_format::general, 123456789, -3);
|
||||
ExpectNumber<10>("1.23456789e5$.25", chars_format::general, 123456789, -3);
|
||||
ExpectNumber<10>("1.23456789e5$-", chars_format::general, 123456789, -3);
|
||||
ExpectNumber<10>("1.23456789e5$PUPPERS!!!", chars_format::general, 123456789,
|
||||
-3);
|
||||
ExpectNumber<10>("123456.789$efghij", chars_format::general, 123456789, -3);
|
||||
ExpectNumber<10>("123456.789$e", chars_format::general, 123456789, -3);
|
||||
ExpectNumber<10>("123456.789$p5", chars_format::general, 123456789, -3);
|
||||
ExpectNumber<10>("123456.789$.10", chars_format::general, 123456789, -3);
|
||||
|
||||
ExpectNumber<16>("1.234abcdefp28$ ", chars_format::general, 0x1234abcdef,
|
||||
-8);
|
||||
ExpectNumber<16>("1.234abcdefp28$p28", chars_format::general, 0x1234abcdef,
|
||||
-8);
|
||||
ExpectNumber<16>("1.234abcdefp28$.125", chars_format::general, 0x1234abcdef,
|
||||
-8);
|
||||
ExpectNumber<16>("1.234abcdefp28$-", chars_format::general, 0x1234abcdef, -8);
|
||||
ExpectNumber<16>("1.234abcdefp28$KITTEHS!!!", chars_format::general,
|
||||
0x1234abcdef, -8);
|
||||
ExpectNumber<16>("1234abcd.ef$ghijk", chars_format::general, 0x1234abcdef,
|
||||
-8);
|
||||
ExpectNumber<16>("1234abcd.ef$p", chars_format::general, 0x1234abcdef, -8);
|
||||
ExpectNumber<16>("1234abcd.ef$.10", chars_format::general, 0x1234abcdef, -8);
|
||||
|
||||
// Ensure we can read a full resolution mantissa without overflow.
|
||||
ExpectNumber<10>("9999999999999999999$", chars_format::general,
|
||||
9999999999999999999u, 0);
|
||||
ExpectNumber<16>("fffffffffffffff$", chars_format::general,
|
||||
0xfffffffffffffffu, 0);
|
||||
|
||||
// Check that zero is consistently read.
|
||||
ExpectNumber<10>("0$", chars_format::general, 0, 0);
|
||||
ExpectNumber<16>("0$", chars_format::general, 0, 0);
|
||||
ExpectNumber<10>("000000000000000000000000000000000000000$",
|
||||
chars_format::general, 0, 0);
|
||||
ExpectNumber<16>("000000000000000000000000000000000000000$",
|
||||
chars_format::general, 0, 0);
|
||||
ExpectNumber<10>("0000000000000000000000.000000000000000000$",
|
||||
chars_format::general, 0, 0);
|
||||
ExpectNumber<16>("0000000000000000000000.000000000000000000$",
|
||||
chars_format::general, 0, 0);
|
||||
ExpectNumber<10>("0.00000000000000000000000000000000e123456$",
|
||||
chars_format::general, 0, 0);
|
||||
ExpectNumber<16>("0.00000000000000000000000000000000p123456$",
|
||||
chars_format::general, 0, 0);
|
||||
}
|
||||
|
||||
TEST(ParseFloat, LargeDecimalMantissa) {
|
||||
// After 19 significant decimal digits in the mantissa, ParsedFloat will
|
||||
// truncate additional digits. We need to test that:
|
||||
// 1) the truncation to 19 digits happens
|
||||
// 2) the returned exponent reflects the dropped significant digits
|
||||
// 3) a correct literal_exponent is set
|
||||
//
|
||||
// If and only if a significant digit is found after 19 digits, then the
|
||||
// entirety of the mantissa in case the exact value is needed to make a
|
||||
// rounding decision. The [ and ] characters below denote where such a
|
||||
// subregion was marked by by ParseFloat. They are not part of the input.
|
||||
|
||||
// Mark a capture group only if a dropped digit is significant (nonzero).
|
||||
ExpectNumber<10>("100000000000000000000000000$", chars_format::general,
|
||||
1000000000000000000,
|
||||
/* adjusted exponent */ 8);
|
||||
|
||||
ExpectNumber<10>("123456789123456789100000000$", chars_format::general,
|
||||
1234567891234567891,
|
||||
/* adjusted exponent */ 8);
|
||||
|
||||
ExpectNumber<10>("[123456789123456789123456789]$", chars_format::general,
|
||||
1234567891234567891,
|
||||
/* adjusted exponent */ 8,
|
||||
/* literal exponent */ 0);
|
||||
|
||||
ExpectNumber<10>("[123456789123456789100000009]$", chars_format::general,
|
||||
1234567891234567891,
|
||||
/* adjusted exponent */ 8,
|
||||
/* literal exponent */ 0);
|
||||
|
||||
ExpectNumber<10>("[123456789123456789120000000]$", chars_format::general,
|
||||
1234567891234567891,
|
||||
/* adjusted exponent */ 8,
|
||||
/* literal exponent */ 0);
|
||||
|
||||
// Leading zeroes should not count towards the 19 significant digit limit
|
||||
ExpectNumber<10>("[00000000123456789123456789123456789]$",
|
||||
chars_format::general, 1234567891234567891,
|
||||
/* adjusted exponent */ 8,
|
||||
/* literal exponent */ 0);
|
||||
|
||||
ExpectNumber<10>("00000000123456789123456789100000000$",
|
||||
chars_format::general, 1234567891234567891,
|
||||
/* adjusted exponent */ 8);
|
||||
|
||||
// Truncated digits after the decimal point should not cause a further
|
||||
// exponent adjustment.
|
||||
ExpectNumber<10>("1.234567891234567891e123$", chars_format::general,
|
||||
1234567891234567891, 105);
|
||||
ExpectNumber<10>("[1.23456789123456789123456789]e123$", chars_format::general,
|
||||
1234567891234567891,
|
||||
/* adjusted exponent */ 105,
|
||||
/* literal exponent */ 123);
|
||||
|
||||
// Ensure we truncate, and not round. (The from_chars algorithm we use
|
||||
// depends on our guess missing low, if it misses, so we need the rounding
|
||||
// error to be downward.)
|
||||
ExpectNumber<10>("[1999999999999999999999]$", chars_format::general,
|
||||
1999999999999999999,
|
||||
/* adjusted exponent */ 3,
|
||||
/* literal exponent */ 0);
|
||||
}
|
||||
|
||||
TEST(ParseFloat, LargeHexadecimalMantissa) {
|
||||
// After 15 significant hex digits in the mantissa, ParsedFloat will treat
|
||||
// additional digits as sticky, We need to test that:
|
||||
// 1) The truncation to 15 digits happens
|
||||
// 2) The returned exponent reflects the dropped significant digits
|
||||
// 3) If a nonzero digit is dropped, the low bit of mantissa is set.
|
||||
|
||||
ExpectNumber<16>("123456789abcdef123456789abcdef$", chars_format::general,
|
||||
0x123456789abcdef, 60);
|
||||
|
||||
// Leading zeroes should not count towards the 15 significant digit limit
|
||||
ExpectNumber<16>("000000123456789abcdef123456789abcdef$",
|
||||
chars_format::general, 0x123456789abcdef, 60);
|
||||
|
||||
// Truncated digits after the radix point should not cause a further
|
||||
// exponent adjustment.
|
||||
ExpectNumber<16>("1.23456789abcdefp100$", chars_format::general,
|
||||
0x123456789abcdef, 44);
|
||||
ExpectNumber<16>("1.23456789abcdef123456789abcdefp100$",
|
||||
chars_format::general, 0x123456789abcdef, 44);
|
||||
|
||||
// test sticky digit behavior. The low bit should be set iff any dropped
|
||||
// digit is nonzero.
|
||||
ExpectNumber<16>("123456789abcdee123456789abcdee$", chars_format::general,
|
||||
0x123456789abcdef, 60);
|
||||
ExpectNumber<16>("123456789abcdee000000000000001$", chars_format::general,
|
||||
0x123456789abcdef, 60);
|
||||
ExpectNumber<16>("123456789abcdee000000000000000$", chars_format::general,
|
||||
0x123456789abcdee, 60);
|
||||
}
|
||||
|
||||
TEST(ParseFloat, ScientificVsFixed) {
|
||||
// In fixed mode, an exponent is never matched (but the remainder of the
|
||||
// number will be matched.)
|
||||
ExpectNumber<10>("1.23456789$e5", chars_format::fixed, 123456789, -8);
|
||||
ExpectNumber<10>("123456.789$", chars_format::fixed, 123456789, -3);
|
||||
ExpectNumber<16>("1.234abcdef$p28", chars_format::fixed, 0x1234abcdef, -36);
|
||||
ExpectNumber<16>("1234abcd.ef$", chars_format::fixed, 0x1234abcdef, -8);
|
||||
|
||||
// In scientific mode, numbers don't match *unless* they have an exponent.
|
||||
ExpectNumber<10>("1.23456789e5$", chars_format::scientific, 123456789, -3);
|
||||
ExpectFailedParse<10>("-123456.789$", chars_format::scientific);
|
||||
ExpectNumber<16>("1.234abcdefp28$", chars_format::scientific, 0x1234abcdef,
|
||||
-8);
|
||||
ExpectFailedParse<16>("1234abcd.ef$", chars_format::scientific);
|
||||
}
|
||||
|
||||
TEST(ParseFloat, Infinity) {
|
||||
ExpectFailedParse<10>("in", chars_format::general);
|
||||
ExpectFailedParse<16>("in", chars_format::general);
|
||||
ExpectFailedParse<10>("inx", chars_format::general);
|
||||
ExpectFailedParse<16>("inx", chars_format::general);
|
||||
ExpectSpecial("inf$", chars_format::general, FloatType::kInfinity);
|
||||
ExpectSpecial("Inf$", chars_format::general, FloatType::kInfinity);
|
||||
ExpectSpecial("INF$", chars_format::general, FloatType::kInfinity);
|
||||
ExpectSpecial("inf$inite", chars_format::general, FloatType::kInfinity);
|
||||
ExpectSpecial("iNfInItY$", chars_format::general, FloatType::kInfinity);
|
||||
ExpectSpecial("infinity$!!!", chars_format::general, FloatType::kInfinity);
|
||||
}
|
||||
|
||||
TEST(ParseFloat, NaN) {
|
||||
ExpectFailedParse<10>("na", chars_format::general);
|
||||
ExpectFailedParse<16>("na", chars_format::general);
|
||||
ExpectFailedParse<10>("nah", chars_format::general);
|
||||
ExpectFailedParse<16>("nah", chars_format::general);
|
||||
ExpectSpecial("nan$", chars_format::general, FloatType::kNan);
|
||||
ExpectSpecial("NaN$", chars_format::general, FloatType::kNan);
|
||||
ExpectSpecial("nAn$", chars_format::general, FloatType::kNan);
|
||||
ExpectSpecial("NAN$", chars_format::general, FloatType::kNan);
|
||||
ExpectSpecial("NaN$aNaNaNaNaBatman!", chars_format::general, FloatType::kNan);
|
||||
|
||||
// A parenthesized sequence of the characters [a-zA-Z0-9_] is allowed to
|
||||
// appear after an NaN. Check that this is allowed, and that the correct
|
||||
// characters are grouped.
|
||||
//
|
||||
// (The characters [ and ] in the pattern below delimit the expected matched
|
||||
// subgroup; they are not part of the input passed to ParseFloat.)
|
||||
ExpectSpecial("nan([0xabcdef])$", chars_format::general, FloatType::kNan);
|
||||
ExpectSpecial("nan([0xabcdef])$...", chars_format::general, FloatType::kNan);
|
||||
ExpectSpecial("nan([0xabcdef])$)...", chars_format::general, FloatType::kNan);
|
||||
ExpectSpecial("nan([])$", chars_format::general, FloatType::kNan);
|
||||
ExpectSpecial("nan([aAzZ09_])$", chars_format::general, FloatType::kNan);
|
||||
// If the subgroup contains illegal characters, don't match it at all.
|
||||
ExpectSpecial("nan$(bad-char)", chars_format::general, FloatType::kNan);
|
||||
// Also cope with a missing close paren.
|
||||
ExpectSpecial("nan$(0xabcdef", chars_format::general, FloatType::kNan);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
Loading…
Add table
Add a link
Reference in a new issue