f28d30df5769bb832dec3ff36d2fcd2bcdf494a3 by Shaindel Schwartz <shaindel@google.com>:

Internal change

PiperOrigin-RevId: 201046831

--
711715a78b7e53dfaafd4d7f08a74e76db22af88 by Mark Barolak <mbar@google.com>:

Internal fix

PiperOrigin-RevId: 201043684

--
64b53edd6bf1fa48f74e7f5d33f00f80d5089147 by Shaindel Schwartz <shaindel@google.com>:

Remove extra whitespace

PiperOrigin-RevId: 201041989

--
0bdd2a0b33657b688e4a04aeba9ebba47e4dc6ca by Shaindel Schwartz <shaindel@google.com>:

Whitespace fix.

PiperOrigin-RevId: 201034413

--
3deb0ac296ef1b74c4789e114a8a8bf53253f26b by Shaindel Schwartz <shaindel@google.com>:

Scrub build tags. No functional changes.

PiperOrigin-RevId: 201032927

--
da75d0f8b73baa7e8f4e9a092bba546012ed3b71 by Alex Strelnikov <strel@google.com>:

Internal change.

PiperOrigin-RevId: 201026131

--
6815d80caa19870d0c441b6b9816c68db41393a5 by Tom Manshreck <shreck@google.com>:

Add documentation for our LTS snapshot branches

PiperOrigin-RevId: 201025191

--
64c3b02006f39e6a8127bbabf9ec947fb45b6504 by Greg Falcon <gfalcon@google.com>:

Provide absl::from_chars for double and float types.  This is a forward-compatible implementation of std::from_chars from C++17.

This provides exact "round_to_nearest" conversions, and has some nice properties:

* Works with string_view (it can convert numbers from non-NUL-terminated buffers)
* Never allocates memory
* Faster than the standard library strtod() in our toolchain
* Uses integer math in its calculations, so is unaffected by floating point environment
* Unaffected by C locale

Also change SimpleAtod/SimpleAtoi to use this new API under the hood.

PiperOrigin-RevId: 201003324

--
542869258eb100779497c899103dc96aced52749 by Greg Falcon <gfalcon@google.com>:

Internal change

PiperOrigin-RevId: 200999200

--
3aba192775c7f80e2cd7f221b0a73537823c54ea by Gennadiy Rozental <rogeeff@google.com>:

Internal change

PiperOrigin-RevId: 200947470

--
daf9b9feedd748d5364a4c06165b7cb7604d3e1e by Mark Barolak <mbar@google.com>:

Add an absl:: qualification to a usage of base_internal::SchedulingMode outside of an absl:: namespace.

PiperOrigin-RevId: 200748234

--
a8d265290a22d629f3d9bf9f872c204200bfe8c8 by Mark Barolak <mbar@google.com>:

Add a missing namespace closing comment to optional.h.

PiperOrigin-RevId: 200739934

--
f05af8ee1c6b864dad2df7c907d424209a3e3202 by Abseil Team <absl-team@google.com>:

Internal change

PiperOrigin-RevId: 200719115
GitOrigin-RevId: f28d30df5769bb832dec3ff36d2fcd2bcdf494a3
Change-Id: Ie4fa601078fd4aa57286372611f1d114fdec82c0
This commit is contained in:
Abseil Team 2018-06-18 13:18:53 -07:00 committed by Shaindel Schwartz
parent f44e1eed08
commit bd40a41cc1
23 changed files with 4164 additions and 78 deletions

View file

@ -0,0 +1,357 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_bigint.h"
#include <algorithm>
#include <cassert>
#include <string>
namespace absl {
namespace strings_internal {
namespace {
// Table containing some large powers of 5, for fast computation.
// Constant step size for entries in the kLargePowersOfFive table. Each entry
// is larger than the previous entry by a factor of 5**kLargePowerOfFiveStep
// (or 5**27).
//
// In other words, the Nth entry in the table is 5**(27*N).
//
// 5**27 is the largest power of 5 that fits in 64 bits.
constexpr int kLargePowerOfFiveStep = 27;
// The largest legal index into the kLargePowersOfFive table.
//
// In other words, the largest precomputed power of 5 is 5**(27*20).
constexpr int kLargestPowerOfFiveIndex = 20;
// Table of powers of (5**27), up to (5**27)**20 == 5**540.
//
// Used to generate large powers of 5 while limiting the number of repeated
// multiplications required.
//
// clang-format off
const uint32_t kLargePowersOfFive[] = {
// 5**27 (i=1), start=0, end=2
0xfa10079dU, 0x6765c793U,
// 5**54 (i=2), start=2, end=6
0x97d9f649U, 0x6664242dU, 0x29939b14U, 0x29c30f10U,
// 5**81 (i=3), start=6, end=12
0xc4f809c5U, 0x7bf3f22aU, 0x67bdae34U, 0xad340517U, 0x369d1b5fU, 0x10de1593U,
// 5**108 (i=4), start=12, end=20
0x92b260d1U, 0x9efff7c7U, 0x81de0ec6U, 0xaeba5d56U, 0x410664a4U, 0x4f40737aU,
0x20d3846fU, 0x06d00f73U,
// 5**135 (i=5), start=20, end=30
0xff1b172dU, 0x13a1d71cU, 0xefa07617U, 0x7f682d3dU, 0xff8c90c0U, 0x3f0131e7U,
0x3fdcb9feU, 0x917b0177U, 0x16c407a7U, 0x02c06b9dU,
// 5**162 (i=6), start=30, end=42
0x960f7199U, 0x056667ecU, 0xe07aefd8U, 0x80f2b9ccU, 0x8273f5e3U, 0xeb9a214aU,
0x40b38005U, 0x0e477ad4U, 0x277d08e6U, 0xfa28b11eU, 0xd3f7d784U, 0x011c835bU,
// 5**189 (i=7), start=42, end=56
0xf723d9d5U, 0x3282d3f3U, 0xe00857d1U, 0x69659d25U, 0x2cf117cfU, 0x24da6d07U,
0x954d1417U, 0x3e5d8cedU, 0x7a8bb766U, 0xfd785ae6U, 0x645436d2U, 0x40c78b34U,
0x94151217U, 0x0072e9f7U,
// 5**216 (i=8), start=56, end=72
0x2b416aa1U, 0x7893c5a7U, 0xe37dc6d4U, 0x2bad2beaU, 0xf0fc846cU, 0x7575ae4bU,
0x62587b14U, 0x83b67a34U, 0x02110cdbU, 0xf7992f55U, 0x00deb022U, 0xa4a23becU,
0x8af5c5cdU, 0xb85b654fU, 0x818df38bU, 0x002e69d2U,
// 5**243 (i=9), start=72, end=90
0x3518cbbdU, 0x20b0c15fU, 0x38756c2fU, 0xfb5dc3ddU, 0x22ad2d94U, 0xbf35a952U,
0xa699192aU, 0x9a613326U, 0xad2a9cedU, 0xd7f48968U, 0xe87dfb54U, 0xc8f05db6U,
0x5ef67531U, 0x31c1ab49U, 0xe202ac9fU, 0x9b2957b5U, 0xa143f6d3U, 0x0012bf07U,
// 5**270 (i=10), start=90, end=110
0x8b971de9U, 0x21aba2e1U, 0x63944362U, 0x57172336U, 0xd9544225U, 0xfb534166U,
0x08c563eeU, 0x14640ee2U, 0x24e40d31U, 0x02b06537U, 0x03887f14U, 0x0285e533U,
0xb744ef26U, 0x8be3a6c4U, 0x266979b4U, 0x6761ece2U, 0xd9cb39e4U, 0xe67de319U,
0x0d39e796U, 0x00079250U,
// 5**297 (i=11), start=110, end=132
0x260eb6e5U, 0xf414a796U, 0xee1a7491U, 0xdb9368ebU, 0xf50c105bU, 0x59157750U,
0x9ed2fb5cU, 0xf6e56d8bU, 0xeaee8d23U, 0x0f319f75U, 0x2aa134d6U, 0xac2908e9U,
0xd4413298U, 0x02f02a55U, 0x989d5a7aU, 0x70dde184U, 0xba8040a7U, 0x03200981U,
0xbe03b11cU, 0x3c1c2a18U, 0xd60427a1U, 0x00030ee0U,
// 5**324 (i=12), start=132, end=156
0xce566d71U, 0xf1c4aa25U, 0x4e93ca53U, 0xa72283d0U, 0x551a73eaU, 0x3d0538e2U,
0x8da4303fU, 0x6a58de60U, 0x0e660221U, 0x49cf61a6U, 0x8d058fc1U, 0xb9d1a14cU,
0x4bab157dU, 0xc85c6932U, 0x518c8b9eU, 0x9b92b8d0U, 0x0d8a0e21U, 0xbd855df9U,
0xb3ea59a1U, 0x8da29289U, 0x4584d506U, 0x3752d80fU, 0xb72569c6U, 0x00013c33U,
// 5**351 (i=13), start=156, end=182
0x190f354dU, 0x83695cfeU, 0xe5a4d0c7U, 0xb60fb7e8U, 0xee5bbcc4U, 0xb922054cU,
0xbb4f0d85U, 0x48394028U, 0x1d8957dbU, 0x0d7edb14U, 0x4ecc7587U, 0x505e9e02U,
0x4c87f36bU, 0x99e66bd6U, 0x44b9ed35U, 0x753037d4U, 0xe5fe5f27U, 0x2742c203U,
0x13b2ed2bU, 0xdc525d2cU, 0xe6fde59aU, 0x77ffb18fU, 0x13c5752cU, 0x08a84bccU,
0x859a4940U, 0x00007fb6U,
// 5**378 (i=14), start=182, end=210
0x4f98cb39U, 0xa60edbbcU, 0x83b5872eU, 0xa501acffU, 0x9cc76f78U, 0xbadd4c73U,
0x43e989faU, 0xca7acf80U, 0x2e0c824fU, 0xb19f4ffcU, 0x092fd81cU, 0xe4eb645bU,
0xa1ff84c2U, 0x8a5a83baU, 0xa8a1fae9U, 0x1db43609U, 0xb0fed50bU, 0x0dd7d2bdU,
0x7d7accd8U, 0x91fa640fU, 0x37dcc6c5U, 0x1c417fd5U, 0xe4d462adU, 0xe8a43399U,
0x131bf9a5U, 0x8df54d29U, 0x36547dc1U, 0x00003395U,
// 5**405 (i=15), start=210, end=240
0x5bd330f5U, 0x77d21967U, 0x1ac481b7U, 0x6be2f7ceU, 0x7f4792a9U, 0xe84c2c52U,
0x84592228U, 0x9dcaf829U, 0xdab44ce1U, 0x3d0c311bU, 0x532e297dU, 0x4704e8b4U,
0x9cdc32beU, 0x41e64d9dU, 0x7717bea1U, 0xa824c00dU, 0x08f50b27U, 0x0f198d77U,
0x49bbfdf0U, 0x025c6c69U, 0xd4e55cd3U, 0xf083602bU, 0xb9f0fecdU, 0xc0864aeaU,
0x9cb98681U, 0xaaf620e9U, 0xacb6df30U, 0x4faafe66U, 0x8af13c3bU, 0x000014d5U,
// 5**432 (i=16), start=240, end=272
0x682bb941U, 0x89a9f297U, 0xcba75d7bU, 0x404217b1U, 0xb4e519e9U, 0xa1bc162bU,
0xf7f5910aU, 0x98715af5U, 0x2ff53e57U, 0xe3ef118cU, 0x490c4543U, 0xbc9b1734U,
0x2affbe4dU, 0x4cedcb4cU, 0xfb14e99eU, 0x35e34212U, 0xece39c24U, 0x07673ab3U,
0xe73115ddU, 0xd15d38e7U, 0x093eed3bU, 0xf8e7eac5U, 0x78a8cc80U, 0x25227aacU,
0x3f590551U, 0x413da1cbU, 0xdf643a55U, 0xab65ad44U, 0xd70b23d7U, 0xc672cd76U,
0x3364ea62U, 0x0000086aU,
// 5**459 (i=17), start=272, end=306
0x22f163ddU, 0x23cf07acU, 0xbe2af6c2U, 0xf412f6f6U, 0xc3ff541eU, 0x6eeaf7deU,
0xa47047e0U, 0x408cda92U, 0x0f0eeb08U, 0x56deba9dU, 0xcfc6b090U, 0x8bbbdf04U,
0x3933cdb3U, 0x9e7bb67dU, 0x9f297035U, 0x38946244U, 0xee1d37bbU, 0xde898174U,
0x63f3559dU, 0x705b72fbU, 0x138d27d9U, 0xf8603a78U, 0x735eec44U, 0xe30987d5U,
0xc6d38070U, 0x9cfe548eU, 0x9ff01422U, 0x7c564aa8U, 0x91cc60baU, 0xcbc3565dU,
0x7550a50bU, 0x6909aeadU, 0x13234c45U, 0x00000366U,
// 5**486 (i=18), start=306, end=342
0x17954989U, 0x3a7d7709U, 0x98042de5U, 0xa9011443U, 0x45e723c2U, 0x269ffd6fU,
0x58852a46U, 0xaaa1042aU, 0x2eee8153U, 0xb2b6c39eU, 0xaf845b65U, 0xf6c365d7U,
0xe4cffb2bU, 0xc840e90cU, 0xabea8abbU, 0x5c58f8d2U, 0x5c19fa3aU, 0x4670910aU,
0x4449f21cU, 0xefa645b3U, 0xcc427decU, 0x083c3d73U, 0x467cb413U, 0x6fe10ae4U,
0x3caffc72U, 0x9f8da55eU, 0x5e5c8ea7U, 0x490594bbU, 0xf0871b0bU, 0xdd89816cU,
0x8e931df8U, 0xe85ce1c9U, 0xcca090a5U, 0x575fa16bU, 0x6b9f106cU, 0x0000015fU,
// 5**513 (i=19), start=342, end=380
0xee20d805U, 0x57bc3c07U, 0xcdea624eU, 0xd3f0f52dU, 0x9924b4f4U, 0xcf968640U,
0x61d41962U, 0xe87fb464U, 0xeaaf51c7U, 0x564c8b60U, 0xccda4028U, 0x529428bbU,
0x313a1fa8U, 0x96bd0f94U, 0x7a82ebaaU, 0xad99e7e9U, 0xf2668cd4U, 0xbe33a45eU,
0xfd0db669U, 0x87ee369fU, 0xd3ec20edU, 0x9c4d7db7U, 0xdedcf0d8U, 0x7cd2ca64U,
0xe25a6577U, 0x61003fd4U, 0xe56f54ccU, 0x10b7c748U, 0x40526e5eU, 0x7300ae87U,
0x5c439261U, 0x2c0ff469U, 0xbf723f12U, 0xb2379b61U, 0xbf59b4f5U, 0xc91b1c3fU,
0xf0046d27U, 0x0000008dU,
// 5**540 (i=20), start=380, end=420
0x525c9e11U, 0xf4e0eb41U, 0xebb2895dU, 0x5da512f9U, 0x7d9b29d4U, 0x452f4edcU,
0x0b90bc37U, 0x341777cbU, 0x63d269afU, 0x1da77929U, 0x0a5c1826U, 0x77991898U,
0x5aeddf86U, 0xf853a877U, 0x538c31ccU, 0xe84896daU, 0xb7a0010bU, 0x17ef4de5U,
0xa52a2adeU, 0x029fd81cU, 0x987ce701U, 0x27fefd77U, 0xdb46c66fU, 0x5d301900U,
0x496998c0U, 0xbb6598b9U, 0x5eebb607U, 0xe547354aU, 0xdf4a2f7eU, 0xf06c4955U,
0x96242ffaU, 0x1775fb27U, 0xbecc58ceU, 0xebf2a53bU, 0x3eaad82aU, 0xf41137baU,
0x573e6fbaU, 0xfb4866b8U, 0x54002148U, 0x00000039U,
};
// clang-format on
// Returns a pointer to the big integer data for (5**27)**i. i must be
// between 1 and 20, inclusive.
const uint32_t* LargePowerOfFiveData(int i) {
return kLargePowersOfFive + i * (i - 1);
}
// Returns the size of the big integer data for (5**27)**i, in words. i must be
// between 1 and 20, inclusive.
int LargePowerOfFiveSize(int i) { return 2 * i; }
} // namespace
const uint32_t kFiveToNth[14] = {
1, 5, 25, 125, 625, 3125, 15625,
78125, 390625, 1953125, 9765625, 48828125, 244140625, 1220703125,
};
const uint32_t kTenToNth[10] = {
1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000,
};
template <int max_words>
int BigUnsigned<max_words>::ReadFloatMantissa(const ParsedFloat& fp,
int significant_digits) {
SetToZero();
assert(fp.type == FloatType::kNumber);
if (fp.subrange_begin == nullptr) {
// We already exactly parsed the mantissa, so no more work is necessary.
words_[0] = fp.mantissa & 0xffffffffu;
words_[1] = fp.mantissa >> 32;
if (words_[1]) {
size_ = 2;
} else if (words_[0]) {
size_ = 1;
}
return fp.exponent;
}
int exponent_adjust =
ReadDigits(fp.subrange_begin, fp.subrange_end, significant_digits);
return fp.literal_exponent + exponent_adjust;
}
template <int max_words>
int BigUnsigned<max_words>::ReadDigits(const char* begin, const char* end,
int significant_digits) {
assert(significant_digits <= Digits10() + 1);
SetToZero();
bool after_decimal_point = false;
// Discard any leading zeroes before the decimal point
while (begin < end && *begin == '0') {
++begin;
}
int dropped_digits = 0;
// Discard any trailing zeroes. These may or may not be after the decimal
// point.
while (begin < end && *std::prev(end) == '0') {
--end;
++dropped_digits;
}
if (begin < end && *std::prev(end) == '.') {
// If the std::string ends in '.', either before or after dropping zeroes, then
// drop the decimal point and look for more digits to drop.
dropped_digits = 0;
--end;
while (begin < end && *std::prev(end) == '0') {
--end;
++dropped_digits;
}
} else if (dropped_digits) {
// We dropped digits, and aren't sure if they're before or after the decimal
// point. Figure that out now.
const char* dp = std::find(begin, end, '.');
if (dp != end) {
// The dropped trailing digits were after the decimal point, so don't
// count them.
dropped_digits = 0;
}
}
// Any non-fraction digits we dropped need to be accounted for in our exponent
// adjustment.
int exponent_adjust = dropped_digits;
uint32_t queued = 0;
int digits_queued = 0;
for (; begin != end && significant_digits > 0; ++begin) {
if (*begin == '.') {
after_decimal_point = true;
continue;
}
if (after_decimal_point) {
// For each fractional digit we emit in our parsed integer, adjust our
// decimal exponent to compensate.
--exponent_adjust;
}
int digit = (*begin - '0');
--significant_digits;
if (significant_digits == 0 && std::next(begin) != end &&
(digit == 0 || digit == 5)) {
// If this is the very last significant digit, but insignificant digits
// remain, we know that the last of those remaining significant digits is
// nonzero. (If it wasn't, we would have stripped it before we got here.)
// So if this final digit is a 0 or 5, adjust it upward by 1.
//
// This adjustment is what allows incredibly large mantissas ending in
// 500000...000000000001 to correctly round up, rather than to nearest.
++digit;
}
queued = 10 * queued + digit;
++digits_queued;
if (digits_queued == kMaxSmallPowerOfTen) {
MultiplyBy(kTenToNth[kMaxSmallPowerOfTen]);
AddWithCarry(0, queued);
queued = digits_queued = 0;
}
}
// Encode any remaining digits.
if (digits_queued) {
MultiplyBy(kTenToNth[digits_queued]);
AddWithCarry(0, queued);
}
// If any insignificant digits remain, we will drop them. But if we have not
// yet read the decimal point, then we have to adjust the exponent to account
// for the dropped digits.
if (begin < end && !after_decimal_point) {
// This call to std::find will result in a pointer either to the decimal
// point, or to the end of our buffer if there was none.
//
// Either way, [begin, decimal_point) will contain the set of dropped digits
// that require an exponent adjustment.
const char* decimal_point = std::find(begin, end, '.');
exponent_adjust += (decimal_point - begin);
}
return exponent_adjust;
}
template <int max_words>
/* static */ BigUnsigned<max_words> BigUnsigned<max_words>::FiveToTheNth(
int n) {
BigUnsigned answer(1u);
// Seed from the table of large powers, if possible.
bool first_pass = true;
while (n >= kLargePowerOfFiveStep) {
int big_power =
std::min(n / kLargePowerOfFiveStep, kLargestPowerOfFiveIndex);
if (first_pass) {
// just copy, rather than multiplying by 1
std::copy(
LargePowerOfFiveData(big_power),
LargePowerOfFiveData(big_power) + LargePowerOfFiveSize(big_power),
answer.words_);
answer.size_ = LargePowerOfFiveSize(big_power);
first_pass = false;
} else {
answer.MultiplyBy(LargePowerOfFiveSize(big_power),
LargePowerOfFiveData(big_power));
}
n -= kLargePowerOfFiveStep * big_power;
}
answer.MultiplyByFiveToTheNth(n);
return answer;
}
template <int max_words>
void BigUnsigned<max_words>::MultiplyStep(int original_size,
const uint32_t* other_words,
int other_size, int step) {
int this_i = std::min(original_size - 1, step);
int other_i = step - this_i;
uint64_t this_word = 0;
uint64_t carry = 0;
for (; this_i >= 0 && other_i < other_size; --this_i, ++other_i) {
uint64_t product = words_[this_i];
product *= other_words[other_i];
this_word += product;
carry += (this_word >> 32);
this_word &= 0xffffffff;
}
AddWithCarry(step + 1, carry);
words_[step] = this_word & 0xffffffff;
if (this_word > 0 && size_ <= step) {
size_ = step + 1;
}
}
template <int max_words>
std::string BigUnsigned<max_words>::ToString() const {
BigUnsigned<max_words> copy = *this;
std::string result;
// Build result in reverse order
while (copy.size() > 0) {
int next_digit = copy.DivMod<10>();
result.push_back('0' + next_digit);
}
if (result.empty()) {
result.push_back('0');
}
std::reverse(result.begin(), result.end());
return result;
}
template class BigUnsigned<4>;
template class BigUnsigned<84>;
} // namespace strings_internal
} // namespace absl

View file

@ -0,0 +1,426 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
#define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
#include <algorithm>
#include <cstdint>
#include <iostream>
#include <string>
#include "absl/strings/ascii.h"
#include "absl/strings/internal/charconv_parse.h"
#include "absl/strings/string_view.h"
namespace absl {
namespace strings_internal {
// The largest power that 5 that can be raised to, and still fit in a uint32_t.
constexpr int kMaxSmallPowerOfFive = 13;
// The largest power that 10 that can be raised to, and still fit in a uint32_t.
constexpr int kMaxSmallPowerOfTen = 9;
extern const uint32_t kFiveToNth[kMaxSmallPowerOfFive + 1];
extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1];
// Large, fixed-width unsigned integer.
//
// Exact rounding for decimal-to-binary floating point conversion requires very
// large integer math, but a design goal of absl::from_chars is to avoid
// allocating memory. The integer precision needed for decimal-to-binary
// conversions is large but bounded, so a huge fixed-width integer class
// suffices.
//
// This is an intentionally limited big integer class. Only needed operations
// are implemented. All storage lives in an array data member, and all
// arithmetic is done in-place, to avoid requiring separate storage for operand
// and result.
//
// This is an internal class. Some methods live in the .cc file, and are
// instantiated only for the values of max_words we need.
template <int max_words>
class BigUnsigned {
public:
static_assert(max_words == 4 || max_words == 84,
"unsupported max_words value");
BigUnsigned() : size_(0), words_{} {}
explicit BigUnsigned(uint32_t v) : size_(v > 0 ? 1 : 0), words_{v} {}
explicit BigUnsigned(uint64_t v)
: size_(0),
words_{static_cast<uint32_t>(v & 0xffffffff),
static_cast<uint32_t>(v >> 32)} {
if (words_[1]) {
size_ = 2;
} else if (words_[0]) {
size_ = 1;
}
}
// Constructs a BigUnsigned from the given string_view containing a decimal
// value. If the input std::string is not a decimal integer, constructs a 0
// instead.
explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} {
// Check for valid input, returning a 0 otherwise. This is reasonable
// behavior only because this constructor is for unit tests.
if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() ||
sv.empty()) {
return;
}
int exponent_adjust =
ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1);
if (exponent_adjust > 0) {
MultiplyByTenToTheNth(exponent_adjust);
}
}
// Loads the mantissa value of a previously-parsed float.
//
// Returns the associated decimal exponent. The value of the parsed float is
// exactly *this * 10**exponent.
int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits);
// Returns the number of decimal digits of precision this type provides. All
// numbers with this many decimal digits or fewer are representable by this
// type.
//
// Analagous to std::numeric_limits<BigUnsigned>::digits10.
static constexpr int Digits10() {
// 9975007/1035508 is very slightly less than log10(2**32).
return static_cast<uint64_t>(max_words) * 9975007 / 1035508;
}
// Shifts left by the given number of bits.
void ShiftLeft(int count) {
if (count > 0) {
const int word_shift = count / 32;
if (word_shift >= max_words) {
SetToZero();
return;
}
size_ = std::min(size_ + word_shift, max_words);
count %= 32;
if (count == 0) {
std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_);
} else {
for (int i = std::min(size_, max_words - 1); i > word_shift; --i) {
words_[i] = (words_[i - word_shift] << count) |
(words_[i - word_shift - 1] >> (32 - count));
}
words_[word_shift] = words_[0] << count;
// Grow size_ if necessary.
if (size_ < max_words && words_[size_]) {
++size_;
}
}
std::fill(words_, words_ + word_shift, 0u);
}
}
// Multiplies by v in-place.
void MultiplyBy(uint32_t v) {
if (size_ == 0 || v == 1) {
return;
}
if (v == 0) {
SetToZero();
return;
}
const uint64_t factor = v;
uint64_t window = 0;
for (int i = 0; i < size_; ++i) {
window += factor * words_[i];
words_[i] = window & 0xffffffff;
window >>= 32;
}
// If carry bits remain and there's space for them, grow size_.
if (window && size_ < max_words) {
words_[size_] = window & 0xffffffff;
++size_;
}
}
void MultiplyBy(uint64_t v) {
uint32_t words[2];
words[0] = static_cast<uint32_t>(v);
words[1] = static_cast<uint32_t>(v >> 32);
if (words[1] == 0) {
MultiplyBy(words[0]);
} else {
MultiplyBy(2, words);
}
}
// Multiplies in place by 5 to the power of n. n must be non-negative.
void MultiplyByFiveToTheNth(int n) {
while (n >= kMaxSmallPowerOfFive) {
MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]);
n -= kMaxSmallPowerOfFive;
}
if (n > 0) {
MultiplyBy(kFiveToNth[n]);
}
}
// Multiplies in place by 10 to the power of n. n must be non-negative.
void MultiplyByTenToTheNth(int n) {
if (n > kMaxSmallPowerOfTen) {
// For large n, raise to a power of 5, then shift left by the same amount.
// (10**n == 5**n * 2**n.) This requires fewer multiplications overall.
MultiplyByFiveToTheNth(n);
ShiftLeft(n);
} else if (n > 0) {
// We can do this more quickly for very small N by using a single
// multiplication.
MultiplyBy(kTenToNth[n]);
}
}
// Returns the value of 5**n, for non-negative n. This implementation uses
// a lookup table, and is faster then seeding a BigUnsigned with 1 and calling
// MultiplyByFiveToTheNth().
static BigUnsigned FiveToTheNth(int n);
// Multiplies by another BigUnsigned, in-place.
template <int M>
void MultiplyBy(const BigUnsigned<M>& other) {
MultiplyBy(other.size(), other.words());
}
void SetToZero() {
std::fill(words_, words_ + size_, 0u);
size_ = 0;
}
// Returns the value of the nth word of this BigUnsigned. This is
// range-checked, and returns 0 on out-of-bounds accesses.
uint32_t GetWord(int index) const {
if (index < 0 || index >= size_) {
return 0;
}
return words_[index];
}
// Returns this integer as a decimal std::string. This is not used in the decimal-
// to-binary conversion; it is intended to aid in testing.
std::string ToString() const;
int size() const { return size_; }
const uint32_t* words() const { return words_; }
private:
// Reads the number between [begin, end), possibly containing a decimal point,
// into this BigUnsigned.
//
// Callers are required to ensure [begin, end) contains a valid number, with
// one or more decimal digits and at most one decimal point. This routine
// will behave unpredictably if these preconditions are not met.
//
// Only the first `significant_digits` digits are read. Digits beyond this
// limit are "sticky": If the final significant digit is 0 or 5, and if any
// dropped digit is nonzero, then that final significant digit is adjusted up
// to 1 or 6. This adjustment allows for precise rounding.
//
// Returns `exponent_adjustment`, a power-of-ten exponent adjustment to
// account for the decimal point and for dropped significant digits. After
// this function returns,
// actual_value_of_parsed_string ~= *this * 10**exponent_adjustment.
int ReadDigits(const char* begin, const char* end, int significant_digits);
// Performs a step of big integer multiplication. This computes the full
// (64-bit-wide) values that should be added at the given index (step), and
// adds to that location in-place.
//
// Because our math all occurs in place, we must multiply starting from the
// highest word working downward. (This is a bit more expensive due to the
// extra carries involved.)
//
// This must be called in steps, for each word to be calculated, starting from
// the high end and working down to 0. The first value of `step` should be
// `std::min(original_size + other.size_ - 2, max_words - 1)`.
// The reason for this expression is that multiplying the i'th word from one
// multiplicand and the j'th word of another multiplicand creates a
// two-word-wide value to be stored at the (i+j)'th element. The highest
// word indices we will access are `original_size - 1` from this object, and
// `other.size_ - 1` from our operand. Therefore,
// `original_size + other.size_ - 2` is the first step we should calculate,
// but limited on an upper bound by max_words.
// Working from high-to-low ensures that we do not overwrite the portions of
// the initial value of *this which are still needed for later steps.
//
// Once called with step == 0, *this contains the result of the
// multiplication.
//
// `original_size` is the size_ of *this before the first call to
// MultiplyStep(). `other_words` and `other_size` are the contents of our
// operand. `step` is the step to perform, as described above.
void MultiplyStep(int original_size, const uint32_t* other_words,
int other_size, int step);
void MultiplyBy(int other_size, const uint32_t* other_words) {
const int original_size = size_;
const int first_step =
std::min(original_size + other_size - 2, max_words - 1);
for (int step = first_step; step >= 0; --step) {
MultiplyStep(original_size, other_words, other_size, step);
}
}
// Adds a 32-bit value to the index'th word, with carry.
void AddWithCarry(int index, uint32_t value) {
if (value) {
while (index < max_words && value > 0) {
words_[index] += value;
// carry if we overflowed in this word:
if (value > words_[index]) {
value = 1;
++index;
} else {
value = 0;
}
}
size_ = std::min(max_words, std::max(index + 1, size_));
}
}
void AddWithCarry(int index, uint64_t value) {
if (value && index < max_words) {
uint32_t high = value >> 32;
uint32_t low = value & 0xffffffff;
words_[index] += low;
if (words_[index] < low) {
++high;
if (high == 0) {
// Carry from the low word caused our high word to overflow.
// Short circuit here to do the right thing.
AddWithCarry(index + 2, static_cast<uint32_t>(1));
return;
}
}
if (high > 0) {
AddWithCarry(index + 1, high);
} else {
// Normally 32-bit AddWithCarry() sets size_, but since we don't call
// it when `high` is 0, do it ourselves here.
size_ = std::min(max_words, std::max(index + 1, size_));
}
}
}
// Divide this in place by a constant divisor. Returns the remainder of the
// division.
template <uint32_t divisor>
uint32_t DivMod() {
uint64_t accumulator = 0;
for (int i = size_ - 1; i >= 0; --i) {
accumulator <<= 32;
accumulator += words_[i];
// accumulator / divisor will never overflow an int32_t in this loop
words_[i] = static_cast<uint32_t>(accumulator / divisor);
accumulator = accumulator % divisor;
}
while (size_ > 0 && words_[size_ - 1] == 0) {
--size_;
}
return static_cast<uint32_t>(accumulator);
}
// The number of elements in words_ that may carry significant values.
// All elements beyond this point are 0.
//
// When size_ is 0, this BigUnsigned stores the value 0.
// When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is
// nonzero. This can occur due to overflow truncation.
// In particular, x.size_ != y.size_ does *not* imply x != y.
int size_;
uint32_t words_[max_words];
};
// Compares two big integer instances.
//
// Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs.
template <int N, int M>
int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
int limit = std::max(lhs.size(), rhs.size());
for (int i = limit - 1; i >= 0; --i) {
const uint32_t lhs_word = lhs.GetWord(i);
const uint32_t rhs_word = rhs.GetWord(i);
if (lhs_word < rhs_word) {
return -1;
} else if (lhs_word > rhs_word) {
return 1;
}
}
return 0;
}
template <int N, int M>
bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
int limit = std::max(lhs.size(), rhs.size());
for (int i = 0; i < limit; ++i) {
if (lhs.GetWord(i) != rhs.GetWord(i)) {
return false;
}
}
return true;
}
template <int N, int M>
bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return !(lhs == rhs);
}
template <int N, int M>
bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return Compare(lhs, rhs) == -1;
}
template <int N, int M>
bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return rhs < lhs;
}
template <int N, int M>
bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return !(rhs < lhs);
}
template <int N, int M>
bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return !(lhs < rhs);
}
// Output operator for BigUnsigned, for testing purposes only.
template <int N>
std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) {
return os << num.ToString();
}
// Explicit instantiation declarations for the sizes of BigUnsigned that we
// are using.
//
// For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is
// still bigger than an int128, and 84 is a large value we will want to use
// in the from_chars implementation.
//
// Comments justifying the use of 84 belong in the from_chars implementation,
// and will be added in a follow-up CL.
extern template class BigUnsigned<4>;
extern template class BigUnsigned<84>;
} // namespace strings_internal
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_

View file

@ -0,0 +1,203 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_bigint.h"
#include <string>
#include "gtest/gtest.h"
namespace absl {
namespace strings_internal {
TEST(BigUnsigned, ShiftLeft) {
{
// Check that 3 * 2**100 is calculated correctly
BigUnsigned<4> num(3u);
num.ShiftLeft(100);
EXPECT_EQ(num, BigUnsigned<4>("3802951800684688204490109616128"));
}
{
// Test that overflow is truncated properly.
// 15 is 4 bits long, and BigUnsigned<4> is a 128-bit bigint.
// Shifting left by 125 bits should truncate off the high bit, so that
// 15 << 125 == 7 << 125
// after truncation.
BigUnsigned<4> a(15u);
BigUnsigned<4> b(7u);
BigUnsigned<4> c(3u);
a.ShiftLeft(125);
b.ShiftLeft(125);
c.ShiftLeft(125);
EXPECT_EQ(a, b);
EXPECT_NE(a, c);
}
{
// Same test, larger bigint:
BigUnsigned<84> a(15u);
BigUnsigned<84> b(7u);
BigUnsigned<84> c(3u);
a.ShiftLeft(84 * 32 - 3);
b.ShiftLeft(84 * 32 - 3);
c.ShiftLeft(84 * 32 - 3);
EXPECT_EQ(a, b);
EXPECT_NE(a, c);
}
{
// Check that incrementally shifting has the same result as doing it all at
// once (attempting to capture corner cases.)
const std::string seed = "1234567890123456789012345678901234567890";
BigUnsigned<84> a(seed);
for (int i = 1; i <= 84 * 32; ++i) {
a.ShiftLeft(1);
BigUnsigned<84> b(seed);
b.ShiftLeft(i);
EXPECT_EQ(a, b);
}
// And we should have fully rotated all bits off by now:
EXPECT_EQ(a, BigUnsigned<84>(0u));
}
}
TEST(BigUnsigned, MultiplyByUint32) {
const BigUnsigned<84> factorial_100(
"933262154439441526816992388562667004907159682643816214685929638952175999"
"932299156089414639761565182862536979208272237582511852109168640000000000"
"00000000000000");
BigUnsigned<84> a(1u);
for (uint32_t i = 1; i <= 100; ++i) {
a.MultiplyBy(i);
}
EXPECT_EQ(a, BigUnsigned<84>(factorial_100));
}
TEST(BigUnsigned, MultiplyByBigUnsigned) {
{
// Put the terms of factorial_200 into two bigints, and multiply them
// together.
const BigUnsigned<84> factorial_200(
"7886578673647905035523632139321850622951359776871732632947425332443594"
"4996340334292030428401198462390417721213891963883025764279024263710506"
"1926624952829931113462857270763317237396988943922445621451664240254033"
"2918641312274282948532775242424075739032403212574055795686602260319041"
"7032406235170085879617892222278962370389737472000000000000000000000000"
"0000000000000000000000000");
BigUnsigned<84> evens(1u);
BigUnsigned<84> odds(1u);
for (uint32_t i = 1; i < 200; i += 2) {
odds.MultiplyBy(i);
evens.MultiplyBy(i + 1);
}
evens.MultiplyBy(odds);
EXPECT_EQ(evens, factorial_200);
}
{
// Multiply various powers of 10 together.
for (int a = 0 ; a < 700; a += 25) {
SCOPED_TRACE(a);
BigUnsigned<84> a_value("3" + std::string(a, '0'));
for (int b = 0; b < (700 - a); b += 25) {
SCOPED_TRACE(b);
BigUnsigned<84> b_value("2" + std::string(b, '0'));
BigUnsigned<84> expected_product("6" + std::string(a + b, '0'));
b_value.MultiplyBy(a_value);
EXPECT_EQ(b_value, expected_product);
}
}
}
}
TEST(BigUnsigned, MultiplyByOverflow) {
{
// Check that multiplcation overflow predictably truncates.
// A big int with all bits on.
BigUnsigned<4> all_bits_on("340282366920938463463374607431768211455");
// Modulo 2**128, this is equal to -1. Therefore the square of this,
// modulo 2**128, should be 1.
all_bits_on.MultiplyBy(all_bits_on);
EXPECT_EQ(all_bits_on, BigUnsigned<4>(1u));
}
{
// Try multiplying a large bigint by 2**50, and compare the result to
// shifting.
BigUnsigned<4> value_1("12345678901234567890123456789012345678");
BigUnsigned<4> value_2("12345678901234567890123456789012345678");
BigUnsigned<4> two_to_fiftieth(1u);
two_to_fiftieth.ShiftLeft(50);
value_1.ShiftLeft(50);
value_2.MultiplyBy(two_to_fiftieth);
EXPECT_EQ(value_1, value_2);
}
}
TEST(BigUnsigned, FiveToTheNth) {
{
// Sanity check that MultiplyByFiveToTheNth gives consistent answers, up to
// and including overflow.
for (int i = 0; i < 1160; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(123u);
BigUnsigned<84> value_2(123u);
value_1.MultiplyByFiveToTheNth(i);
for (int j = 0; j < i; j++) {
value_2.MultiplyBy(5u);
}
EXPECT_EQ(value_1, value_2);
}
}
{
// Check that the faster, table-lookup-based static method returns the same
// result that multiplying in-place would return, up to and including
// overflow.
for (int i = 0; i < 1160; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(1u);
value_1.MultiplyByFiveToTheNth(i);
BigUnsigned<84> value_2 = BigUnsigned<84>::FiveToTheNth(i);
EXPECT_EQ(value_1, value_2);
}
}
}
TEST(BigUnsigned, TenToTheNth) {
{
// Sanity check MultiplyByTenToTheNth.
for (int i = 0; i < 800; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(123u);
BigUnsigned<84> value_2(123u);
value_1.MultiplyByTenToTheNth(i);
for (int j = 0; j < i; j++) {
value_2.MultiplyBy(10u);
}
EXPECT_EQ(value_1, value_2);
}
}
{
// Alternate testing approach, taking advantage of the decimal parser.
for (int i = 0; i < 200; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(135u);
value_1.MultiplyByTenToTheNth(i);
BigUnsigned<84> value_2("135" + std::string(i, '0'));
EXPECT_EQ(value_1, value_2);
}
}
}
} // namespace strings_internal
} // namespace absl

View file

@ -0,0 +1,496 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_parse.h"
#include "absl/strings/charconv.h"
#include <cassert>
#include <cstdint>
#include <limits>
#include "absl/strings/internal/memutil.h"
namespace absl {
namespace {
// ParseFloat<10> will read the first 19 significant digits of the mantissa.
// This number was chosen for multiple reasons.
//
// (a) First, for whatever integer type we choose to represent the mantissa, we
// want to choose the largest possible number of decimal digits for that integer
// type. We are using uint64_t, which can express any 19-digit unsigned
// integer.
//
// (b) Second, we need to parse enough digits that the binary value of any
// mantissa we capture has more bits of resolution than the mantissa
// representation in the target float. Our algorithm requires at least 3 bits
// of headway, but 19 decimal digits give a little more than that.
//
// The following static assertions verify the above comments:
constexpr int kDecimalMantissaDigitsMax = 19;
static_assert(std::numeric_limits<uint64_t>::digits10 ==
kDecimalMantissaDigitsMax,
"(a) above");
// IEEE doubles, which we assume in Abseil, have 53 binary bits of mantissa.
static_assert(std::numeric_limits<double>::is_iec559, "IEEE double assumed");
static_assert(std::numeric_limits<double>::radix == 2, "IEEE double fact");
static_assert(std::numeric_limits<double>::digits == 53, "IEEE double fact");
// The lowest valued 19-digit decimal mantissa we can read still contains
// sufficient information to reconstruct a binary mantissa.
static_assert(1000000000000000000u > (uint64_t(1) << (53 + 3)), "(b) above");
// ParseFloat<16> will read the first 15 significant digits of the mantissa.
//
// Because a base-16-to-base-2 conversion can be done exactly, we do not need
// to maximize the number of scanned hex digits to improve our conversion. What
// is required is to scan two more bits than the mantissa can represent, so that
// we always round correctly.
//
// (One extra bit does not suffice to perform correct rounding, since a number
// exactly halfway between two representable floats has unique rounding rules,
// so we need to differentiate between a "halfway between" number and a "closer
// to the larger value" number.)
constexpr int kHexadecimalMantissaDigitsMax = 15;
// The minimum number of significant bits that will be read from
// kHexadecimalMantissaDigitsMax hex digits. We must subtract by three, since
// the most significant digit can be a "1", which only contributes a single
// significant bit.
constexpr int kGuaranteedHexadecimalMantissaBitPrecision =
4 * kHexadecimalMantissaDigitsMax - 3;
static_assert(kGuaranteedHexadecimalMantissaBitPrecision >
std::numeric_limits<double>::digits + 2,
"kHexadecimalMantissaDigitsMax too small");
// We also impose a limit on the number of significant digits we will read from
// an exponent, to avoid having to deal with integer overflow. We use 9 for
// this purpose.
//
// If we read a 9 digit exponent, the end result of the conversion will
// necessarily be infinity or zero, depending on the sign of the exponent.
// Therefore we can just drop extra digits on the floor without any extra
// logic.
constexpr int kDecimalExponentDigitsMax = 9;
static_assert(std::numeric_limits<int>::digits10 >= kDecimalExponentDigitsMax,
"int type too small");
// To avoid incredibly large inputs causing integer overflow for our exponent,
// we impose an arbitrary but very large limit on the number of significant
// digits we will accept. The implementation refuses to match a std::string with
// more consecutive significant mantissa digits than this.
constexpr int kDecimalDigitLimit = 50000000;
// Corresponding limit for hexadecimal digit inputs. This is one fourth the
// amount of kDecimalDigitLimit, since each dropped hexadecimal digit requires
// a binary exponent adjustment of 4.
constexpr int kHexadecimalDigitLimit = kDecimalDigitLimit / 4;
// The largest exponent we can read is 999999999 (per
// kDecimalExponentDigitsMax), and the largest exponent adjustment we can get
// from dropped mantissa digits is 2 * kDecimalDigitLimit, and the sum of these
// comfortably fits in an integer.
//
// We count kDecimalDigitLimit twice because there are independent limits for
// numbers before and after the decimal point. (In the case where there are no
// significant digits before the decimal point, there are independent limits for
// post-decimal-point leading zeroes and for significant digits.)
static_assert(999999999 + 2 * kDecimalDigitLimit <
std::numeric_limits<int>::max(),
"int type too small");
static_assert(999999999 + 2 * (4 * kHexadecimalDigitLimit) <
std::numeric_limits<int>::max(),
"int type too small");
// Returns true if the provided bitfield allows parsing an exponent value
// (e.g., "1.5e100").
bool AllowExponent(chars_format flags) {
bool fixed = (flags & chars_format::fixed) == chars_format::fixed;
bool scientific =
(flags & chars_format::scientific) == chars_format::scientific;
return scientific || !fixed;
}
// Returns true if the provided bitfield requires an exponent value be present.
bool RequireExponent(chars_format flags) {
bool fixed = (flags & chars_format::fixed) == chars_format::fixed;
bool scientific =
(flags & chars_format::scientific) == chars_format::scientific;
return scientific && !fixed;
}
const int8_t kAsciiToInt[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1};
// Returns true if `ch` is a digit in the given base
template <int base>
bool IsDigit(char ch);
// Converts a valid `ch` to its digit value in the given base.
template <int base>
unsigned ToDigit(char ch);
// Returns true if `ch` is the exponent delimiter for the given base.
template <int base>
bool IsExponentCharacter(char ch);
// Returns the maximum number of significant digits we will read for a float
// in the given base.
template <int base>
constexpr int MantissaDigitsMax();
// Returns the largest consecutive run of digits we will accept when parsing a
// number in the given base.
template <int base>
constexpr int DigitLimit();
// Returns the amount the exponent must be adjusted by for each dropped digit.
// (For decimal this is 1, since the digits are in base 10 and the exponent base
// is also 10, but for hexadecimal this is 4, since the digits are base 16 but
// the exponent base is 2.)
template <int base>
constexpr int DigitMagnitude();
template <>
bool IsDigit<10>(char ch) {
return ch >= '0' && ch <= '9';
}
template <>
bool IsDigit<16>(char ch) {
return kAsciiToInt[static_cast<unsigned char>(ch)] >= 0;
}
template <>
unsigned ToDigit<10>(char ch) {
return ch - '0';
}
template <>
unsigned ToDigit<16>(char ch) {
return kAsciiToInt[static_cast<unsigned char>(ch)];
}
template <>
bool IsExponentCharacter<10>(char ch) {
return ch == 'e' || ch == 'E';
}
template <>
bool IsExponentCharacter<16>(char ch) {
return ch == 'p' || ch == 'P';
}
template <>
constexpr int MantissaDigitsMax<10>() {
return kDecimalMantissaDigitsMax;
}
template <>
constexpr int MantissaDigitsMax<16>() {
return kHexadecimalMantissaDigitsMax;
}
template <>
constexpr int DigitLimit<10>() {
return kDecimalDigitLimit;
}
template <>
constexpr int DigitLimit<16>() {
return kHexadecimalDigitLimit;
}
template <>
constexpr int DigitMagnitude<10>() {
return 1;
}
template <>
constexpr int DigitMagnitude<16>() {
return 4;
}
// Reads decimal digits from [begin, end) into *out. Returns the number of
// digits consumed.
//
// After max_digits has been read, keeps consuming characters, but no longer
// adjusts *out. If a nonzero digit is dropped this way, *dropped_nonzero_digit
// is set; otherwise, it is left unmodified.
//
// If no digits are matched, returns 0 and leaves *out unchanged.
//
// ConsumeDigits does not protect against overflow on *out; max_digits must
// be chosen with respect to type T to avoid the possibility of overflow.
template <int base, typename T>
std::size_t ConsumeDigits(const char* begin, const char* end, int max_digits,
T* out, bool* dropped_nonzero_digit) {
if (base == 10) {
assert(max_digits <= std::numeric_limits<T>::digits10);
} else if (base == 16) {
assert(max_digits * 4 <= std::numeric_limits<T>::digits);
}
const char* const original_begin = begin;
T accumulator = *out;
const char* significant_digits_end =
(end - begin > max_digits) ? begin + max_digits : end;
while (begin < significant_digits_end && IsDigit<base>(*begin)) {
// Do not guard against *out overflow; max_digits was chosen to avoid this.
// Do assert against it, to detect problems in debug builds.
auto digit = static_cast<T>(ToDigit<base>(*begin));
assert(accumulator * base >= accumulator);
accumulator *= base;
assert(accumulator + digit >= accumulator);
accumulator += digit;
++begin;
}
bool dropped_nonzero = false;
while (begin < end && IsDigit<base>(*begin)) {
dropped_nonzero = dropped_nonzero || (*begin != '0');
++begin;
}
if (dropped_nonzero && dropped_nonzero_digit != nullptr) {
*dropped_nonzero_digit = true;
}
*out = accumulator;
return begin - original_begin;
}
// Returns true if `v` is one of the chars allowed inside parentheses following
// a NaN.
bool IsNanChar(char v) {
return (v == '_') || (v >= '0' && v <= '9') || (v >= 'a' && v <= 'z') ||
(v >= 'A' && v <= 'Z');
}
// Checks the range [begin, end) for a strtod()-formatted infinity or NaN. If
// one is found, sets `out` appropriately and returns true.
bool ParseInfinityOrNan(const char* begin, const char* end,
strings_internal::ParsedFloat* out) {
if (end - begin < 3) {
return false;
}
switch (*begin) {
case 'i':
case 'I': {
// An infinity std::string consists of the characters "inf" or "infinity",
// case insensitive.
if (strings_internal::memcasecmp(begin + 1, "nf", 2) != 0) {
return false;
}
out->type = strings_internal::FloatType::kInfinity;
if (end - begin >= 8 &&
strings_internal::memcasecmp(begin + 3, "inity", 5) == 0) {
out->end = begin + 8;
} else {
out->end = begin + 3;
}
return true;
}
case 'n':
case 'N': {
// A NaN consists of the characters "nan", case insensitive, optionally
// followed by a parenthesized sequence of zero or more alphanumeric
// characters and/or underscores.
if (strings_internal::memcasecmp(begin + 1, "an", 2) != 0) {
return false;
}
out->type = strings_internal::FloatType::kNan;
out->end = begin + 3;
// NaN is allowed to be followed by a parenthesized std::string, consisting of
// only the characters [a-zA-Z0-9_]. Match that if it's present.
begin += 3;
if (begin < end && *begin == '(') {
const char* nan_begin = begin + 1;
while (nan_begin < end && IsNanChar(*nan_begin)) {
++nan_begin;
}
if (nan_begin < end && *nan_begin == ')') {
// We found an extra NaN specifier range
out->subrange_begin = begin + 1;
out->subrange_end = nan_begin;
out->end = nan_begin + 1;
}
}
return true;
}
default:
return false;
}
}
} // namespace
namespace strings_internal {
template <int base>
strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end,
chars_format format_flags) {
strings_internal::ParsedFloat result;
// Exit early if we're given an empty range.
if (begin == end) return result;
// Handle the infinity and NaN cases.
if (ParseInfinityOrNan(begin, end, &result)) {
return result;
}
const char* const mantissa_begin = begin;
while (begin < end && *begin == '0') {
++begin; // skip leading zeros
}
uint64_t mantissa = 0;
int exponent_adjustment = 0;
bool mantissa_is_inexact = false;
std::size_t pre_decimal_digits = ConsumeDigits<base>(
begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact);
begin += pre_decimal_digits;
int digits_left;
if (pre_decimal_digits >= DigitLimit<base>()) {
// refuse to parse pathological inputs
return result;
} else if (pre_decimal_digits > MantissaDigitsMax<base>()) {
// We dropped some non-fraction digits on the floor. Adjust our exponent
// to compensate.
exponent_adjustment =
static_cast<int>(pre_decimal_digits - MantissaDigitsMax<base>());
digits_left = 0;
} else {
digits_left =
static_cast<int>(MantissaDigitsMax<base>() - pre_decimal_digits);
}
if (begin < end && *begin == '.') {
++begin;
if (mantissa == 0) {
// If we haven't seen any nonzero digits yet, keep skipping zeros. We
// have to adjust the exponent to reflect the changed place value.
const char* begin_zeros = begin;
while (begin < end && *begin == '0') {
++begin;
}
std::size_t zeros_skipped = begin - begin_zeros;
if (zeros_skipped >= DigitLimit<base>()) {
// refuse to parse pathological inputs
return result;
}
exponent_adjustment -= static_cast<int>(zeros_skipped);
}
std::size_t post_decimal_digits = ConsumeDigits<base>(
begin, end, digits_left, &mantissa, &mantissa_is_inexact);
begin += post_decimal_digits;
// Since `mantissa` is an integer, each significant digit we read after
// the decimal point requires an adjustment to the exponent. "1.23e0" will
// be stored as `mantissa` == 123 and `exponent` == -2 (that is,
// "123e-2").
if (post_decimal_digits >= DigitLimit<base>()) {
// refuse to parse pathological inputs
return result;
} else if (post_decimal_digits > digits_left) {
exponent_adjustment -= digits_left;
} else {
exponent_adjustment -= post_decimal_digits;
}
}
// If we've found no mantissa whatsoever, this isn't a number.
if (mantissa_begin == begin) {
return result;
}
// A bare "." doesn't count as a mantissa either.
if (begin - mantissa_begin == 1 && *mantissa_begin == '.') {
return result;
}
if (mantissa_is_inexact) {
// We dropped significant digits on the floor. Handle this appropriately.
if (base == 10) {
// If we truncated significant decimal digits, store the full range of the
// mantissa for future big integer math for exact rounding.
result.subrange_begin = mantissa_begin;
result.subrange_end = begin;
} else if (base == 16) {
// If we truncated hex digits, reflect this fact by setting the low
// ("sticky") bit. This allows for correct rounding in all cases.
mantissa |= 1;
}
}
result.mantissa = mantissa;
const char* const exponent_begin = begin;
result.literal_exponent = 0;
bool found_exponent = false;
if (AllowExponent(format_flags) && begin < end &&
IsExponentCharacter<base>(*begin)) {
bool negative_exponent = false;
++begin;
if (begin < end && *begin == '-') {
negative_exponent = true;
++begin;
} else if (begin < end && *begin == '+') {
++begin;
}
const char* const exponent_digits_begin = begin;
// Exponent is always expressed in decimal, even for hexadecimal floats.
begin += ConsumeDigits<10>(begin, end, kDecimalExponentDigitsMax,
&result.literal_exponent, nullptr);
if (begin == exponent_digits_begin) {
// there were no digits where we expected an exponent. We failed to read
// an exponent and should not consume the 'e' after all. Rewind 'begin'.
found_exponent = false;
begin = exponent_begin;
} else {
found_exponent = true;
if (negative_exponent) {
result.literal_exponent = -result.literal_exponent;
}
}
}
if (!found_exponent && RequireExponent(format_flags)) {
// Provided flags required an exponent, but none was found. This results
// in a failure to scan.
return result;
}
// Success!
result.type = strings_internal::FloatType::kNumber;
if (result.mantissa > 0) {
result.exponent = result.literal_exponent +
(DigitMagnitude<base>() * exponent_adjustment);
} else {
result.exponent = 0;
}
result.end = begin;
return result;
}
template ParsedFloat ParseFloat<10>(const char* begin, const char* end,
chars_format format_flags);
template ParsedFloat ParseFloat<16>(const char* begin, const char* end,
chars_format format_flags);
} // namespace strings_internal
} // namespace absl

View file

@ -0,0 +1,96 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_
#define ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_
#include <cstdint>
#include "absl/strings/charconv.h"
namespace absl {
namespace strings_internal {
// Enum indicating whether a parsed float is a number or special value.
enum class FloatType { kNumber, kInfinity, kNan };
// The decomposed parts of a parsed `float` or `double`.
struct ParsedFloat {
// Representation of the parsed mantissa, with the decimal point adjusted to
// make it an integer.
//
// During decimal scanning, this contains 19 significant digits worth of
// mantissa value. If digits beyond this point are found, they
// are truncated, and if any of these dropped digits are nonzero, then
// `mantissa` is inexact, and the full mantissa is stored in [subrange_begin,
// subrange_end).
//
// During hexadecimal scanning, this contains 15 significant hex digits worth
// of mantissa value. Digits beyond this point are sticky -- they are
// truncated, but if any dropped digits are nonzero, the low bit of mantissa
// will be set. (This allows for precise rounding, and avoids the need
// to store the full mantissa in [subrange_begin, subrange_end).)
uint64_t mantissa = 0;
// Floating point expontent. This reflects any decimal point adjustments and
// any truncated digits from the mantissa. The absolute value of the parsed
// number is represented by mantissa * (base ** exponent), where base==10 for
// decimal floats, and base==2 for hexadecimal floats.
int exponent = 0;
// The literal exponent value scanned from the input, or 0 if none was
// present. This does not reflect any adjustments applied to mantissa.
int literal_exponent = 0;
// The type of number scanned.
FloatType type = FloatType::kNumber;
// When non-null, [subrange_begin, subrange_end) marks a range of characters
// that require further processing. The meaning is dependent on float type.
// If type == kNumber and this is set, this is a "wide input": the input
// mantissa contained more than 19 digits. The range contains the full
// mantissa. It plus `literal_exponent` need to be examined to find the best
// floating point match.
// If type == kNan and this is set, the range marks the contents of a
// matched parenthesized character region after the NaN.
const char* subrange_begin = nullptr;
const char* subrange_end = nullptr;
// One-past-the-end of the successfully parsed region, or nullptr if no
// matching pattern was found.
const char* end = nullptr;
};
// Read the floating point number in the provided range, and populate
// ParsedFloat accordingly.
//
// format_flags is a bitmask value specifying what patterns this API will match.
// `scientific` and `fixed` are honored per std::from_chars rules
// ([utility.from.chars], C++17): if exactly one of these bits is set, then an
// exponent is required, or dislallowed, respectively.
//
// Template parameter `base` must be either 10 or 16. For base 16, a "0x" is
// *not* consumed. The `hex` bit from format_flags is ignored by ParseFloat.
template <int base>
ParsedFloat ParseFloat(const char* begin, const char* end,
absl::chars_format format_flags);
extern template ParsedFloat ParseFloat<10>(const char* begin, const char* end,
absl::chars_format format_flags);
extern template ParsedFloat ParseFloat<16>(const char* begin, const char* end,
absl::chars_format format_flags);
} // namespace strings_internal
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_

View file

@ -0,0 +1,357 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_parse.h"
#include <string>
#include <utility>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/str_cat.h"
using absl::chars_format;
using absl::strings_internal::FloatType;
using absl::strings_internal::ParsedFloat;
using absl::strings_internal::ParseFloat;
namespace {
// Check that a given std::string input is parsed to the expected mantissa and
// exponent.
//
// Input std::string `s` must contain a '$' character. It marks the end of the
// characters that should be consumed by the match. It is stripped from the
// input to ParseFloat.
//
// If input std::string `s` contains '[' and ']' characters, these mark the region
// of characters that should be marked as the "subrange". For NaNs, this is
// the location of the extended NaN std::string. For numbers, this is the location
// of the full, over-large mantissa.
template <int base>
void ExpectParsedFloat(std::string s, absl::chars_format format_flags,
FloatType expected_type, uint64_t expected_mantissa,
int expected_exponent,
int expected_literal_exponent = -999) {
SCOPED_TRACE(s);
int begin_subrange = -1;
int end_subrange = -1;
// If s contains '[' and ']', then strip these characters and set the subrange
// indices appropriately.
std::string::size_type open_bracket_pos = s.find('[');
if (open_bracket_pos != std::string::npos) {
begin_subrange = static_cast<int>(open_bracket_pos);
s.replace(open_bracket_pos, 1, "");
std::string::size_type close_bracket_pos = s.find(']');
ABSL_RAW_CHECK(close_bracket_pos != absl::string_view::npos,
"Test input contains [ without matching ]");
end_subrange = static_cast<int>(close_bracket_pos);
s.replace(close_bracket_pos, 1, "");
}
const std::string::size_type expected_characters_matched = s.find('$');
ABSL_RAW_CHECK(expected_characters_matched != std::string::npos,
"Input std::string must contain $");
s.replace(expected_characters_matched, 1, "");
ParsedFloat parsed =
ParseFloat<base>(s.data(), s.data() + s.size(), format_flags);
EXPECT_NE(parsed.end, nullptr);
if (parsed.end == nullptr) {
return; // The following tests are not useful if we fully failed to parse
}
EXPECT_EQ(parsed.type, expected_type);
if (begin_subrange == -1) {
EXPECT_EQ(parsed.subrange_begin, nullptr);
EXPECT_EQ(parsed.subrange_end, nullptr);
} else {
EXPECT_EQ(parsed.subrange_begin, s.data() + begin_subrange);
EXPECT_EQ(parsed.subrange_end, s.data() + end_subrange);
}
if (parsed.type == FloatType::kNumber) {
EXPECT_EQ(parsed.mantissa, expected_mantissa);
EXPECT_EQ(parsed.exponent, expected_exponent);
if (expected_literal_exponent != -999) {
EXPECT_EQ(parsed.literal_exponent, expected_literal_exponent);
}
}
auto characters_matched = static_cast<int>(parsed.end - s.data());
EXPECT_EQ(characters_matched, expected_characters_matched);
}
// Check that a given std::string input is parsed to the expected mantissa and
// exponent.
//
// Input std::string `s` must contain a '$' character. It marks the end of the
// characters that were consumed by the match.
template <int base>
void ExpectNumber(std::string s, absl::chars_format format_flags,
uint64_t expected_mantissa, int expected_exponent,
int expected_literal_exponent = -999) {
ExpectParsedFloat<base>(std::move(s), format_flags, FloatType::kNumber,
expected_mantissa, expected_exponent,
expected_literal_exponent);
}
// Check that a given std::string input is parsed to the given special value.
//
// This tests against both number bases, since infinities and NaNs have
// identical representations in both modes.
void ExpectSpecial(const std::string& s, absl::chars_format format_flags,
FloatType type) {
ExpectParsedFloat<10>(s, format_flags, type, 0, 0);
ExpectParsedFloat<16>(s, format_flags, type, 0, 0);
}
// Check that a given input std::string is not matched by Float.
template <int base>
void ExpectFailedParse(absl::string_view s, absl::chars_format format_flags) {
ParsedFloat parsed =
ParseFloat<base>(s.data(), s.data() + s.size(), format_flags);
EXPECT_EQ(parsed.end, nullptr);
}
TEST(ParseFloat, SimpleValue) {
// Test that various forms of floating point numbers all parse correctly.
ExpectNumber<10>("1.23456789e5$", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e+5$", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789E5$", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e05$", chars_format::general, 123456789, -3);
ExpectNumber<10>("123.456789e3$", chars_format::general, 123456789, -3);
ExpectNumber<10>("0.000123456789e9$", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456789e-3$", chars_format::general, 123456789, -3);
ExpectNumber<16>("1.234abcdefp28$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234abcdefp+28$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234ABCDEFp28$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234AbCdEfP0028$", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("123.4abcdefp20$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("0.0001234abcdefp44$", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1234abcd.ef$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1234abcdefp-8$", chars_format::general, 0x1234abcdef, -8);
// ExpectNumber does not attempt to drop trailing zeroes.
ExpectNumber<10>("0001.2345678900e005$", chars_format::general, 12345678900,
-5);
ExpectNumber<16>("0001.234abcdef000p28$", chars_format::general,
0x1234abcdef000, -20);
// Ensure non-matching characters after a number are ignored, even when they
// look like potentially matching characters.
ExpectNumber<10>("1.23456789e5$ ", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$e5e5", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$.25", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$-", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$PUPPERS!!!", chars_format::general, 123456789,
-3);
ExpectNumber<10>("123456.789$efghij", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$e", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$p5", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$.10", chars_format::general, 123456789, -3);
ExpectNumber<16>("1.234abcdefp28$ ", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1.234abcdefp28$p28", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1.234abcdefp28$.125", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1.234abcdefp28$-", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234abcdefp28$KITTEHS!!!", chars_format::general,
0x1234abcdef, -8);
ExpectNumber<16>("1234abcd.ef$ghijk", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1234abcd.ef$p", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1234abcd.ef$.10", chars_format::general, 0x1234abcdef, -8);
// Ensure we can read a full resolution mantissa without overflow.
ExpectNumber<10>("9999999999999999999$", chars_format::general,
9999999999999999999u, 0);
ExpectNumber<16>("fffffffffffffff$", chars_format::general,
0xfffffffffffffffu, 0);
// Check that zero is consistently read.
ExpectNumber<10>("0$", chars_format::general, 0, 0);
ExpectNumber<16>("0$", chars_format::general, 0, 0);
ExpectNumber<10>("000000000000000000000000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<16>("000000000000000000000000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<10>("0000000000000000000000.000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<16>("0000000000000000000000.000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<10>("0.00000000000000000000000000000000e123456$",
chars_format::general, 0, 0);
ExpectNumber<16>("0.00000000000000000000000000000000p123456$",
chars_format::general, 0, 0);
}
TEST(ParseFloat, LargeDecimalMantissa) {
// After 19 significant decimal digits in the mantissa, ParsedFloat will
// truncate additional digits. We need to test that:
// 1) the truncation to 19 digits happens
// 2) the returned exponent reflects the dropped significant digits
// 3) a correct literal_exponent is set
//
// If and only if a significant digit is found after 19 digits, then the
// entirety of the mantissa in case the exact value is needed to make a
// rounding decision. The [ and ] characters below denote where such a
// subregion was marked by by ParseFloat. They are not part of the input.
// Mark a capture group only if a dropped digit is significant (nonzero).
ExpectNumber<10>("100000000000000000000000000$", chars_format::general,
1000000000000000000,
/* adjusted exponent */ 8);
ExpectNumber<10>("123456789123456789100000000$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8);
ExpectNumber<10>("[123456789123456789123456789]$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
ExpectNumber<10>("[123456789123456789100000009]$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
ExpectNumber<10>("[123456789123456789120000000]$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
// Leading zeroes should not count towards the 19 significant digit limit
ExpectNumber<10>("[00000000123456789123456789123456789]$",
chars_format::general, 1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
ExpectNumber<10>("00000000123456789123456789100000000$",
chars_format::general, 1234567891234567891,
/* adjusted exponent */ 8);
// Truncated digits after the decimal point should not cause a further
// exponent adjustment.
ExpectNumber<10>("1.234567891234567891e123$", chars_format::general,
1234567891234567891, 105);
ExpectNumber<10>("[1.23456789123456789123456789]e123$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 105,
/* literal exponent */ 123);
// Ensure we truncate, and not round. (The from_chars algorithm we use
// depends on our guess missing low, if it misses, so we need the rounding
// error to be downward.)
ExpectNumber<10>("[1999999999999999999999]$", chars_format::general,
1999999999999999999,
/* adjusted exponent */ 3,
/* literal exponent */ 0);
}
TEST(ParseFloat, LargeHexadecimalMantissa) {
// After 15 significant hex digits in the mantissa, ParsedFloat will treat
// additional digits as sticky, We need to test that:
// 1) The truncation to 15 digits happens
// 2) The returned exponent reflects the dropped significant digits
// 3) If a nonzero digit is dropped, the low bit of mantissa is set.
ExpectNumber<16>("123456789abcdef123456789abcdef$", chars_format::general,
0x123456789abcdef, 60);
// Leading zeroes should not count towards the 15 significant digit limit
ExpectNumber<16>("000000123456789abcdef123456789abcdef$",
chars_format::general, 0x123456789abcdef, 60);
// Truncated digits after the radix point should not cause a further
// exponent adjustment.
ExpectNumber<16>("1.23456789abcdefp100$", chars_format::general,
0x123456789abcdef, 44);
ExpectNumber<16>("1.23456789abcdef123456789abcdefp100$",
chars_format::general, 0x123456789abcdef, 44);
// test sticky digit behavior. The low bit should be set iff any dropped
// digit is nonzero.
ExpectNumber<16>("123456789abcdee123456789abcdee$", chars_format::general,
0x123456789abcdef, 60);
ExpectNumber<16>("123456789abcdee000000000000001$", chars_format::general,
0x123456789abcdef, 60);
ExpectNumber<16>("123456789abcdee000000000000000$", chars_format::general,
0x123456789abcdee, 60);
}
TEST(ParseFloat, ScientificVsFixed) {
// In fixed mode, an exponent is never matched (but the remainder of the
// number will be matched.)
ExpectNumber<10>("1.23456789$e5", chars_format::fixed, 123456789, -8);
ExpectNumber<10>("123456.789$", chars_format::fixed, 123456789, -3);
ExpectNumber<16>("1.234abcdef$p28", chars_format::fixed, 0x1234abcdef, -36);
ExpectNumber<16>("1234abcd.ef$", chars_format::fixed, 0x1234abcdef, -8);
// In scientific mode, numbers don't match *unless* they have an exponent.
ExpectNumber<10>("1.23456789e5$", chars_format::scientific, 123456789, -3);
ExpectFailedParse<10>("-123456.789$", chars_format::scientific);
ExpectNumber<16>("1.234abcdefp28$", chars_format::scientific, 0x1234abcdef,
-8);
ExpectFailedParse<16>("1234abcd.ef$", chars_format::scientific);
}
TEST(ParseFloat, Infinity) {
ExpectFailedParse<10>("in", chars_format::general);
ExpectFailedParse<16>("in", chars_format::general);
ExpectFailedParse<10>("inx", chars_format::general);
ExpectFailedParse<16>("inx", chars_format::general);
ExpectSpecial("inf$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("Inf$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("INF$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("inf$inite", chars_format::general, FloatType::kInfinity);
ExpectSpecial("iNfInItY$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("infinity$!!!", chars_format::general, FloatType::kInfinity);
}
TEST(ParseFloat, NaN) {
ExpectFailedParse<10>("na", chars_format::general);
ExpectFailedParse<16>("na", chars_format::general);
ExpectFailedParse<10>("nah", chars_format::general);
ExpectFailedParse<16>("nah", chars_format::general);
ExpectSpecial("nan$", chars_format::general, FloatType::kNan);
ExpectSpecial("NaN$", chars_format::general, FloatType::kNan);
ExpectSpecial("nAn$", chars_format::general, FloatType::kNan);
ExpectSpecial("NAN$", chars_format::general, FloatType::kNan);
ExpectSpecial("NaN$aNaNaNaNaBatman!", chars_format::general, FloatType::kNan);
// A parenthesized sequence of the characters [a-zA-Z0-9_] is allowed to
// appear after an NaN. Check that this is allowed, and that the correct
// characters are grouped.
//
// (The characters [ and ] in the pattern below delimit the expected matched
// subgroup; they are not part of the input passed to ParseFloat.)
ExpectSpecial("nan([0xabcdef])$", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([0xabcdef])$...", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([0xabcdef])$)...", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([])$", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([aAzZ09_])$", chars_format::general, FloatType::kNan);
// If the subgroup contains illegal characters, don't match it at all.
ExpectSpecial("nan$(bad-char)", chars_format::general, FloatType::kNan);
// Also cope with a missing close paren.
ExpectSpecial("nan$(0xabcdef", chars_format::general, FloatType::kNan);
}
} // namespace