Export of internal Abseil changes.

--
babbb6421068af3831870fd5995444437ace6769 by Derek Mauro <dmauro@google.com>:

Rollback of:

Make raw_hash_set_test less flaky.

Split the timing loop into chunks so that we are less suceptible to
antogantistic processes.

PiperOrigin-RevId: 243854490

--
a2711f17a712f6d09799bf32363d67526737b486 by CJ Johnson <johnsoncj@google.com>:

Relocates IsAtLeastForwardIterator to internal/inlined_vector.h

PiperOrigin-RevId: 243846090

--
6c14cdbeb9a61022c27f8957654f930d8abf2fc1 by Matt Kulukundis <kfm@google.com>:

Make raw_hash_set_test less flaky.

Split the timing loop into chunks so that we are less suceptible to
antogantistic processes.

PiperOrigin-RevId: 243824289

--
ee6072a6b6e0ac653622524ceb09db3b9e870f96 by Samuel Benzaquen <sbenza@google.com>:

Improve format parser performance.
Replace the main switch with a lookup in the existing tag table.
Improve the ABI of ConsumeUnboundConversion a little.

PiperOrigin-RevId: 243824112

--
24b9e6476dfa4be8d644359eab8ac6816492f187 by Abseil Team <absl-team@google.com>:

Fix DR numbers: 3800 ? 3080, 3801 ? 3081.

PiperOrigin-RevId: 243804213

--
0660404074707e197684f07cc0bffe4a9c35cd2f by Abseil Team <absl-team@google.com>:

Internal change.

PiperOrigin-RevId: 243757359

--
ba0f5bb9b8584d75c4ffc44ff3cb8c691796ffc6 by Xiaoyi Zhang <zhangxy@google.com>:

Consolidate ABSL_INTERNAL_UNALIGNED_* implementation into memcpy.
The compiler should be good enough to optimize these operations.
See https://github.com/abseil/abseil-cpp/issues/269 for background.

PiperOrigin-RevId: 243323941

--
00853a8756548df7217513c562d604b4ee5c6ab9 by Eric Fiselier <ericwf@google.com>:

Reexport memory.h from optional.h for compatibility between libc++ and
libstdc++.

PiperOrigin-RevId: 243313425
GitOrigin-RevId: babbb6421068af3831870fd5995444437ace6769
Change-Id: Ic53c127ad857a431ad60c98b27cc585fed50a3e3
This commit is contained in:
Abseil Team 2019-04-16 12:11:35 -07:00 committed by Alex Strelnikov
parent a02f62f456
commit d902eb869b
10 changed files with 186 additions and 328 deletions

View file

@ -9,16 +9,11 @@ namespace absl {
namespace str_format_internal {
namespace {
template <typename T, size_t N>
size_t ArraySize(T (&)[N]) {
return N;
}
class FormatBindTest : public ::testing::Test {
public:
bool Extract(const char *s, UnboundConversion *props, int *next) const {
absl::string_view src = s;
return ConsumeUnboundConversion(&src, props, next) && src.empty();
return ConsumeUnboundConversion(s, s + strlen(s), props, next) ==
s + strlen(s);
}
};

View file

@ -15,6 +15,44 @@
namespace absl {
namespace str_format_internal {
using CC = ConversionChar::Id;
using LM = LengthMod::Id;
ABSL_CONST_INIT const ConvTag kTags[256] = {
{}, {}, {}, {}, {}, {}, {}, {}, // 00-07
{}, {}, {}, {}, {}, {}, {}, {}, // 08-0f
{}, {}, {}, {}, {}, {}, {}, {}, // 10-17
{}, {}, {}, {}, {}, {}, {}, {}, // 18-1f
{}, {}, {}, {}, {}, {}, {}, {}, // 20-27
{}, {}, {}, {}, {}, {}, {}, {}, // 28-2f
{}, {}, {}, {}, {}, {}, {}, {}, // 30-37
{}, {}, {}, {}, {}, {}, {}, {}, // 38-3f
{}, CC::A, {}, CC::C, {}, CC::E, CC::F, CC::G, // @ABCDEFG
{}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO
{}, {}, {}, CC::S, {}, {}, {}, {}, // PQRSTUVW
CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_
{}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg
LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno
CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw
CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}!
{}, {}, {}, {}, {}, {}, {}, {}, // 80-87
{}, {}, {}, {}, {}, {}, {}, {}, // 88-8f
{}, {}, {}, {}, {}, {}, {}, {}, // 90-97
{}, {}, {}, {}, {}, {}, {}, {}, // 98-9f
{}, {}, {}, {}, {}, {}, {}, {}, // a0-a7
{}, {}, {}, {}, {}, {}, {}, {}, // a8-af
{}, {}, {}, {}, {}, {}, {}, {}, // b0-b7
{}, {}, {}, {}, {}, {}, {}, {}, // b8-bf
{}, {}, {}, {}, {}, {}, {}, {}, // c0-c7
{}, {}, {}, {}, {}, {}, {}, {}, // c8-cf
{}, {}, {}, {}, {}, {}, {}, {}, // d0-d7
{}, {}, {}, {}, {}, {}, {}, {}, // d8-df
{}, {}, {}, {}, {}, {}, {}, {}, // e0-e7
{}, {}, {}, {}, {}, {}, {}, {}, // e8-ef
{}, {}, {}, {}, {}, {}, {}, {}, // f0-f7
{}, {}, {}, {}, {}, {}, {}, {}, // f8-ff
};
namespace {
bool CheckFastPathSetting(const UnboundConversion& conv) {
@ -36,60 +74,17 @@ bool CheckFastPathSetting(const UnboundConversion& conv) {
return should_be_basic == conv.flags.basic;
}
// Keep a single table for all the conversion chars and length modifiers.
// We invert the length modifiers to make them negative so that we can easily
// test for them.
// Everything else is `none`, which is a negative constant.
using CC = ConversionChar::Id;
using LM = LengthMod::Id;
static constexpr std::int8_t none = -128;
static constexpr std::int8_t kIds[] = {
none, none, none, none, none, none, none, none, // 00-07
none, none, none, none, none, none, none, none, // 08-0f
none, none, none, none, none, none, none, none, // 10-17
none, none, none, none, none, none, none, none, // 18-1f
none, none, none, none, none, none, none, none, // 20-27
none, none, none, none, none, none, none, none, // 28-2f
none, none, none, none, none, none, none, none, // 30-37
none, none, none, none, none, none, none, none, // 38-3f
none, CC::A, none, CC::C, none, CC::E, CC::F, CC::G, // @ABCDEFG
none, none, none, none, ~LM::L, none, none, none, // HIJKLMNO
none, none, none, CC::S, none, none, none, none, // PQRSTUVW
CC::X, none, none, none, none, none, none, none, // XYZ[\]^_
none, CC::a, none, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg
~LM::h, CC::i, ~LM::j, none, ~LM::l, none, CC::n, CC::o, // hijklmno
CC::p, ~LM::q, none, CC::s, ~LM::t, CC::u, none, none, // pqrstuvw
CC::x, none, ~LM::z, none, none, none, none, none, // xyz{|}~!
none, none, none, none, none, none, none, none, // 80-87
none, none, none, none, none, none, none, none, // 88-8f
none, none, none, none, none, none, none, none, // 90-97
none, none, none, none, none, none, none, none, // 98-9f
none, none, none, none, none, none, none, none, // a0-a7
none, none, none, none, none, none, none, none, // a8-af
none, none, none, none, none, none, none, none, // b0-b7
none, none, none, none, none, none, none, none, // b8-bf
none, none, none, none, none, none, none, none, // c0-c7
none, none, none, none, none, none, none, none, // c8-cf
none, none, none, none, none, none, none, none, // d0-d7
none, none, none, none, none, none, none, none, // d8-df
none, none, none, none, none, none, none, none, // e0-e7
none, none, none, none, none, none, none, none, // e8-ef
none, none, none, none, none, none, none, none, // f0-f7
none, none, none, none, none, none, none, none, // f8-ff
};
template <bool is_positional>
bool ConsumeConversion(string_view *src, UnboundConversion *conv,
int *next_arg) {
const char *pos = src->data();
const char *const end = pos + src->size();
const char *ConsumeConversion(const char *pos, const char *const end,
UnboundConversion *conv, int *next_arg) {
const char* const original_pos = pos;
char c;
// Read the next char into `c` and update `pos`. Returns false if there are
// no more chars to read.
#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \
do { \
if (ABSL_PREDICT_FALSE(pos == end)) return false; \
c = *pos++; \
#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \
do { \
if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
c = *pos++; \
} while (0)
const auto parse_digits = [&] {
@ -111,10 +106,10 @@ bool ConsumeConversion(string_view *src, UnboundConversion *conv,
if (is_positional) {
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return false;
if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
conv->arg_position = parse_digits();
assert(conv->arg_position > 0);
if (ABSL_PREDICT_FALSE(c != '$')) return false;
if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
}
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
@ -129,10 +124,9 @@ bool ConsumeConversion(string_view *src, UnboundConversion *conv,
conv->flags.basic = false;
for (; c <= '0';) {
// FIXME: We might be able to speed this up reusing the kIds lookup table
// from above.
// It might require changing Flags to be a plain integer where we can |= a
// value.
// FIXME: We might be able to speed this up reusing the lookup table from
// above. It might require changing Flags to be a plain integer where we
// can |= a value.
switch (c) {
case '-':
conv->flags.left = true;
@ -160,20 +154,20 @@ flags_done:
if (c >= '0') {
int maybe_width = parse_digits();
if (!is_positional && c == '$') {
if (ABSL_PREDICT_FALSE(*next_arg != 0)) return false;
if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
// Positional conversion.
*next_arg = -1;
conv->flags = Flags();
conv->flags.basic = true;
return ConsumeConversion<true>(src, conv, next_arg);
return ConsumeConversion<true>(original_pos, end, conv, next_arg);
}
conv->width.set_value(maybe_width);
} else if (c == '*') {
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (is_positional) {
if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return false;
if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
conv->width.set_from_arg(parse_digits());
if (ABSL_PREDICT_FALSE(c != '$')) return false;
if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else {
conv->width.set_from_arg(++*next_arg);
@ -188,9 +182,9 @@ flags_done:
} else if (c == '*') {
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (is_positional) {
if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return false;
if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
conv->precision.set_from_arg(parse_digits());
if (c != '$') return false;
if (c != '$') return nullptr;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else {
conv->precision.set_from_arg(++*next_arg);
@ -201,14 +195,14 @@ flags_done:
}
}
std::int8_t id = kIds[static_cast<unsigned char>(c)];
auto tag = GetTagForChar(c);
if (id < 0) {
if (ABSL_PREDICT_FALSE(id == none)) return false;
if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
// It is a length modifier.
using str_format_internal::LengthMod;
LengthMod length_mod = LengthMod::FromId(static_cast<LM>(~id));
LengthMod length_mod = tag.as_length();
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (c == 'h' && length_mod.id() == LengthMod::h) {
conv->length_mod = LengthMod::FromId(LengthMod::hh);
@ -219,25 +213,24 @@ flags_done:
} else {
conv->length_mod = length_mod;
}
id = kIds[static_cast<unsigned char>(c)];
if (ABSL_PREDICT_FALSE(id < 0)) return false;
tag = GetTagForChar(c);
if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
}
assert(CheckFastPathSetting(*conv));
(void)(&CheckFastPathSetting);
conv->conv = ConversionChar::FromId(static_cast<CC>(id));
conv->conv = tag.as_conv();
if (!is_positional) conv->arg_position = ++*next_arg;
*src = string_view(pos, end - pos);
return true;
return pos;
}
} // namespace
bool ConsumeUnboundConversion(string_view *src, UnboundConversion *conv,
int *next_arg) {
if (*next_arg < 0) return ConsumeConversion<true>(src, conv, next_arg);
return ConsumeConversion<false>(src, conv, next_arg);
const char *ConsumeUnboundConversion(const char *p, const char *end,
UnboundConversion *conv, int *next_arg) {
if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
return ConsumeConversion<false>(p, end, conv, next_arg);
}
struct ParsedFormatBase::ParsedFormatConsumer {

View file

@ -63,17 +63,45 @@ struct UnboundConversion {
ConversionChar conv;
};
// Consume conversion spec prefix (not including '%') of '*src' if valid.
// Consume conversion spec prefix (not including '%') of [p, end) if valid.
// Examples of valid specs would be e.g.: "s", "d", "-12.6f".
// If valid, the front of src is advanced such that src becomes the
// part following the conversion spec, and the spec part is broken down and
// returned in 'conv'.
// If invalid, returns false and leaves 'src' unmodified.
// For example:
// Given "d9", returns "d", and leaves src="9",
// Given "!", returns "" and leaves src="!".
bool ConsumeUnboundConversion(string_view* src, UnboundConversion* conv,
int* next_arg);
// If valid, it returns the first character following the conversion spec,
// and the spec part is broken down and returned in 'conv'.
// If invalid, returns nullptr.
const char* ConsumeUnboundConversion(const char* p, const char* end,
UnboundConversion* conv, int* next_arg);
// Helper tag class for the table below.
// It allows fast `char -> ConversionChar/LengthMod` checking and conversions.
class ConvTag {
public:
constexpr ConvTag(ConversionChar::Id id) : tag_(id) {} // NOLINT
// We invert the length modifiers to make them negative so that we can easily
// test for them.
constexpr ConvTag(LengthMod::Id id) : tag_(~id) {} // NOLINT
// Everything else is -128, which is negative to make is_conv() simpler.
constexpr ConvTag() : tag_(-128) {}
bool is_conv() const { return tag_ >= 0; }
bool is_length() const { return tag_ < 0 && tag_ != -128; }
ConversionChar as_conv() const {
assert(is_conv());
return ConversionChar::FromId(static_cast<ConversionChar::Id>(tag_));
}
LengthMod as_length() const {
assert(is_length());
return LengthMod::FromId(static_cast<LengthMod::Id>(~tag_));
}
private:
std::int8_t tag_;
};
extern const ConvTag kTags[256];
// Keep a single table for all the conversion chars and length modifiers.
inline ConvTag GetTagForChar(char c) {
return kTags[static_cast<unsigned char>(c)];
}
// Parse the format string provided in 'src' and pass the identified items into
// 'consumer'.
@ -88,51 +116,53 @@ bool ConsumeUnboundConversion(string_view* src, UnboundConversion* conv,
template <typename Consumer>
bool ParseFormatString(string_view src, Consumer consumer) {
int next_arg = 0;
while (!src.empty()) {
const char* percent =
static_cast<const char*>(memchr(src.data(), '%', src.size()));
const char* p = src.data();
const char* const end = p + src.size();
while (p != end) {
const char* percent = static_cast<const char*>(memchr(p, '%', end - p));
if (!percent) {
// We found the last substring.
return consumer.Append(src);
return consumer.Append(string_view(p, end - p));
}
// We found a percent, so push the text run then process the percent.
size_t percent_loc = percent - src.data();
if (!consumer.Append(string_view(src.data(), percent_loc))) return false;
if (percent + 1 >= src.data() + src.size()) return false;
UnboundConversion conv;
switch (percent[1]) {
case '%':
if (!consumer.Append("%")) return false;
src.remove_prefix(percent_loc + 2);
continue;
#define PARSER_CASE(ch) \
case #ch[0]: \
src.remove_prefix(percent_loc + 2); \
conv.conv = ConversionChar::FromId(ConversionChar::ch); \
conv.arg_position = ++next_arg; \
break;
ABSL_CONVERSION_CHARS_EXPAND_(PARSER_CASE, );
#undef PARSER_CASE
default:
src.remove_prefix(percent_loc + 1);
if (!ConsumeUnboundConversion(&src, &conv, &next_arg)) return false;
break;
}
if (next_arg == 0) {
// This indicates an error in the format std::string.
// The only way to get next_arg == 0 is to have a positional argument
// first which sets next_arg to -1 and then a non-positional argument
// which does ++next_arg.
// Checking here seems to be the cheapeast place to do it.
if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) {
return false;
}
if (!consumer.ConvertOne(
conv, string_view(percent + 1, src.data() - (percent + 1)))) {
return false;
if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false;
auto tag = GetTagForChar(percent[1]);
if (tag.is_conv()) {
if (ABSL_PREDICT_FALSE(next_arg < 0)) {
// This indicates an error in the format std::string.
// The only way to get `next_arg < 0` here is to have a positional
// argument first which sets next_arg to -1 and then a non-positional
// argument.
return false;
}
p = percent + 2;
// Keep this case separate from the one below.
// ConvertOne is more efficient when the compiler can see that the `basic`
// flag is set.
UnboundConversion conv;
conv.conv = tag.as_conv();
conv.arg_position = ++next_arg;
if (ABSL_PREDICT_FALSE(
!consumer.ConvertOne(conv, string_view(percent + 1, 1)))) {
return false;
}
} else if (percent[1] != '%') {
UnboundConversion conv;
p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg);
if (ABSL_PREDICT_FALSE(p == nullptr)) return false;
if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(
conv, string_view(percent + 1, p - (percent + 1))))) {
return false;
}
} else {
if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false;
p = percent + 2;
continue;
}
}
return true;

View file

@ -1,6 +1,8 @@
#include "absl/strings/internal/str_format/parser.h"
#include <string.h>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/macros.h"
@ -9,6 +11,8 @@ namespace str_format_internal {
namespace {
using testing::Pair;
TEST(LengthModTest, Names) {
struct Expectation {
int line;
@ -63,20 +67,21 @@ TEST(ConversionCharTest, Names) {
class ConsumeUnboundConversionTest : public ::testing::Test {
public:
typedef UnboundConversion Props;
string_view Consume(string_view* src) {
std::pair<string_view, string_view> Consume(string_view src) {
int next = 0;
const char* prev_begin = src->data();
o = UnboundConversion(); // refresh
ConsumeUnboundConversion(src, &o, &next);
return {prev_begin, static_cast<size_t>(src->data() - prev_begin)};
const char* p = ConsumeUnboundConversion(
src.data(), src.data() + src.size(), &o, &next);
if (!p) return {{}, src};
return {string_view(src.data(), p - src.data()),
string_view(p, src.data() + src.size() - p)};
}
bool Run(const char *fmt, bool force_positional = false) {
string_view src = fmt;
int next = force_positional ? -1 : 0;
o = UnboundConversion(); // refresh
return ConsumeUnboundConversion(&src, &o, &next) && src.empty();
return ConsumeUnboundConversion(fmt, fmt + strlen(fmt), &o, &next) ==
fmt + strlen(fmt);
}
UnboundConversion o;
};
@ -104,11 +109,7 @@ TEST_F(ConsumeUnboundConversionTest, ConsumeSpecification) {
};
for (const auto& e : kExpect) {
SCOPED_TRACE(e.line);
string_view src = e.src;
EXPECT_EQ(e.src, src);
string_view out = Consume(&src);
EXPECT_EQ(e.out, out);
EXPECT_EQ(e.src_post, src);
EXPECT_THAT(Consume(e.src), Pair(e.out, e.src_post));
}
}