Export of internal Abseil changes.

-- babbb6421068af3831870fd5995444437ace6769 by Derek Mauro <dmauro@google.com>: Rollback of: Make raw_hash_set_test less flaky. Split the timing loop into chunks so that we are less suceptible to antogantistic processes. PiperOrigin-RevId: 243854490 -- a2711f17a712f6d09799bf32363d67526737b486 by CJ Johnson <johnsoncj@google.com>: Relocates IsAtLeastForwardIterator to internal/inlined_vector.h PiperOrigin-RevId: 243846090 -- 6c14cdbeb9a61022c27f8957654f930d8abf2fc1 by Matt Kulukundis <kfm@google.com>: Make raw_hash_set_test less flaky. Split the timing loop into chunks so that we are less suceptible to antogantistic processes. PiperOrigin-RevId: 243824289 -- ee6072a6b6e0ac653622524ceb09db3b9e870f96 by Samuel Benzaquen <sbenza@google.com>: Improve format parser performance. Replace the main switch with a lookup in the existing tag table. Improve the ABI of ConsumeUnboundConversion a little. PiperOrigin-RevId: 243824112 -- 24b9e6476dfa4be8d644359eab8ac6816492f187 by Abseil Team <absl-team@google.com>: Fix DR numbers: 3800 ? 3080, 3801 ? 3081. PiperOrigin-RevId: 243804213 -- 0660404074707e197684f07cc0bffe4a9c35cd2f by Abseil Team <absl-team@google.com>: Internal change. PiperOrigin-RevId: 243757359 -- ba0f5bb9b8584d75c4ffc44ff3cb8c691796ffc6 by Xiaoyi Zhang <zhangxy@google.com>: Consolidate ABSL_INTERNAL_UNALIGNED_* implementation into memcpy. The compiler should be good enough to optimize these operations. See https://github.com/abseil/abseil-cpp/issues/269 for background. PiperOrigin-RevId: 243323941 -- 00853a8756548df7217513c562d604b4ee5c6ab9 by Eric Fiselier <ericwf@google.com>: Reexport memory.h from optional.h for compatibility between libc++ and libstdc++. PiperOrigin-RevId: 243313425 GitOrigin-RevId: babbb6421068af3831870fd5995444437ace6769 Change-Id: Ic53c127ad857a431ad60c98b27cc585fed50a3e3
2019-04-16 12:11:35 -07:00 · 2019-04-16 12:11:35 -07:00 · d902eb869b
commit d902eb869b
parent a02f62f456
10 changed files with 186 additions and 328 deletions
--- a/absl/strings/internal/str_format/bind_test.cc
+++ b/absl/strings/internal/str_format/bind_test.cc
@ -9,16 +9,11 @@ namespace absl {
 namespace str_format_internal {
 namespace {

-template <typename T, size_t N>
-size_t ArraySize(T (&)[N]) {
-  return N;
-}
-
 class FormatBindTest : public ::testing::Test {
 public:
  bool Extract(const char *s, UnboundConversion *props, int *next) const {
-    absl::string_view src = s;
-    return ConsumeUnboundConversion(&src, props, next) && src.empty();
+    return ConsumeUnboundConversion(s, s + strlen(s), props, next) ==
+           s + strlen(s);
  }
 };

--- a/absl/strings/internal/str_format/parser.cc
+++ b/absl/strings/internal/str_format/parser.cc
@ -15,6 +15,44 @@

 namespace absl {
 namespace str_format_internal {
+
+using CC = ConversionChar::Id;
+using LM = LengthMod::Id;
+ABSL_CONST_INIT const ConvTag kTags[256] = {
+    {},    {},    {},    {},    {},    {},    {},    {},     // 00-07
+    {},    {},    {},    {},    {},    {},    {},    {},     // 08-0f
+    {},    {},    {},    {},    {},    {},    {},    {},     // 10-17
+    {},    {},    {},    {},    {},    {},    {},    {},     // 18-1f
+    {},    {},    {},    {},    {},    {},    {},    {},     // 20-27
+    {},    {},    {},    {},    {},    {},    {},    {},     // 28-2f
+    {},    {},    {},    {},    {},    {},    {},    {},     // 30-37
+    {},    {},    {},    {},    {},    {},    {},    {},     // 38-3f
+    {},    CC::A, {},    CC::C, {},    CC::E, CC::F, CC::G,  // @ABCDEFG
+    {},    {},    {},    {},    LM::L, {},    {},    {},     // HIJKLMNO
+    {},    {},    {},    CC::S, {},    {},    {},    {},     // PQRSTUVW
+    CC::X, {},    {},    {},    {},    {},    {},    {},     // XYZ[\]^_
+    {},    CC::a, {},    CC::c, CC::d, CC::e, CC::f, CC::g,  // `abcdefg
+    LM::h, CC::i, LM::j, {},    LM::l, {},    CC::n, CC::o,  // hijklmno
+    CC::p, LM::q, {},    CC::s, LM::t, CC::u, {},    {},     // pqrstuvw
+    CC::x, {},    LM::z, {},    {},    {},    {},    {},     // xyz{|}!
+    {},    {},    {},    {},    {},    {},    {},    {},     // 80-87
+    {},    {},    {},    {},    {},    {},    {},    {},     // 88-8f
+    {},    {},    {},    {},    {},    {},    {},    {},     // 90-97
+    {},    {},    {},    {},    {},    {},    {},    {},     // 98-9f
+    {},    {},    {},    {},    {},    {},    {},    {},     // a0-a7
+    {},    {},    {},    {},    {},    {},    {},    {},     // a8-af
+    {},    {},    {},    {},    {},    {},    {},    {},     // b0-b7
+    {},    {},    {},    {},    {},    {},    {},    {},     // b8-bf
+    {},    {},    {},    {},    {},    {},    {},    {},     // c0-c7
+    {},    {},    {},    {},    {},    {},    {},    {},     // c8-cf
+    {},    {},    {},    {},    {},    {},    {},    {},     // d0-d7
+    {},    {},    {},    {},    {},    {},    {},    {},     // d8-df
+    {},    {},    {},    {},    {},    {},    {},    {},     // e0-e7
+    {},    {},    {},    {},    {},    {},    {},    {},     // e8-ef
+    {},    {},    {},    {},    {},    {},    {},    {},     // f0-f7
+    {},    {},    {},    {},    {},    {},    {},    {},     // f8-ff
+};
+
 namespace {

 bool CheckFastPathSetting(const UnboundConversion& conv) {
@ -36,60 +74,17 @@ bool CheckFastPathSetting(const UnboundConversion& conv) {
  return should_be_basic == conv.flags.basic;
 }

-// Keep a single table for all the conversion chars and length modifiers.
-// We invert the length modifiers to make them negative so that we can easily
-// test for them.
-// Everything else is `none`, which is a negative constant.
-using CC = ConversionChar::Id;
-using LM = LengthMod::Id;
-static constexpr std::int8_t none = -128;
-static constexpr std::int8_t kIds[] = {
-    none,   none,   none,   none,  none,   none,  none,  none,   // 00-07
-    none,   none,   none,   none,  none,   none,  none,  none,   // 08-0f
-    none,   none,   none,   none,  none,   none,  none,  none,   // 10-17
-    none,   none,   none,   none,  none,   none,  none,  none,   // 18-1f
-    none,   none,   none,   none,  none,   none,  none,  none,   // 20-27
-    none,   none,   none,   none,  none,   none,  none,  none,   // 28-2f
-    none,   none,   none,   none,  none,   none,  none,  none,   // 30-37
-    none,   none,   none,   none,  none,   none,  none,  none,   // 38-3f
-    none,   CC::A,  none,   CC::C, none,   CC::E, CC::F, CC::G,  // @ABCDEFG
-    none,   none,   none,   none,  ~LM::L, none,  none,  none,   // HIJKLMNO
-    none,   none,   none,   CC::S, none,   none,  none,  none,   // PQRSTUVW
-    CC::X,  none,   none,   none,  none,   none,  none,  none,   // XYZ[\]^_
-    none,   CC::a,  none,   CC::c, CC::d,  CC::e, CC::f, CC::g,  // `abcdefg
-    ~LM::h, CC::i,  ~LM::j, none,  ~LM::l, none,  CC::n, CC::o,  // hijklmno
-    CC::p,  ~LM::q, none,   CC::s, ~LM::t, CC::u, none,  none,   // pqrstuvw
-    CC::x,  none,   ~LM::z, none,  none,   none,  none,  none,   // xyz{|}~!
-    none,   none,   none,   none,  none,   none,  none,  none,   // 80-87
-    none,   none,   none,   none,  none,   none,  none,  none,   // 88-8f
-    none,   none,   none,   none,  none,   none,  none,  none,   // 90-97
-    none,   none,   none,   none,  none,   none,  none,  none,   // 98-9f
-    none,   none,   none,   none,  none,   none,  none,  none,   // a0-a7
-    none,   none,   none,   none,  none,   none,  none,  none,   // a8-af
-    none,   none,   none,   none,  none,   none,  none,  none,   // b0-b7
-    none,   none,   none,   none,  none,   none,  none,  none,   // b8-bf
-    none,   none,   none,   none,  none,   none,  none,  none,   // c0-c7
-    none,   none,   none,   none,  none,   none,  none,  none,   // c8-cf
-    none,   none,   none,   none,  none,   none,  none,  none,   // d0-d7
-    none,   none,   none,   none,  none,   none,  none,  none,   // d8-df
-    none,   none,   none,   none,  none,   none,  none,  none,   // e0-e7
-    none,   none,   none,   none,  none,   none,  none,  none,   // e8-ef
-    none,   none,   none,   none,  none,   none,  none,  none,   // f0-f7
-    none,   none,   none,   none,  none,   none,  none,  none,   // f8-ff
-};
-
 template <bool is_positional>
-bool ConsumeConversion(string_view *src, UnboundConversion *conv,
-                       int *next_arg) {
-  const char *pos = src->data();
-  const char *const end = pos + src->size();
+const char *ConsumeConversion(const char *pos, const char *const end,
+                              UnboundConversion *conv, int *next_arg) {
+  const char* const original_pos = pos;
  char c;
  // Read the next char into `c` and update `pos`. Returns false if there are
  // no more chars to read.
-#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR()        \
-  do {                                                \
-    if (ABSL_PREDICT_FALSE(pos == end)) return false; \
-    c = *pos++;                                       \
+#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR()          \
+  do {                                                  \
+    if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
+    c = *pos++;                                         \
  } while (0)

  const auto parse_digits = [&] {
@ -111,10 +106,10 @@ bool ConsumeConversion(string_view *src, UnboundConversion *conv,

  if (is_positional) {
    ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
-    if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return false;
+    if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
    conv->arg_position = parse_digits();
    assert(conv->arg_position > 0);
-    if (ABSL_PREDICT_FALSE(c != '$')) return false;
+    if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
  }

  ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
@ -129,10 +124,9 @@ bool ConsumeConversion(string_view *src, UnboundConversion *conv,
    conv->flags.basic = false;

    for (; c <= '0';) {
-      // FIXME: We might be able to speed this up reusing the kIds lookup table
-      // from above.
-      // It might require changing Flags to be a plain integer where we can |= a
-      // value.
+      // FIXME: We might be able to speed this up reusing the lookup table from
+      // above. It might require changing Flags to be a plain integer where we
+      // can |= a value.
      switch (c) {
        case '-':
          conv->flags.left = true;
@ -160,20 +154,20 @@ flags_done:
      if (c >= '0') {
        int maybe_width = parse_digits();
        if (!is_positional && c == '$') {
-          if (ABSL_PREDICT_FALSE(*next_arg != 0)) return false;
+          if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
          // Positional conversion.
          *next_arg = -1;
          conv->flags = Flags();
          conv->flags.basic = true;
-          return ConsumeConversion<true>(src, conv, next_arg);
+          return ConsumeConversion<true>(original_pos, end, conv, next_arg);
        }
        conv->width.set_value(maybe_width);
      } else if (c == '*') {
        ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
        if (is_positional) {
-          if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return false;
+          if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
          conv->width.set_from_arg(parse_digits());
-          if (ABSL_PREDICT_FALSE(c != '$')) return false;
+          if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
          ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
        } else {
          conv->width.set_from_arg(++*next_arg);
@ -188,9 +182,9 @@ flags_done:
      } else if (c == '*') {
        ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
        if (is_positional) {
-          if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return false;
+          if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
          conv->precision.set_from_arg(parse_digits());
-          if (c != '$') return false;
+          if (c != '$') return nullptr;
          ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
        } else {
          conv->precision.set_from_arg(++*next_arg);
@ -201,14 +195,14 @@ flags_done:
    }
  }

-  std::int8_t id = kIds[static_cast<unsigned char>(c)];
+  auto tag = GetTagForChar(c);

-  if (id < 0) {
-    if (ABSL_PREDICT_FALSE(id == none)) return false;
+  if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
+    if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;

    // It is a length modifier.
    using str_format_internal::LengthMod;
-    LengthMod length_mod = LengthMod::FromId(static_cast<LM>(~id));
+    LengthMod length_mod = tag.as_length();
    ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
    if (c == 'h' && length_mod.id() == LengthMod::h) {
      conv->length_mod = LengthMod::FromId(LengthMod::hh);
@ -219,25 +213,24 @@ flags_done:
    } else {
      conv->length_mod = length_mod;
    }
-    id = kIds[static_cast<unsigned char>(c)];
-    if (ABSL_PREDICT_FALSE(id < 0)) return false;
+    tag = GetTagForChar(c);
+    if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
  }

  assert(CheckFastPathSetting(*conv));
  (void)(&CheckFastPathSetting);

-  conv->conv = ConversionChar::FromId(static_cast<CC>(id));
+  conv->conv = tag.as_conv();
  if (!is_positional) conv->arg_position = ++*next_arg;
-  *src = string_view(pos, end - pos);
-  return true;
+  return pos;
 }

 }  // namespace

-bool ConsumeUnboundConversion(string_view *src, UnboundConversion *conv,
-                              int *next_arg) {
-  if (*next_arg < 0) return ConsumeConversion<true>(src, conv, next_arg);
-  return ConsumeConversion<false>(src, conv, next_arg);
+const char *ConsumeUnboundConversion(const char *p, const char *end,
+                                     UnboundConversion *conv, int *next_arg) {
+  if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
+  return ConsumeConversion<false>(p, end, conv, next_arg);
 }

 struct ParsedFormatBase::ParsedFormatConsumer {
--- a/absl/strings/internal/str_format/parser.h
+++ b/absl/strings/internal/str_format/parser.h
@ -63,17 +63,45 @@ struct UnboundConversion {
  ConversionChar conv;
 };

-// Consume conversion spec prefix (not including '%') of '*src' if valid.
+// Consume conversion spec prefix (not including '%') of [p, end) if valid.
 // Examples of valid specs would be e.g.: "s", "d", "-12.6f".
-// If valid, the front of src is advanced such that src becomes the
-// part following the conversion spec, and the spec part is broken down and
-// returned in 'conv'.
-// If invalid, returns false and leaves 'src' unmodified.
-// For example:
-//   Given "d9", returns "d", and leaves src="9",
-//   Given "!", returns "" and leaves src="!".
-bool ConsumeUnboundConversion(string_view* src, UnboundConversion* conv,
-                              int* next_arg);
+// If valid, it returns the first character following the conversion spec,
+// and the spec part is broken down and returned in 'conv'.
+// If invalid, returns nullptr.
+const char* ConsumeUnboundConversion(const char* p, const char* end,
+                                     UnboundConversion* conv, int* next_arg);
+
+// Helper tag class for the table below.
+// It allows fast `char -> ConversionChar/LengthMod` checking and conversions.
+class ConvTag {
+ public:
+  constexpr ConvTag(ConversionChar::Id id) : tag_(id) {}  // NOLINT
+  // We invert the length modifiers to make them negative so that we can easily
+  // test for them.
+  constexpr ConvTag(LengthMod::Id id) : tag_(~id) {}  // NOLINT
+  // Everything else is -128, which is negative to make is_conv() simpler.
+  constexpr ConvTag() : tag_(-128) {}
+
+  bool is_conv() const { return tag_ >= 0; }
+  bool is_length() const { return tag_ < 0 && tag_ != -128; }
+  ConversionChar as_conv() const {
+    assert(is_conv());
+    return ConversionChar::FromId(static_cast<ConversionChar::Id>(tag_));
+  }
+  LengthMod as_length() const {
+    assert(is_length());
+    return LengthMod::FromId(static_cast<LengthMod::Id>(~tag_));
+  }
+
+ private:
+  std::int8_t tag_;
+};
+
+extern const ConvTag kTags[256];
+// Keep a single table for all the conversion chars and length modifiers.
+inline ConvTag GetTagForChar(char c) {
+  return kTags[static_cast<unsigned char>(c)];
+}

 // Parse the format string provided in 'src' and pass the identified items into
 // 'consumer'.
@ -88,51 +116,53 @@ bool ConsumeUnboundConversion(string_view* src, UnboundConversion* conv,
 template <typename Consumer>
 bool ParseFormatString(string_view src, Consumer consumer) {
  int next_arg = 0;
-  while (!src.empty()) {
-    const char* percent =
-        static_cast<const char*>(memchr(src.data(), '%', src.size()));
+  const char* p = src.data();
+  const char* const end = p + src.size();
+  while (p != end) {
+    const char* percent = static_cast<const char*>(memchr(p, '%', end - p));
    if (!percent) {
      // We found the last substring.
-      return consumer.Append(src);
+      return consumer.Append(string_view(p, end - p));
    }
    // We found a percent, so push the text run then process the percent.
-    size_t percent_loc = percent - src.data();
-    if (!consumer.Append(string_view(src.data(), percent_loc))) return false;
-    if (percent + 1 >= src.data() + src.size()) return false;
-
-    UnboundConversion conv;
-
-    switch (percent[1]) {
-      case '%':
-        if (!consumer.Append("%")) return false;
-        src.remove_prefix(percent_loc + 2);
-        continue;
-
-#define PARSER_CASE(ch)                                     \
-  case #ch[0]:                                              \
-    src.remove_prefix(percent_loc + 2);                     \
-    conv.conv = ConversionChar::FromId(ConversionChar::ch); \
-    conv.arg_position = ++next_arg;                         \
-    break;
-        ABSL_CONVERSION_CHARS_EXPAND_(PARSER_CASE, );
-#undef PARSER_CASE
-
-      default:
-        src.remove_prefix(percent_loc + 1);
-        if (!ConsumeUnboundConversion(&src, &conv, &next_arg)) return false;
-        break;
-    }
-    if (next_arg == 0) {
-      // This indicates an error in the format std::string.
-      // The only way to get next_arg == 0 is to have a positional argument
-      // first which sets next_arg to -1 and then a non-positional argument
-      // which does ++next_arg.
-      // Checking here seems to be the cheapeast place to do it.
+    if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) {
      return false;
    }
-    if (!consumer.ConvertOne(
-            conv, string_view(percent + 1, src.data() - (percent + 1)))) {
-      return false;
+    if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false;
+
+    auto tag = GetTagForChar(percent[1]);
+    if (tag.is_conv()) {
+      if (ABSL_PREDICT_FALSE(next_arg < 0)) {
+        // This indicates an error in the format std::string.
+        // The only way to get `next_arg < 0` here is to have a positional
+        // argument first which sets next_arg to -1 and then a non-positional
+        // argument.
+        return false;
+      }
+      p = percent + 2;
+
+      // Keep this case separate from the one below.
+      // ConvertOne is more efficient when the compiler can see that the `basic`
+      // flag is set.
+      UnboundConversion conv;
+      conv.conv = tag.as_conv();
+      conv.arg_position = ++next_arg;
+      if (ABSL_PREDICT_FALSE(
+              !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) {
+        return false;
+      }
+    } else if (percent[1] != '%') {
+      UnboundConversion conv;
+      p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg);
+      if (ABSL_PREDICT_FALSE(p == nullptr)) return false;
+      if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(
+          conv, string_view(percent + 1, p - (percent + 1))))) {
+        return false;
+      }
+    } else {
+      if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false;
+      p = percent + 2;
+      continue;
    }
  }
  return true;
--- a/absl/strings/internal/str_format/parser_test.cc
+++ b/absl/strings/internal/str_format/parser_test.cc
@ -1,6 +1,8 @@
 #include "absl/strings/internal/str_format/parser.h"

 #include <string.h>
+
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "absl/base/macros.h"

@ -9,6 +11,8 @@ namespace str_format_internal {

 namespace {

+using testing::Pair;
+
 TEST(LengthModTest, Names) {
  struct Expectation {
    int line;
@ -63,20 +67,21 @@ TEST(ConversionCharTest, Names) {

 class ConsumeUnboundConversionTest : public ::testing::Test {
 public:
-  typedef UnboundConversion Props;
-  string_view Consume(string_view* src) {
+  std::pair<string_view, string_view> Consume(string_view src) {
    int next = 0;
-    const char* prev_begin = src->data();
    o = UnboundConversion();  // refresh
-    ConsumeUnboundConversion(src, &o, &next);
-    return {prev_begin, static_cast<size_t>(src->data() - prev_begin)};
+    const char* p = ConsumeUnboundConversion(
+        src.data(), src.data() + src.size(), &o, &next);
+    if (!p) return {{}, src};
+    return {string_view(src.data(), p - src.data()),
+            string_view(p, src.data() + src.size() - p)};
  }

  bool Run(const char *fmt, bool force_positional = false) {
-    string_view src = fmt;
    int next = force_positional ? -1 : 0;
    o = UnboundConversion();  // refresh
-    return ConsumeUnboundConversion(&src, &o, &next) && src.empty();
+    return ConsumeUnboundConversion(fmt, fmt + strlen(fmt), &o, &next) ==
+           fmt + strlen(fmt);
  }
  UnboundConversion o;
 };
@ -104,11 +109,7 @@ TEST_F(ConsumeUnboundConversionTest, ConsumeSpecification) {
  };
  for (const auto& e : kExpect) {
    SCOPED_TRACE(e.line);
-    string_view src = e.src;
-    EXPECT_EQ(e.src, src);
-    string_view out = Consume(&src);
-    EXPECT_EQ(e.out, out);
-    EXPECT_EQ(e.src_post, src);
+    EXPECT_THAT(Consume(e.src), Pair(e.out, e.src_post));
  }
 }