Initial Commit
This commit is contained in:
commit
c2e7548296
238 changed files with 65475 additions and 0 deletions
154
absl/strings/internal/char_map.h
Normal file
154
absl/strings/internal/char_map.h
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
// Copyright 2017 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Character Map Class
|
||||
//
|
||||
// A fast, bit-vector map for 8-bit unsigned characters.
|
||||
// This class is useful for non-character purposes as well.
|
||||
|
||||
#ifndef ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
|
||||
#define ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/base/port.h"
|
||||
|
||||
namespace absl {
|
||||
namespace strings_internal {
|
||||
|
||||
class Charmap {
|
||||
public:
|
||||
constexpr Charmap() : m_() {}
|
||||
|
||||
// Initializes with a given char*. Note that NUL is not treated as
|
||||
// a terminator, but rather a char to be flicked.
|
||||
Charmap(const char* str, int len) : m_() {
|
||||
while (len--) SetChar(*str++);
|
||||
}
|
||||
|
||||
// Initializes with a given char*. NUL is treated as a terminator
|
||||
// and will not be in the charmap.
|
||||
explicit Charmap(const char* str) : m_() {
|
||||
while (*str) SetChar(*str++);
|
||||
}
|
||||
|
||||
constexpr bool contains(unsigned char c) const {
|
||||
return (m_[c / 64] >> (c % 64)) & 0x1;
|
||||
}
|
||||
|
||||
// Returns true if and only if a character exists in both maps.
|
||||
bool IntersectsWith(const Charmap& c) const {
|
||||
for (size_t i = 0; i < ABSL_ARRAYSIZE(m_); ++i) {
|
||||
if ((m_[i] & c.m_[i]) != 0) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsZero() const {
|
||||
for (uint64_t c : m_) {
|
||||
if (c != 0) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Containing only a single specified char.
|
||||
static constexpr Charmap Char(char x) {
|
||||
return Charmap(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
|
||||
CharMaskForWord(x, 2), CharMaskForWord(x, 3));
|
||||
}
|
||||
|
||||
// Containing all the chars in the C-std::string 's'.
|
||||
// Note that this is expensively recursive because of the C++11 constexpr
|
||||
// formulation. Use only in constexpr initializers.
|
||||
static constexpr Charmap FromString(const char* s) {
|
||||
return *s == 0 ? Charmap() : (Char(*s) | FromString(s + 1));
|
||||
}
|
||||
|
||||
// Containing all the chars in the closed interval [lo,hi].
|
||||
static constexpr Charmap Range(char lo, char hi) {
|
||||
return Charmap(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
|
||||
RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
|
||||
}
|
||||
|
||||
friend constexpr Charmap operator&(const Charmap& a, const Charmap& b) {
|
||||
return Charmap(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
|
||||
a.m_[3] & b.m_[3]);
|
||||
}
|
||||
|
||||
friend constexpr Charmap operator|(const Charmap& a, const Charmap& b) {
|
||||
return Charmap(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
|
||||
a.m_[3] | b.m_[3]);
|
||||
}
|
||||
|
||||
friend constexpr Charmap operator~(const Charmap& a) {
|
||||
return Charmap(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
|
||||
}
|
||||
|
||||
private:
|
||||
constexpr Charmap(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
|
||||
: m_{b0, b1, b2, b3} {}
|
||||
|
||||
static constexpr uint64_t RangeForWord(unsigned char lo, unsigned char hi,
|
||||
uint64_t word) {
|
||||
return OpenRangeFromZeroForWord(hi + 1, word) &
|
||||
~OpenRangeFromZeroForWord(lo, word);
|
||||
}
|
||||
|
||||
// All the chars in the specified word of the range [0, upper).
|
||||
static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
|
||||
uint64_t word) {
|
||||
return (upper <= 64 * word)
|
||||
? 0
|
||||
: (upper >= 64 * (word + 1))
|
||||
? ~static_cast<uint64_t>(0)
|
||||
: (~static_cast<uint64_t>(0) >> (64 - upper % 64));
|
||||
}
|
||||
|
||||
static constexpr uint64_t CharMaskForWord(unsigned char x, uint64_t word) {
|
||||
return (x / 64 == word) ? (static_cast<uint64_t>(1) << (x % 64)) : 0;
|
||||
}
|
||||
|
||||
private:
|
||||
void SetChar(unsigned char c) {
|
||||
m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64);
|
||||
}
|
||||
|
||||
uint64_t m_[4];
|
||||
};
|
||||
|
||||
// Mirror the char-classifying predicates in <cctype>
|
||||
constexpr Charmap UpperCharmap() { return Charmap::Range('A', 'Z'); }
|
||||
constexpr Charmap LowerCharmap() { return Charmap::Range('a', 'z'); }
|
||||
constexpr Charmap DigitCharmap() { return Charmap::Range('0', '9'); }
|
||||
constexpr Charmap AlphaCharmap() { return LowerCharmap() | UpperCharmap(); }
|
||||
constexpr Charmap AlnumCharmap() { return DigitCharmap() | AlphaCharmap(); }
|
||||
constexpr Charmap XDigitCharmap() {
|
||||
return DigitCharmap() | Charmap::Range('A', 'F') | Charmap::Range('a', 'f');
|
||||
}
|
||||
constexpr Charmap PrintCharmap() { return Charmap::Range(0x20, 0x7e); }
|
||||
constexpr Charmap SpaceCharmap() { return Charmap::FromString("\t\n\v\f\r "); }
|
||||
constexpr Charmap CntrlCharmap() {
|
||||
return Charmap::Range(0, 0x7f) & ~PrintCharmap();
|
||||
}
|
||||
constexpr Charmap BlankCharmap() { return Charmap::FromString("\t "); }
|
||||
constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); }
|
||||
constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); }
|
||||
|
||||
} // namespace strings_internal
|
||||
} // namespace absl
|
||||
|
||||
#endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
|
||||
172
absl/strings/internal/char_map_test.cc
Normal file
172
absl/strings/internal/char_map_test.cc
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
// Copyright 2017 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "absl/strings/internal/char_map.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cctype>
|
||||
|
||||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr absl::strings_internal::Charmap everything_map =
|
||||
~absl::strings_internal::Charmap();
|
||||
constexpr absl::strings_internal::Charmap nothing_map{};
|
||||
|
||||
TEST(Charmap, AllTests) {
|
||||
const absl::strings_internal::Charmap also_nothing_map("", 0);
|
||||
ASSERT_TRUE(everything_map.contains('\0'));
|
||||
ASSERT_TRUE(!nothing_map.contains('\0'));
|
||||
ASSERT_TRUE(!also_nothing_map.contains('\0'));
|
||||
for (unsigned char ch = 1; ch != 0; ++ch) {
|
||||
ASSERT_TRUE(everything_map.contains(ch));
|
||||
ASSERT_TRUE(!nothing_map.contains(ch));
|
||||
ASSERT_TRUE(!also_nothing_map.contains(ch));
|
||||
}
|
||||
|
||||
const absl::strings_internal::Charmap symbols("&@#@^!@?", 5);
|
||||
ASSERT_TRUE(symbols.contains('&'));
|
||||
ASSERT_TRUE(symbols.contains('@'));
|
||||
ASSERT_TRUE(symbols.contains('#'));
|
||||
ASSERT_TRUE(symbols.contains('^'));
|
||||
ASSERT_TRUE(!symbols.contains('!'));
|
||||
ASSERT_TRUE(!symbols.contains('?'));
|
||||
int cnt = 0;
|
||||
for (unsigned char ch = 1; ch != 0; ++ch)
|
||||
cnt += symbols.contains(ch);
|
||||
ASSERT_EQ(cnt, 4);
|
||||
|
||||
const absl::strings_internal::Charmap lets("^abcde", 3);
|
||||
const absl::strings_internal::Charmap lets2("fghij\0klmnop", 10);
|
||||
const absl::strings_internal::Charmap lets3("fghij\0klmnop");
|
||||
ASSERT_TRUE(lets2.contains('k'));
|
||||
ASSERT_TRUE(!lets3.contains('k'));
|
||||
|
||||
ASSERT_TRUE(symbols.IntersectsWith(lets));
|
||||
ASSERT_TRUE(!lets2.IntersectsWith(lets));
|
||||
ASSERT_TRUE(lets.IntersectsWith(symbols));
|
||||
ASSERT_TRUE(!lets.IntersectsWith(lets2));
|
||||
|
||||
ASSERT_TRUE(nothing_map.IsZero());
|
||||
ASSERT_TRUE(!lets.IsZero());
|
||||
}
|
||||
|
||||
namespace {
|
||||
std::string Members(const absl::strings_internal::Charmap& m) {
|
||||
std::string r;
|
||||
for (size_t i = 0; i < 256; ++i)
|
||||
if (m.contains(i)) r.push_back(i);
|
||||
return r;
|
||||
}
|
||||
|
||||
std::string ClosedRangeString(unsigned char lo, unsigned char hi) {
|
||||
// Don't depend on lo<hi. Just increment until lo==hi.
|
||||
std::string s;
|
||||
while (true) {
|
||||
s.push_back(lo);
|
||||
if (lo == hi) break;
|
||||
++lo;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(Charmap, Constexpr) {
|
||||
constexpr absl::strings_internal::Charmap kEmpty = nothing_map;
|
||||
EXPECT_THAT(Members(kEmpty), "");
|
||||
constexpr absl::strings_internal::Charmap kA =
|
||||
absl::strings_internal::Charmap::Char('A');
|
||||
EXPECT_THAT(Members(kA), "A");
|
||||
constexpr absl::strings_internal::Charmap kAZ =
|
||||
absl::strings_internal::Charmap::Range('A', 'Z');
|
||||
EXPECT_THAT(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
|
||||
constexpr absl::strings_internal::Charmap kIdentifier =
|
||||
absl::strings_internal::Charmap::Range('0', '9') |
|
||||
absl::strings_internal::Charmap::Range('A', 'Z') |
|
||||
absl::strings_internal::Charmap::Range('a', 'z') |
|
||||
absl::strings_internal::Charmap::Char('_');
|
||||
EXPECT_THAT(Members(kIdentifier),
|
||||
"0123456789"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"_"
|
||||
"abcdefghijklmnopqrstuvwxyz");
|
||||
constexpr absl::strings_internal::Charmap kAll = everything_map;
|
||||
for (size_t i = 0; i < 256; ++i) {
|
||||
EXPECT_TRUE(kAll.contains(i)) << i;
|
||||
}
|
||||
constexpr absl::strings_internal::Charmap kHello =
|
||||
absl::strings_internal::Charmap::FromString("Hello, world!");
|
||||
EXPECT_THAT(Members(kHello), " !,Hdelorw");
|
||||
|
||||
// test negation and intersection
|
||||
constexpr absl::strings_internal::Charmap kABC =
|
||||
absl::strings_internal::Charmap::Range('A', 'Z') &
|
||||
~absl::strings_internal::Charmap::Range('D', 'Z');
|
||||
EXPECT_THAT(Members(kABC), "ABC");
|
||||
}
|
||||
|
||||
TEST(Charmap, Range) {
|
||||
// Exhaustive testing takes too long, so test some of the boundaries that
|
||||
// are perhaps going to cause trouble.
|
||||
std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15,
|
||||
16, 17, 30, 31, 32, 33, 63, 64, 65,
|
||||
127, 128, 129, 223, 224, 225, 254, 255};
|
||||
for (auto lo = poi.begin(); lo != poi.end(); ++lo) {
|
||||
SCOPED_TRACE(*lo);
|
||||
for (auto hi = lo; hi != poi.end(); ++hi) {
|
||||
SCOPED_TRACE(*hi);
|
||||
EXPECT_THAT(Members(absl::strings_internal::Charmap::Range(*lo, *hi)),
|
||||
ClosedRangeString(*lo, *hi));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool AsBool(int x) { return static_cast<bool>(x); }
|
||||
|
||||
TEST(CharmapCtype, Match) {
|
||||
for (int c = 0; c < 256; ++c) {
|
||||
SCOPED_TRACE(c);
|
||||
SCOPED_TRACE(static_cast<char>(c));
|
||||
EXPECT_EQ(AsBool(std::isupper(c)),
|
||||
absl::strings_internal::UpperCharmap().contains(c));
|
||||
EXPECT_EQ(AsBool(std::islower(c)),
|
||||
absl::strings_internal::LowerCharmap().contains(c));
|
||||
EXPECT_EQ(AsBool(std::isdigit(c)),
|
||||
absl::strings_internal::DigitCharmap().contains(c));
|
||||
EXPECT_EQ(AsBool(std::isalpha(c)),
|
||||
absl::strings_internal::AlphaCharmap().contains(c));
|
||||
EXPECT_EQ(AsBool(std::isalnum(c)),
|
||||
absl::strings_internal::AlnumCharmap().contains(c));
|
||||
EXPECT_EQ(AsBool(std::isxdigit(c)),
|
||||
absl::strings_internal::XDigitCharmap().contains(c));
|
||||
EXPECT_EQ(AsBool(std::isprint(c)),
|
||||
absl::strings_internal::PrintCharmap().contains(c));
|
||||
EXPECT_EQ(AsBool(std::isspace(c)),
|
||||
absl::strings_internal::SpaceCharmap().contains(c));
|
||||
EXPECT_EQ(AsBool(std::iscntrl(c)),
|
||||
absl::strings_internal::CntrlCharmap().contains(c));
|
||||
EXPECT_EQ(AsBool(std::isblank(c)),
|
||||
absl::strings_internal::BlankCharmap().contains(c));
|
||||
EXPECT_EQ(AsBool(std::isgraph(c)),
|
||||
absl::strings_internal::GraphCharmap().contains(c));
|
||||
EXPECT_EQ(AsBool(std::ispunct(c)),
|
||||
absl::strings_internal::PunctCharmap().contains(c));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
113
absl/strings/internal/escaping_test_common.inc
Normal file
113
absl/strings/internal/escaping_test_common.inc
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
// Copyright 2017 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// This test contains common things needed by both escaping_test.cc and
|
||||
// escaping_benchmark.cc.
|
||||
|
||||
namespace {
|
||||
|
||||
struct {
|
||||
absl::string_view plaintext;
|
||||
absl::string_view cyphertext;
|
||||
} const base64_strings[] = {
|
||||
// Some google quotes
|
||||
// Cyphertext created with "uuencode (GNU sharutils) 4.6.3"
|
||||
// (Note that we're testing the websafe encoding, though, so if
|
||||
// you add messages, be sure to run "tr -- '+/' '-_'" on the output)
|
||||
{ "I was always good at math and science, and I never realized "
|
||||
"that was unusual or somehow undesirable. So one of the things "
|
||||
"I care a lot about is helping to remove that stigma, "
|
||||
"to show girls that you can be feminine, you can like the things "
|
||||
"that girls like, but you can also be really good at technology. "
|
||||
"You can be really good at building things."
|
||||
" - Marissa Meyer, Newsweek, 2010-12-22" "\n",
|
||||
|
||||
"SSB3YXMgYWx3YXlzIGdvb2QgYXQgbWF0aCBhbmQgc2NpZW5jZSwgYW5kIEkg"
|
||||
"bmV2ZXIgcmVhbGl6ZWQgdGhhdCB3YXMgdW51c3VhbCBvciBzb21laG93IHVu"
|
||||
"ZGVzaXJhYmxlLiBTbyBvbmUgb2YgdGhlIHRoaW5ncyBJIGNhcmUgYSBsb3Qg"
|
||||
"YWJvdXQgaXMgaGVscGluZyB0byByZW1vdmUgdGhhdCBzdGlnbWEsIHRvIHNo"
|
||||
"b3cgZ2lybHMgdGhhdCB5b3UgY2FuIGJlIGZlbWluaW5lLCB5b3UgY2FuIGxp"
|
||||
"a2UgdGhlIHRoaW5ncyB0aGF0IGdpcmxzIGxpa2UsIGJ1dCB5b3UgY2FuIGFs"
|
||||
"c28gYmUgcmVhbGx5IGdvb2QgYXQgdGVjaG5vbG9neS4gWW91IGNhbiBiZSBy"
|
||||
"ZWFsbHkgZ29vZCBhdCBidWlsZGluZyB0aGluZ3MuIC0gTWFyaXNzYSBNZXll"
|
||||
"ciwgTmV3c3dlZWssIDIwMTAtMTItMjIK" },
|
||||
|
||||
{ "Typical first year for a new cluster: "
|
||||
"~0.5 overheating "
|
||||
"~1 PDU failure "
|
||||
"~1 rack-move "
|
||||
"~1 network rewiring "
|
||||
"~20 rack failures "
|
||||
"~5 racks go wonky "
|
||||
"~8 network maintenances "
|
||||
"~12 router reloads "
|
||||
"~3 router failures "
|
||||
"~dozens of minor 30-second blips for dns "
|
||||
"~1000 individual machine failures "
|
||||
"~thousands of hard drive failures "
|
||||
"slow disks, bad memory, misconfigured machines, flaky machines, etc."
|
||||
" - Jeff Dean, The Joys of Real Hardware" "\n",
|
||||
|
||||
"VHlwaWNhbCBmaXJzdCB5ZWFyIGZvciBhIG5ldyBjbHVzdGVyOiB-MC41IG92"
|
||||
"ZXJoZWF0aW5nIH4xIFBEVSBmYWlsdXJlIH4xIHJhY2stbW92ZSB-MSBuZXR3"
|
||||
"b3JrIHJld2lyaW5nIH4yMCByYWNrIGZhaWx1cmVzIH41IHJhY2tzIGdvIHdv"
|
||||
"bmt5IH44IG5ldHdvcmsgbWFpbnRlbmFuY2VzIH4xMiByb3V0ZXIgcmVsb2Fk"
|
||||
"cyB-MyByb3V0ZXIgZmFpbHVyZXMgfmRvemVucyBvZiBtaW5vciAzMC1zZWNv"
|
||||
"bmQgYmxpcHMgZm9yIGRucyB-MTAwMCBpbmRpdmlkdWFsIG1hY2hpbmUgZmFp"
|
||||
"bHVyZXMgfnRob3VzYW5kcyBvZiBoYXJkIGRyaXZlIGZhaWx1cmVzIHNsb3cg"
|
||||
"ZGlza3MsIGJhZCBtZW1vcnksIG1pc2NvbmZpZ3VyZWQgbWFjaGluZXMsIGZs"
|
||||
"YWt5IG1hY2hpbmVzLCBldGMuIC0gSmVmZiBEZWFuLCBUaGUgSm95cyBvZiBS"
|
||||
"ZWFsIEhhcmR3YXJlCg" },
|
||||
|
||||
{ "I'm the head of the webspam team at Google. "
|
||||
"That means that if you type your name into Google and get porn back, "
|
||||
"it's my fault. Unless you're a porn star, in which case porn is a "
|
||||
"completely reasonable response."
|
||||
" - Matt Cutts, Google Plus" "\n",
|
||||
|
||||
"SSdtIHRoZSBoZWFkIG9mIHRoZSB3ZWJzcGFtIHRlYW0gYXQgR29vZ2xlLiAg"
|
||||
"VGhhdCBtZWFucyB0aGF0IGlmIHlvdSB0eXBlIHlvdXIgbmFtZSBpbnRvIEdv"
|
||||
"b2dsZSBhbmQgZ2V0IHBvcm4gYmFjaywgaXQncyBteSBmYXVsdC4gVW5sZXNz"
|
||||
"IHlvdSdyZSBhIHBvcm4gc3RhciwgaW4gd2hpY2ggY2FzZSBwb3JuIGlzIGEg"
|
||||
"Y29tcGxldGVseSByZWFzb25hYmxlIHJlc3BvbnNlLiAtIE1hdHQgQ3V0dHMs"
|
||||
"IEdvb2dsZSBQbHVzCg" },
|
||||
|
||||
{ "It will still be a long time before machines approach human intelligence. "
|
||||
"But luckily, machines don't actually have to be intelligent; "
|
||||
"they just have to fake it. Access to a wealth of information, "
|
||||
"combined with a rudimentary decision-making capacity, "
|
||||
"can often be almost as useful. Of course, the results are better yet "
|
||||
"when coupled with intelligence. A reference librarian with access to "
|
||||
"a good search engine is a formidable tool."
|
||||
" - Craig Silverstein, Siemens Pictures of the Future, Spring 2004" "\n",
|
||||
|
||||
"SXQgd2lsbCBzdGlsbCBiZSBhIGxvbmcgdGltZSBiZWZvcmUgbWFjaGluZXMg"
|
||||
"YXBwcm9hY2ggaHVtYW4gaW50ZWxsaWdlbmNlLiBCdXQgbHVja2lseSwgbWFj"
|
||||
"aGluZXMgZG9uJ3QgYWN0dWFsbHkgaGF2ZSB0byBiZSBpbnRlbGxpZ2VudDsg"
|
||||
"dGhleSBqdXN0IGhhdmUgdG8gZmFrZSBpdC4gQWNjZXNzIHRvIGEgd2VhbHRo"
|
||||
"IG9mIGluZm9ybWF0aW9uLCBjb21iaW5lZCB3aXRoIGEgcnVkaW1lbnRhcnkg"
|
||||
"ZGVjaXNpb24tbWFraW5nIGNhcGFjaXR5LCBjYW4gb2Z0ZW4gYmUgYWxtb3N0"
|
||||
"IGFzIHVzZWZ1bC4gT2YgY291cnNlLCB0aGUgcmVzdWx0cyBhcmUgYmV0dGVy"
|
||||
"IHlldCB3aGVuIGNvdXBsZWQgd2l0aCBpbnRlbGxpZ2VuY2UuIEEgcmVmZXJl"
|
||||
"bmNlIGxpYnJhcmlhbiB3aXRoIGFjY2VzcyB0byBhIGdvb2Qgc2VhcmNoIGVu"
|
||||
"Z2luZSBpcyBhIGZvcm1pZGFibGUgdG9vbC4gLSBDcmFpZyBTaWx2ZXJzdGVp"
|
||||
"biwgU2llbWVucyBQaWN0dXJlcyBvZiB0aGUgRnV0dXJlLCBTcHJpbmcgMjAw"
|
||||
"NAo" },
|
||||
|
||||
// Degenerate edge case
|
||||
{ "",
|
||||
"" },
|
||||
};
|
||||
|
||||
} // namespace
|
||||
215
absl/strings/internal/fastmem.h
Normal file
215
absl/strings/internal/fastmem.h
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
// Copyright 2017 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Fast memory copying and comparison routines.
|
||||
// strings::fastmemcmp_inlined() replaces memcmp()
|
||||
// strings::memcpy_inlined() replaces memcpy()
|
||||
// strings::memeq(a, b, n) replaces memcmp(a, b, n) == 0
|
||||
//
|
||||
// strings::*_inlined() routines are inline versions of the
|
||||
// routines exported by this module. Sometimes using the inlined
|
||||
// versions is faster. Measure before using the inlined versions.
|
||||
//
|
||||
|
||||
#ifndef ABSL_STRINGS_INTERNAL_FASTMEM_H_
|
||||
#define ABSL_STRINGS_INTERNAL_FASTMEM_H_
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
#include "absl/base/internal/unaligned_access.h"
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/base/port.h"
|
||||
|
||||
namespace absl {
|
||||
namespace strings_internal {
|
||||
|
||||
// Return true if the n bytes at a equal the n bytes at b.
|
||||
// The regions are allowed to overlap.
|
||||
//
|
||||
// The performance is similar to the performance of memcmp(), but faster for
|
||||
// moderately-sized inputs, or inputs that share a common prefix and differ
|
||||
// somewhere in their last 8 bytes. Further optimizations can be added later
|
||||
// if it makes sense to do so. Alternatively, if the compiler & runtime improve
|
||||
// to eliminate the need for this, we can remove it.
|
||||
inline bool memeq(const char* a, const char* b, size_t n) {
|
||||
size_t n_rounded_down = n & ~static_cast<size_t>(7);
|
||||
if (ABSL_PREDICT_FALSE(n_rounded_down == 0)) { // n <= 7
|
||||
return memcmp(a, b, n) == 0;
|
||||
}
|
||||
// n >= 8
|
||||
{
|
||||
uint64_t u =
|
||||
ABSL_INTERNAL_UNALIGNED_LOAD64(a) ^ ABSL_INTERNAL_UNALIGNED_LOAD64(b);
|
||||
uint64_t v = ABSL_INTERNAL_UNALIGNED_LOAD64(a + n - 8) ^
|
||||
ABSL_INTERNAL_UNALIGNED_LOAD64(b + n - 8);
|
||||
if ((u | v) != 0) { // The first or last 8 bytes differ.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// The next line forces n to be a multiple of 8.
|
||||
n = n_rounded_down;
|
||||
if (n >= 80) {
|
||||
// In 2013 or later, this should be fast on long strings.
|
||||
return memcmp(a, b, n) == 0;
|
||||
}
|
||||
// Now force n to be a multiple of 16. Arguably, a "switch" would be smart
|
||||
// here, but there's a difficult-to-evaluate code size vs. speed issue. The
|
||||
// current approach often re-compares some bytes (worst case is if n initially
|
||||
// was 16, 32, 48, or 64), but is fairly short.
|
||||
size_t e = n & 8;
|
||||
a += e;
|
||||
b += e;
|
||||
n -= e;
|
||||
// n is now in {0, 16, 32, ...}. Process 0 or more 16-byte chunks.
|
||||
while (n > 0) {
|
||||
#ifdef __SSE4_1__
|
||||
__m128i u =
|
||||
_mm_xor_si128(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a)),
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i*>(b)));
|
||||
if (!_mm_test_all_zeros(u, u)) {
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
uint64_t x =
|
||||
ABSL_INTERNAL_UNALIGNED_LOAD64(a) ^ ABSL_INTERNAL_UNALIGNED_LOAD64(b);
|
||||
uint64_t y = ABSL_INTERNAL_UNALIGNED_LOAD64(a + 8) ^
|
||||
ABSL_INTERNAL_UNALIGNED_LOAD64(b + 8);
|
||||
if ((x | y) != 0) {
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
a += 16;
|
||||
b += 16;
|
||||
n -= 16;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline int fastmemcmp_inlined(const void* va, const void* vb, size_t n) {
|
||||
const unsigned char* pa = static_cast<const unsigned char*>(va);
|
||||
const unsigned char* pb = static_cast<const unsigned char*>(vb);
|
||||
switch (n) {
|
||||
default:
|
||||
return memcmp(va, vb, n);
|
||||
case 7:
|
||||
if (*pa != *pb) return *pa < *pb ? -1 : +1;
|
||||
++pa;
|
||||
++pb;
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
case 6:
|
||||
if (*pa != *pb) return *pa < *pb ? -1 : +1;
|
||||
++pa;
|
||||
++pb;
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
case 5:
|
||||
if (*pa != *pb) return *pa < *pb ? -1 : +1;
|
||||
++pa;
|
||||
++pb;
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
case 4:
|
||||
if (*pa != *pb) return *pa < *pb ? -1 : +1;
|
||||
++pa;
|
||||
++pb;
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
case 3:
|
||||
if (*pa != *pb) return *pa < *pb ? -1 : +1;
|
||||
++pa;
|
||||
++pb;
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
case 2:
|
||||
if (*pa != *pb) return *pa < *pb ? -1 : +1;
|
||||
++pa;
|
||||
++pb;
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
case 1:
|
||||
if (*pa != *pb) return *pa < *pb ? -1 : +1;
|
||||
ABSL_FALLTHROUGH_INTENDED;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// The standard memcpy operation is slow for variable small sizes.
|
||||
// This implementation inlines the optimal realization for sizes 1 to 16.
|
||||
// To avoid code bloat don't use it in case of not performance-critical spots,
|
||||
// nor when you don't expect very frequent values of size <= 16.
|
||||
inline void memcpy_inlined(char* dst, const char* src, size_t size) {
|
||||
// Compiler inlines code with minimal amount of data movement when third
|
||||
// parameter of memcpy is a constant.
|
||||
switch (size) {
|
||||
case 1:
|
||||
memcpy(dst, src, 1);
|
||||
break;
|
||||
case 2:
|
||||
memcpy(dst, src, 2);
|
||||
break;
|
||||
case 3:
|
||||
memcpy(dst, src, 3);
|
||||
break;
|
||||
case 4:
|
||||
memcpy(dst, src, 4);
|
||||
break;
|
||||
case 5:
|
||||
memcpy(dst, src, 5);
|
||||
break;
|
||||
case 6:
|
||||
memcpy(dst, src, 6);
|
||||
break;
|
||||
case 7:
|
||||
memcpy(dst, src, 7);
|
||||
break;
|
||||
case 8:
|
||||
memcpy(dst, src, 8);
|
||||
break;
|
||||
case 9:
|
||||
memcpy(dst, src, 9);
|
||||
break;
|
||||
case 10:
|
||||
memcpy(dst, src, 10);
|
||||
break;
|
||||
case 11:
|
||||
memcpy(dst, src, 11);
|
||||
break;
|
||||
case 12:
|
||||
memcpy(dst, src, 12);
|
||||
break;
|
||||
case 13:
|
||||
memcpy(dst, src, 13);
|
||||
break;
|
||||
case 14:
|
||||
memcpy(dst, src, 14);
|
||||
break;
|
||||
case 15:
|
||||
memcpy(dst, src, 15);
|
||||
break;
|
||||
case 16:
|
||||
memcpy(dst, src, 16);
|
||||
break;
|
||||
default:
|
||||
memcpy(dst, src, size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace strings_internal
|
||||
} // namespace absl
|
||||
|
||||
#endif // ABSL_STRINGS_INTERNAL_FASTMEM_H_
|
||||
453
absl/strings/internal/fastmem_test.cc
Normal file
453
absl/strings/internal/fastmem_test.cc
Normal file
|
|
@ -0,0 +1,453 @@
|
|||
// Copyright 2017 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "absl/strings/internal/fastmem.h"
|
||||
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <string>
|
||||
|
||||
#include "base/init_google.h"
|
||||
#include "base/logging.h"
|
||||
#include "testing/base/public/benchmark.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace {
|
||||
|
||||
using RandomEngine = std::minstd_rand0;
|
||||
|
||||
void VerifyResults(const int r1, const int r2, const std::string& a,
|
||||
const std::string& b) {
|
||||
CHECK_EQ(a.size(), b.size());
|
||||
if (r1 == 0) {
|
||||
EXPECT_EQ(r2, 0) << a << " " << b;
|
||||
} else if (r1 > 0) {
|
||||
EXPECT_GT(r2, 0) << a << " " << b;
|
||||
} else {
|
||||
EXPECT_LT(r2, 0) << a << " " << b;
|
||||
}
|
||||
if ((r1 == 0) == (r2 == 0)) {
|
||||
EXPECT_EQ(r1 == 0,
|
||||
absl::strings_internal::memeq(a.data(), b.data(), a.size()))
|
||||
<< r1 << " " << a << " " << b;
|
||||
}
|
||||
}
|
||||
|
||||
// Check correctness against glibc's memcmp implementation
|
||||
void CheckSingle(const std::string& a, const std::string& b) {
|
||||
CHECK_EQ(a.size(), b.size());
|
||||
const int r1 = memcmp(a.data(), b.data(), a.size());
|
||||
const int r2 =
|
||||
absl::strings_internal::fastmemcmp_inlined(a.data(), b.data(), a.size());
|
||||
VerifyResults(r1, r2, a, b);
|
||||
}
|
||||
|
||||
void GenerateString(size_t len, std::string* s) {
|
||||
s->clear();
|
||||
for (int i = 0; i < len; i++) {
|
||||
*s += ('a' + (i % 26));
|
||||
}
|
||||
}
|
||||
|
||||
void CheckCompare(const std::string& a, const std::string& b) {
|
||||
CheckSingle(a, b);
|
||||
for (int common = 0; common <= 32; common++) {
|
||||
std::string extra;
|
||||
GenerateString(common, &extra);
|
||||
CheckSingle(extra + a, extra + b);
|
||||
CheckSingle(a + extra, b + extra);
|
||||
for (char c1 = 'a'; c1 <= 'c'; c1++) {
|
||||
for (char c2 = 'a'; c2 <= 'c'; c2++) {
|
||||
CheckSingle(extra + c1 + a, extra + c2 + b);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(FastCompare, Misc) {
|
||||
CheckCompare("", "");
|
||||
|
||||
CheckCompare("a", "a");
|
||||
CheckCompare("ab", "ab");
|
||||
CheckCompare("abc", "abc");
|
||||
CheckCompare("abcd", "abcd");
|
||||
CheckCompare("abcde", "abcde");
|
||||
|
||||
CheckCompare("a", "x");
|
||||
CheckCompare("ab", "xb");
|
||||
CheckCompare("abc", "xbc");
|
||||
CheckCompare("abcd", "xbcd");
|
||||
CheckCompare("abcde", "xbcde");
|
||||
|
||||
CheckCompare("x", "a");
|
||||
CheckCompare("xb", "ab");
|
||||
CheckCompare("xbc", "abc");
|
||||
CheckCompare("xbcd", "abcd");
|
||||
CheckCompare("xbcde", "abcde");
|
||||
|
||||
CheckCompare("a", "x");
|
||||
CheckCompare("ab", "ax");
|
||||
CheckCompare("abc", "abx");
|
||||
CheckCompare("abcd", "abcx");
|
||||
CheckCompare("abcde", "abcdx");
|
||||
|
||||
CheckCompare("x", "a");
|
||||
CheckCompare("ax", "ab");
|
||||
CheckCompare("abx", "abc");
|
||||
CheckCompare("abcx", "abcd");
|
||||
CheckCompare("abcdx", "abcde");
|
||||
|
||||
for (int len = 0; len < 1000; len++) {
|
||||
std::string p(len, 'z');
|
||||
CheckCompare(p + "x", p + "a");
|
||||
CheckCompare(p + "ax", p + "ab");
|
||||
CheckCompare(p + "abx", p + "abc");
|
||||
CheckCompare(p + "abcx", p + "abcd");
|
||||
CheckCompare(p + "abcdx", p + "abcde");
|
||||
}
|
||||
}
|
||||
|
||||
TEST(FastCompare, TrailingByte) {
|
||||
for (int i = 0; i < 256; i++) {
|
||||
for (int j = 0; j < 256; j++) {
|
||||
std::string a(1, i);
|
||||
std::string b(1, j);
|
||||
CheckSingle(a, b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check correctness of memcpy_inlined.
|
||||
void CheckSingleMemcpyInlined(const std::string& a) {
|
||||
std::unique_ptr<char[]> destination(new char[a.size() + 2]);
|
||||
destination[0] = 'x';
|
||||
destination[a.size() + 1] = 'x';
|
||||
absl::strings_internal::memcpy_inlined(destination.get() + 1, a.data(),
|
||||
a.size());
|
||||
CHECK_EQ('x', destination[0]);
|
||||
CHECK_EQ('x', destination[a.size() + 1]);
|
||||
CHECK_EQ(0, memcmp(a.data(), destination.get() + 1, a.size()));
|
||||
}
|
||||
|
||||
TEST(MemCpyInlined, Misc) {
|
||||
CheckSingleMemcpyInlined("");
|
||||
CheckSingleMemcpyInlined("0");
|
||||
CheckSingleMemcpyInlined("012");
|
||||
CheckSingleMemcpyInlined("0123");
|
||||
CheckSingleMemcpyInlined("01234");
|
||||
CheckSingleMemcpyInlined("012345");
|
||||
CheckSingleMemcpyInlined("0123456");
|
||||
CheckSingleMemcpyInlined("01234567");
|
||||
CheckSingleMemcpyInlined("012345678");
|
||||
CheckSingleMemcpyInlined("0123456789");
|
||||
CheckSingleMemcpyInlined("0123456789a");
|
||||
CheckSingleMemcpyInlined("0123456789ab");
|
||||
CheckSingleMemcpyInlined("0123456789abc");
|
||||
CheckSingleMemcpyInlined("0123456789abcd");
|
||||
CheckSingleMemcpyInlined("0123456789abcde");
|
||||
CheckSingleMemcpyInlined("0123456789abcdef");
|
||||
CheckSingleMemcpyInlined("0123456789abcdefg");
|
||||
}
|
||||
|
||||
template <typename Function>
|
||||
inline void CopyLoop(benchmark::State& state, int size, Function func) {
|
||||
char* src = new char[size];
|
||||
char* dst = new char[size];
|
||||
memset(src, 'x', size);
|
||||
memset(dst, 'y', size);
|
||||
for (auto _ : state) {
|
||||
func(dst, src, size);
|
||||
}
|
||||
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size);
|
||||
CHECK_EQ(dst[0], 'x');
|
||||
delete[] src;
|
||||
delete[] dst;
|
||||
}
|
||||
|
||||
void BM_memcpy(benchmark::State& state) {
|
||||
CopyLoop(state, state.range(0), memcpy);
|
||||
}
|
||||
BENCHMARK(BM_memcpy)->DenseRange(1, 18)->Range(32, 8 << 20);
|
||||
|
||||
void BM_memcpy_inlined(benchmark::State& state) {
|
||||
CopyLoop(state, state.range(0), absl::strings_internal::memcpy_inlined);
|
||||
}
|
||||
BENCHMARK(BM_memcpy_inlined)->DenseRange(1, 18)->Range(32, 8 << 20);
|
||||
|
||||
// unaligned memcpy
|
||||
void BM_unaligned_memcpy(benchmark::State& state) {
|
||||
const int n = state.range(0);
|
||||
const int kMaxOffset = 32;
|
||||
char* src = new char[n + kMaxOffset];
|
||||
char* dst = new char[n + kMaxOffset];
|
||||
memset(src, 'x', n + kMaxOffset);
|
||||
int r = 0, i = 0;
|
||||
for (auto _ : state) {
|
||||
memcpy(dst + (i % kMaxOffset), src + ((i + 5) % kMaxOffset), n);
|
||||
r += dst[0];
|
||||
++i;
|
||||
}
|
||||
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
|
||||
delete[] src;
|
||||
delete[] dst;
|
||||
benchmark::DoNotOptimize(r);
|
||||
}
|
||||
BENCHMARK(BM_unaligned_memcpy)->DenseRange(1, 18)->Range(32, 8 << 20);
|
||||
|
||||
// memmove worst case: heavy overlap, but not always by the same amount.
|
||||
// Also, the source and destination will often be unaligned.
|
||||
void BM_memmove_worst_case(benchmark::State& state) {
|
||||
const int n = state.range(0);
|
||||
const int32_t kDeterministicSeed = 301;
|
||||
const int kMaxOffset = 32;
|
||||
char* src = new char[n + kMaxOffset];
|
||||
memset(src, 'x', n + kMaxOffset);
|
||||
size_t offsets[64];
|
||||
RandomEngine rng(kDeterministicSeed);
|
||||
std::uniform_int_distribution<size_t> random_to_max_offset(0, kMaxOffset);
|
||||
for (size_t& offset : offsets) {
|
||||
offset = random_to_max_offset(rng);
|
||||
}
|
||||
int r = 0, i = 0;
|
||||
for (auto _ : state) {
|
||||
memmove(src + offsets[i], src + offsets[i + 1], n);
|
||||
r += src[0];
|
||||
i = (i + 2) % arraysize(offsets);
|
||||
}
|
||||
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
|
||||
delete[] src;
|
||||
benchmark::DoNotOptimize(r);
|
||||
}
|
||||
BENCHMARK(BM_memmove_worst_case)->DenseRange(1, 18)->Range(32, 8 << 20);
|
||||
|
||||
// memmove cache-friendly: aligned and overlapping with 4k
|
||||
// between the source and destination addresses.
|
||||
void BM_memmove_cache_friendly(benchmark::State& state) {
|
||||
const int n = state.range(0);
|
||||
char* src = new char[n + 4096];
|
||||
memset(src, 'x', n);
|
||||
int r = 0;
|
||||
while (state.KeepRunningBatch(2)) { // count each memmove as an iteration
|
||||
memmove(src + 4096, src, n);
|
||||
memmove(src, src + 4096, n);
|
||||
r += src[0];
|
||||
}
|
||||
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
|
||||
delete[] src;
|
||||
benchmark::DoNotOptimize(r);
|
||||
}
|
||||
BENCHMARK(BM_memmove_cache_friendly)
|
||||
->Arg(5 * 1024)
|
||||
->Arg(10 * 1024)
|
||||
->Range(16 << 10, 8 << 20);
|
||||
|
||||
// memmove best(?) case: aligned and non-overlapping.
|
||||
void BM_memmove_aligned_non_overlapping(benchmark::State& state) {
|
||||
CopyLoop(state, state.range(0), memmove);
|
||||
}
|
||||
BENCHMARK(BM_memmove_aligned_non_overlapping)
|
||||
->DenseRange(1, 18)
|
||||
->Range(32, 8 << 20);
|
||||
|
||||
// memset speed
|
||||
void BM_memset(benchmark::State& state) {
|
||||
const int n = state.range(0);
|
||||
char* dst = new char[n];
|
||||
int r = 0;
|
||||
for (auto _ : state) {
|
||||
memset(dst, 'x', n);
|
||||
r += dst[0];
|
||||
}
|
||||
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
|
||||
delete[] dst;
|
||||
benchmark::DoNotOptimize(r);
|
||||
}
|
||||
BENCHMARK(BM_memset)->Range(8, 4096 << 10);
|
||||
|
||||
// Bandwidth (vectorization?) test: the ideal generated code will be limited
|
||||
// by memory bandwidth. Even so-so generated code will max out memory bandwidth
|
||||
// on some machines.
|
||||
void BM_membandwidth(benchmark::State& state) {
|
||||
const int n = state.range(0);
|
||||
CHECK_EQ(n % 32, 0); // We will read 32 bytes per iter.
|
||||
char* dst = new char[n];
|
||||
int r = 0;
|
||||
for (auto _ : state) {
|
||||
const uint32_t* p = reinterpret_cast<uint32_t*>(dst);
|
||||
const uint32_t* limit = reinterpret_cast<uint32_t*>(dst + n);
|
||||
uint32_t x = 0;
|
||||
while (p < limit) {
|
||||
x += p[0];
|
||||
x += p[1];
|
||||
x += p[2];
|
||||
x += p[3];
|
||||
x += p[4];
|
||||
x += p[5];
|
||||
x += p[6];
|
||||
x += p[7];
|
||||
p += 8;
|
||||
}
|
||||
r += x;
|
||||
}
|
||||
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
|
||||
delete[] dst;
|
||||
benchmark::DoNotOptimize(r);
|
||||
}
|
||||
BENCHMARK(BM_membandwidth)->Range(32, 16384 << 10);
|
||||
|
||||
// Helper for benchmarks. Repeatedly compares two strings that are
|
||||
// either equal or different only in one character. If test_equal_strings
|
||||
// is false then position_to_modify determines where the difference will be.
|
||||
template <typename Function>
|
||||
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void StringCompareLoop(
|
||||
benchmark::State& state, bool test_equal_strings,
|
||||
std::string::size_type position_to_modify, int size, Function func) {
|
||||
const int kIterMult = 4; // Iteration multiplier for better timing resolution
|
||||
CHECK_GT(size, 0);
|
||||
const bool position_to_modify_is_valid =
|
||||
position_to_modify != std::string::npos && position_to_modify < size;
|
||||
CHECK_NE(position_to_modify_is_valid, test_equal_strings);
|
||||
if (!position_to_modify_is_valid) {
|
||||
position_to_modify = 0;
|
||||
}
|
||||
std::string sa(size, 'a');
|
||||
std::string sb = sa;
|
||||
char last = sa[size - 1];
|
||||
int num = 0;
|
||||
for (auto _ : state) {
|
||||
for (int i = 0; i < kIterMult; ++i) {
|
||||
sb[position_to_modify] = test_equal_strings ? last : last ^ 1;
|
||||
num += func(sa, sb);
|
||||
}
|
||||
}
|
||||
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size);
|
||||
benchmark::DoNotOptimize(num);
|
||||
}
|
||||
|
||||
// Helper for benchmarks. Repeatedly compares two memory regions that are
|
||||
// either equal or different only in their final character.
|
||||
template <typename Function>
|
||||
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void CompareLoop(benchmark::State& state,
|
||||
bool test_equal_strings,
|
||||
int size, Function func) {
|
||||
const int kIterMult = 4; // Iteration multiplier for better timing resolution
|
||||
CHECK_GT(size, 0);
|
||||
char* data = static_cast<char*>(malloc(size * 2));
|
||||
memset(data, 'a', size * 2);
|
||||
char* a = data;
|
||||
char* b = data + size;
|
||||
char last = a[size - 1];
|
||||
int num = 0;
|
||||
for (auto _ : state) {
|
||||
for (int i = 0; i < kIterMult; ++i) {
|
||||
b[size - 1] = test_equal_strings ? last : last ^ 1;
|
||||
num += func(a, b, size);
|
||||
}
|
||||
}
|
||||
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size);
|
||||
benchmark::DoNotOptimize(num);
|
||||
free(data);
|
||||
}
|
||||
|
||||
void BM_memcmp(benchmark::State& state) {
|
||||
CompareLoop(state, false, state.range(0), memcmp);
|
||||
}
|
||||
BENCHMARK(BM_memcmp)->DenseRange(1, 9)->Range(32, 8 << 20);
|
||||
|
||||
void BM_fastmemcmp_inlined(benchmark::State& state) {
|
||||
CompareLoop(state, false, state.range(0),
|
||||
absl::strings_internal::fastmemcmp_inlined);
|
||||
}
|
||||
BENCHMARK(BM_fastmemcmp_inlined)->DenseRange(1, 9)->Range(32, 8 << 20);
|
||||
|
||||
void BM_memeq(benchmark::State& state) {
|
||||
CompareLoop(state, false, state.range(0), absl::strings_internal::memeq);
|
||||
}
|
||||
BENCHMARK(BM_memeq)->DenseRange(1, 9)->Range(32, 8 << 20);
|
||||
|
||||
void BM_memeq_equal(benchmark::State& state) {
|
||||
CompareLoop(state, true, state.range(0), absl::strings_internal::memeq);
|
||||
}
|
||||
BENCHMARK(BM_memeq_equal)->DenseRange(1, 9)->Range(32, 8 << 20);
|
||||
|
||||
bool StringLess(const std::string& x, const std::string& y) { return x < y; }
|
||||
bool StringEqual(const std::string& x, const std::string& y) { return x == y; }
|
||||
bool StdEqual(const std::string& x, const std::string& y) {
|
||||
return x.size() == y.size() &&
|
||||
std::equal(x.data(), x.data() + x.size(), y.data());
|
||||
}
|
||||
|
||||
// Benchmark for x < y, where x and y are strings that differ in only their
|
||||
// final char. That should be more-or-less the worst case for <.
|
||||
void BM_string_less(benchmark::State& state) {
|
||||
StringCompareLoop(state, false, state.range(0) - 1, state.range(0),
|
||||
StringLess);
|
||||
}
|
||||
BENCHMARK(BM_string_less)->DenseRange(1, 9)->Range(32, 1 << 20);
|
||||
|
||||
// Benchmark for x < y, where x and y are strings that differ in only their
|
||||
// first char. That should be more-or-less the best case for <.
|
||||
void BM_string_less_easy(benchmark::State& state) {
|
||||
StringCompareLoop(state, false, 0, state.range(0), StringLess);
|
||||
}
|
||||
BENCHMARK(BM_string_less_easy)->DenseRange(1, 9)->Range(32, 1 << 20);
|
||||
|
||||
void BM_string_equal(benchmark::State& state) {
|
||||
StringCompareLoop(state, false, state.range(0) - 1, state.range(0),
|
||||
StringEqual);
|
||||
}
|
||||
BENCHMARK(BM_string_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
|
||||
|
||||
void BM_string_equal_equal(benchmark::State& state) {
|
||||
StringCompareLoop(state, true, std::string::npos, state.range(0), StringEqual);
|
||||
}
|
||||
BENCHMARK(BM_string_equal_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
|
||||
|
||||
void BM_std_equal(benchmark::State& state) {
|
||||
StringCompareLoop(state, false, state.range(0) - 1, state.range(0), StdEqual);
|
||||
}
|
||||
BENCHMARK(BM_std_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
|
||||
|
||||
void BM_std_equal_equal(benchmark::State& state) {
|
||||
StringCompareLoop(state, true, std::string::npos, state.range(0), StdEqual);
|
||||
}
|
||||
BENCHMARK(BM_std_equal_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
|
||||
|
||||
void BM_string_equal_unequal_lengths(benchmark::State& state) {
|
||||
const int size = state.range(0);
|
||||
std::string a(size, 'a');
|
||||
std::string b(size + 1, 'a');
|
||||
int count = 0;
|
||||
for (auto _ : state) {
|
||||
b[size - 1] = 'a';
|
||||
count += (a == b);
|
||||
}
|
||||
benchmark::DoNotOptimize(count);
|
||||
}
|
||||
BENCHMARK(BM_string_equal_unequal_lengths)->Arg(1)->Arg(1 << 20);
|
||||
|
||||
void BM_stdstring_equal_unequal_lengths(benchmark::State& state) {
|
||||
const int size = state.range(0);
|
||||
std::string a(size, 'a');
|
||||
std::string b(size + 1, 'a');
|
||||
int count = 0;
|
||||
for (auto _ : state) {
|
||||
b[size - 1] = 'a';
|
||||
count += (a == b);
|
||||
}
|
||||
benchmark::DoNotOptimize(count);
|
||||
}
|
||||
BENCHMARK(BM_stdstring_equal_unequal_lengths)->Arg(1)->Arg(1 << 20);
|
||||
|
||||
} // namespace
|
||||
110
absl/strings/internal/memutil.cc
Normal file
110
absl/strings/internal/memutil.cc
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
// Copyright 2017 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "absl/strings/internal/memutil.h"
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
namespace absl {
|
||||
namespace strings_internal {
|
||||
|
||||
int memcasecmp(const char* s1, const char* s2, size_t len) {
|
||||
const unsigned char* us1 = reinterpret_cast<const unsigned char*>(s1);
|
||||
const unsigned char* us2 = reinterpret_cast<const unsigned char*>(s2);
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
const int diff =
|
||||
int{static_cast<unsigned char>(absl::ascii_tolower(us1[i]))} -
|
||||
int{static_cast<unsigned char>(absl::ascii_tolower(us2[i]))};
|
||||
if (diff != 0) return diff;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
char* memdup(const char* s, size_t slen) {
|
||||
void* copy;
|
||||
if ((copy = malloc(slen)) == nullptr) return nullptr;
|
||||
memcpy(copy, s, slen);
|
||||
return reinterpret_cast<char*>(copy);
|
||||
}
|
||||
|
||||
char* memrchr(const char* s, int c, size_t slen) {
|
||||
for (const char* e = s + slen - 1; e >= s; e--) {
|
||||
if (*e == c) return const_cast<char*>(e);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
size_t memspn(const char* s, size_t slen, const char* accept) {
|
||||
const char* p = s;
|
||||
const char* spanp;
|
||||
char c, sc;
|
||||
|
||||
cont:
|
||||
c = *p++;
|
||||
if (slen-- == 0) return p - 1 - s;
|
||||
for (spanp = accept; (sc = *spanp++) != '\0';)
|
||||
if (sc == c) goto cont;
|
||||
return p - 1 - s;
|
||||
}
|
||||
|
||||
size_t memcspn(const char* s, size_t slen, const char* reject) {
|
||||
const char* p = s;
|
||||
const char* spanp;
|
||||
char c, sc;
|
||||
|
||||
while (slen-- != 0) {
|
||||
c = *p++;
|
||||
for (spanp = reject; (sc = *spanp++) != '\0';)
|
||||
if (sc == c) return p - 1 - s;
|
||||
}
|
||||
return p - s;
|
||||
}
|
||||
|
||||
char* mempbrk(const char* s, size_t slen, const char* accept) {
|
||||
const char* scanp;
|
||||
int sc;
|
||||
|
||||
for (; slen; ++s, --slen) {
|
||||
for (scanp = accept; (sc = *scanp++) != '\0';)
|
||||
if (sc == *s) return const_cast<char*>(s);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// This is significantly faster for case-sensitive matches with very
|
||||
// few possible matches. See unit test for benchmarks.
|
||||
const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle,
|
||||
size_t neelen) {
|
||||
if (0 == neelen) {
|
||||
return phaystack; // even if haylen is 0
|
||||
}
|
||||
if (haylen < neelen) return nullptr;
|
||||
|
||||
const char* match;
|
||||
const char* hayend = phaystack + haylen - neelen + 1;
|
||||
// A static cast is used here to work around the fact that memchr returns
|
||||
// a void* on Posix-compliant systems and const void* on Windows.
|
||||
while ((match = static_cast<const char*>(
|
||||
memchr(phaystack, pneedle[0], hayend - phaystack)))) {
|
||||
if (memcmp(match, pneedle, neelen) == 0)
|
||||
return match;
|
||||
else
|
||||
phaystack = match + 1;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace strings_internal
|
||||
} // namespace absl
|
||||
146
absl/strings/internal/memutil.h
Normal file
146
absl/strings/internal/memutil.h
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
//
|
||||
// Copyright 2017 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
// These routines provide mem versions of standard C std::string routines,
|
||||
// such as strpbrk. They function exactly the same as the str versions,
|
||||
// so if you wonder what they are, replace the word "mem" by
|
||||
// "str" and check out the man page. I could return void*, as the
|
||||
// strutil.h mem*() routines tend to do, but I return char* instead
|
||||
// since this is by far the most common way these functions are called.
|
||||
//
|
||||
// The difference between the mem and str versions is the mem version
|
||||
// takes a pointer and a length, rather than a '\0'-terminated std::string.
|
||||
// The memcase* routines defined here assume the locale is "C"
|
||||
// (they use absl::ascii_tolower instead of tolower).
|
||||
//
|
||||
// These routines are based on the BSD library.
|
||||
//
|
||||
// Here's a list of routines from std::string.h, and their mem analogues.
|
||||
// Functions in lowercase are defined in std::string.h; those in UPPERCASE
|
||||
// are defined here:
|
||||
//
|
||||
// strlen --
|
||||
// strcat strncat MEMCAT
|
||||
// strcpy strncpy memcpy
|
||||
// -- memccpy (very cool function, btw)
|
||||
// -- memmove
|
||||
// -- memset
|
||||
// strcmp strncmp memcmp
|
||||
// strcasecmp strncasecmp MEMCASECMP
|
||||
// strchr memchr
|
||||
// strcoll --
|
||||
// strxfrm --
|
||||
// strdup strndup MEMDUP
|
||||
// strrchr MEMRCHR
|
||||
// strspn MEMSPN
|
||||
// strcspn MEMCSPN
|
||||
// strpbrk MEMPBRK
|
||||
// strstr MEMSTR MEMMEM
|
||||
// (g)strcasestr MEMCASESTR MEMCASEMEM
|
||||
// strtok --
|
||||
// strprefix MEMPREFIX (strprefix is from strutil.h)
|
||||
// strcaseprefix MEMCASEPREFIX (strcaseprefix is from strutil.h)
|
||||
// strsuffix MEMSUFFIX (strsuffix is from strutil.h)
|
||||
// strcasesuffix MEMCASESUFFIX (strcasesuffix is from strutil.h)
|
||||
// -- MEMIS
|
||||
// -- MEMCASEIS
|
||||
// strcount MEMCOUNT (strcount is from strutil.h)
|
||||
|
||||
#ifndef ABSL_STRINGS_INTERNAL_MEMUTIL_H_
|
||||
#define ABSL_STRINGS_INTERNAL_MEMUTIL_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
|
||||
#include "absl/base/port.h" // disable some warnings on Windows
|
||||
#include "absl/strings/ascii.h" // for absl::ascii_tolower
|
||||
|
||||
namespace absl {
|
||||
namespace strings_internal {
|
||||
|
||||
inline char* memcat(char* dest, size_t destlen, const char* src,
|
||||
size_t srclen) {
|
||||
return reinterpret_cast<char*>(memcpy(dest + destlen, src, srclen));
|
||||
}
|
||||
|
||||
int memcasecmp(const char* s1, const char* s2, size_t len);
|
||||
char* memdup(const char* s, size_t slen);
|
||||
char* memrchr(const char* s, int c, size_t slen);
|
||||
size_t memspn(const char* s, size_t slen, const char* accept);
|
||||
size_t memcspn(const char* s, size_t slen, const char* reject);
|
||||
char* mempbrk(const char* s, size_t slen, const char* accept);
|
||||
|
||||
// This is for internal use only. Don't call this directly
|
||||
template <bool case_sensitive>
|
||||
const char* int_memmatch(const char* haystack, size_t haylen,
|
||||
const char* needle, size_t neelen) {
|
||||
if (0 == neelen) {
|
||||
return haystack; // even if haylen is 0
|
||||
}
|
||||
const char* hayend = haystack + haylen;
|
||||
const char* needlestart = needle;
|
||||
const char* needleend = needlestart + neelen;
|
||||
|
||||
for (; haystack < hayend; ++haystack) {
|
||||
char hay = case_sensitive
|
||||
? *haystack
|
||||
: absl::ascii_tolower(static_cast<unsigned char>(*haystack));
|
||||
char nee = case_sensitive
|
||||
? *needle
|
||||
: absl::ascii_tolower(static_cast<unsigned char>(*needle));
|
||||
if (hay == nee) {
|
||||
if (++needle == needleend) {
|
||||
return haystack + 1 - neelen;
|
||||
}
|
||||
} else if (needle != needlestart) {
|
||||
// must back up haystack in case a prefix matched (find "aab" in "aaab")
|
||||
haystack -= needle - needlestart; // for loop will advance one more
|
||||
needle = needlestart;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// These are the guys you can call directly
|
||||
inline const char* memstr(const char* phaystack, size_t haylen,
|
||||
const char* pneedle) {
|
||||
return int_memmatch<true>(phaystack, haylen, pneedle, strlen(pneedle));
|
||||
}
|
||||
|
||||
inline const char* memcasestr(const char* phaystack, size_t haylen,
|
||||
const char* pneedle) {
|
||||
return int_memmatch<false>(phaystack, haylen, pneedle, strlen(pneedle));
|
||||
}
|
||||
|
||||
inline const char* memmem(const char* phaystack, size_t haylen,
|
||||
const char* pneedle, size_t needlelen) {
|
||||
return int_memmatch<true>(phaystack, haylen, pneedle, needlelen);
|
||||
}
|
||||
|
||||
inline const char* memcasemem(const char* phaystack, size_t haylen,
|
||||
const char* pneedle, size_t needlelen) {
|
||||
return int_memmatch<false>(phaystack, haylen, pneedle, needlelen);
|
||||
}
|
||||
|
||||
// This is significantly faster for case-sensitive matches with very
|
||||
// few possible matches. See unit test for benchmarks.
|
||||
const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle,
|
||||
size_t neelen);
|
||||
|
||||
} // namespace strings_internal
|
||||
} // namespace absl
|
||||
|
||||
#endif // ABSL_STRINGS_INTERNAL_MEMUTIL_H_
|
||||
180
absl/strings/internal/memutil_test.cc
Normal file
180
absl/strings/internal/memutil_test.cc
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
// Copyright 2017 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Unit test for memutil.cc
|
||||
|
||||
#include "absl/strings/internal/memutil.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "absl/strings/ascii.h"
|
||||
|
||||
namespace {
|
||||
|
||||
static char* memcasechr(const char* s, int c, size_t slen) {
|
||||
c = absl::ascii_tolower(c);
|
||||
for (; slen; ++s, --slen) {
|
||||
if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static const char* memcasematch(const char* phaystack, size_t haylen,
|
||||
const char* pneedle, size_t neelen) {
|
||||
if (0 == neelen) {
|
||||
return phaystack; // even if haylen is 0
|
||||
}
|
||||
if (haylen < neelen) return nullptr;
|
||||
|
||||
const char* match;
|
||||
const char* hayend = phaystack + haylen - neelen + 1;
|
||||
while ((match = static_cast<char*>(
|
||||
memcasechr(phaystack, pneedle[0], hayend - phaystack)))) {
|
||||
if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0)
|
||||
return match;
|
||||
else
|
||||
phaystack = match + 1;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
TEST(MemUtilTest, AllTests) {
|
||||
// check memutil functions
|
||||
char a[1000];
|
||||
absl::strings_internal::memcat(a, 0, "hello", sizeof("hello") - 1);
|
||||
absl::strings_internal::memcat(a, 5, " there", sizeof(" there") - 1);
|
||||
|
||||
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO there",
|
||||
sizeof("hello there") - 1),
|
||||
0);
|
||||
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf",
|
||||
sizeof("hello there") - 1),
|
||||
-1);
|
||||
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf",
|
||||
sizeof("hello there") - 2),
|
||||
0);
|
||||
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "whatever", 0), 0);
|
||||
|
||||
char* p = absl::strings_internal::memdup("hello", 5);
|
||||
free(p);
|
||||
|
||||
p = absl::strings_internal::memrchr("hello there", 'e',
|
||||
sizeof("hello there") - 1);
|
||||
EXPECT_TRUE(p && p[-1] == 'r');
|
||||
p = absl::strings_internal::memrchr("hello there", 'e',
|
||||
sizeof("hello there") - 2);
|
||||
EXPECT_TRUE(p && p[-1] == 'h');
|
||||
p = absl::strings_internal::memrchr("hello there", 'u',
|
||||
sizeof("hello there") - 1);
|
||||
EXPECT_TRUE(p == nullptr);
|
||||
|
||||
int len = absl::strings_internal::memspn("hello there",
|
||||
sizeof("hello there") - 1, "hole");
|
||||
EXPECT_EQ(len, sizeof("hello") - 1);
|
||||
len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
|
||||
"u");
|
||||
EXPECT_EQ(len, 0);
|
||||
len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
|
||||
"");
|
||||
EXPECT_EQ(len, 0);
|
||||
len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
|
||||
"trole h");
|
||||
EXPECT_EQ(len, sizeof("hello there") - 1);
|
||||
len = absl::strings_internal::memspn("hello there!",
|
||||
sizeof("hello there!") - 1, "trole h");
|
||||
EXPECT_EQ(len, sizeof("hello there") - 1);
|
||||
len = absl::strings_internal::memspn("hello there!",
|
||||
sizeof("hello there!") - 2, "trole h!");
|
||||
EXPECT_EQ(len, sizeof("hello there!") - 2);
|
||||
|
||||
len = absl::strings_internal::memcspn("hello there",
|
||||
sizeof("hello there") - 1, "leho");
|
||||
EXPECT_EQ(len, 0);
|
||||
len = absl::strings_internal::memcspn("hello there",
|
||||
sizeof("hello there") - 1, "u");
|
||||
EXPECT_EQ(len, sizeof("hello there") - 1);
|
||||
len = absl::strings_internal::memcspn("hello there",
|
||||
sizeof("hello there") - 1, "");
|
||||
EXPECT_EQ(len, sizeof("hello there") - 1);
|
||||
len = absl::strings_internal::memcspn("hello there",
|
||||
sizeof("hello there") - 1, " ");
|
||||
EXPECT_EQ(len, 5);
|
||||
|
||||
p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
|
||||
"leho");
|
||||
EXPECT_TRUE(p && p[1] == 'e' && p[2] == 'l');
|
||||
p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
|
||||
"nu");
|
||||
EXPECT_TRUE(p == nullptr);
|
||||
p = absl::strings_internal::mempbrk("hello there!",
|
||||
sizeof("hello there!") - 2, "!");
|
||||
EXPECT_TRUE(p == nullptr);
|
||||
p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
|
||||
" t ");
|
||||
EXPECT_TRUE(p && p[-1] == 'o' && p[1] == 't');
|
||||
|
||||
{
|
||||
const char kHaystack[] = "0123456789";
|
||||
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 0, "", 0), kHaystack);
|
||||
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "012", 3),
|
||||
kHaystack);
|
||||
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "0xx", 1),
|
||||
kHaystack);
|
||||
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "789", 3),
|
||||
kHaystack + 7);
|
||||
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "9xx", 1),
|
||||
kHaystack + 9);
|
||||
EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "9xx", 3) ==
|
||||
nullptr);
|
||||
EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "xxx", 1) ==
|
||||
nullptr);
|
||||
}
|
||||
{
|
||||
const char kHaystack[] = "aBcDeFgHiJ";
|
||||
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 0, "", 0),
|
||||
kHaystack);
|
||||
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Abc", 3),
|
||||
kHaystack);
|
||||
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Axx", 1),
|
||||
kHaystack);
|
||||
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "hIj", 3),
|
||||
kHaystack + 7);
|
||||
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 1),
|
||||
kHaystack + 9);
|
||||
EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 3) ==
|
||||
nullptr);
|
||||
EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "xxx", 1) ==
|
||||
nullptr);
|
||||
}
|
||||
{
|
||||
const char kHaystack[] = "0123456789";
|
||||
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 0, "", 0), kHaystack);
|
||||
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "012", 3),
|
||||
kHaystack);
|
||||
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "0xx", 1),
|
||||
kHaystack);
|
||||
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "789", 3),
|
||||
kHaystack + 7);
|
||||
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 1),
|
||||
kHaystack + 9);
|
||||
EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 3) ==
|
||||
nullptr);
|
||||
EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "xxx", 1) ==
|
||||
nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
166
absl/strings/internal/numbers_test_common.inc
Normal file
166
absl/strings/internal/numbers_test_common.inc
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
// Copyright 2017 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// This file contains common things needed by numbers_test.cc,
|
||||
// numbers_legacy_test.cc and numbers_benchmark.cc.
|
||||
|
||||
namespace {
|
||||
|
||||
// Previously documented minimum buffer sizes for Fast*ToBuffer functions.
|
||||
// NOTE(edk): These should be deleted and uses replaced with kFastToBufferSize
|
||||
// once existing code has been fixed to use kFastToBufferSize.
|
||||
enum {
|
||||
kFastInt32ToBufferSize = 12,
|
||||
kFastInt64ToBufferSize = 22,
|
||||
kFastUInt32ToBufferSize = 12,
|
||||
kFastUInt64ToBufferSize = 22
|
||||
};
|
||||
|
||||
template <typename IntType>
|
||||
bool Itoa(IntType value, int base, std::string* destination) {
|
||||
destination->clear();
|
||||
if (base <= 1 || base > 36) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (value == 0) {
|
||||
destination->push_back('0');
|
||||
return true;
|
||||
}
|
||||
|
||||
bool negative = value < 0;
|
||||
while (value != 0) {
|
||||
const IntType next_value = value / base;
|
||||
// Can't use std::abs here because of problems when IntType is unsigned.
|
||||
int remainder = value > next_value * base ? value - next_value * base
|
||||
: next_value * base - value;
|
||||
char c = remainder < 10 ? '0' + remainder : 'A' + remainder - 10;
|
||||
destination->insert(0, 1, c);
|
||||
value = next_value;
|
||||
}
|
||||
|
||||
if (negative) {
|
||||
destination->insert(0, 1, '-');
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
struct uint32_test_case {
|
||||
const char* str;
|
||||
bool expect_ok;
|
||||
int base; // base to pass to the conversion function
|
||||
uint32_t expected;
|
||||
} const strtouint32_test_cases[] = {
|
||||
{"0xffffffff", true, 16, std::numeric_limits<uint32_t>::max()},
|
||||
{"0x34234324", true, 16, 0x34234324},
|
||||
{"34234324", true, 16, 0x34234324},
|
||||
{"0", true, 16, 0},
|
||||
{" \t\n 0xffffffff", true, 16, std::numeric_limits<uint32_t>::max()},
|
||||
{" \f\v 46", true, 10, 46}, // must accept weird whitespace
|
||||
{" \t\n 72717222", true, 8, 072717222},
|
||||
{" \t\n 072717222", true, 8, 072717222},
|
||||
{" \t\n 072717228", false, 8, 07271722},
|
||||
{"0", true, 0, 0},
|
||||
|
||||
// Base-10 version.
|
||||
{"34234324", true, 0, 34234324},
|
||||
{"4294967295", true, 0, std::numeric_limits<uint32_t>::max()},
|
||||
{"34234324 \n\t", true, 10, 34234324},
|
||||
|
||||
// Unusual base
|
||||
{"0", true, 3, 0},
|
||||
{"2", true, 3, 2},
|
||||
{"11", true, 3, 4},
|
||||
|
||||
// Invalid uints.
|
||||
{"", false, 0, 0},
|
||||
{" ", false, 0, 0},
|
||||
{"abc", false, 0, 0}, // would be valid hex, but prefix is missing
|
||||
{"34234324a", false, 0, 34234324},
|
||||
{"34234.3", false, 0, 34234},
|
||||
{"-1", false, 0, 0},
|
||||
{" -123", false, 0, 0},
|
||||
{" \t\n -123", false, 0, 0},
|
||||
|
||||
// Out of bounds.
|
||||
{"4294967296", false, 0, std::numeric_limits<uint32_t>::max()},
|
||||
{"0x100000000", false, 0, std::numeric_limits<uint32_t>::max()},
|
||||
{nullptr, false, 0, 0},
|
||||
};
|
||||
|
||||
struct uint64_test_case {
|
||||
const char* str;
|
||||
bool expect_ok;
|
||||
int base;
|
||||
uint64_t expected;
|
||||
} const strtouint64_test_cases[] = {
|
||||
{"0x3423432448783446", true, 16, int64_t{0x3423432448783446}},
|
||||
{"3423432448783446", true, 16, int64_t{0x3423432448783446}},
|
||||
|
||||
{"0", true, 16, 0},
|
||||
{"000", true, 0, 0},
|
||||
{"0", true, 0, 0},
|
||||
{" \t\n 0xffffffffffffffff", true, 16,
|
||||
std::numeric_limits<uint64_t>::max()},
|
||||
|
||||
{"012345670123456701234", true, 8, int64_t{012345670123456701234}},
|
||||
{"12345670123456701234", true, 8, int64_t{012345670123456701234}},
|
||||
|
||||
{"12845670123456701234", false, 8, 0},
|
||||
|
||||
// Base-10 version.
|
||||
{"34234324487834466", true, 0, int64_t{34234324487834466}},
|
||||
|
||||
{" \t\n 18446744073709551615", true, 0,
|
||||
std::numeric_limits<uint64_t>::max()},
|
||||
|
||||
{"34234324487834466 \n\t ", true, 0, int64_t{34234324487834466}},
|
||||
|
||||
{" \f\v 46", true, 10, 46}, // must accept weird whitespace
|
||||
|
||||
// Unusual base
|
||||
{"0", true, 3, 0},
|
||||
{"2", true, 3, 2},
|
||||
{"11", true, 3, 4},
|
||||
|
||||
{"0", true, 0, 0},
|
||||
|
||||
// Invalid uints.
|
||||
{"", false, 0, 0},
|
||||
{" ", false, 0, 0},
|
||||
{"abc", false, 0, 0},
|
||||
{"34234324487834466a", false, 0, 0},
|
||||
{"34234487834466.3", false, 0, 0},
|
||||
{"-1", false, 0, 0},
|
||||
{" -123", false, 0, 0},
|
||||
{" \t\n -123", false, 0, 0},
|
||||
|
||||
// Out of bounds.
|
||||
{"18446744073709551616", false, 10, 0},
|
||||
{"18446744073709551616", false, 0, 0},
|
||||
{"0x10000000000000000", false, 16, std::numeric_limits<uint64_t>::max()},
|
||||
{"0X10000000000000000", false, 16,
|
||||
std::numeric_limits<uint64_t>::max()}, // 0X versus 0x.
|
||||
{"0x10000000000000000", false, 0, std::numeric_limits<uint64_t>::max()},
|
||||
{"0X10000000000000000", false, 0,
|
||||
std::numeric_limits<uint64_t>::max()}, // 0X versus 0x.
|
||||
|
||||
{"0x1234", true, 16, 0x1234},
|
||||
|
||||
// Base-10 std::string version.
|
||||
{"1234", true, 0, 1234},
|
||||
{nullptr, false, 0, 0},
|
||||
};
|
||||
|
||||
} // namespace
|
||||
97
absl/strings/internal/ostringstream.h
Normal file
97
absl/strings/internal/ostringstream.h
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
// Copyright 2017 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
|
||||
#define ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
|
||||
|
||||
#include <cassert>
|
||||
#include <ostream>
|
||||
#include <streambuf>
|
||||
#include <string>
|
||||
|
||||
#include "absl/base/port.h"
|
||||
|
||||
namespace absl {
|
||||
namespace strings_internal {
|
||||
|
||||
// The same as std::ostringstream but appends to a user-specified std::string,
|
||||
// and is faster. It is ~70% faster to create, ~50% faster to write to, and
|
||||
// completely free to extract the result std::string.
|
||||
//
|
||||
// std::string s;
|
||||
// OStringStream strm(&s);
|
||||
// strm << 42 << ' ' << 3.14; // appends to `s`
|
||||
//
|
||||
// The stream object doesn't have to be named. Starting from C++11 operator<<
|
||||
// works with rvalues of std::ostream.
|
||||
//
|
||||
// std::string s;
|
||||
// OStringStream(&s) << 42 << ' ' << 3.14; // appends to `s`
|
||||
//
|
||||
// OStringStream is faster to create than std::ostringstream but it's still
|
||||
// relatively slow. Avoid creating multiple streams where a single stream will
|
||||
// do.
|
||||
//
|
||||
// Creates unnecessary instances of OStringStream: slow.
|
||||
//
|
||||
// std::string s;
|
||||
// OStringStream(&s) << 42;
|
||||
// OStringStream(&s) << ' ';
|
||||
// OStringStream(&s) << 3.14;
|
||||
//
|
||||
// Creates a single instance of OStringStream and reuses it: fast.
|
||||
//
|
||||
// std::string s;
|
||||
// OStringStream strm(&s);
|
||||
// strm << 42;
|
||||
// strm << ' ';
|
||||
// strm << 3.14;
|
||||
//
|
||||
// Note: flush() has no effect. No reason to call it.
|
||||
class OStringStream : private std::basic_streambuf<char>, public std::ostream {
|
||||
public:
|
||||
// The argument can be null, in which case you'll need to call str(p) with a
|
||||
// non-null argument before you can write to the stream.
|
||||
//
|
||||
// The destructor of OStringStream doesn't use the std::string. It's OK to destroy
|
||||
// the std::string before the stream.
|
||||
explicit OStringStream(std::string* s) : std::ostream(this), s_(s) {}
|
||||
|
||||
std::string* str() { return s_; }
|
||||
const std::string* str() const { return s_; }
|
||||
void str(std::string* s) { s_ = s; }
|
||||
|
||||
private:
|
||||
using Buf = std::basic_streambuf<char>;
|
||||
|
||||
Buf::int_type overflow(int c = Buf::traits_type::eof()) override {
|
||||
assert(s_);
|
||||
if (!Buf::traits_type::eq_int_type(c, Buf::traits_type::eof()))
|
||||
s_->push_back(static_cast<char>(c));
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::streamsize xsputn(const char* s, std::streamsize n) override {
|
||||
assert(s_);
|
||||
s_->append(s, n);
|
||||
return n;
|
||||
}
|
||||
|
||||
std::string* s_;
|
||||
};
|
||||
|
||||
} // namespace strings_internal
|
||||
} // namespace absl
|
||||
|
||||
#endif // ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
|
||||
103
absl/strings/internal/ostringstream_test.cc
Normal file
103
absl/strings/internal/ostringstream_test.cc
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
// Copyright 2017 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "absl/strings/internal/ostringstream.h"
|
||||
|
||||
#include <memory>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace {
|
||||
|
||||
TEST(OStringStream, IsOStream) {
|
||||
static_assert(
|
||||
std::is_base_of<std::ostream, absl::strings_internal::OStringStream>(),
|
||||
"");
|
||||
}
|
||||
|
||||
TEST(OStringStream, ConstructDestroy) {
|
||||
{
|
||||
absl::strings_internal::OStringStream strm(nullptr);
|
||||
EXPECT_EQ(nullptr, strm.str());
|
||||
}
|
||||
{
|
||||
std::string s = "abc";
|
||||
{
|
||||
absl::strings_internal::OStringStream strm(&s);
|
||||
EXPECT_EQ(&s, strm.str());
|
||||
}
|
||||
EXPECT_EQ("abc", s);
|
||||
}
|
||||
{
|
||||
std::unique_ptr<std::string> s(new std::string);
|
||||
absl::strings_internal::OStringStream strm(s.get());
|
||||
s.reset();
|
||||
}
|
||||
}
|
||||
|
||||
TEST(OStringStream, Str) {
|
||||
std::string s1;
|
||||
absl::strings_internal::OStringStream strm(&s1);
|
||||
const absl::strings_internal::OStringStream& c_strm(strm);
|
||||
|
||||
static_assert(std::is_same<decltype(strm.str()), std::string*>(), "");
|
||||
static_assert(std::is_same<decltype(c_strm.str()), const std::string*>(), "");
|
||||
|
||||
EXPECT_EQ(&s1, strm.str());
|
||||
EXPECT_EQ(&s1, c_strm.str());
|
||||
|
||||
strm.str(&s1);
|
||||
EXPECT_EQ(&s1, strm.str());
|
||||
EXPECT_EQ(&s1, c_strm.str());
|
||||
|
||||
std::string s2;
|
||||
strm.str(&s2);
|
||||
EXPECT_EQ(&s2, strm.str());
|
||||
EXPECT_EQ(&s2, c_strm.str());
|
||||
|
||||
strm.str(nullptr);
|
||||
EXPECT_EQ(nullptr, strm.str());
|
||||
EXPECT_EQ(nullptr, c_strm.str());
|
||||
}
|
||||
|
||||
TEST(OStreamStream, WriteToLValue) {
|
||||
std::string s = "abc";
|
||||
{
|
||||
absl::strings_internal::OStringStream strm(&s);
|
||||
EXPECT_EQ("abc", s);
|
||||
strm << "";
|
||||
EXPECT_EQ("abc", s);
|
||||
strm << 42;
|
||||
EXPECT_EQ("abc42", s);
|
||||
strm << 'x' << 'y';
|
||||
EXPECT_EQ("abc42xy", s);
|
||||
}
|
||||
EXPECT_EQ("abc42xy", s);
|
||||
}
|
||||
|
||||
TEST(OStreamStream, WriteToRValue) {
|
||||
std::string s = "abc";
|
||||
absl::strings_internal::OStringStream(&s) << "";
|
||||
EXPECT_EQ("abc", s);
|
||||
absl::strings_internal::OStringStream(&s) << 42;
|
||||
EXPECT_EQ("abc42", s);
|
||||
absl::strings_internal::OStringStream(&s) << 'x' << 'y';
|
||||
EXPECT_EQ("abc42xy", s);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
69
absl/strings/internal/resize_uninitialized.h
Normal file
69
absl/strings/internal/resize_uninitialized.h
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
//
|
||||
// Copyright 2017 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#ifndef ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
|
||||
#define ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "absl/base/port.h"
|
||||
#include "absl/meta/type_traits.h" // for void_t
|
||||
|
||||
namespace absl {
|
||||
namespace strings_internal {
|
||||
|
||||
// Is a subclass of true_type or false_type, depending on whether or not
|
||||
// T has a resize_uninitialized member.
|
||||
template <typename T, typename = void>
|
||||
struct HasResizeUninitialized : std::false_type {};
|
||||
template <typename T>
|
||||
struct HasResizeUninitialized<
|
||||
T, absl::void_t<decltype(std::declval<T>().resize_uninitialized(237))>>
|
||||
: std::true_type {};
|
||||
|
||||
template <typename string_type>
|
||||
void ResizeUninit(string_type* s, size_t new_size, std::true_type) {
|
||||
s->resize_uninitialized(new_size);
|
||||
}
|
||||
template <typename string_type>
|
||||
void ResizeUninit(string_type* s, size_t new_size, std::false_type) {
|
||||
s->resize(new_size);
|
||||
}
|
||||
|
||||
// Returns true if the std::string implementation supports a resize where
|
||||
// the new characters added to the std::string are left untouched.
|
||||
//
|
||||
// (A better name might be "STLStringSupportsUninitializedResize", alluding to
|
||||
// the previous function.)
|
||||
template <typename string_type>
|
||||
inline constexpr bool STLStringSupportsNontrashingResize(string_type*) {
|
||||
return HasResizeUninitialized<string_type>();
|
||||
}
|
||||
|
||||
// Like str->resize(new_size), except any new characters added to "*str" as a
|
||||
// result of resizing may be left uninitialized, rather than being filled with
|
||||
// '0' bytes. Typically used when code is then going to overwrite the backing
|
||||
// store of the std::string with known data. Uses a Google extension to std::string.
|
||||
template <typename string_type, typename = void>
|
||||
inline void STLStringResizeUninitialized(string_type* s, size_t new_size) {
|
||||
ResizeUninit(s, new_size, HasResizeUninitialized<string_type>());
|
||||
}
|
||||
|
||||
} // namespace strings_internal
|
||||
} // namespace absl
|
||||
|
||||
#endif // ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
|
||||
68
absl/strings/internal/resize_uninitialized_test.cc
Normal file
68
absl/strings/internal/resize_uninitialized_test.cc
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
// Copyright 2017 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "absl/strings/internal/resize_uninitialized.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace {
|
||||
|
||||
int resize_call_count = 0;
|
||||
|
||||
struct resizable_string {
|
||||
void resize(size_t) { resize_call_count += 1; }
|
||||
};
|
||||
|
||||
int resize_uninitialized_call_count = 0;
|
||||
|
||||
struct resize_uninitializable_string {
|
||||
void resize(size_t) { resize_call_count += 1; }
|
||||
void resize_uninitialized(size_t) { resize_uninitialized_call_count += 1; }
|
||||
};
|
||||
|
||||
TEST(ResizeUninit, WithAndWithout) {
|
||||
resize_call_count = 0;
|
||||
resize_uninitialized_call_count = 0;
|
||||
{
|
||||
resizable_string rs;
|
||||
|
||||
EXPECT_EQ(resize_call_count, 0);
|
||||
EXPECT_EQ(resize_uninitialized_call_count, 0);
|
||||
EXPECT_FALSE(
|
||||
absl::strings_internal::STLStringSupportsNontrashingResize(&rs));
|
||||
EXPECT_EQ(resize_call_count, 0);
|
||||
EXPECT_EQ(resize_uninitialized_call_count, 0);
|
||||
absl::strings_internal::STLStringResizeUninitialized(&rs, 237);
|
||||
EXPECT_EQ(resize_call_count, 1);
|
||||
EXPECT_EQ(resize_uninitialized_call_count, 0);
|
||||
}
|
||||
|
||||
resize_call_count = 0;
|
||||
resize_uninitialized_call_count = 0;
|
||||
{
|
||||
resize_uninitializable_string rus;
|
||||
|
||||
EXPECT_EQ(resize_call_count, 0);
|
||||
EXPECT_EQ(resize_uninitialized_call_count, 0);
|
||||
EXPECT_TRUE(
|
||||
absl::strings_internal::STLStringSupportsNontrashingResize(&rus));
|
||||
EXPECT_EQ(resize_call_count, 0);
|
||||
EXPECT_EQ(resize_uninitialized_call_count, 0);
|
||||
absl::strings_internal::STLStringResizeUninitialized(&rus, 237);
|
||||
EXPECT_EQ(resize_call_count, 0);
|
||||
EXPECT_EQ(resize_uninitialized_call_count, 1);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
314
absl/strings/internal/str_join_internal.h
Normal file
314
absl/strings/internal/str_join_internal.h
Normal file
|
|
@ -0,0 +1,314 @@
|
|||
//
|
||||
// Copyright 2017 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
// This file declares INTERNAL parts of the Join API that are inlined/templated
|
||||
// or otherwise need to be available at compile time. The main abstractions
|
||||
// defined in this file are:
|
||||
//
|
||||
// - A handful of default Formatters
|
||||
// - JoinAlgorithm() overloads
|
||||
// - JoinRange() overloads
|
||||
// - JoinTuple()
|
||||
//
|
||||
// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
|
||||
// absl/strings/str_join.h
|
||||
//
|
||||
// IWYU pragma: private, include "absl/strings/str_join.h"
|
||||
|
||||
#ifndef ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
|
||||
#define ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
|
||||
|
||||
#include <cassert>
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "absl/strings/internal/ostringstream.h"
|
||||
#include "absl/strings/str_cat.h"
|
||||
|
||||
namespace absl {
|
||||
namespace strings_internal {
|
||||
|
||||
//
|
||||
// Formatter objects
|
||||
//
|
||||
// The following are implementation classes for standard Formatter objects. The
|
||||
// factory functions that users will call to create and use these formatters are
|
||||
// defined and documented in strings/join.h.
|
||||
//
|
||||
|
||||
// The default formatter. Converts alpha-numeric types to strings.
|
||||
struct AlphaNumFormatterImpl {
|
||||
// This template is needed in order to support passing in a dereferenced
|
||||
// vector<bool>::iterator
|
||||
template <typename T>
|
||||
void operator()(std::string* out, const T& t) const {
|
||||
StrAppend(out, AlphaNum(t));
|
||||
}
|
||||
|
||||
void operator()(std::string* out, const AlphaNum& t) const {
|
||||
StrAppend(out, t);
|
||||
}
|
||||
};
|
||||
|
||||
// A type that's used to overload the JoinAlgorithm() function (defined below)
|
||||
// for ranges that do not require additional formatting (e.g., a range of
|
||||
// strings).
|
||||
|
||||
struct NoFormatter : public AlphaNumFormatterImpl {};
|
||||
|
||||
// Formats types to strings using the << operator.
|
||||
class StreamFormatterImpl {
|
||||
public:
|
||||
// The method isn't const because it mutates state. Making it const will
|
||||
// render StreamFormatterImpl thread-hostile.
|
||||
template <typename T>
|
||||
void operator()(std::string* out, const T& t) {
|
||||
// The stream is created lazily to avoid paying the relatively high cost
|
||||
// of its construction when joining an empty range.
|
||||
if (strm_) {
|
||||
strm_->clear(); // clear the bad, fail and eof bits in case they were set
|
||||
strm_->str(out);
|
||||
} else {
|
||||
strm_.reset(new strings_internal::OStringStream(out));
|
||||
}
|
||||
*strm_ << t;
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<strings_internal::OStringStream> strm_;
|
||||
};
|
||||
|
||||
// Formats a std::pair<>. The 'first' member is formatted using f1_ and the
|
||||
// 'second' member is formatted using f2_. sep_ is the separator.
|
||||
template <typename F1, typename F2>
|
||||
class PairFormatterImpl {
|
||||
public:
|
||||
PairFormatterImpl(F1 f1, absl::string_view sep, F2 f2)
|
||||
: f1_(std::move(f1)), sep_(sep), f2_(std::move(f2)) {}
|
||||
|
||||
template <typename T>
|
||||
void operator()(std::string* out, const T& p) {
|
||||
f1_(out, p.first);
|
||||
out->append(sep_);
|
||||
f2_(out, p.second);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void operator()(std::string* out, const T& p) const {
|
||||
f1_(out, p.first);
|
||||
out->append(sep_);
|
||||
f2_(out, p.second);
|
||||
}
|
||||
|
||||
private:
|
||||
F1 f1_;
|
||||
std::string sep_;
|
||||
F2 f2_;
|
||||
};
|
||||
|
||||
// Wraps another formatter and dereferences the argument to operator() then
|
||||
// passes the dereferenced argument to the wrapped formatter. This can be
|
||||
// useful, for example, to join a std::vector<int*>.
|
||||
template <typename Formatter>
|
||||
class DereferenceFormatterImpl {
|
||||
public:
|
||||
DereferenceFormatterImpl() : f_() {}
|
||||
explicit DereferenceFormatterImpl(Formatter&& f)
|
||||
: f_(std::forward<Formatter>(f)) {}
|
||||
|
||||
template <typename T>
|
||||
void operator()(std::string* out, const T& t) {
|
||||
f_(out, *t);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void operator()(std::string* out, const T& t) const {
|
||||
f_(out, *t);
|
||||
}
|
||||
|
||||
private:
|
||||
Formatter f_;
|
||||
};
|
||||
|
||||
// DefaultFormatter<T> is a traits class that selects a default Formatter to use
|
||||
// for the given type T. The ::Type member names the Formatter to use. This is
|
||||
// used by the strings::Join() functions that do NOT take a Formatter argument,
|
||||
// in which case a default Formatter must be chosen.
|
||||
//
|
||||
// AlphaNumFormatterImpl is the default in the base template, followed by
|
||||
// specializations for other types.
|
||||
template <typename ValueType>
|
||||
struct DefaultFormatter {
|
||||
typedef AlphaNumFormatterImpl Type;
|
||||
};
|
||||
template <>
|
||||
struct DefaultFormatter<const char*> {
|
||||
typedef AlphaNumFormatterImpl Type;
|
||||
};
|
||||
template <>
|
||||
struct DefaultFormatter<char*> {
|
||||
typedef AlphaNumFormatterImpl Type;
|
||||
};
|
||||
template <>
|
||||
struct DefaultFormatter<std::string> {
|
||||
typedef NoFormatter Type;
|
||||
};
|
||||
template <>
|
||||
struct DefaultFormatter<absl::string_view> {
|
||||
typedef NoFormatter Type;
|
||||
};
|
||||
template <typename ValueType>
|
||||
struct DefaultFormatter<ValueType*> {
|
||||
typedef DereferenceFormatterImpl<typename DefaultFormatter<ValueType>::Type>
|
||||
Type;
|
||||
};
|
||||
|
||||
template <typename ValueType>
|
||||
struct DefaultFormatter<std::unique_ptr<ValueType>>
|
||||
: public DefaultFormatter<ValueType*> {};
|
||||
|
||||
//
|
||||
// JoinAlgorithm() functions
|
||||
//
|
||||
|
||||
// The main joining algorithm. This simply joins the elements in the given
|
||||
// iterator range, each separated by the given separator, into an output std::string,
|
||||
// and formats each element using the provided Formatter object.
|
||||
template <typename Iterator, typename Formatter>
|
||||
std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s,
|
||||
Formatter&& f) {
|
||||
std::string result;
|
||||
absl::string_view sep("");
|
||||
for (Iterator it = start; it != end; ++it) {
|
||||
result.append(sep.data(), sep.size());
|
||||
f(&result, *it);
|
||||
sep = s;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// No-op placeholder for input iterators which can not be iterated over.
|
||||
template <typename Iterator>
|
||||
size_t GetResultSize(Iterator, Iterator, size_t, std::input_iterator_tag) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Calculates space to reserve, if the iterator supports multiple passes.
|
||||
template <typename Iterator>
|
||||
size_t GetResultSize(Iterator it, Iterator end, size_t separator_size,
|
||||
std::forward_iterator_tag) {
|
||||
assert(it != end);
|
||||
size_t length = it->size();
|
||||
while (++it != end) {
|
||||
length += separator_size;
|
||||
length += it->size();
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
// A joining algorithm that's optimized for an iterator range of std::string-like
|
||||
// objects that do not need any additional formatting. This is to optimize the
|
||||
// common case of joining, say, a std::vector<std::string> or a
|
||||
// std::vector<absl::string_view>.
|
||||
//
|
||||
// This is an overload of the previous JoinAlgorithm() function. Here the
|
||||
// Formatter argument is of type NoFormatter. Since NoFormatter is an internal
|
||||
// type, this overload is only invoked when strings::Join() is called with a
|
||||
// range of std::string-like objects (e.g., std::string, absl::string_view), and an
|
||||
// explicit Formatter argument was NOT specified.
|
||||
//
|
||||
// The optimization is that the needed space will be reserved in the output
|
||||
// std::string to avoid the need to resize while appending. To do this, the iterator
|
||||
// range will be traversed twice: once to calculate the total needed size, and
|
||||
// then again to copy the elements and delimiters to the output std::string.
|
||||
template <typename Iterator>
|
||||
std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s,
|
||||
NoFormatter) {
|
||||
std::string result;
|
||||
if (start != end) {
|
||||
typename std::iterator_traits<Iterator>::iterator_category iterator_tag;
|
||||
result.reserve(GetResultSize(start, end, s.size(), iterator_tag));
|
||||
|
||||
// Joins strings
|
||||
absl::string_view sep("", 0);
|
||||
for (Iterator it = start; it != end; ++it) {
|
||||
result.append(sep.data(), sep.size());
|
||||
result.append(it->data(), it->size());
|
||||
sep = s;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// JoinTupleLoop implements a loop over the elements of a std::tuple, which
|
||||
// are heterogeneous. The primary template matches the tuple interior case. It
|
||||
// continues the iteration after appending a separator (for nonzero indices)
|
||||
// and formatting an element of the tuple. The specialization for the I=N case
|
||||
// matches the end-of-tuple, and terminates the iteration.
|
||||
template <size_t I, size_t N>
|
||||
struct JoinTupleLoop {
|
||||
template <typename Tup, typename Formatter>
|
||||
void operator()(std::string* out, const Tup& tup, absl::string_view sep,
|
||||
Formatter&& fmt) {
|
||||
if (I > 0) out->append(sep.data(), sep.size());
|
||||
fmt(out, std::get<I>(tup));
|
||||
JoinTupleLoop<I + 1, N>()(out, tup, sep, fmt);
|
||||
}
|
||||
};
|
||||
template <size_t N>
|
||||
struct JoinTupleLoop<N, N> {
|
||||
template <typename Tup, typename Formatter>
|
||||
void operator()(std::string*, const Tup&, absl::string_view, Formatter&&) {}
|
||||
};
|
||||
|
||||
template <typename... T, typename Formatter>
|
||||
std::string JoinAlgorithm(const std::tuple<T...>& tup, absl::string_view sep,
|
||||
Formatter&& fmt) {
|
||||
std::string result;
|
||||
JoinTupleLoop<0, sizeof...(T)>()(&result, tup, sep, fmt);
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename Iterator>
|
||||
std::string JoinRange(Iterator first, Iterator last, absl::string_view separator) {
|
||||
// No formatter was explicitly given, so a default must be chosen.
|
||||
typedef typename std::iterator_traits<Iterator>::value_type ValueType;
|
||||
typedef typename DefaultFormatter<ValueType>::Type Formatter;
|
||||
return JoinAlgorithm(first, last, separator, Formatter());
|
||||
}
|
||||
|
||||
template <typename Range, typename Formatter>
|
||||
std::string JoinRange(const Range& range, absl::string_view separator,
|
||||
Formatter&& fmt) {
|
||||
using std::begin;
|
||||
using std::end;
|
||||
return JoinAlgorithm(begin(range), end(range), separator, fmt);
|
||||
}
|
||||
|
||||
template <typename Range>
|
||||
std::string JoinRange(const Range& range, absl::string_view separator) {
|
||||
using std::begin;
|
||||
using std::end;
|
||||
return JoinRange(begin(range), end(range), separator);
|
||||
}
|
||||
|
||||
} // namespace strings_internal
|
||||
} // namespace absl
|
||||
|
||||
#endif // ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
|
||||
439
absl/strings/internal/str_split_internal.h
Normal file
439
absl/strings/internal/str_split_internal.h
Normal file
|
|
@ -0,0 +1,439 @@
|
|||
// Copyright 2017 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
// This file declares INTERNAL parts of the Split API that are inline/templated
|
||||
// or otherwise need to be available at compile time. The main abstractions
|
||||
// defined in here are
|
||||
//
|
||||
// - ConvertibleToStringView
|
||||
// - SplitIterator<>
|
||||
// - Splitter<>
|
||||
//
|
||||
// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
|
||||
// absl/strings/str_split.h.
|
||||
//
|
||||
// IWYU pragma: private, include "absl/strings/str_split.h"
|
||||
|
||||
#ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
|
||||
#define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
|
||||
|
||||
#ifdef _GLIBCXX_DEBUG
|
||||
#include <glibcxx_debug_traits.h>
|
||||
#endif // _GLIBCXX_DEBUG
|
||||
|
||||
#include <array>
|
||||
#include <initializer_list>
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/base/port.h"
|
||||
#include "absl/meta/type_traits.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
|
||||
namespace absl {
|
||||
namespace strings_internal {
|
||||
|
||||
#ifdef _GLIBCXX_DEBUG
|
||||
using ::glibcxx_debug_traits::IsStrictlyDebugWrapperBase;
|
||||
#else // _GLIBCXX_DEBUG
|
||||
template <typename T> struct IsStrictlyDebugWrapperBase : std::false_type {};
|
||||
#endif // _GLIBCXX_DEBUG
|
||||
|
||||
// This class is implicitly constructible from everything that absl::string_view
|
||||
// is implicitly constructible from. If it's constructed from a temporary
|
||||
// std::string, the data is moved into a data member so its lifetime matches that of
|
||||
// the ConvertibleToStringView instance.
|
||||
class ConvertibleToStringView {
|
||||
public:
|
||||
ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit)
|
||||
: value_(s) {}
|
||||
ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit)
|
||||
ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit)
|
||||
: value_(s) {}
|
||||
ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit)
|
||||
: value_(s) {}
|
||||
|
||||
// Matches rvalue strings and moves their data to a member.
|
||||
ConvertibleToStringView(std::string&& s) // NOLINT(runtime/explicit)
|
||||
: copy_(std::move(s)), value_(copy_) {}
|
||||
|
||||
ConvertibleToStringView(const ConvertibleToStringView& other)
|
||||
: copy_(other.copy_),
|
||||
value_(other.IsSelfReferential() ? copy_ : other.value_) {}
|
||||
|
||||
ConvertibleToStringView(ConvertibleToStringView&& other) {
|
||||
StealMembers(std::move(other));
|
||||
}
|
||||
|
||||
ConvertibleToStringView& operator=(ConvertibleToStringView other) {
|
||||
StealMembers(std::move(other));
|
||||
return *this;
|
||||
}
|
||||
|
||||
absl::string_view value() const { return value_; }
|
||||
|
||||
private:
|
||||
// Returns true if ctsp's value refers to its internal copy_ member.
|
||||
bool IsSelfReferential() const { return value_.data() == copy_.data(); }
|
||||
|
||||
void StealMembers(ConvertibleToStringView&& other) {
|
||||
if (other.IsSelfReferential()) {
|
||||
copy_ = std::move(other.copy_);
|
||||
value_ = copy_;
|
||||
other.value_ = other.copy_;
|
||||
} else {
|
||||
value_ = other.value_;
|
||||
}
|
||||
}
|
||||
|
||||
// Holds the data moved from temporary std::string arguments. Declared first so
|
||||
// that 'value' can refer to 'copy_'.
|
||||
std::string copy_;
|
||||
absl::string_view value_;
|
||||
};
|
||||
|
||||
// An iterator that enumerates the parts of a std::string from a Splitter. The text
|
||||
// to be split, the Delimiter, and the Predicate are all taken from the given
|
||||
// Splitter object. Iterators may only be compared if they refer to the same
|
||||
// Splitter instance.
|
||||
//
|
||||
// This class is NOT part of the public splitting API.
|
||||
template <typename Splitter>
|
||||
class SplitIterator {
|
||||
public:
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
using value_type = absl::string_view;
|
||||
using difference_type = ptrdiff_t;
|
||||
using pointer = const value_type*;
|
||||
using reference = const value_type&;
|
||||
|
||||
enum State { kInitState, kLastState, kEndState };
|
||||
SplitIterator(State state, const Splitter* splitter)
|
||||
: pos_(0),
|
||||
state_(state),
|
||||
splitter_(splitter),
|
||||
delimiter_(splitter->delimiter()),
|
||||
predicate_(splitter->predicate()) {
|
||||
// Hack to maintain backward compatibility. This one block makes it so an
|
||||
// empty absl::string_view whose .data() happens to be nullptr behaves
|
||||
// *differently* from an otherwise empty absl::string_view whose .data() is
|
||||
// not nullptr. This is an undesirable difference in general, but this
|
||||
// behavior is maintained to avoid breaking existing code that happens to
|
||||
// depend on this old behavior/bug. Perhaps it will be fixed one day. The
|
||||
// difference in behavior is as follows:
|
||||
// Split(absl::string_view(""), '-'); // {""}
|
||||
// Split(absl::string_view(), '-'); // {}
|
||||
if (splitter_->text().data() == nullptr) {
|
||||
state_ = kEndState;
|
||||
pos_ = splitter_->text().size();
|
||||
return;
|
||||
}
|
||||
|
||||
if (state_ == kEndState) {
|
||||
pos_ = splitter_->text().size();
|
||||
} else {
|
||||
++(*this);
|
||||
}
|
||||
}
|
||||
|
||||
bool at_end() const { return state_ == kEndState; }
|
||||
|
||||
reference operator*() const { return curr_; }
|
||||
pointer operator->() const { return &curr_; }
|
||||
|
||||
SplitIterator& operator++() {
|
||||
do {
|
||||
if (state_ == kLastState) {
|
||||
state_ = kEndState;
|
||||
return *this;
|
||||
}
|
||||
const absl::string_view text = splitter_->text();
|
||||
const absl::string_view d = delimiter_.Find(text, pos_);
|
||||
if (d.data() == text.end()) state_ = kLastState;
|
||||
curr_ = text.substr(pos_, d.data() - (text.data() + pos_));
|
||||
pos_ += curr_.size() + d.size();
|
||||
} while (!predicate_(curr_));
|
||||
return *this;
|
||||
}
|
||||
|
||||
SplitIterator operator++(int) {
|
||||
SplitIterator old(*this);
|
||||
++(*this);
|
||||
return old;
|
||||
}
|
||||
|
||||
friend bool operator==(const SplitIterator& a, const SplitIterator& b) {
|
||||
return a.state_ == b.state_ && a.pos_ == b.pos_;
|
||||
}
|
||||
|
||||
friend bool operator!=(const SplitIterator& a, const SplitIterator& b) {
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t pos_;
|
||||
State state_;
|
||||
absl::string_view curr_;
|
||||
const Splitter* splitter_;
|
||||
typename Splitter::DelimiterType delimiter_;
|
||||
typename Splitter::PredicateType predicate_;
|
||||
};
|
||||
|
||||
// HasMappedType<T>::value is true iff there exists a type T::mapped_type.
|
||||
template <typename T, typename = void>
|
||||
struct HasMappedType : std::false_type {};
|
||||
template <typename T>
|
||||
struct HasMappedType<T, absl::void_t<typename T::mapped_type>>
|
||||
: std::true_type {};
|
||||
|
||||
// HasValueType<T>::value is true iff there exists a type T::value_type.
|
||||
template <typename T, typename = void>
|
||||
struct HasValueType : std::false_type {};
|
||||
template <typename T>
|
||||
struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type {
|
||||
};
|
||||
|
||||
// HasConstIterator<T>::value is true iff there exists a type T::const_iterator.
|
||||
template <typename T, typename = void>
|
||||
struct HasConstIterator : std::false_type {};
|
||||
template <typename T>
|
||||
struct HasConstIterator<T, absl::void_t<typename T::const_iterator>>
|
||||
: std::true_type {};
|
||||
|
||||
// IsInitializerList<T>::value is true iff T is an std::initializer_list. More
|
||||
// details below in Splitter<> where this is used.
|
||||
std::false_type IsInitializerListDispatch(...); // default: No
|
||||
template <typename T>
|
||||
std::true_type IsInitializerListDispatch(std::initializer_list<T>*);
|
||||
template <typename T>
|
||||
struct IsInitializerList
|
||||
: decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {};
|
||||
|
||||
// A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition
|
||||
// is true for type 'C'.
|
||||
//
|
||||
// Restricts conversion to container-like types (by testing for the presence of
|
||||
// a const_iterator member type) and also to disable conversion to an
|
||||
// std::initializer_list (which also has a const_iterator). Otherwise, code
|
||||
// compiled in C++11 will get an error due to ambiguous conversion paths (in
|
||||
// C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T>
|
||||
// or an std::initializer_list<T>).
|
||||
template <typename C>
|
||||
struct SplitterIsConvertibleTo
|
||||
: std::enable_if<
|
||||
!IsStrictlyDebugWrapperBase<C>::value &&
|
||||
!IsInitializerList<C>::value &&
|
||||
HasValueType<C>::value &&
|
||||
HasConstIterator<C>::value> {};
|
||||
|
||||
// This class implements the range that is returned by absl::StrSplit(). This
|
||||
// class has templated conversion operators that allow it to be implicitly
|
||||
// converted to a variety of types that the caller may have specified on the
|
||||
// left-hand side of an assignment.
|
||||
//
|
||||
// The main interface for interacting with this class is through its implicit
|
||||
// conversion operators. However, this class may also be used like a container
|
||||
// in that it has .begin() and .end() member functions. It may also be used
|
||||
// within a range-for loop.
|
||||
//
|
||||
// Output containers can be collections of any type that is constructible from
|
||||
// an absl::string_view.
|
||||
//
|
||||
// An Predicate functor may be supplied. This predicate will be used to filter
|
||||
// the split strings: only strings for which the predicate returns true will be
|
||||
// kept. A Predicate object is any unary functor that takes an absl::string_view
|
||||
// and returns bool.
|
||||
template <typename Delimiter, typename Predicate>
|
||||
class Splitter {
|
||||
public:
|
||||
using DelimiterType = Delimiter;
|
||||
using PredicateType = Predicate;
|
||||
using const_iterator = strings_internal::SplitIterator<Splitter>;
|
||||
using value_type = typename std::iterator_traits<const_iterator>::value_type;
|
||||
|
||||
Splitter(ConvertibleToStringView input_text, Delimiter d, Predicate p)
|
||||
: text_(std::move(input_text)),
|
||||
delimiter_(std::move(d)),
|
||||
predicate_(std::move(p)) {}
|
||||
|
||||
absl::string_view text() const { return text_.value(); }
|
||||
const Delimiter& delimiter() const { return delimiter_; }
|
||||
const Predicate& predicate() const { return predicate_; }
|
||||
|
||||
// Range functions that iterate the split substrings as absl::string_view
|
||||
// objects. These methods enable a Splitter to be used in a range-based for
|
||||
// loop.
|
||||
const_iterator begin() const { return {const_iterator::kInitState, this}; }
|
||||
const_iterator end() const { return {const_iterator::kEndState, this}; }
|
||||
|
||||
// An implicit conversion operator that is restricted to only those containers
|
||||
// that the splitter is convertible to.
|
||||
template <typename Container,
|
||||
typename OnlyIf = typename SplitterIsConvertibleTo<Container>::type>
|
||||
operator Container() const { // NOLINT(runtime/explicit)
|
||||
return ConvertToContainer<Container, typename Container::value_type,
|
||||
HasMappedType<Container>::value>()(*this);
|
||||
}
|
||||
|
||||
// Returns a pair with its .first and .second members set to the first two
|
||||
// strings returned by the begin() iterator. Either/both of .first and .second
|
||||
// will be constructed with empty strings if the iterator doesn't have a
|
||||
// corresponding value.
|
||||
template <typename First, typename Second>
|
||||
operator std::pair<First, Second>() const { // NOLINT(runtime/explicit)
|
||||
absl::string_view first, second;
|
||||
auto it = begin();
|
||||
if (it != end()) {
|
||||
first = *it;
|
||||
if (++it != end()) {
|
||||
second = *it;
|
||||
}
|
||||
}
|
||||
return {First(first), Second(second)};
|
||||
}
|
||||
|
||||
private:
|
||||
// ConvertToContainer is a functor converting a Splitter to the requested
|
||||
// Container of ValueType. It is specialized below to optimize splitting to
|
||||
// certain combinations of Container and ValueType.
|
||||
//
|
||||
// This base template handles the generic case of storing the split results in
|
||||
// the requested non-map-like container and converting the split substrings to
|
||||
// the requested type.
|
||||
template <typename Container, typename ValueType, bool is_map = false>
|
||||
struct ConvertToContainer {
|
||||
Container operator()(const Splitter& splitter) const {
|
||||
Container c;
|
||||
auto it = std::inserter(c, c.end());
|
||||
for (const auto sp : splitter) {
|
||||
*it++ = ValueType(sp);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
};
|
||||
|
||||
// Partial specialization for a std::vector<absl::string_view>.
|
||||
//
|
||||
// Optimized for the common case of splitting to a
|
||||
// std::vector<absl::string_view>. In this case we first split the results to
|
||||
// a small array of absl::string_view on the stack, to reduce reallocations.
|
||||
template <typename A>
|
||||
struct ConvertToContainer<std::vector<absl::string_view, A>,
|
||||
absl::string_view, false> {
|
||||
std::vector<absl::string_view, A> operator()(
|
||||
const Splitter& splitter) const {
|
||||
struct raw_view {
|
||||
const char* data;
|
||||
size_t size;
|
||||
operator absl::string_view() const { // NOLINT(runtime/explicit)
|
||||
return {data, size};
|
||||
}
|
||||
};
|
||||
std::vector<absl::string_view, A> v;
|
||||
std::array<raw_view, 16> ar;
|
||||
for (auto it = splitter.begin(); !it.at_end();) {
|
||||
size_t index = 0;
|
||||
do {
|
||||
ar[index].data = it->data();
|
||||
ar[index].size = it->size();
|
||||
++it;
|
||||
} while (++index != ar.size() && !it.at_end());
|
||||
v.insert(v.end(), ar.begin(), ar.begin() + index);
|
||||
}
|
||||
return v;
|
||||
}
|
||||
};
|
||||
|
||||
// Partial specialization for a std::vector<std::string>.
|
||||
//
|
||||
// Optimized for the common case of splitting to a std::vector<std::string>. In
|
||||
// this case we first split the results to a std::vector<absl::string_view> so
|
||||
// the returned std::vector<std::string> can have space reserved to avoid std::string
|
||||
// moves.
|
||||
template <typename A>
|
||||
struct ConvertToContainer<std::vector<std::string, A>, std::string, false> {
|
||||
std::vector<std::string, A> operator()(const Splitter& splitter) const {
|
||||
const std::vector<absl::string_view> v = splitter;
|
||||
return std::vector<std::string, A>(v.begin(), v.end());
|
||||
}
|
||||
};
|
||||
|
||||
// Partial specialization for containers of pairs (e.g., maps).
|
||||
//
|
||||
// The algorithm is to insert a new pair into the map for each even-numbered
|
||||
// item, with the even-numbered item as the key with a default-constructed
|
||||
// value. Each odd-numbered item will then be assigned to the last pair's
|
||||
// value.
|
||||
template <typename Container, typename First, typename Second>
|
||||
struct ConvertToContainer<Container, std::pair<const First, Second>, true> {
|
||||
Container operator()(const Splitter& splitter) const {
|
||||
Container m;
|
||||
typename Container::iterator it;
|
||||
bool insert = true;
|
||||
for (const auto sp : splitter) {
|
||||
if (insert) {
|
||||
it = Inserter<Container>::Insert(&m, First(sp), Second());
|
||||
} else {
|
||||
it->second = Second(sp);
|
||||
}
|
||||
insert = !insert;
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
// Inserts the key and value into the given map, returning an iterator to
|
||||
// the inserted item. Specialized for std::map and std::multimap to use
|
||||
// emplace() and adapt emplace()'s return value.
|
||||
template <typename Map>
|
||||
struct Inserter {
|
||||
using M = Map;
|
||||
template <typename... Args>
|
||||
static typename M::iterator Insert(M* m, Args&&... args) {
|
||||
return m->insert(std::make_pair(std::forward<Args>(args)...)).first;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename... Ts>
|
||||
struct Inserter<std::map<Ts...>> {
|
||||
using M = std::map<Ts...>;
|
||||
template <typename... Args>
|
||||
static typename M::iterator Insert(M* m, Args&&... args) {
|
||||
return m->emplace(std::make_pair(std::forward<Args>(args)...)).first;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename... Ts>
|
||||
struct Inserter<std::multimap<Ts...>> {
|
||||
using M = std::multimap<Ts...>;
|
||||
template <typename... Args>
|
||||
static typename M::iterator Insert(M* m, Args&&... args) {
|
||||
return m->emplace(std::make_pair(std::forward<Args>(args)...));
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
ConvertibleToStringView text_;
|
||||
Delimiter delimiter_;
|
||||
Predicate predicate_;
|
||||
};
|
||||
|
||||
} // namespace strings_internal
|
||||
} // namespace absl
|
||||
|
||||
#endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
|
||||
51
absl/strings/internal/utf8.cc
Normal file
51
absl/strings/internal/utf8.cc
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
// Copyright 2017 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// UTF8 utilities, implemented to reduce dependencies.
|
||||
|
||||
#include "absl/strings/internal/utf8.h"
|
||||
|
||||
namespace absl {
|
||||
namespace strings_internal {
|
||||
|
||||
size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) {
|
||||
if (utf8_char <= 0x7F) {
|
||||
*buffer = static_cast<char>(utf8_char);
|
||||
return 1;
|
||||
} else if (utf8_char <= 0x7FF) {
|
||||
buffer[1] = 0x80 | (utf8_char & 0x3F);
|
||||
utf8_char >>= 6;
|
||||
buffer[0] = 0xC0 | utf8_char;
|
||||
return 2;
|
||||
} else if (utf8_char <= 0xFFFF) {
|
||||
buffer[2] = 0x80 | (utf8_char & 0x3F);
|
||||
utf8_char >>= 6;
|
||||
buffer[1] = 0x80 | (utf8_char & 0x3F);
|
||||
utf8_char >>= 6;
|
||||
buffer[0] = 0xE0 | utf8_char;
|
||||
return 3;
|
||||
} else {
|
||||
buffer[3] = 0x80 | (utf8_char & 0x3F);
|
||||
utf8_char >>= 6;
|
||||
buffer[2] = 0x80 | (utf8_char & 0x3F);
|
||||
utf8_char >>= 6;
|
||||
buffer[1] = 0x80 | (utf8_char & 0x3F);
|
||||
utf8_char >>= 6;
|
||||
buffer[0] = 0xF0 | utf8_char;
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace strings_internal
|
||||
} // namespace absl
|
||||
52
absl/strings/internal/utf8.h
Normal file
52
absl/strings/internal/utf8.h
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
// Copyright 2017 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// UTF8 utilities, implemented to reduce dependencies.
|
||||
//
|
||||
// If you need Unicode specific processing (for example being aware of
|
||||
// Unicode character boundaries, or knowledge of Unicode casing rules,
|
||||
// or various forms of equivalence and normalization), take a look at
|
||||
// files in i18n/utf8.
|
||||
|
||||
#ifndef ABSL_STRINGS_INTERNAL_UTF8_H_
|
||||
#define ABSL_STRINGS_INTERNAL_UTF8_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
|
||||
namespace absl {
|
||||
namespace strings_internal {
|
||||
|
||||
// For Unicode code points 0 through 0x10FFFF, EncodeUTF8Char writes
|
||||
// out the UTF-8 encoding into buffer, and returns the number of chars
|
||||
// it wrote.
|
||||
//
|
||||
// As described in https://tools.ietf.org/html/rfc3629#section-3 , the encodings
|
||||
// are:
|
||||
// 00 - 7F : 0xxxxxxx
|
||||
// 80 - 7FF : 110xxxxx 10xxxxxx
|
||||
// 800 - FFFF : 1110xxxx 10xxxxxx 10xxxxxx
|
||||
// 10000 - 10FFFF : 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
//
|
||||
// Values greater than 0x10FFFF are not supported and may or may not write
|
||||
// characters into buffer, however never will more than kMaxEncodedUTF8Size
|
||||
// bytes be written, regardless of the value of utf8_char.
|
||||
enum { kMaxEncodedUTF8Size = 4 };
|
||||
size_t EncodeUTF8Char(char *buffer, char32_t utf8_char);
|
||||
|
||||
} // namespace strings_internal
|
||||
} // namespace absl
|
||||
|
||||
#endif // ABSL_STRINGS_INTERNAL_UTF8_H_
|
||||
58
absl/strings/internal/utf8_test.cc
Normal file
58
absl/strings/internal/utf8_test.cc
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
// Copyright 2017 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "absl/strings/internal/utf8.h"
|
||||
|
||||
#include <cctype>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdint>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace {
|
||||
|
||||
TEST(EncodeUTF8Char, BasicFunction) {
|
||||
std::pair<char32_t, std::string> tests[] = {{0x0030, u8"\u0030"},
|
||||
{0x00A3, u8"\u00A3"},
|
||||
{0x00010000, u8"\U00010000"},
|
||||
{0x0000FFFF, u8"\U0000FFFF"},
|
||||
{0x0010FFFD, u8"\U0010FFFD"}};
|
||||
for (auto &test : tests) {
|
||||
char buf0[7] = {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'};
|
||||
char buf1[7] = {'\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF'};
|
||||
char *buf0_written =
|
||||
&buf0[absl::strings_internal::EncodeUTF8Char(buf0, test.first)];
|
||||
char *buf1_written =
|
||||
&buf1[absl::strings_internal::EncodeUTF8Char(buf1, test.first)];
|
||||
int apparent_length = 7;
|
||||
while (buf0[apparent_length - 1] == '\x00' &&
|
||||
buf1[apparent_length - 1] == '\xFF') {
|
||||
if (--apparent_length == 0) break;
|
||||
}
|
||||
EXPECT_EQ(apparent_length, buf0_written - buf0);
|
||||
EXPECT_EQ(apparent_length, buf1_written - buf1);
|
||||
EXPECT_EQ(apparent_length, test.second.length());
|
||||
EXPECT_EQ(std::string(buf0, apparent_length), test.second);
|
||||
EXPECT_EQ(std::string(buf1, apparent_length), test.second);
|
||||
}
|
||||
char buf[32] = "Don't Tread On Me";
|
||||
EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf, 0x00110000),
|
||||
absl::strings_internal::kMaxEncodedUTF8Size);
|
||||
char buf2[32] = "Negative is invalid but sane";
|
||||
EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf2, -1),
|
||||
absl::strings_internal::kMaxEncodedUTF8Size);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
Loading…
Add table
Add a link
Reference in a new issue