Initial Commit

This commit is contained in:
misterg 2017-09-19 16:54:40 -04:00
commit c2e7548296
238 changed files with 65475 additions and 0 deletions

293
absl/strings/BUILD.bazel Normal file
View file

@ -0,0 +1,293 @@
#
# Copyright 2017 The Abseil Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# -*- mode: python; -*-
# Libraries in this low-level package may not depend on libraries in packages
# that are not low level. For more information, including how to submit
# changes to this file, see http://www/eng/howto/build-monitors.html
load(
"//absl:test_dependencies.bzl",
"GUNIT_MAIN_DEPS_SELECTOR",
"GUNIT_DEPS_SELECTOR",
)
load(
"//absl:copts.bzl",
"ABSL_DEFAULT_COPTS",
"ABSL_TEST_COPTS",
"ABSL_EXCEPTIONS_FLAG",
)
package(
default_visibility = ["//visibility:public"],
features = [
"parse_headers",
"header_modules",
],
)
licenses(["notice"]) # Apache 2.0
cc_library(
name = "strings",
srcs = [
"ascii.cc",
"escaping.cc",
"internal/memutil.cc",
"internal/memutil.h",
"internal/str_join_internal.h",
"internal/str_split_internal.h",
"match.cc",
"numbers.cc",
"str_cat.cc",
"str_replace.cc",
"str_split.cc",
"string_view.cc",
"substitute.cc",
],
hdrs = [
"ascii.h",
"escaping.h",
"match.h",
"numbers.h",
"str_cat.h",
"str_join.h",
"str_replace.h",
"str_split.h",
"string_view.h",
"strip.h",
"substitute.h",
],
copts = ABSL_DEFAULT_COPTS,
deps = [
":internal",
"//absl/base",
"//absl/base:config",
"//absl/base:core_headers",
"//absl/base:endian",
"//absl/base:throw_delegate",
"//absl/memory",
"//absl/meta:type_traits",
"//absl/numeric:int128",
],
)
cc_library(
name = "internal",
srcs = [
"internal/utf8.cc",
],
hdrs = [
"internal/char_map.h",
"internal/fastmem.h",
"internal/ostringstream.h",
"internal/resize_uninitialized.h",
"internal/utf8.h",
],
copts = ABSL_DEFAULT_COPTS,
deps = [
"//absl/base:core_headers",
"//absl/base:endian",
"//absl/meta:type_traits",
],
)
cc_test(
name = "match_test",
size = "small",
srcs = ["match_test.cc"],
copts = ABSL_TEST_COPTS,
deps = [":strings"] + select(GUNIT_MAIN_DEPS_SELECTOR),
)
cc_test(
name = "escaping_test",
size = "small",
srcs = [
"escaping_test.cc",
"internal/escaping_test_common.inc",
],
copts = ABSL_TEST_COPTS,
deps = [
":strings",
"//absl/base:core_headers",
"//absl/container:fixed_array",
] + select(GUNIT_MAIN_DEPS_SELECTOR),
)
cc_test(
name = "ascii_test",
size = "small",
srcs = ["ascii_test.cc"],
copts = ABSL_TEST_COPTS,
deps = [
":strings",
"//absl/base:core_headers",
] + select(GUNIT_MAIN_DEPS_SELECTOR),
)
cc_test(
name = "memutil_test",
size = "small",
srcs = [
"internal/memutil.h",
"internal/memutil_test.cc",
],
copts = ABSL_TEST_COPTS,
deps = [
":strings",
"//absl/base:core_headers",
] + select(GUNIT_MAIN_DEPS_SELECTOR),
)
cc_test(
name = "utf8_test",
size = "small",
srcs = [
"internal/utf8_test.cc",
],
copts = ABSL_TEST_COPTS,
deps = [
":strings",
":internal",
] + select(GUNIT_MAIN_DEPS_SELECTOR),
)
cc_test(
name = "string_view_test",
size = "small",
srcs = ["string_view_test.cc"],
copts = ABSL_TEST_COPTS + ABSL_EXCEPTIONS_FLAG,
deps = [
":strings",
"//absl/base:core_headers",
"//absl/base:config",
"//absl/base:dynamic_annotations",
] + select(GUNIT_MAIN_DEPS_SELECTOR),
)
cc_test(
name = "substitute_test",
size = "small",
srcs = ["substitute_test.cc"],
copts = ABSL_TEST_COPTS,
deps = [
":strings",
"//absl/base:core_headers",
] + select(GUNIT_MAIN_DEPS_SELECTOR),
)
cc_test(
name = "str_replace_test",
size = "small",
srcs = ["str_replace_test.cc"],
copts = ABSL_TEST_COPTS,
deps = [
":strings",
] + select(GUNIT_MAIN_DEPS_SELECTOR),
)
cc_test(
name = "str_split_test",
srcs = ["str_split_test.cc"],
copts = ABSL_TEST_COPTS,
deps = [
":strings",
"//absl/base:core_headers",
"//absl/base:dynamic_annotations",
] + select(GUNIT_MAIN_DEPS_SELECTOR),
)
cc_test(
name = "ostringstream_test",
size = "small",
srcs = ["internal/ostringstream_test.cc"],
copts = ABSL_TEST_COPTS,
deps = [
":internal",
] + select(GUNIT_MAIN_DEPS_SELECTOR),
)
cc_test(
name = "resize_uninitialized_test",
size = "small",
srcs = [
"internal/resize_uninitialized.h",
"internal/resize_uninitialized_test.cc",
],
copts = ABSL_TEST_COPTS,
deps = [
"//absl/base:core_headers",
"//absl/meta:type_traits",
] + select(GUNIT_MAIN_DEPS_SELECTOR),
)
cc_test(
name = "str_join_test",
size = "small",
srcs = ["str_join_test.cc"],
copts = ABSL_TEST_COPTS,
deps = [
":strings",
"//absl/base:core_headers",
"//absl/memory",
] + select(GUNIT_MAIN_DEPS_SELECTOR),
)
cc_test(
name = "str_cat_test",
size = "small",
srcs = ["str_cat_test.cc"],
copts = ABSL_TEST_COPTS,
deps = [
":strings",
"//absl/base:core_headers",
] + select(GUNIT_MAIN_DEPS_SELECTOR),
)
cc_test(
name = "numbers_test",
size = "small",
srcs = [
"internal/numbers_test_common.inc",
"numbers_test.cc",
],
copts = ABSL_TEST_COPTS,
tags = [
"no_test_loonix",
],
deps = [
":strings",
"//absl/base",
"//absl/base:core_headers",
] + select(GUNIT_MAIN_DEPS_SELECTOR),
)
cc_test(
name = "strip_test",
size = "small",
srcs = ["strip_test.cc"],
copts = ABSL_TEST_COPTS,
deps = [":strings"] + select(GUNIT_MAIN_DEPS_SELECTOR),
)
cc_test(
name = "char_map_test",
srcs = ["internal/char_map_test.cc"],
copts = ABSL_TEST_COPTS,
deps = [
":internal",
] + select(GUNIT_MAIN_DEPS_SELECTOR),
)

87
absl/strings/README.md Normal file
View file

@ -0,0 +1,87 @@
# ABSL Strings
This directory contains packages related to std::string operations and std::string
alternatives (such as character-agnostic byte manipulation packages).
## Library Listing
Two library targets are available within this directory:
* **strings** (`//absl/strings:strings`) provides classes and
utility functions for manipulating and comparing strings, converting other
types (such as integers) into strings, or evaluating strings for other usages
(such as tokenization).
* **cord** (`//absl/strings:cord`) provides classes and utility
functions for manipulating `Cord` elements. A `Cord` is a sequence of
characters that internally uses a tree structure to store their data,
avoiding the need for long regions of contiguous memory, and allows memory
sharing, sub-std::string copy-on-write, and a host of other advanced std::string
features.
## Strings Library File Listing
The following header files are directly included within the
`absl::strings` library.
## Alternate std::string-like Classes
* `bytestream.h`
<br/>Abstraction of std::string for I/O
* `string_view.h`
<br/>Pointer to part or all of another std::string
## Formatting and Parsing
* `numbers.h`
<br/>Converter between strings and numbers. Prefer `str_cat.h` for numbers
to strings
## Operations on Characters
* `ascii_ctype.h`
<br/>Char classifiers like &lt;ctype.h&gt; but faster
* `charset.h`
<br/>Bitmap from unsigned char -&gt; bool
## Operations on Strings
* `case.h`
<br/>Case-changers
* `escaping.h`
<br/>Escapers and unescapers
* `str_join.h`
<br/>Joiner functions using a delimiter
* `str_split.h`
<br/>Split functions
* `str_cat.h`
<br/>Concatenators and appenders
* `string_view_utils.h`
<br>Utility functions for strings
* `strip.h`
<br/>Character removal functions
* `substitute.h`
<br/>Printf-like typesafe formatter
## Miscellaneous
* `util.h`
<br/>Grab bag of useful std::string functions
## Cord Library File Listing
The following header files are directly included within the
`absl::strings::cord` library:
## The `Cord` Class
* `cord.h`
<br/>A std::string built from a tree of shareable nodes
## Operations on Cords
* `cord_cat.h`
<br/>Concatenator functions for cords
* `cord_util.h`
<br/>Utility functions for cords

198
absl/strings/ascii.cc Normal file
View file

@ -0,0 +1,198 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/ascii.h"
namespace absl {
namespace ascii_internal {
// # Table generated by this Python code (bit 0x02 is currently unused):
// TODO(mbar) Move Python code for generation of table to BUILD and link here.
// NOTE: The kAsciiPropertyBits table used within this code was generated by
// Python code of the following form. (Bit 0x02 is currently unused and
// available.)
//
// def Hex2(n):
// return '0x' + hex(n/16)[2:] + hex(n%16)[2:]
// def IsPunct(ch):
// return (ord(ch) >= 32 and ord(ch) < 127 and
// not ch.isspace() and not ch.isalnum())
// def IsBlank(ch):
// return ch in ' \t'
// def IsCntrl(ch):
// return ord(ch) < 32 or ord(ch) == 127
// def IsXDigit(ch):
// return ch.isdigit() or ch.lower() in 'abcdef'
// for i in range(128):
// ch = chr(i)
// mask = ((ch.isalpha() and 0x01 or 0) |
// (ch.isalnum() and 0x04 or 0) |
// (ch.isspace() and 0x08 or 0) |
// (IsPunct(ch) and 0x10 or 0) |
// (IsBlank(ch) and 0x20 or 0) |
// (IsCntrl(ch) and 0x40 or 0) |
// (IsXDigit(ch) and 0x80 or 0))
// print Hex2(mask) + ',',
// if i % 16 == 7:
// print ' //', Hex2(i & 0x78)
// elif i % 16 == 15:
// print
// clang-format off
// Array of bitfields holding character information. Each bit value corresponds
// to a particular character feature. For readability, and because the value
// of these bits is tightly coupled to this implementation, the individual bits
// are not named. Note that bitfields for all characters above ASCII 127 are
// zero-initialized.
const unsigned char kPropertyBits[256] = {
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x00
0x40, 0x68, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40,
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x10
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
0x28, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0x20
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, // 0x30
0x84, 0x84, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x40
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x50
0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x60
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x70
0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x40,
};
// Array of characters for the ascii_tolower() function. For values 'A'
// through 'Z', return the lower-case character; otherwise, return the
// identity of the passed character.
const char kToLower[256] = {
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
'\x40', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
'x', 'y', 'z', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
'\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
'\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
'\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
'\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
'\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
'\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
'\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
'\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
'\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
'\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
'\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
'\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
'\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
'\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
'\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
'\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
'\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
'\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
};
// Array of characters for the ascii_toupper() function. For values 'a'
// through 'z', return the upper-case character; otherwise, return the
// identity of the passed character.
const char kToUpper[256] = {
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
'\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
'\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
'\x60', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
'X', 'Y', 'Z', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
'\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
'\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
'\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
'\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
'\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
'\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
'\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
'\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
'\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
'\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
'\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
'\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
'\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
'\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
'\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
'\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
};
// clang-format on
} // namespace ascii_internal
void AsciiStrToLower(std::string* s) {
for (auto& ch : *s) {
ch = absl::ascii_tolower(ch);
}
}
void AsciiStrToUpper(std::string* s) {
for (auto& ch : *s) {
ch = absl::ascii_toupper(ch);
}
}
void RemoveExtraAsciiWhitespace(std::string* str) {
auto stripped = StripAsciiWhitespace(*str);
if (stripped.empty()) {
str->clear();
return;
}
auto input_it = stripped.begin();
auto input_end = stripped.end();
auto output_it = &(*str)[0];
bool is_ws = false;
for (; input_it < input_end; ++input_it) {
if (is_ws) {
// Consecutive whitespace? Keep only the last.
is_ws = absl::ascii_isspace(*input_it);
if (is_ws) --output_it;
} else {
is_ws = absl::ascii_isspace(*input_it);
}
*output_it = *input_it;
++output_it;
}
str->erase(output_it - &(*str)[0]);
}
} // namespace absl

239
absl/strings/ascii.h Normal file
View file

@ -0,0 +1,239 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: ascii.h
// -----------------------------------------------------------------------------
//
// This package contains functions operating on characters and strings
// restricted to standard ASCII. These include character classification
// functions analogous to those found in the ANSI C Standard Library <ctype.h>
// header file.
//
// C++ implementations provide <ctype.h> functionality based on their
// C environment locale. In general, reliance on such a locale is not ideal, as
// the locale standard is problematic (and may not return invariant information
// for the same character set, for example). These `ascii_*()` functions are
// hard-wired for standard ASCII, much faster, and guaranteed to behave
// consistently. They will never be overloaded, nor will their function
// signature change.
//
// `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`,
// `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`,
// `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`,
// `ascii_isxdigit()`
// Analagous to the <ctype.h> functions with similar names, these
// functions take an unsigned char and return a bool, based on whether the
// character matches the condition specified.
//
// If the input character has a numerical value greater than 127, these
// functions return `false`.
//
// `ascii_tolower()`, `ascii_toupper()`
// Analagous to the <ctype.h> functions with similar names, these functions
// take an unsigned char and return a char.
//
// If the input character is not an ASCII {lower,upper}-case letter (including
// numerical values greater than 127) then the functions return the same value
// as the input character.
#ifndef ABSL_STRINGS_ASCII_H_
#define ABSL_STRINGS_ASCII_H_
#include <algorithm>
#include <string>
#include "absl/base/attributes.h"
#include "absl/strings/string_view.h"
namespace absl {
namespace ascii_internal {
// Declaration for an array of bitfields holding character information.
extern const unsigned char kPropertyBits[256];
// Declaration for the array of characters to upper-case characters.
extern const char kToUpper[256];
// Declaration for the array of characters to lower-case characters.
extern const char kToLower[256];
} // namespace ascii_internal
// ascii_isalpha()
//
// Determines whether the given character is an alphabetic character.
inline bool ascii_isalpha(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x01) != 0;
}
// ascii_isalnum()
//
// Determines whether the given character is an alphanumeric character.
inline bool ascii_isalnum(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x04) != 0;
}
// ascii_isspace()
//
// Determines whether the given character is a whitespace character (space,
// tab, vertical tab, formfeed, linefeed, or carriage return).
inline bool ascii_isspace(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x08) != 0;
}
// ascii_ispunct()
//
// Determines whether the given character is a punctuation character.
inline bool ascii_ispunct(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x10) != 0;
}
// ascii_isblank()
//
// Determines whether the given character is a blank character (tab or space).
inline bool ascii_isblank(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x20) != 0;
}
// ascii_iscntrl()
//
// Determines whether the given character is a control character.
inline bool ascii_iscntrl(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x40) != 0;
}
// ascii_isxdigit()
//
// Determines whether the given character can be represented as a hexadecimal
// digit character (i.e. {0-9} or {A-F}).
inline bool ascii_isxdigit(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x80) != 0;
}
// ascii_isdigit()
//
// Determines whether the given character can be represented as a decimal
// digit character (i.e. {0-9}).
inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; }
// ascii_isprint()
//
// Determines whether the given character is printable, including whitespace.
inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; }
// ascii_isgraph()
//
// Determines whether the given character has a graphical representation.
inline bool ascii_isgraph(unsigned char c) { return c > 32 && c < 127; }
// ascii_isupper()
//
// Determines whether the given character is uppercase.
inline bool ascii_isupper(unsigned char c) { return c >= 'A' && c <= 'Z'; }
// ascii_islower()
//
// Determines whether the given character is lowercase.
inline bool ascii_islower(unsigned char c) { return c >= 'a' && c <= 'z'; }
// ascii_isascii()
//
// Determines whether the given character is ASCII.
inline bool ascii_isascii(unsigned char c) { return c < 128; }
// ascii_tolower()
//
// Returns an ASCII character, converting to lowercase if uppercase is
// passed. Note that character values > 127 are simply returned.
inline char ascii_tolower(unsigned char c) {
return ascii_internal::kToLower[c];
}
// Converts the characters in `s` to lowercase, changing the contents of `s`.
void AsciiStrToLower(std::string* s);
// Creates a lowercase std::string from a given absl::string_view.
ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) {
std::string result(s);
absl::AsciiStrToLower(&result);
return result;
}
// ascii_toupper()
//
// Returns the ASCII character, converting to upper-case if lower-case is
// passed. Note that characters values > 127 are simply returned.
inline char ascii_toupper(unsigned char c) {
return ascii_internal::kToUpper[c];
}
// Converts the characters in `s` to uppercase, changing the contents of `s`.
void AsciiStrToUpper(std::string* s);
// Creates an uppercase std::string from a given absl::string_view.
ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) {
std::string result(s);
absl::AsciiStrToUpper(&result);
return result;
}
// Returns absl::string_view with whitespace stripped from the beginning of the
// given string_view.
ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace(
absl::string_view str) {
auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace);
return absl::string_view(it, str.end() - it);
}
// Strips in place whitespace from the beginning of the given std::string.
inline void StripLeadingAsciiWhitespace(std::string* str) {
auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace);
str->erase(str->begin(), it);
}
// Returns absl::string_view with whitespace stripped from the end of the given
// string_view.
ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace(
absl::string_view str) {
auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace);
return absl::string_view(str.begin(), str.rend() - it);
}
// Strips in place whitespace from the end of the given std::string
inline void StripTrailingAsciiWhitespace(std::string* str) {
auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace);
str->erase(str->rend() - it);
}
// Returns absl::string_view with whitespace stripped from both ends of the
// given string_view.
ABSL_MUST_USE_RESULT inline absl::string_view StripAsciiWhitespace(
absl::string_view str) {
return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str));
}
// Strips in place whitespace from both ends of the given std::string
inline void StripAsciiWhitespace(std::string* str) {
StripTrailingAsciiWhitespace(str);
StripLeadingAsciiWhitespace(str);
}
// Removes leading, trailing, and consecutive internal whitespace.
void RemoveExtraAsciiWhitespace(std::string*);
} // namespace absl
#endif // ABSL_STRINGS_ASCII_H_

View file

@ -0,0 +1,66 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_ASCII_CTYPE_H_
#define ABSL_STRINGS_ASCII_CTYPE_H_
#include "absl/strings/ascii.h"
inline bool ascii_isalpha(unsigned char c) {
return absl::ascii_isalpha(c);
}
inline bool ascii_isalnum(unsigned char c) {
return absl::ascii_isalnum(c);
}
inline bool ascii_isspace(unsigned char c) {
return absl::ascii_isspace(c);
}
inline bool ascii_ispunct(unsigned char c) {
return absl::ascii_ispunct(c);
}
inline bool ascii_isblank(unsigned char c) {
return absl::ascii_isblank(c);
}
inline bool ascii_iscntrl(unsigned char c) {
return absl::ascii_iscntrl(c);
}
inline bool ascii_isxdigit(unsigned char c) {
return absl::ascii_isxdigit(c);
}
inline bool ascii_isdigit(unsigned char c) {
return absl::ascii_isdigit(c);
}
inline bool ascii_isprint(unsigned char c) {
return absl::ascii_isprint(c);
}
inline bool ascii_isgraph(unsigned char c) {
return absl::ascii_isgraph(c);
}
inline bool ascii_isupper(unsigned char c) {
return absl::ascii_isupper(c);
}
inline bool ascii_islower(unsigned char c) {
return absl::ascii_islower(c);
}
inline bool ascii_isascii(unsigned char c) {
return absl::ascii_isascii(c);
}
inline char ascii_tolower(unsigned char c) {
return absl::ascii_tolower(c);
}
inline char ascii_toupper(unsigned char c) {
return absl::ascii_toupper(c);
}
#endif // ABSL_STRINGS_ASCII_CTYPE_H_

354
absl/strings/ascii_test.cc Normal file
View file

@ -0,0 +1,354 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/ascii.h"
#include <cctype>
#include <clocale>
#include <cstring>
#include <string>
#include "gtest/gtest.h"
#include "absl/base/macros.h"
#include "absl/base/port.h"
namespace {
TEST(AsciiIsFoo, All) {
for (int i = 0; i < 256; i++) {
if ((i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z'))
EXPECT_TRUE(absl::ascii_isalpha(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_isalpha(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if ((i >= '0' && i <= '9'))
EXPECT_TRUE(absl::ascii_isdigit(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_isdigit(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (absl::ascii_isalpha(i) || absl::ascii_isdigit(i))
EXPECT_TRUE(absl::ascii_isalnum(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_isalnum(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (i != '\0' && strchr(" \r\n\t\v\f", i))
EXPECT_TRUE(absl::ascii_isspace(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_isspace(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (i >= 32 && i < 127)
EXPECT_TRUE(absl::ascii_isprint(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_isprint(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (absl::ascii_isprint(i) && !absl::ascii_isspace(i) &&
!absl::ascii_isalnum(i))
EXPECT_TRUE(absl::ascii_ispunct(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_ispunct(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (i == ' ' || i == '\t')
EXPECT_TRUE(absl::ascii_isblank(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_isblank(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (i < 32 || i == 127)
EXPECT_TRUE(absl::ascii_iscntrl(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_iscntrl(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (absl::ascii_isdigit(i) || (i >= 'A' && i <= 'F') ||
(i >= 'a' && i <= 'f'))
EXPECT_TRUE(absl::ascii_isxdigit(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_isxdigit(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (i > 32 && i < 127)
EXPECT_TRUE(absl::ascii_isgraph(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_isgraph(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (i >= 'A' && i <= 'Z')
EXPECT_TRUE(absl::ascii_isupper(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_isupper(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (i >= 'a' && i <= 'z')
EXPECT_TRUE(absl::ascii_islower(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_islower(i)) << ": failed on " << i;
}
for (int i = 0; i < 128; i++) {
EXPECT_TRUE(absl::ascii_isascii(i)) << ": failed on " << i;
}
for (int i = 128; i < 256; i++) {
EXPECT_TRUE(!absl::ascii_isascii(i)) << ": failed on " << i;
}
// The official is* functions don't accept negative signed chars, but
// our absl::ascii_is* functions do.
for (int i = 0; i < 256; i++) {
signed char sc = static_cast<signed char>(static_cast<unsigned char>(i));
EXPECT_EQ(absl::ascii_isalpha(i), absl::ascii_isalpha(sc)) << i;
EXPECT_EQ(absl::ascii_isdigit(i), absl::ascii_isdigit(sc)) << i;
EXPECT_EQ(absl::ascii_isalnum(i), absl::ascii_isalnum(sc)) << i;
EXPECT_EQ(absl::ascii_isspace(i), absl::ascii_isspace(sc)) << i;
EXPECT_EQ(absl::ascii_ispunct(i), absl::ascii_ispunct(sc)) << i;
EXPECT_EQ(absl::ascii_isblank(i), absl::ascii_isblank(sc)) << i;
EXPECT_EQ(absl::ascii_iscntrl(i), absl::ascii_iscntrl(sc)) << i;
EXPECT_EQ(absl::ascii_isxdigit(i), absl::ascii_isxdigit(sc)) << i;
EXPECT_EQ(absl::ascii_isprint(i), absl::ascii_isprint(sc)) << i;
EXPECT_EQ(absl::ascii_isgraph(i), absl::ascii_isgraph(sc)) << i;
EXPECT_EQ(absl::ascii_isupper(i), absl::ascii_isupper(sc)) << i;
EXPECT_EQ(absl::ascii_islower(i), absl::ascii_islower(sc)) << i;
EXPECT_EQ(absl::ascii_isascii(i), absl::ascii_isascii(sc)) << i;
}
}
// Checks that absl::ascii_isfoo returns the same value as isfoo in the C
// locale.
TEST(AsciiIsFoo, SameAsIsFoo) {
// temporarily change locale to C. It should already be C, but just for safety
std::string old_locale = setlocale(LC_CTYPE, nullptr);
ASSERT_TRUE(setlocale(LC_CTYPE, "C"));
for (int i = 0; i < 256; i++) {
EXPECT_EQ(isalpha(i) != 0, absl::ascii_isalpha(i)) << i;
EXPECT_EQ(isdigit(i) != 0, absl::ascii_isdigit(i)) << i;
EXPECT_EQ(isalnum(i) != 0, absl::ascii_isalnum(i)) << i;
EXPECT_EQ(isspace(i) != 0, absl::ascii_isspace(i)) << i;
EXPECT_EQ(ispunct(i) != 0, absl::ascii_ispunct(i)) << i;
EXPECT_EQ(isblank(i) != 0, absl::ascii_isblank(i)) << i;
EXPECT_EQ(iscntrl(i) != 0, absl::ascii_iscntrl(i)) << i;
EXPECT_EQ(isxdigit(i) != 0, absl::ascii_isxdigit(i)) << i;
EXPECT_EQ(isprint(i) != 0, absl::ascii_isprint(i)) << i;
EXPECT_EQ(isgraph(i) != 0, absl::ascii_isgraph(i)) << i;
EXPECT_EQ(isupper(i) != 0, absl::ascii_isupper(i)) << i;
EXPECT_EQ(islower(i) != 0, absl::ascii_islower(i)) << i;
EXPECT_EQ(isascii(i) != 0, absl::ascii_isascii(i)) << i;
}
// restore the old locale.
ASSERT_TRUE(setlocale(LC_CTYPE, old_locale.c_str()));
}
TEST(AsciiToFoo, All) {
// temporarily change locale to C. It should already be C, but just for safety
std::string old_locale = setlocale(LC_CTYPE, nullptr);
ASSERT_TRUE(setlocale(LC_CTYPE, "C"));
for (int i = 0; i < 256; i++) {
if (absl::ascii_islower(i))
EXPECT_EQ(absl::ascii_toupper(i), 'A' + (i - 'a')) << i;
else
EXPECT_EQ(absl::ascii_toupper(i), static_cast<char>(i)) << i;
if (absl::ascii_isupper(i))
EXPECT_EQ(absl::ascii_tolower(i), 'a' + (i - 'A')) << i;
else
EXPECT_EQ(absl::ascii_tolower(i), static_cast<char>(i)) << i;
// These CHECKs only hold in a C locale.
EXPECT_EQ(static_cast<char>(tolower(i)), absl::ascii_tolower(i)) << i;
EXPECT_EQ(static_cast<char>(toupper(i)), absl::ascii_toupper(i)) << i;
// The official to* functions don't accept negative signed chars, but
// our absl::ascii_to* functions do.
signed char sc = static_cast<signed char>(static_cast<unsigned char>(i));
EXPECT_EQ(absl::ascii_tolower(i), absl::ascii_tolower(sc)) << i;
EXPECT_EQ(absl::ascii_toupper(i), absl::ascii_toupper(sc)) << i;
}
// restore the old locale.
ASSERT_TRUE(setlocale(LC_CTYPE, old_locale.c_str()));
}
TEST(AsciiStrTo, Lower) {
const char buf[] = "ABCDEF";
const std::string str("GHIJKL");
const std::string str2("MNOPQR");
const absl::string_view sp(str2);
EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf));
EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str));
EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp));
char mutable_buf[] = "Mutable";
std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),
mutable_buf, absl::ascii_tolower);
EXPECT_STREQ("mutable", mutable_buf);
}
TEST(AsciiStrTo, Upper) {
const char buf[] = "abcdef";
const std::string str("ghijkl");
const std::string str2("mnopqr");
const absl::string_view sp(str2);
EXPECT_EQ("ABCDEF", absl::AsciiStrToUpper(buf));
EXPECT_EQ("GHIJKL", absl::AsciiStrToUpper(str));
EXPECT_EQ("MNOPQR", absl::AsciiStrToUpper(sp));
char mutable_buf[] = "Mutable";
std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),
mutable_buf, absl::ascii_toupper);
EXPECT_STREQ("MUTABLE", mutable_buf);
}
TEST(StripLeadingAsciiWhitespace, FromStringView) {
EXPECT_EQ(absl::string_view{},
absl::StripLeadingAsciiWhitespace(absl::string_view{}));
EXPECT_EQ("foo", absl::StripLeadingAsciiWhitespace({"foo"}));
EXPECT_EQ("foo", absl::StripLeadingAsciiWhitespace({"\t \n\f\r\n\vfoo"}));
EXPECT_EQ("foo foo\n ",
absl::StripLeadingAsciiWhitespace({"\t \n\f\r\n\vfoo foo\n "}));
EXPECT_EQ(absl::string_view{}, absl::StripLeadingAsciiWhitespace(
{"\t \n\f\r\v\n\t \n\f\r\v\n"}));
}
TEST(StripLeadingAsciiWhitespace, InPlace) {
std::string str;
absl::StripLeadingAsciiWhitespace(&str);
EXPECT_EQ("", str);
str = "foo";
absl::StripLeadingAsciiWhitespace(&str);
EXPECT_EQ("foo", str);
str = "\t \n\f\r\n\vfoo";
absl::StripLeadingAsciiWhitespace(&str);
EXPECT_EQ("foo", str);
str = "\t \n\f\r\n\vfoo foo\n ";
absl::StripLeadingAsciiWhitespace(&str);
EXPECT_EQ("foo foo\n ", str);
str = "\t \n\f\r\v\n\t \n\f\r\v\n";
absl::StripLeadingAsciiWhitespace(&str);
EXPECT_EQ(absl::string_view{}, str);
}
TEST(StripTrailingAsciiWhitespace, FromStringView) {
EXPECT_EQ(absl::string_view{},
absl::StripTrailingAsciiWhitespace(absl::string_view{}));
EXPECT_EQ("foo", absl::StripTrailingAsciiWhitespace({"foo"}));
EXPECT_EQ("foo", absl::StripTrailingAsciiWhitespace({"foo\t \n\f\r\n\v"}));
EXPECT_EQ(" \nfoo foo",
absl::StripTrailingAsciiWhitespace({" \nfoo foo\t \n\f\r\n\v"}));
EXPECT_EQ(absl::string_view{}, absl::StripTrailingAsciiWhitespace(
{"\t \n\f\r\v\n\t \n\f\r\v\n"}));
}
TEST(StripTrailingAsciiWhitespace, InPlace) {
std::string str;
absl::StripTrailingAsciiWhitespace(&str);
EXPECT_EQ("", str);
str = "foo";
absl::StripTrailingAsciiWhitespace(&str);
EXPECT_EQ("foo", str);
str = "foo\t \n\f\r\n\v";
absl::StripTrailingAsciiWhitespace(&str);
EXPECT_EQ("foo", str);
str = " \nfoo foo\t \n\f\r\n\v";
absl::StripTrailingAsciiWhitespace(&str);
EXPECT_EQ(" \nfoo foo", str);
str = "\t \n\f\r\v\n\t \n\f\r\v\n";
absl::StripTrailingAsciiWhitespace(&str);
EXPECT_EQ(absl::string_view{}, str);
}
TEST(StripAsciiWhitespace, FromStringView) {
EXPECT_EQ(absl::string_view{},
absl::StripAsciiWhitespace(absl::string_view{}));
EXPECT_EQ("foo", absl::StripAsciiWhitespace({"foo"}));
EXPECT_EQ("foo",
absl::StripAsciiWhitespace({"\t \n\f\r\n\vfoo\t \n\f\r\n\v"}));
EXPECT_EQ("foo foo", absl::StripAsciiWhitespace(
{"\t \n\f\r\n\vfoo foo\t \n\f\r\n\v"}));
EXPECT_EQ(absl::string_view{},
absl::StripAsciiWhitespace({"\t \n\f\r\v\n\t \n\f\r\v\n"}));
}
TEST(StripAsciiWhitespace, InPlace) {
std::string str;
absl::StripAsciiWhitespace(&str);
EXPECT_EQ("", str);
str = "foo";
absl::StripAsciiWhitespace(&str);
EXPECT_EQ("foo", str);
str = "\t \n\f\r\n\vfoo\t \n\f\r\n\v";
absl::StripAsciiWhitespace(&str);
EXPECT_EQ("foo", str);
str = "\t \n\f\r\n\vfoo foo\t \n\f\r\n\v";
absl::StripAsciiWhitespace(&str);
EXPECT_EQ("foo foo", str);
str = "\t \n\f\r\v\n\t \n\f\r\v\n";
absl::StripAsciiWhitespace(&str);
EXPECT_EQ(absl::string_view{}, str);
}
TEST(RemoveExtraAsciiWhitespace, InPlace) {
const char* inputs[] = {"No extra space",
" Leading whitespace",
"Trailing whitespace ",
" Leading and trailing ",
" Whitespace \t in\v middle ",
"'Eeeeep! \n Newlines!\n",
"nospaces",
"",
"\n\t a\t\n\nb \t\n"};
const char* outputs[] = {
"No extra space",
"Leading whitespace",
"Trailing whitespace",
"Leading and trailing",
"Whitespace in middle",
"'Eeeeep! Newlines!",
"nospaces",
"",
"a\nb",
};
const int NUM_TESTS = ABSL_ARRAYSIZE(inputs);
for (int i = 0; i < NUM_TESTS; i++) {
std::string s(inputs[i]);
absl::RemoveExtraAsciiWhitespace(&s);
EXPECT_EQ(outputs[i], s);
}
}
} // namespace

1093
absl/strings/escaping.cc Normal file

File diff suppressed because it is too large Load diff

158
absl/strings/escaping.h Normal file
View file

@ -0,0 +1,158 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: escaping.h
// -----------------------------------------------------------------------------
//
// This header file contains std::string utilities involved in escaping and
// unescaping strings in various ways.
//
#ifndef ABSL_STRINGS_ESCAPING_H_
#define ABSL_STRINGS_ESCAPING_H_
#include <cstddef>
#include <string>
#include <vector>
#include "absl/base/macros.h"
#include "absl/strings/ascii.h"
#include "absl/strings/str_join.h"
#include "absl/strings/string_view.h"
namespace absl {
// CUnescape()
//
// Unescapes a `source` std::string and copies it into `dest`, rewriting C-style
// escape sequences (http://en.cppreference.com/w/cpp/language/escape) into
// their proper code point equivalents, returning `true` if successful.
//
// The following unescape sequences can be handled:
//
// * ASCII escape sequences ('\n','\r','\\', etc.) to their ASCII equivalents
// * Octal escape sequences ('\nnn') to byte nnn. The unescaped value must
// resolve to a single byte or an error will occur. E.g. values greater than
// 0xff will produce an error.
// * Hexadecimal escape sequences ('\xnn') to byte nn. While an arbitrary
// number of following digits are allowed, the unescaped value must resolve
// to a single byte or an error will occur. E.g. '\x0045' is equivalent to
// '\x45', but '\x1234' will produce an error.
// * Unicode escape sequences ('\unnnn' for exactly four hex digits or
// '\Unnnnnnnn' for exactly eight hex digits, which will be encoded in
// UTF-8. (E.g., `\u2019` unescapes to the three bytes 0xE2, 0x80, and
// 0x99).
//
//
// If any errors are encountered, this function returns `false` and stores the
// first encountered error in `error`. To disable error reporting, set `error`
// to `nullptr` or use the overload with no error reporting below.
//
// Example:
//
// std::string s = "foo\\rbar\\nbaz\\t";
// std::string unescaped_s = absl::CUnescape(s);
// EXPECT_EQ(unescaped_s, "foo\rbar\nbaz\t");
bool CUnescape(absl::string_view source, std::string* dest, std::string* error);
// Overload of `CUnescape()` with no error reporting.
inline bool CUnescape(absl::string_view source, std::string* dest) {
return CUnescape(source, dest, nullptr);
}
// CEscape()
//
// Escapes a 'src' std::string using C-style escapes sequences
// (http://en.cppreference.com/w/cpp/language/escape), escaping other
// non-printable/non-whitespace bytes as octal sequences (e.g. "\377").
//
// Example:
//
// std::string s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n";
// std::string escaped_s = absl::CEscape(s);
// EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\010\\t\\n\\013\\014\\r\\n");
std::string CEscape(absl::string_view src);
// CHexEscape()
//
// Escapes a 'src' std::string using C-style escape sequences, escaping
// other non-printable/non-whitespace bytes as hexadecimal sequences (e.g.
// "\xFF").
//
// Example:
//
// std::string s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n";
// std::string escaped_s = absl::CHexEscape(s);
// EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\x08\\t\\n\\x0b\\x0c\\r\\n");
std::string CHexEscape(absl::string_view src);
// Utf8SafeCEscape()
//
// Escapes a 'src' std::string using C-style escape sequences, escaping bytes as
// octal sequences, and passing through UTF-8 characters without conversion.
// I.e., when encountering any bytes with their high bit set, this function
// will not escape those values, whether or not they are valid UTF-8.
std::string Utf8SafeCEscape(absl::string_view src);
// Utf8SafeCHexEscape()
//
// Escapes a 'src' std::string using C-style escape sequences, escaping bytes as
// hexidecimal sequences, and passing through UTF-8 characters without
// conversion.
std::string Utf8SafeCHexEscape(absl::string_view src);
// Base64Unescape()
//
// Converts a `src` std::string encoded in Base64 to its binary equivalent, writing
// it to a `dest` buffer, returning `true` on success. If `src` contains invalid
// characters, `dest` is cleared and returns `false`.
bool Base64Unescape(absl::string_view src, std::string* dest);
// WebSafeBase64Unescape(absl::string_view, std::string*)
//
// Converts a `src` std::string encoded in Base64 to its binary equivalent, writing
// it to a `dest` buffer, but using '-' instead of '+', and '_' instead of '/'.
// If `src` contains invalid characters, `dest` is cleared and returns `false`.
bool WebSafeBase64Unescape(absl::string_view src, std::string* dest);
// Base64Escape()
//
// Encodes a `src` std::string into a `dest` buffer using base64 encoding, with
// padding characters. This function conforms with RFC 4648 section 4 (base64).
void Base64Escape(absl::string_view src, std::string* dest);
// WebSafeBase64Escape()
//
// Encodes a `src` std::string into a `dest` buffer using uses '-' instead of '+' and
// '_' instead of '/', and without padding. This function conforms with RFC 4648
// section 5 (base64url).
void WebSafeBase64Escape(absl::string_view src, std::string* dest);
// HexStringToBytes()
//
// Converts an ASCII hex std::string into bytes, returning binary data of length
// `from.size()/2`.
std::string HexStringToBytes(absl::string_view from);
// BytesToHexString()
//
// Converts binary data into an ASCII text std::string, returing a std::string of size
// `2*from.size()`.
std::string BytesToHexString(absl::string_view from);
} // namespace absl
#endif // ABSL_STRINGS_ESCAPING_H_

View file

@ -0,0 +1,638 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/escaping.h"
#include <array>
#include <cstdio>
#include <cstring>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/macros.h"
#include "absl/container/fixed_array.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/internal/escaping_test_common.inc"
namespace {
struct epair {
std::string escaped;
std::string unescaped;
};
TEST(CEscape, EscapeAndUnescape) {
const std::string inputs[] = {
std::string("foo\nxx\r\b\0023"),
std::string(""),
std::string("abc"),
std::string("\1chad_rules"),
std::string("\1arnar_drools"),
std::string("xxxx\r\t'\"\\"),
std::string("\0xx\0", 4),
std::string("\x01\x31"),
std::string("abc\xb\x42\141bc"),
std::string("123\1\x31\x32\x33"),
std::string("\xc1\xca\x1b\x62\x19o\xcc\x04"),
std::string("\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name"),
};
// Do this twice, once for octal escapes and once for hex escapes.
for (int kind = 0; kind < 4; kind++) {
for (const std::string& original : inputs) {
std::string escaped;
switch (kind) {
case 0:
escaped = absl::CEscape(original);
break;
case 1:
escaped = absl::CHexEscape(original);
break;
case 2:
escaped = absl::Utf8SafeCEscape(original);
break;
case 3:
escaped = absl::Utf8SafeCHexEscape(original);
break;
}
std::string unescaped_str;
EXPECT_TRUE(absl::CUnescape(escaped, &unescaped_str));
EXPECT_EQ(unescaped_str, original);
// Check in-place unescaping
std::string s = escaped;
EXPECT_TRUE(absl::CUnescape(s, &s));
ASSERT_EQ(s, original);
}
}
// Check that all possible two character strings can be escaped then
// unescaped successfully.
for (int char0 = 0; char0 < 256; char0++) {
for (int char1 = 0; char1 < 256; char1++) {
char chars[2];
chars[0] = char0;
chars[1] = char1;
std::string s(chars, 2);
std::string escaped = absl::CHexEscape(s);
std::string unescaped;
EXPECT_TRUE(absl::CUnescape(escaped, &unescaped));
EXPECT_EQ(s, unescaped);
}
}
}
TEST(CEscape, BasicEscaping) {
epair oct_values[] = {
{"foo\\rbar\\nbaz\\t", "foo\rbar\nbaz\t"},
{"\\'full of \\\"sound\\\" and \\\"fury\\\"\\'",
"'full of \"sound\" and \"fury\"'"},
{"signi\\\\fying\\\\ nothing\\\\", "signi\\fying\\ nothing\\"},
{"\\010\\t\\n\\013\\014\\r", "\010\011\012\013\014\015"}
};
epair hex_values[] = {
{"ubik\\rubik\\nubik\\t", "ubik\rubik\nubik\t"},
{"I\\\'ve just seen a \\\"face\\\"",
"I've just seen a \"face\""},
{"hel\\\\ter\\\\skel\\\\ter\\\\", "hel\\ter\\skel\\ter\\"},
{"\\x08\\t\\n\\x0b\\x0c\\r", "\010\011\012\013\014\015"}
};
epair utf8_oct_values[] = {
{"\xe8\xb0\xb7\xe6\xad\x8c\\r\xe8\xb0\xb7\xe6\xad\x8c\\nbaz\\t",
"\xe8\xb0\xb7\xe6\xad\x8c\r\xe8\xb0\xb7\xe6\xad\x8c\nbaz\t"},
{"\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name",
"\"\xe8\xb0\xb7\xe6\xad\x8c\" is Google\'s Chinese name"},
{"\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\\\are\\\\Japanese\\\\chars\\\\",
"\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\are\\Japanese\\chars\\"},
{"\xed\x81\xac\xeb\xa1\xac\\010\\t\\n\\013\\014\\r",
"\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"}
};
epair utf8_hex_values[] = {
{"\x20\xe4\xbd\xa0\\t\xe5\xa5\xbd,\\r!\\n",
"\x20\xe4\xbd\xa0\t\xe5\xa5\xbd,\r!\n"},
{"\xe8\xa9\xa6\xe9\xa8\x93\\\' means \\\"test\\\"",
"\xe8\xa9\xa6\xe9\xa8\x93\' means \"test\""},
{"\\\\\xe6\x88\x91\\\\:\\\\\xe6\x9d\xa8\xe6\xac\xa2\\\\",
"\\\xe6\x88\x91\\:\\\xe6\x9d\xa8\xe6\xac\xa2\\"},
{"\xed\x81\xac\xeb\xa1\xac\\x08\\t\\n\\x0b\\x0c\\r",
"\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"}
};
for (const epair& val : oct_values) {
std::string escaped = absl::CEscape(val.unescaped);
EXPECT_EQ(escaped, val.escaped);
}
for (const epair& val : hex_values) {
std::string escaped = absl::CHexEscape(val.unescaped);
EXPECT_EQ(escaped, val.escaped);
}
for (const epair& val : utf8_oct_values) {
std::string escaped = absl::Utf8SafeCEscape(val.unescaped);
EXPECT_EQ(escaped, val.escaped);
}
for (const epair& val : utf8_hex_values) {
std::string escaped = absl::Utf8SafeCHexEscape(val.unescaped);
EXPECT_EQ(escaped, val.escaped);
}
}
TEST(Unescape, BasicFunction) {
epair tests[] =
{{"\\u0030", "0"},
{"\\u00A3", "\xC2\xA3"},
{"\\u22FD", "\xE2\x8B\xBD"},
{"\\U00010000", "\xF0\x90\x80\x80"},
{"\\U0010FFFD", "\xF4\x8F\xBF\xBD"}};
for (const epair& val : tests) {
std::string out;
EXPECT_TRUE(absl::CUnescape(val.escaped, &out));
EXPECT_EQ(out, val.unescaped);
}
std::string bad[] =
{"\\u1", // too short
"\\U1", // too short
"\\Uffffff",
"\\777", // exceeds 0xff
"\\xABCD"}; // exceeds 0xff
for (const std::string& e : bad) {
std::string error;
std::string out;
EXPECT_FALSE(absl::CUnescape(e, &out, &error));
EXPECT_FALSE(error.empty());
}
}
class CUnescapeTest : public testing::Test {
protected:
static const char kStringWithMultipleOctalNulls[];
static const char kStringWithMultipleHexNulls[];
static const char kStringWithMultipleUnicodeNulls[];
std::string result_string_;
};
const char CUnescapeTest::kStringWithMultipleOctalNulls[] =
"\\0\\n" // null escape \0 plus newline
"0\\n" // just a number 0 (not a null escape) plus newline
"\\00\\12" // null escape \00 plus octal newline code
"\\000"; // null escape \000
// This has the same ingredients as kStringWithMultipleOctalNulls
// but with \x hex escapes instead of octal escapes.
const char CUnescapeTest::kStringWithMultipleHexNulls[] =
"\\x0\\n"
"0\\n"
"\\x00\\xa"
"\\x000";
const char CUnescapeTest::kStringWithMultipleUnicodeNulls[] =
"\\u0000\\n" // short-form (4-digit) null escape plus newline
"0\\n" // just a number 0 (not a null escape) plus newline
"\\U00000000"; // long-form (8-digit) null escape
TEST_F(CUnescapeTest, Unescapes1CharOctalNull) {
std::string original_string = "\\0";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes2CharOctalNull) {
std::string original_string = "\\00";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes3CharOctalNull) {
std::string original_string = "\\000";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes1CharHexNull) {
std::string original_string = "\\x0";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes2CharHexNull) {
std::string original_string = "\\x00";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes3CharHexNull) {
std::string original_string = "\\x000";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes4CharUnicodeNull) {
std::string original_string = "\\u0000";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes8CharUnicodeNull) {
std::string original_string = "\\U00000000";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, UnescapesMultipleOctalNulls) {
std::string original_string(kStringWithMultipleOctalNulls);
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
// All escapes, including newlines and null escapes, should have been
// converted to the equivalent characters.
EXPECT_EQ(std::string("\0\n"
"0\n"
"\0\n"
"\0", 7), result_string_);
}
TEST_F(CUnescapeTest, UnescapesMultipleHexNulls) {
std::string original_string(kStringWithMultipleHexNulls);
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0\n"
"0\n"
"\0\n"
"\0", 7), result_string_);
}
TEST_F(CUnescapeTest, UnescapesMultipleUnicodeNulls) {
std::string original_string(kStringWithMultipleUnicodeNulls);
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0\n"
"0\n"
"\0", 5), result_string_);
}
static struct {
absl::string_view plaintext;
absl::string_view cyphertext;
} const base64_tests[] = {
// Empty std::string.
{{"", 0}, {"", 0}},
{{nullptr, 0},
{"", 0}}, // if length is zero, plaintext ptr must be ignored!
// Basic bit patterns;
// values obtained with "echo -n '...' | uuencode -m test"
{{"\000", 1}, "AA=="},
{{"\001", 1}, "AQ=="},
{{"\002", 1}, "Ag=="},
{{"\004", 1}, "BA=="},
{{"\010", 1}, "CA=="},
{{"\020", 1}, "EA=="},
{{"\040", 1}, "IA=="},
{{"\100", 1}, "QA=="},
{{"\200", 1}, "gA=="},
{{"\377", 1}, "/w=="},
{{"\376", 1}, "/g=="},
{{"\375", 1}, "/Q=="},
{{"\373", 1}, "+w=="},
{{"\367", 1}, "9w=="},
{{"\357", 1}, "7w=="},
{{"\337", 1}, "3w=="},
{{"\277", 1}, "vw=="},
{{"\177", 1}, "fw=="},
{{"\000\000", 2}, "AAA="},
{{"\000\001", 2}, "AAE="},
{{"\000\002", 2}, "AAI="},
{{"\000\004", 2}, "AAQ="},
{{"\000\010", 2}, "AAg="},
{{"\000\020", 2}, "ABA="},
{{"\000\040", 2}, "ACA="},
{{"\000\100", 2}, "AEA="},
{{"\000\200", 2}, "AIA="},
{{"\001\000", 2}, "AQA="},
{{"\002\000", 2}, "AgA="},
{{"\004\000", 2}, "BAA="},
{{"\010\000", 2}, "CAA="},
{{"\020\000", 2}, "EAA="},
{{"\040\000", 2}, "IAA="},
{{"\100\000", 2}, "QAA="},
{{"\200\000", 2}, "gAA="},
{{"\377\377", 2}, "//8="},
{{"\377\376", 2}, "//4="},
{{"\377\375", 2}, "//0="},
{{"\377\373", 2}, "//s="},
{{"\377\367", 2}, "//c="},
{{"\377\357", 2}, "/+8="},
{{"\377\337", 2}, "/98="},
{{"\377\277", 2}, "/78="},
{{"\377\177", 2}, "/38="},
{{"\376\377", 2}, "/v8="},
{{"\375\377", 2}, "/f8="},
{{"\373\377", 2}, "+/8="},
{{"\367\377", 2}, "9/8="},
{{"\357\377", 2}, "7/8="},
{{"\337\377", 2}, "3/8="},
{{"\277\377", 2}, "v/8="},
{{"\177\377", 2}, "f/8="},
{{"\000\000\000", 3}, "AAAA"},
{{"\000\000\001", 3}, "AAAB"},
{{"\000\000\002", 3}, "AAAC"},
{{"\000\000\004", 3}, "AAAE"},
{{"\000\000\010", 3}, "AAAI"},
{{"\000\000\020", 3}, "AAAQ"},
{{"\000\000\040", 3}, "AAAg"},
{{"\000\000\100", 3}, "AABA"},
{{"\000\000\200", 3}, "AACA"},
{{"\000\001\000", 3}, "AAEA"},
{{"\000\002\000", 3}, "AAIA"},
{{"\000\004\000", 3}, "AAQA"},
{{"\000\010\000", 3}, "AAgA"},
{{"\000\020\000", 3}, "ABAA"},
{{"\000\040\000", 3}, "ACAA"},
{{"\000\100\000", 3}, "AEAA"},
{{"\000\200\000", 3}, "AIAA"},
{{"\001\000\000", 3}, "AQAA"},
{{"\002\000\000", 3}, "AgAA"},
{{"\004\000\000", 3}, "BAAA"},
{{"\010\000\000", 3}, "CAAA"},
{{"\020\000\000", 3}, "EAAA"},
{{"\040\000\000", 3}, "IAAA"},
{{"\100\000\000", 3}, "QAAA"},
{{"\200\000\000", 3}, "gAAA"},
{{"\377\377\377", 3}, "////"},
{{"\377\377\376", 3}, "///+"},
{{"\377\377\375", 3}, "///9"},
{{"\377\377\373", 3}, "///7"},
{{"\377\377\367", 3}, "///3"},
{{"\377\377\357", 3}, "///v"},
{{"\377\377\337", 3}, "///f"},
{{"\377\377\277", 3}, "//+/"},
{{"\377\377\177", 3}, "//9/"},
{{"\377\376\377", 3}, "//7/"},
{{"\377\375\377", 3}, "//3/"},
{{"\377\373\377", 3}, "//v/"},
{{"\377\367\377", 3}, "//f/"},
{{"\377\357\377", 3}, "/+//"},
{{"\377\337\377", 3}, "/9//"},
{{"\377\277\377", 3}, "/7//"},
{{"\377\177\377", 3}, "/3//"},
{{"\376\377\377", 3}, "/v//"},
{{"\375\377\377", 3}, "/f//"},
{{"\373\377\377", 3}, "+///"},
{{"\367\377\377", 3}, "9///"},
{{"\357\377\377", 3}, "7///"},
{{"\337\377\377", 3}, "3///"},
{{"\277\377\377", 3}, "v///"},
{{"\177\377\377", 3}, "f///"},
// Random numbers: values obtained with
//
// #! /bin/bash
// dd bs=$1 count=1 if=/dev/random of=/tmp/bar.random
// od -N $1 -t o1 /tmp/bar.random
// uuencode -m test < /tmp/bar.random
//
// where $1 is the number of bytes (2, 3)
{{"\243\361", 2}, "o/E="},
{{"\024\167", 2}, "FHc="},
{{"\313\252", 2}, "y6o="},
{{"\046\041", 2}, "JiE="},
{{"\145\236", 2}, "ZZ4="},
{{"\254\325", 2}, "rNU="},
{{"\061\330", 2}, "Mdg="},
{{"\245\032", 2}, "pRo="},
{{"\006\000", 2}, "BgA="},
{{"\375\131", 2}, "/Vk="},
{{"\303\210", 2}, "w4g="},
{{"\040\037", 2}, "IB8="},
{{"\261\372", 2}, "sfo="},
{{"\335\014", 2}, "3Qw="},
{{"\233\217", 2}, "m48="},
{{"\373\056", 2}, "+y4="},
{{"\247\232", 2}, "p5o="},
{{"\107\053", 2}, "Rys="},
{{"\204\077", 2}, "hD8="},
{{"\276\211", 2}, "vok="},
{{"\313\110", 2}, "y0g="},
{{"\363\376", 2}, "8/4="},
{{"\251\234", 2}, "qZw="},
{{"\103\262", 2}, "Q7I="},
{{"\142\312", 2}, "Yso="},
{{"\067\211", 2}, "N4k="},
{{"\220\001", 2}, "kAE="},
{{"\152\240", 2}, "aqA="},
{{"\367\061", 2}, "9zE="},
{{"\133\255", 2}, "W60="},
{{"\176\035", 2}, "fh0="},
{{"\032\231", 2}, "Gpk="},
{{"\013\007\144", 3}, "Cwdk"},
{{"\030\112\106", 3}, "GEpG"},
{{"\047\325\046", 3}, "J9Um"},
{{"\310\160\022", 3}, "yHAS"},
{{"\131\100\237", 3}, "WUCf"},
{{"\064\342\134", 3}, "NOJc"},
{{"\010\177\004", 3}, "CH8E"},
{{"\345\147\205", 3}, "5WeF"},
{{"\300\343\360", 3}, "wOPw"},
{{"\061\240\201", 3}, "MaCB"},
{{"\225\333\044", 3}, "ldsk"},
{{"\215\137\352", 3}, "jV/q"},
{{"\371\147\160", 3}, "+Wdw"},
{{"\030\320\051", 3}, "GNAp"},
{{"\044\174\241", 3}, "JHyh"},
{{"\260\127\037", 3}, "sFcf"},
{{"\111\045\033", 3}, "SSUb"},
{{"\202\114\107", 3}, "gkxH"},
{{"\057\371\042", 3}, "L/ki"},
{{"\223\247\244", 3}, "k6ek"},
{{"\047\216\144", 3}, "J45k"},
{{"\203\070\327", 3}, "gzjX"},
{{"\247\140\072", 3}, "p2A6"},
{{"\124\115\116", 3}, "VE1O"},
{{"\157\162\050", 3}, "b3Io"},
{{"\357\223\004", 3}, "75ME"},
{{"\052\117\156", 3}, "Kk9u"},
{{"\347\154\000", 3}, "52wA"},
{{"\303\012\142", 3}, "wwpi"},
{{"\060\035\362", 3}, "MB3y"},
{{"\130\226\361", 3}, "WJbx"},
{{"\173\013\071", 3}, "ews5"},
{{"\336\004\027", 3}, "3gQX"},
{{"\357\366\234", 3}, "7/ac"},
{{"\353\304\111", 3}, "68RJ"},
{{"\024\264\131", 3}, "FLRZ"},
{{"\075\114\251", 3}, "PUyp"},
{{"\315\031\225", 3}, "zRmV"},
{{"\154\201\276", 3}, "bIG+"},
{{"\200\066\072", 3}, "gDY6"},
{{"\142\350\267", 3}, "Yui3"},
{{"\033\000\166", 3}, "GwB2"},
{{"\210\055\077", 3}, "iC0/"},
{{"\341\037\124", 3}, "4R9U"},
{{"\161\103\152", 3}, "cUNq"},
{{"\270\142\131", 3}, "uGJZ"},
{{"\337\076\074", 3}, "3z48"},
{{"\375\106\362", 3}, "/Uby"},
{{"\227\301\127", 3}, "l8FX"},
{{"\340\002\234", 3}, "4AKc"},
{{"\121\064\033", 3}, "UTQb"},
{{"\157\134\143", 3}, "b1xj"},
{{"\247\055\327", 3}, "py3X"},
{{"\340\142\005", 3}, "4GIF"},
{{"\060\260\143", 3}, "MLBj"},
{{"\075\203\170", 3}, "PYN4"},
{{"\143\160\016", 3}, "Y3AO"},
{{"\313\013\063", 3}, "ywsz"},
{{"\174\236\135", 3}, "fJ5d"},
{{"\103\047\026", 3}, "QycW"},
{{"\365\005\343", 3}, "9QXj"},
{{"\271\160\223", 3}, "uXCT"},
{{"\362\255\172", 3}, "8q16"},
{{"\113\012\015", 3}, "SwoN"},
// various lengths, generated by this python script:
//
// from std::string import lowercase as lc
// for i in range(27):
// print '{ %2d, "%s",%s "%s" },' % (i, lc[:i], ' ' * (26-i),
// lc[:i].encode('base64').strip())
{{"", 0}, {"", 0}},
{"a", "YQ=="},
{"ab", "YWI="},
{"abc", "YWJj"},
{"abcd", "YWJjZA=="},
{"abcde", "YWJjZGU="},
{"abcdef", "YWJjZGVm"},
{"abcdefg", "YWJjZGVmZw=="},
{"abcdefgh", "YWJjZGVmZ2g="},
{"abcdefghi", "YWJjZGVmZ2hp"},
{"abcdefghij", "YWJjZGVmZ2hpag=="},
{"abcdefghijk", "YWJjZGVmZ2hpams="},
{"abcdefghijkl", "YWJjZGVmZ2hpamts"},
{"abcdefghijklm", "YWJjZGVmZ2hpamtsbQ=="},
{"abcdefghijklmn", "YWJjZGVmZ2hpamtsbW4="},
{"abcdefghijklmno", "YWJjZGVmZ2hpamtsbW5v"},
{"abcdefghijklmnop", "YWJjZGVmZ2hpamtsbW5vcA=="},
{"abcdefghijklmnopq", "YWJjZGVmZ2hpamtsbW5vcHE="},
{"abcdefghijklmnopqr", "YWJjZGVmZ2hpamtsbW5vcHFy"},
{"abcdefghijklmnopqrs", "YWJjZGVmZ2hpamtsbW5vcHFycw=="},
{"abcdefghijklmnopqrst", "YWJjZGVmZ2hpamtsbW5vcHFyc3Q="},
{"abcdefghijklmnopqrstu", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1"},
{"abcdefghijklmnopqrstuv", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dg=="},
{"abcdefghijklmnopqrstuvw", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnc="},
{"abcdefghijklmnopqrstuvwx", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4"},
{"abcdefghijklmnopqrstuvwxy", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eQ=="},
{"abcdefghijklmnopqrstuvwxyz", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo="},
};
TEST(Base64, EscapeAndUnescape) {
// Check the short strings; this tests the math (and boundaries)
for (const auto& tc : base64_tests) {
std::string encoded("this junk should be ignored");
absl::Base64Escape(tc.plaintext, &encoded);
EXPECT_EQ(encoded, tc.cyphertext);
std::string decoded("this junk should be ignored");
EXPECT_TRUE(absl::Base64Unescape(encoded, &decoded));
EXPECT_EQ(decoded, tc.plaintext);
std::string websafe(tc.cyphertext);
for (int c = 0; c < websafe.size(); ++c) {
if ('+' == websafe[c]) websafe[c] = '-';
if ('/' == websafe[c]) websafe[c] = '_';
if ('=' == websafe[c]) {
websafe.resize(c);
break;
}
}
encoded = "this junk should be ignored";
absl::WebSafeBase64Escape(tc.plaintext, &encoded);
EXPECT_EQ(encoded, websafe);
// Let's try the std::string version of the decoder
decoded = "this junk should be ignored";
EXPECT_TRUE(absl::WebSafeBase64Unescape(websafe, &decoded));
EXPECT_EQ(decoded, tc.plaintext);
}
// Now try the long strings, this tests the streaming
for (const auto& tc : base64_strings) {
std::string buffer;
absl::WebSafeBase64Escape(tc.plaintext, &buffer);
EXPECT_EQ(tc.cyphertext, buffer);
}
// Verify the behavior when decoding bad data
{
absl::string_view data_set[] = {"ab-/", absl::string_view("\0bcd", 4),
absl::string_view("abc.\0", 5)};
for (absl::string_view bad_data : data_set) {
std::string buf;
EXPECT_FALSE(absl::Base64Unescape(bad_data, &buf));
EXPECT_FALSE(absl::WebSafeBase64Unescape(bad_data, &buf));
EXPECT_TRUE(buf.empty());
}
}
}
TEST(Base64, DISABLED_HugeData) {
const size_t kSize = size_t(3) * 1000 * 1000 * 1000;
static_assert(kSize % 3 == 0, "kSize must be divisible by 3");
const std::string huge(kSize, 'x');
std::string escaped;
absl::Base64Escape(huge, &escaped);
// Generates the std::string that should match a base64 encoded "xxx..." std::string.
// "xxx" in base64 is "eHh4".
std::string expected_encoding;
expected_encoding.reserve(kSize / 3 * 4);
for (size_t i = 0; i < kSize / 3; ++i) {
expected_encoding.append("eHh4");
}
EXPECT_EQ(expected_encoding, escaped);
std::string unescaped;
EXPECT_TRUE(absl::Base64Unescape(escaped, &unescaped));
EXPECT_EQ(huge, unescaped);
}
TEST(HexAndBack, HexStringToBytes_and_BytesToHexString) {
std::string hex_mixed = "0123456789abcdefABCDEF";
std::string bytes_expected = "\x01\x23\x45\x67\x89\xab\xcd\xef\xAB\xCD\xEF";
std::string hex_only_lower = "0123456789abcdefabcdef";
std::string bytes_result = absl::HexStringToBytes(hex_mixed);
EXPECT_EQ(bytes_expected, bytes_result);
std::string prefix_valid = hex_mixed + "?";
std::string prefix_valid_result = absl::HexStringToBytes(
absl::string_view(prefix_valid.data(), prefix_valid.size() - 1));
EXPECT_EQ(bytes_expected, prefix_valid_result);
std::string infix_valid = "?" + hex_mixed + "???";
std::string infix_valid_result = absl::HexStringToBytes(
absl::string_view(infix_valid.data() + 1, hex_mixed.size()));
EXPECT_EQ(bytes_expected, infix_valid_result);
std::string hex_result = absl::BytesToHexString(bytes_expected);
EXPECT_EQ(hex_only_lower, hex_result);
}
} // namespace

View file

@ -0,0 +1,154 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Character Map Class
//
// A fast, bit-vector map for 8-bit unsigned characters.
// This class is useful for non-character purposes as well.
#ifndef ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
#define ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
#include <cstddef>
#include <cstdint>
#include <cstring>
#include "absl/base/macros.h"
#include "absl/base/port.h"
namespace absl {
namespace strings_internal {
class Charmap {
public:
constexpr Charmap() : m_() {}
// Initializes with a given char*. Note that NUL is not treated as
// a terminator, but rather a char to be flicked.
Charmap(const char* str, int len) : m_() {
while (len--) SetChar(*str++);
}
// Initializes with a given char*. NUL is treated as a terminator
// and will not be in the charmap.
explicit Charmap(const char* str) : m_() {
while (*str) SetChar(*str++);
}
constexpr bool contains(unsigned char c) const {
return (m_[c / 64] >> (c % 64)) & 0x1;
}
// Returns true if and only if a character exists in both maps.
bool IntersectsWith(const Charmap& c) const {
for (size_t i = 0; i < ABSL_ARRAYSIZE(m_); ++i) {
if ((m_[i] & c.m_[i]) != 0) return true;
}
return false;
}
bool IsZero() const {
for (uint64_t c : m_) {
if (c != 0) return false;
}
return true;
}
// Containing only a single specified char.
static constexpr Charmap Char(char x) {
return Charmap(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
CharMaskForWord(x, 2), CharMaskForWord(x, 3));
}
// Containing all the chars in the C-std::string 's'.
// Note that this is expensively recursive because of the C++11 constexpr
// formulation. Use only in constexpr initializers.
static constexpr Charmap FromString(const char* s) {
return *s == 0 ? Charmap() : (Char(*s) | FromString(s + 1));
}
// Containing all the chars in the closed interval [lo,hi].
static constexpr Charmap Range(char lo, char hi) {
return Charmap(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
}
friend constexpr Charmap operator&(const Charmap& a, const Charmap& b) {
return Charmap(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
a.m_[3] & b.m_[3]);
}
friend constexpr Charmap operator|(const Charmap& a, const Charmap& b) {
return Charmap(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
a.m_[3] | b.m_[3]);
}
friend constexpr Charmap operator~(const Charmap& a) {
return Charmap(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
}
private:
constexpr Charmap(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
: m_{b0, b1, b2, b3} {}
static constexpr uint64_t RangeForWord(unsigned char lo, unsigned char hi,
uint64_t word) {
return OpenRangeFromZeroForWord(hi + 1, word) &
~OpenRangeFromZeroForWord(lo, word);
}
// All the chars in the specified word of the range [0, upper).
static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
uint64_t word) {
return (upper <= 64 * word)
? 0
: (upper >= 64 * (word + 1))
? ~static_cast<uint64_t>(0)
: (~static_cast<uint64_t>(0) >> (64 - upper % 64));
}
static constexpr uint64_t CharMaskForWord(unsigned char x, uint64_t word) {
return (x / 64 == word) ? (static_cast<uint64_t>(1) << (x % 64)) : 0;
}
private:
void SetChar(unsigned char c) {
m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64);
}
uint64_t m_[4];
};
// Mirror the char-classifying predicates in <cctype>
constexpr Charmap UpperCharmap() { return Charmap::Range('A', 'Z'); }
constexpr Charmap LowerCharmap() { return Charmap::Range('a', 'z'); }
constexpr Charmap DigitCharmap() { return Charmap::Range('0', '9'); }
constexpr Charmap AlphaCharmap() { return LowerCharmap() | UpperCharmap(); }
constexpr Charmap AlnumCharmap() { return DigitCharmap() | AlphaCharmap(); }
constexpr Charmap XDigitCharmap() {
return DigitCharmap() | Charmap::Range('A', 'F') | Charmap::Range('a', 'f');
}
constexpr Charmap PrintCharmap() { return Charmap::Range(0x20, 0x7e); }
constexpr Charmap SpaceCharmap() { return Charmap::FromString("\t\n\v\f\r "); }
constexpr Charmap CntrlCharmap() {
return Charmap::Range(0, 0x7f) & ~PrintCharmap();
}
constexpr Charmap BlankCharmap() { return Charmap::FromString("\t "); }
constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); }
constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); }
} // namespace strings_internal
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_

View file

@ -0,0 +1,172 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/char_map.h"
#include <cstdio>
#include <cstdlib>
#include <cctype>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace {
constexpr absl::strings_internal::Charmap everything_map =
~absl::strings_internal::Charmap();
constexpr absl::strings_internal::Charmap nothing_map{};
TEST(Charmap, AllTests) {
const absl::strings_internal::Charmap also_nothing_map("", 0);
ASSERT_TRUE(everything_map.contains('\0'));
ASSERT_TRUE(!nothing_map.contains('\0'));
ASSERT_TRUE(!also_nothing_map.contains('\0'));
for (unsigned char ch = 1; ch != 0; ++ch) {
ASSERT_TRUE(everything_map.contains(ch));
ASSERT_TRUE(!nothing_map.contains(ch));
ASSERT_TRUE(!also_nothing_map.contains(ch));
}
const absl::strings_internal::Charmap symbols("&@#@^!@?", 5);
ASSERT_TRUE(symbols.contains('&'));
ASSERT_TRUE(symbols.contains('@'));
ASSERT_TRUE(symbols.contains('#'));
ASSERT_TRUE(symbols.contains('^'));
ASSERT_TRUE(!symbols.contains('!'));
ASSERT_TRUE(!symbols.contains('?'));
int cnt = 0;
for (unsigned char ch = 1; ch != 0; ++ch)
cnt += symbols.contains(ch);
ASSERT_EQ(cnt, 4);
const absl::strings_internal::Charmap lets("^abcde", 3);
const absl::strings_internal::Charmap lets2("fghij\0klmnop", 10);
const absl::strings_internal::Charmap lets3("fghij\0klmnop");
ASSERT_TRUE(lets2.contains('k'));
ASSERT_TRUE(!lets3.contains('k'));
ASSERT_TRUE(symbols.IntersectsWith(lets));
ASSERT_TRUE(!lets2.IntersectsWith(lets));
ASSERT_TRUE(lets.IntersectsWith(symbols));
ASSERT_TRUE(!lets.IntersectsWith(lets2));
ASSERT_TRUE(nothing_map.IsZero());
ASSERT_TRUE(!lets.IsZero());
}
namespace {
std::string Members(const absl::strings_internal::Charmap& m) {
std::string r;
for (size_t i = 0; i < 256; ++i)
if (m.contains(i)) r.push_back(i);
return r;
}
std::string ClosedRangeString(unsigned char lo, unsigned char hi) {
// Don't depend on lo<hi. Just increment until lo==hi.
std::string s;
while (true) {
s.push_back(lo);
if (lo == hi) break;
++lo;
}
return s;
}
} // namespace
TEST(Charmap, Constexpr) {
constexpr absl::strings_internal::Charmap kEmpty = nothing_map;
EXPECT_THAT(Members(kEmpty), "");
constexpr absl::strings_internal::Charmap kA =
absl::strings_internal::Charmap::Char('A');
EXPECT_THAT(Members(kA), "A");
constexpr absl::strings_internal::Charmap kAZ =
absl::strings_internal::Charmap::Range('A', 'Z');
EXPECT_THAT(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
constexpr absl::strings_internal::Charmap kIdentifier =
absl::strings_internal::Charmap::Range('0', '9') |
absl::strings_internal::Charmap::Range('A', 'Z') |
absl::strings_internal::Charmap::Range('a', 'z') |
absl::strings_internal::Charmap::Char('_');
EXPECT_THAT(Members(kIdentifier),
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"_"
"abcdefghijklmnopqrstuvwxyz");
constexpr absl::strings_internal::Charmap kAll = everything_map;
for (size_t i = 0; i < 256; ++i) {
EXPECT_TRUE(kAll.contains(i)) << i;
}
constexpr absl::strings_internal::Charmap kHello =
absl::strings_internal::Charmap::FromString("Hello, world!");
EXPECT_THAT(Members(kHello), " !,Hdelorw");
// test negation and intersection
constexpr absl::strings_internal::Charmap kABC =
absl::strings_internal::Charmap::Range('A', 'Z') &
~absl::strings_internal::Charmap::Range('D', 'Z');
EXPECT_THAT(Members(kABC), "ABC");
}
TEST(Charmap, Range) {
// Exhaustive testing takes too long, so test some of the boundaries that
// are perhaps going to cause trouble.
std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15,
16, 17, 30, 31, 32, 33, 63, 64, 65,
127, 128, 129, 223, 224, 225, 254, 255};
for (auto lo = poi.begin(); lo != poi.end(); ++lo) {
SCOPED_TRACE(*lo);
for (auto hi = lo; hi != poi.end(); ++hi) {
SCOPED_TRACE(*hi);
EXPECT_THAT(Members(absl::strings_internal::Charmap::Range(*lo, *hi)),
ClosedRangeString(*lo, *hi));
}
}
}
bool AsBool(int x) { return static_cast<bool>(x); }
TEST(CharmapCtype, Match) {
for (int c = 0; c < 256; ++c) {
SCOPED_TRACE(c);
SCOPED_TRACE(static_cast<char>(c));
EXPECT_EQ(AsBool(std::isupper(c)),
absl::strings_internal::UpperCharmap().contains(c));
EXPECT_EQ(AsBool(std::islower(c)),
absl::strings_internal::LowerCharmap().contains(c));
EXPECT_EQ(AsBool(std::isdigit(c)),
absl::strings_internal::DigitCharmap().contains(c));
EXPECT_EQ(AsBool(std::isalpha(c)),
absl::strings_internal::AlphaCharmap().contains(c));
EXPECT_EQ(AsBool(std::isalnum(c)),
absl::strings_internal::AlnumCharmap().contains(c));
EXPECT_EQ(AsBool(std::isxdigit(c)),
absl::strings_internal::XDigitCharmap().contains(c));
EXPECT_EQ(AsBool(std::isprint(c)),
absl::strings_internal::PrintCharmap().contains(c));
EXPECT_EQ(AsBool(std::isspace(c)),
absl::strings_internal::SpaceCharmap().contains(c));
EXPECT_EQ(AsBool(std::iscntrl(c)),
absl::strings_internal::CntrlCharmap().contains(c));
EXPECT_EQ(AsBool(std::isblank(c)),
absl::strings_internal::BlankCharmap().contains(c));
EXPECT_EQ(AsBool(std::isgraph(c)),
absl::strings_internal::GraphCharmap().contains(c));
EXPECT_EQ(AsBool(std::ispunct(c)),
absl::strings_internal::PunctCharmap().contains(c));
}
}
} // namespace

View file

@ -0,0 +1,113 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This test contains common things needed by both escaping_test.cc and
// escaping_benchmark.cc.
namespace {
struct {
absl::string_view plaintext;
absl::string_view cyphertext;
} const base64_strings[] = {
// Some google quotes
// Cyphertext created with "uuencode (GNU sharutils) 4.6.3"
// (Note that we're testing the websafe encoding, though, so if
// you add messages, be sure to run "tr -- '+/' '-_'" on the output)
{ "I was always good at math and science, and I never realized "
"that was unusual or somehow undesirable. So one of the things "
"I care a lot about is helping to remove that stigma, "
"to show girls that you can be feminine, you can like the things "
"that girls like, but you can also be really good at technology. "
"You can be really good at building things."
" - Marissa Meyer, Newsweek, 2010-12-22" "\n",
"SSB3YXMgYWx3YXlzIGdvb2QgYXQgbWF0aCBhbmQgc2NpZW5jZSwgYW5kIEkg"
"bmV2ZXIgcmVhbGl6ZWQgdGhhdCB3YXMgdW51c3VhbCBvciBzb21laG93IHVu"
"ZGVzaXJhYmxlLiBTbyBvbmUgb2YgdGhlIHRoaW5ncyBJIGNhcmUgYSBsb3Qg"
"YWJvdXQgaXMgaGVscGluZyB0byByZW1vdmUgdGhhdCBzdGlnbWEsIHRvIHNo"
"b3cgZ2lybHMgdGhhdCB5b3UgY2FuIGJlIGZlbWluaW5lLCB5b3UgY2FuIGxp"
"a2UgdGhlIHRoaW5ncyB0aGF0IGdpcmxzIGxpa2UsIGJ1dCB5b3UgY2FuIGFs"
"c28gYmUgcmVhbGx5IGdvb2QgYXQgdGVjaG5vbG9neS4gWW91IGNhbiBiZSBy"
"ZWFsbHkgZ29vZCBhdCBidWlsZGluZyB0aGluZ3MuIC0gTWFyaXNzYSBNZXll"
"ciwgTmV3c3dlZWssIDIwMTAtMTItMjIK" },
{ "Typical first year for a new cluster: "
"~0.5 overheating "
"~1 PDU failure "
"~1 rack-move "
"~1 network rewiring "
"~20 rack failures "
"~5 racks go wonky "
"~8 network maintenances "
"~12 router reloads "
"~3 router failures "
"~dozens of minor 30-second blips for dns "
"~1000 individual machine failures "
"~thousands of hard drive failures "
"slow disks, bad memory, misconfigured machines, flaky machines, etc."
" - Jeff Dean, The Joys of Real Hardware" "\n",
"VHlwaWNhbCBmaXJzdCB5ZWFyIGZvciBhIG5ldyBjbHVzdGVyOiB-MC41IG92"
"ZXJoZWF0aW5nIH4xIFBEVSBmYWlsdXJlIH4xIHJhY2stbW92ZSB-MSBuZXR3"
"b3JrIHJld2lyaW5nIH4yMCByYWNrIGZhaWx1cmVzIH41IHJhY2tzIGdvIHdv"
"bmt5IH44IG5ldHdvcmsgbWFpbnRlbmFuY2VzIH4xMiByb3V0ZXIgcmVsb2Fk"
"cyB-MyByb3V0ZXIgZmFpbHVyZXMgfmRvemVucyBvZiBtaW5vciAzMC1zZWNv"
"bmQgYmxpcHMgZm9yIGRucyB-MTAwMCBpbmRpdmlkdWFsIG1hY2hpbmUgZmFp"
"bHVyZXMgfnRob3VzYW5kcyBvZiBoYXJkIGRyaXZlIGZhaWx1cmVzIHNsb3cg"
"ZGlza3MsIGJhZCBtZW1vcnksIG1pc2NvbmZpZ3VyZWQgbWFjaGluZXMsIGZs"
"YWt5IG1hY2hpbmVzLCBldGMuIC0gSmVmZiBEZWFuLCBUaGUgSm95cyBvZiBS"
"ZWFsIEhhcmR3YXJlCg" },
{ "I'm the head of the webspam team at Google. "
"That means that if you type your name into Google and get porn back, "
"it's my fault. Unless you're a porn star, in which case porn is a "
"completely reasonable response."
" - Matt Cutts, Google Plus" "\n",
"SSdtIHRoZSBoZWFkIG9mIHRoZSB3ZWJzcGFtIHRlYW0gYXQgR29vZ2xlLiAg"
"VGhhdCBtZWFucyB0aGF0IGlmIHlvdSB0eXBlIHlvdXIgbmFtZSBpbnRvIEdv"
"b2dsZSBhbmQgZ2V0IHBvcm4gYmFjaywgaXQncyBteSBmYXVsdC4gVW5sZXNz"
"IHlvdSdyZSBhIHBvcm4gc3RhciwgaW4gd2hpY2ggY2FzZSBwb3JuIGlzIGEg"
"Y29tcGxldGVseSByZWFzb25hYmxlIHJlc3BvbnNlLiAtIE1hdHQgQ3V0dHMs"
"IEdvb2dsZSBQbHVzCg" },
{ "It will still be a long time before machines approach human intelligence. "
"But luckily, machines don't actually have to be intelligent; "
"they just have to fake it. Access to a wealth of information, "
"combined with a rudimentary decision-making capacity, "
"can often be almost as useful. Of course, the results are better yet "
"when coupled with intelligence. A reference librarian with access to "
"a good search engine is a formidable tool."
" - Craig Silverstein, Siemens Pictures of the Future, Spring 2004" "\n",
"SXQgd2lsbCBzdGlsbCBiZSBhIGxvbmcgdGltZSBiZWZvcmUgbWFjaGluZXMg"
"YXBwcm9hY2ggaHVtYW4gaW50ZWxsaWdlbmNlLiBCdXQgbHVja2lseSwgbWFj"
"aGluZXMgZG9uJ3QgYWN0dWFsbHkgaGF2ZSB0byBiZSBpbnRlbGxpZ2VudDsg"
"dGhleSBqdXN0IGhhdmUgdG8gZmFrZSBpdC4gQWNjZXNzIHRvIGEgd2VhbHRo"
"IG9mIGluZm9ybWF0aW9uLCBjb21iaW5lZCB3aXRoIGEgcnVkaW1lbnRhcnkg"
"ZGVjaXNpb24tbWFraW5nIGNhcGFjaXR5LCBjYW4gb2Z0ZW4gYmUgYWxtb3N0"
"IGFzIHVzZWZ1bC4gT2YgY291cnNlLCB0aGUgcmVzdWx0cyBhcmUgYmV0dGVy"
"IHlldCB3aGVuIGNvdXBsZWQgd2l0aCBpbnRlbGxpZ2VuY2UuIEEgcmVmZXJl"
"bmNlIGxpYnJhcmlhbiB3aXRoIGFjY2VzcyB0byBhIGdvb2Qgc2VhcmNoIGVu"
"Z2luZSBpcyBhIGZvcm1pZGFibGUgdG9vbC4gLSBDcmFpZyBTaWx2ZXJzdGVp"
"biwgU2llbWVucyBQaWN0dXJlcyBvZiB0aGUgRnV0dXJlLCBTcHJpbmcgMjAw"
"NAo" },
// Degenerate edge case
{ "",
"" },
};
} // namespace

View file

@ -0,0 +1,215 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Fast memory copying and comparison routines.
// strings::fastmemcmp_inlined() replaces memcmp()
// strings::memcpy_inlined() replaces memcpy()
// strings::memeq(a, b, n) replaces memcmp(a, b, n) == 0
//
// strings::*_inlined() routines are inline versions of the
// routines exported by this module. Sometimes using the inlined
// versions is faster. Measure before using the inlined versions.
//
#ifndef ABSL_STRINGS_INTERNAL_FASTMEM_H_
#define ABSL_STRINGS_INTERNAL_FASTMEM_H_
#ifdef __SSE4_1__
#include <immintrin.h>
#endif
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include "absl/base/internal/unaligned_access.h"
#include "absl/base/macros.h"
#include "absl/base/port.h"
namespace absl {
namespace strings_internal {
// Return true if the n bytes at a equal the n bytes at b.
// The regions are allowed to overlap.
//
// The performance is similar to the performance of memcmp(), but faster for
// moderately-sized inputs, or inputs that share a common prefix and differ
// somewhere in their last 8 bytes. Further optimizations can be added later
// if it makes sense to do so. Alternatively, if the compiler & runtime improve
// to eliminate the need for this, we can remove it.
inline bool memeq(const char* a, const char* b, size_t n) {
size_t n_rounded_down = n & ~static_cast<size_t>(7);
if (ABSL_PREDICT_FALSE(n_rounded_down == 0)) { // n <= 7
return memcmp(a, b, n) == 0;
}
// n >= 8
{
uint64_t u =
ABSL_INTERNAL_UNALIGNED_LOAD64(a) ^ ABSL_INTERNAL_UNALIGNED_LOAD64(b);
uint64_t v = ABSL_INTERNAL_UNALIGNED_LOAD64(a + n - 8) ^
ABSL_INTERNAL_UNALIGNED_LOAD64(b + n - 8);
if ((u | v) != 0) { // The first or last 8 bytes differ.
return false;
}
}
// The next line forces n to be a multiple of 8.
n = n_rounded_down;
if (n >= 80) {
// In 2013 or later, this should be fast on long strings.
return memcmp(a, b, n) == 0;
}
// Now force n to be a multiple of 16. Arguably, a "switch" would be smart
// here, but there's a difficult-to-evaluate code size vs. speed issue. The
// current approach often re-compares some bytes (worst case is if n initially
// was 16, 32, 48, or 64), but is fairly short.
size_t e = n & 8;
a += e;
b += e;
n -= e;
// n is now in {0, 16, 32, ...}. Process 0 or more 16-byte chunks.
while (n > 0) {
#ifdef __SSE4_1__
__m128i u =
_mm_xor_si128(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a)),
_mm_loadu_si128(reinterpret_cast<const __m128i*>(b)));
if (!_mm_test_all_zeros(u, u)) {
return false;
}
#else
uint64_t x =
ABSL_INTERNAL_UNALIGNED_LOAD64(a) ^ ABSL_INTERNAL_UNALIGNED_LOAD64(b);
uint64_t y = ABSL_INTERNAL_UNALIGNED_LOAD64(a + 8) ^
ABSL_INTERNAL_UNALIGNED_LOAD64(b + 8);
if ((x | y) != 0) {
return false;
}
#endif
a += 16;
b += 16;
n -= 16;
}
return true;
}
inline int fastmemcmp_inlined(const void* va, const void* vb, size_t n) {
const unsigned char* pa = static_cast<const unsigned char*>(va);
const unsigned char* pb = static_cast<const unsigned char*>(vb);
switch (n) {
default:
return memcmp(va, vb, n);
case 7:
if (*pa != *pb) return *pa < *pb ? -1 : +1;
++pa;
++pb;
ABSL_FALLTHROUGH_INTENDED;
case 6:
if (*pa != *pb) return *pa < *pb ? -1 : +1;
++pa;
++pb;
ABSL_FALLTHROUGH_INTENDED;
case 5:
if (*pa != *pb) return *pa < *pb ? -1 : +1;
++pa;
++pb;
ABSL_FALLTHROUGH_INTENDED;
case 4:
if (*pa != *pb) return *pa < *pb ? -1 : +1;
++pa;
++pb;
ABSL_FALLTHROUGH_INTENDED;
case 3:
if (*pa != *pb) return *pa < *pb ? -1 : +1;
++pa;
++pb;
ABSL_FALLTHROUGH_INTENDED;
case 2:
if (*pa != *pb) return *pa < *pb ? -1 : +1;
++pa;
++pb;
ABSL_FALLTHROUGH_INTENDED;
case 1:
if (*pa != *pb) return *pa < *pb ? -1 : +1;
ABSL_FALLTHROUGH_INTENDED;
case 0:
break;
}
return 0;
}
// The standard memcpy operation is slow for variable small sizes.
// This implementation inlines the optimal realization for sizes 1 to 16.
// To avoid code bloat don't use it in case of not performance-critical spots,
// nor when you don't expect very frequent values of size <= 16.
inline void memcpy_inlined(char* dst, const char* src, size_t size) {
// Compiler inlines code with minimal amount of data movement when third
// parameter of memcpy is a constant.
switch (size) {
case 1:
memcpy(dst, src, 1);
break;
case 2:
memcpy(dst, src, 2);
break;
case 3:
memcpy(dst, src, 3);
break;
case 4:
memcpy(dst, src, 4);
break;
case 5:
memcpy(dst, src, 5);
break;
case 6:
memcpy(dst, src, 6);
break;
case 7:
memcpy(dst, src, 7);
break;
case 8:
memcpy(dst, src, 8);
break;
case 9:
memcpy(dst, src, 9);
break;
case 10:
memcpy(dst, src, 10);
break;
case 11:
memcpy(dst, src, 11);
break;
case 12:
memcpy(dst, src, 12);
break;
case 13:
memcpy(dst, src, 13);
break;
case 14:
memcpy(dst, src, 14);
break;
case 15:
memcpy(dst, src, 15);
break;
case 16:
memcpy(dst, src, 16);
break;
default:
memcpy(dst, src, size);
break;
}
}
} // namespace strings_internal
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_FASTMEM_H_

View file

@ -0,0 +1,453 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/fastmem.h"
#include <memory>
#include <random>
#include <string>
#include "base/init_google.h"
#include "base/logging.h"
#include "testing/base/public/benchmark.h"
#include "gtest/gtest.h"
namespace {
using RandomEngine = std::minstd_rand0;
void VerifyResults(const int r1, const int r2, const std::string& a,
const std::string& b) {
CHECK_EQ(a.size(), b.size());
if (r1 == 0) {
EXPECT_EQ(r2, 0) << a << " " << b;
} else if (r1 > 0) {
EXPECT_GT(r2, 0) << a << " " << b;
} else {
EXPECT_LT(r2, 0) << a << " " << b;
}
if ((r1 == 0) == (r2 == 0)) {
EXPECT_EQ(r1 == 0,
absl::strings_internal::memeq(a.data(), b.data(), a.size()))
<< r1 << " " << a << " " << b;
}
}
// Check correctness against glibc's memcmp implementation
void CheckSingle(const std::string& a, const std::string& b) {
CHECK_EQ(a.size(), b.size());
const int r1 = memcmp(a.data(), b.data(), a.size());
const int r2 =
absl::strings_internal::fastmemcmp_inlined(a.data(), b.data(), a.size());
VerifyResults(r1, r2, a, b);
}
void GenerateString(size_t len, std::string* s) {
s->clear();
for (int i = 0; i < len; i++) {
*s += ('a' + (i % 26));
}
}
void CheckCompare(const std::string& a, const std::string& b) {
CheckSingle(a, b);
for (int common = 0; common <= 32; common++) {
std::string extra;
GenerateString(common, &extra);
CheckSingle(extra + a, extra + b);
CheckSingle(a + extra, b + extra);
for (char c1 = 'a'; c1 <= 'c'; c1++) {
for (char c2 = 'a'; c2 <= 'c'; c2++) {
CheckSingle(extra + c1 + a, extra + c2 + b);
}
}
}
}
TEST(FastCompare, Misc) {
CheckCompare("", "");
CheckCompare("a", "a");
CheckCompare("ab", "ab");
CheckCompare("abc", "abc");
CheckCompare("abcd", "abcd");
CheckCompare("abcde", "abcde");
CheckCompare("a", "x");
CheckCompare("ab", "xb");
CheckCompare("abc", "xbc");
CheckCompare("abcd", "xbcd");
CheckCompare("abcde", "xbcde");
CheckCompare("x", "a");
CheckCompare("xb", "ab");
CheckCompare("xbc", "abc");
CheckCompare("xbcd", "abcd");
CheckCompare("xbcde", "abcde");
CheckCompare("a", "x");
CheckCompare("ab", "ax");
CheckCompare("abc", "abx");
CheckCompare("abcd", "abcx");
CheckCompare("abcde", "abcdx");
CheckCompare("x", "a");
CheckCompare("ax", "ab");
CheckCompare("abx", "abc");
CheckCompare("abcx", "abcd");
CheckCompare("abcdx", "abcde");
for (int len = 0; len < 1000; len++) {
std::string p(len, 'z');
CheckCompare(p + "x", p + "a");
CheckCompare(p + "ax", p + "ab");
CheckCompare(p + "abx", p + "abc");
CheckCompare(p + "abcx", p + "abcd");
CheckCompare(p + "abcdx", p + "abcde");
}
}
TEST(FastCompare, TrailingByte) {
for (int i = 0; i < 256; i++) {
for (int j = 0; j < 256; j++) {
std::string a(1, i);
std::string b(1, j);
CheckSingle(a, b);
}
}
}
// Check correctness of memcpy_inlined.
void CheckSingleMemcpyInlined(const std::string& a) {
std::unique_ptr<char[]> destination(new char[a.size() + 2]);
destination[0] = 'x';
destination[a.size() + 1] = 'x';
absl::strings_internal::memcpy_inlined(destination.get() + 1, a.data(),
a.size());
CHECK_EQ('x', destination[0]);
CHECK_EQ('x', destination[a.size() + 1]);
CHECK_EQ(0, memcmp(a.data(), destination.get() + 1, a.size()));
}
TEST(MemCpyInlined, Misc) {
CheckSingleMemcpyInlined("");
CheckSingleMemcpyInlined("0");
CheckSingleMemcpyInlined("012");
CheckSingleMemcpyInlined("0123");
CheckSingleMemcpyInlined("01234");
CheckSingleMemcpyInlined("012345");
CheckSingleMemcpyInlined("0123456");
CheckSingleMemcpyInlined("01234567");
CheckSingleMemcpyInlined("012345678");
CheckSingleMemcpyInlined("0123456789");
CheckSingleMemcpyInlined("0123456789a");
CheckSingleMemcpyInlined("0123456789ab");
CheckSingleMemcpyInlined("0123456789abc");
CheckSingleMemcpyInlined("0123456789abcd");
CheckSingleMemcpyInlined("0123456789abcde");
CheckSingleMemcpyInlined("0123456789abcdef");
CheckSingleMemcpyInlined("0123456789abcdefg");
}
template <typename Function>
inline void CopyLoop(benchmark::State& state, int size, Function func) {
char* src = new char[size];
char* dst = new char[size];
memset(src, 'x', size);
memset(dst, 'y', size);
for (auto _ : state) {
func(dst, src, size);
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size);
CHECK_EQ(dst[0], 'x');
delete[] src;
delete[] dst;
}
void BM_memcpy(benchmark::State& state) {
CopyLoop(state, state.range(0), memcpy);
}
BENCHMARK(BM_memcpy)->DenseRange(1, 18)->Range(32, 8 << 20);
void BM_memcpy_inlined(benchmark::State& state) {
CopyLoop(state, state.range(0), absl::strings_internal::memcpy_inlined);
}
BENCHMARK(BM_memcpy_inlined)->DenseRange(1, 18)->Range(32, 8 << 20);
// unaligned memcpy
void BM_unaligned_memcpy(benchmark::State& state) {
const int n = state.range(0);
const int kMaxOffset = 32;
char* src = new char[n + kMaxOffset];
char* dst = new char[n + kMaxOffset];
memset(src, 'x', n + kMaxOffset);
int r = 0, i = 0;
for (auto _ : state) {
memcpy(dst + (i % kMaxOffset), src + ((i + 5) % kMaxOffset), n);
r += dst[0];
++i;
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
delete[] src;
delete[] dst;
benchmark::DoNotOptimize(r);
}
BENCHMARK(BM_unaligned_memcpy)->DenseRange(1, 18)->Range(32, 8 << 20);
// memmove worst case: heavy overlap, but not always by the same amount.
// Also, the source and destination will often be unaligned.
void BM_memmove_worst_case(benchmark::State& state) {
const int n = state.range(0);
const int32_t kDeterministicSeed = 301;
const int kMaxOffset = 32;
char* src = new char[n + kMaxOffset];
memset(src, 'x', n + kMaxOffset);
size_t offsets[64];
RandomEngine rng(kDeterministicSeed);
std::uniform_int_distribution<size_t> random_to_max_offset(0, kMaxOffset);
for (size_t& offset : offsets) {
offset = random_to_max_offset(rng);
}
int r = 0, i = 0;
for (auto _ : state) {
memmove(src + offsets[i], src + offsets[i + 1], n);
r += src[0];
i = (i + 2) % arraysize(offsets);
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
delete[] src;
benchmark::DoNotOptimize(r);
}
BENCHMARK(BM_memmove_worst_case)->DenseRange(1, 18)->Range(32, 8 << 20);
// memmove cache-friendly: aligned and overlapping with 4k
// between the source and destination addresses.
void BM_memmove_cache_friendly(benchmark::State& state) {
const int n = state.range(0);
char* src = new char[n + 4096];
memset(src, 'x', n);
int r = 0;
while (state.KeepRunningBatch(2)) { // count each memmove as an iteration
memmove(src + 4096, src, n);
memmove(src, src + 4096, n);
r += src[0];
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
delete[] src;
benchmark::DoNotOptimize(r);
}
BENCHMARK(BM_memmove_cache_friendly)
->Arg(5 * 1024)
->Arg(10 * 1024)
->Range(16 << 10, 8 << 20);
// memmove best(?) case: aligned and non-overlapping.
void BM_memmove_aligned_non_overlapping(benchmark::State& state) {
CopyLoop(state, state.range(0), memmove);
}
BENCHMARK(BM_memmove_aligned_non_overlapping)
->DenseRange(1, 18)
->Range(32, 8 << 20);
// memset speed
void BM_memset(benchmark::State& state) {
const int n = state.range(0);
char* dst = new char[n];
int r = 0;
for (auto _ : state) {
memset(dst, 'x', n);
r += dst[0];
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
delete[] dst;
benchmark::DoNotOptimize(r);
}
BENCHMARK(BM_memset)->Range(8, 4096 << 10);
// Bandwidth (vectorization?) test: the ideal generated code will be limited
// by memory bandwidth. Even so-so generated code will max out memory bandwidth
// on some machines.
void BM_membandwidth(benchmark::State& state) {
const int n = state.range(0);
CHECK_EQ(n % 32, 0); // We will read 32 bytes per iter.
char* dst = new char[n];
int r = 0;
for (auto _ : state) {
const uint32_t* p = reinterpret_cast<uint32_t*>(dst);
const uint32_t* limit = reinterpret_cast<uint32_t*>(dst + n);
uint32_t x = 0;
while (p < limit) {
x += p[0];
x += p[1];
x += p[2];
x += p[3];
x += p[4];
x += p[5];
x += p[6];
x += p[7];
p += 8;
}
r += x;
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
delete[] dst;
benchmark::DoNotOptimize(r);
}
BENCHMARK(BM_membandwidth)->Range(32, 16384 << 10);
// Helper for benchmarks. Repeatedly compares two strings that are
// either equal or different only in one character. If test_equal_strings
// is false then position_to_modify determines where the difference will be.
template <typename Function>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void StringCompareLoop(
benchmark::State& state, bool test_equal_strings,
std::string::size_type position_to_modify, int size, Function func) {
const int kIterMult = 4; // Iteration multiplier for better timing resolution
CHECK_GT(size, 0);
const bool position_to_modify_is_valid =
position_to_modify != std::string::npos && position_to_modify < size;
CHECK_NE(position_to_modify_is_valid, test_equal_strings);
if (!position_to_modify_is_valid) {
position_to_modify = 0;
}
std::string sa(size, 'a');
std::string sb = sa;
char last = sa[size - 1];
int num = 0;
for (auto _ : state) {
for (int i = 0; i < kIterMult; ++i) {
sb[position_to_modify] = test_equal_strings ? last : last ^ 1;
num += func(sa, sb);
}
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size);
benchmark::DoNotOptimize(num);
}
// Helper for benchmarks. Repeatedly compares two memory regions that are
// either equal or different only in their final character.
template <typename Function>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void CompareLoop(benchmark::State& state,
bool test_equal_strings,
int size, Function func) {
const int kIterMult = 4; // Iteration multiplier for better timing resolution
CHECK_GT(size, 0);
char* data = static_cast<char*>(malloc(size * 2));
memset(data, 'a', size * 2);
char* a = data;
char* b = data + size;
char last = a[size - 1];
int num = 0;
for (auto _ : state) {
for (int i = 0; i < kIterMult; ++i) {
b[size - 1] = test_equal_strings ? last : last ^ 1;
num += func(a, b, size);
}
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size);
benchmark::DoNotOptimize(num);
free(data);
}
void BM_memcmp(benchmark::State& state) {
CompareLoop(state, false, state.range(0), memcmp);
}
BENCHMARK(BM_memcmp)->DenseRange(1, 9)->Range(32, 8 << 20);
void BM_fastmemcmp_inlined(benchmark::State& state) {
CompareLoop(state, false, state.range(0),
absl::strings_internal::fastmemcmp_inlined);
}
BENCHMARK(BM_fastmemcmp_inlined)->DenseRange(1, 9)->Range(32, 8 << 20);
void BM_memeq(benchmark::State& state) {
CompareLoop(state, false, state.range(0), absl::strings_internal::memeq);
}
BENCHMARK(BM_memeq)->DenseRange(1, 9)->Range(32, 8 << 20);
void BM_memeq_equal(benchmark::State& state) {
CompareLoop(state, true, state.range(0), absl::strings_internal::memeq);
}
BENCHMARK(BM_memeq_equal)->DenseRange(1, 9)->Range(32, 8 << 20);
bool StringLess(const std::string& x, const std::string& y) { return x < y; }
bool StringEqual(const std::string& x, const std::string& y) { return x == y; }
bool StdEqual(const std::string& x, const std::string& y) {
return x.size() == y.size() &&
std::equal(x.data(), x.data() + x.size(), y.data());
}
// Benchmark for x < y, where x and y are strings that differ in only their
// final char. That should be more-or-less the worst case for <.
void BM_string_less(benchmark::State& state) {
StringCompareLoop(state, false, state.range(0) - 1, state.range(0),
StringLess);
}
BENCHMARK(BM_string_less)->DenseRange(1, 9)->Range(32, 1 << 20);
// Benchmark for x < y, where x and y are strings that differ in only their
// first char. That should be more-or-less the best case for <.
void BM_string_less_easy(benchmark::State& state) {
StringCompareLoop(state, false, 0, state.range(0), StringLess);
}
BENCHMARK(BM_string_less_easy)->DenseRange(1, 9)->Range(32, 1 << 20);
void BM_string_equal(benchmark::State& state) {
StringCompareLoop(state, false, state.range(0) - 1, state.range(0),
StringEqual);
}
BENCHMARK(BM_string_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
void BM_string_equal_equal(benchmark::State& state) {
StringCompareLoop(state, true, std::string::npos, state.range(0), StringEqual);
}
BENCHMARK(BM_string_equal_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
void BM_std_equal(benchmark::State& state) {
StringCompareLoop(state, false, state.range(0) - 1, state.range(0), StdEqual);
}
BENCHMARK(BM_std_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
void BM_std_equal_equal(benchmark::State& state) {
StringCompareLoop(state, true, std::string::npos, state.range(0), StdEqual);
}
BENCHMARK(BM_std_equal_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
void BM_string_equal_unequal_lengths(benchmark::State& state) {
const int size = state.range(0);
std::string a(size, 'a');
std::string b(size + 1, 'a');
int count = 0;
for (auto _ : state) {
b[size - 1] = 'a';
count += (a == b);
}
benchmark::DoNotOptimize(count);
}
BENCHMARK(BM_string_equal_unequal_lengths)->Arg(1)->Arg(1 << 20);
void BM_stdstring_equal_unequal_lengths(benchmark::State& state) {
const int size = state.range(0);
std::string a(size, 'a');
std::string b(size + 1, 'a');
int count = 0;
for (auto _ : state) {
b[size - 1] = 'a';
count += (a == b);
}
benchmark::DoNotOptimize(count);
}
BENCHMARK(BM_stdstring_equal_unequal_lengths)->Arg(1)->Arg(1 << 20);
} // namespace

View file

@ -0,0 +1,110 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/memutil.h"
#include <cstdlib>
namespace absl {
namespace strings_internal {
int memcasecmp(const char* s1, const char* s2, size_t len) {
const unsigned char* us1 = reinterpret_cast<const unsigned char*>(s1);
const unsigned char* us2 = reinterpret_cast<const unsigned char*>(s2);
for (size_t i = 0; i < len; i++) {
const int diff =
int{static_cast<unsigned char>(absl::ascii_tolower(us1[i]))} -
int{static_cast<unsigned char>(absl::ascii_tolower(us2[i]))};
if (diff != 0) return diff;
}
return 0;
}
char* memdup(const char* s, size_t slen) {
void* copy;
if ((copy = malloc(slen)) == nullptr) return nullptr;
memcpy(copy, s, slen);
return reinterpret_cast<char*>(copy);
}
char* memrchr(const char* s, int c, size_t slen) {
for (const char* e = s + slen - 1; e >= s; e--) {
if (*e == c) return const_cast<char*>(e);
}
return nullptr;
}
size_t memspn(const char* s, size_t slen, const char* accept) {
const char* p = s;
const char* spanp;
char c, sc;
cont:
c = *p++;
if (slen-- == 0) return p - 1 - s;
for (spanp = accept; (sc = *spanp++) != '\0';)
if (sc == c) goto cont;
return p - 1 - s;
}
size_t memcspn(const char* s, size_t slen, const char* reject) {
const char* p = s;
const char* spanp;
char c, sc;
while (slen-- != 0) {
c = *p++;
for (spanp = reject; (sc = *spanp++) != '\0';)
if (sc == c) return p - 1 - s;
}
return p - s;
}
char* mempbrk(const char* s, size_t slen, const char* accept) {
const char* scanp;
int sc;
for (; slen; ++s, --slen) {
for (scanp = accept; (sc = *scanp++) != '\0';)
if (sc == *s) return const_cast<char*>(s);
}
return nullptr;
}
// This is significantly faster for case-sensitive matches with very
// few possible matches. See unit test for benchmarks.
const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle,
size_t neelen) {
if (0 == neelen) {
return phaystack; // even if haylen is 0
}
if (haylen < neelen) return nullptr;
const char* match;
const char* hayend = phaystack + haylen - neelen + 1;
// A static cast is used here to work around the fact that memchr returns
// a void* on Posix-compliant systems and const void* on Windows.
while ((match = static_cast<const char*>(
memchr(phaystack, pneedle[0], hayend - phaystack)))) {
if (memcmp(match, pneedle, neelen) == 0)
return match;
else
phaystack = match + 1;
}
return nullptr;
}
} // namespace strings_internal
} // namespace absl

View file

@ -0,0 +1,146 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// These routines provide mem versions of standard C std::string routines,
// such as strpbrk. They function exactly the same as the str versions,
// so if you wonder what they are, replace the word "mem" by
// "str" and check out the man page. I could return void*, as the
// strutil.h mem*() routines tend to do, but I return char* instead
// since this is by far the most common way these functions are called.
//
// The difference between the mem and str versions is the mem version
// takes a pointer and a length, rather than a '\0'-terminated std::string.
// The memcase* routines defined here assume the locale is "C"
// (they use absl::ascii_tolower instead of tolower).
//
// These routines are based on the BSD library.
//
// Here's a list of routines from std::string.h, and their mem analogues.
// Functions in lowercase are defined in std::string.h; those in UPPERCASE
// are defined here:
//
// strlen --
// strcat strncat MEMCAT
// strcpy strncpy memcpy
// -- memccpy (very cool function, btw)
// -- memmove
// -- memset
// strcmp strncmp memcmp
// strcasecmp strncasecmp MEMCASECMP
// strchr memchr
// strcoll --
// strxfrm --
// strdup strndup MEMDUP
// strrchr MEMRCHR
// strspn MEMSPN
// strcspn MEMCSPN
// strpbrk MEMPBRK
// strstr MEMSTR MEMMEM
// (g)strcasestr MEMCASESTR MEMCASEMEM
// strtok --
// strprefix MEMPREFIX (strprefix is from strutil.h)
// strcaseprefix MEMCASEPREFIX (strcaseprefix is from strutil.h)
// strsuffix MEMSUFFIX (strsuffix is from strutil.h)
// strcasesuffix MEMCASESUFFIX (strcasesuffix is from strutil.h)
// -- MEMIS
// -- MEMCASEIS
// strcount MEMCOUNT (strcount is from strutil.h)
#ifndef ABSL_STRINGS_INTERNAL_MEMUTIL_H_
#define ABSL_STRINGS_INTERNAL_MEMUTIL_H_
#include <cstddef>
#include <cstring>
#include "absl/base/port.h" // disable some warnings on Windows
#include "absl/strings/ascii.h" // for absl::ascii_tolower
namespace absl {
namespace strings_internal {
inline char* memcat(char* dest, size_t destlen, const char* src,
size_t srclen) {
return reinterpret_cast<char*>(memcpy(dest + destlen, src, srclen));
}
int memcasecmp(const char* s1, const char* s2, size_t len);
char* memdup(const char* s, size_t slen);
char* memrchr(const char* s, int c, size_t slen);
size_t memspn(const char* s, size_t slen, const char* accept);
size_t memcspn(const char* s, size_t slen, const char* reject);
char* mempbrk(const char* s, size_t slen, const char* accept);
// This is for internal use only. Don't call this directly
template <bool case_sensitive>
const char* int_memmatch(const char* haystack, size_t haylen,
const char* needle, size_t neelen) {
if (0 == neelen) {
return haystack; // even if haylen is 0
}
const char* hayend = haystack + haylen;
const char* needlestart = needle;
const char* needleend = needlestart + neelen;
for (; haystack < hayend; ++haystack) {
char hay = case_sensitive
? *haystack
: absl::ascii_tolower(static_cast<unsigned char>(*haystack));
char nee = case_sensitive
? *needle
: absl::ascii_tolower(static_cast<unsigned char>(*needle));
if (hay == nee) {
if (++needle == needleend) {
return haystack + 1 - neelen;
}
} else if (needle != needlestart) {
// must back up haystack in case a prefix matched (find "aab" in "aaab")
haystack -= needle - needlestart; // for loop will advance one more
needle = needlestart;
}
}
return nullptr;
}
// These are the guys you can call directly
inline const char* memstr(const char* phaystack, size_t haylen,
const char* pneedle) {
return int_memmatch<true>(phaystack, haylen, pneedle, strlen(pneedle));
}
inline const char* memcasestr(const char* phaystack, size_t haylen,
const char* pneedle) {
return int_memmatch<false>(phaystack, haylen, pneedle, strlen(pneedle));
}
inline const char* memmem(const char* phaystack, size_t haylen,
const char* pneedle, size_t needlelen) {
return int_memmatch<true>(phaystack, haylen, pneedle, needlelen);
}
inline const char* memcasemem(const char* phaystack, size_t haylen,
const char* pneedle, size_t needlelen) {
return int_memmatch<false>(phaystack, haylen, pneedle, needlelen);
}
// This is significantly faster for case-sensitive matches with very
// few possible matches. See unit test for benchmarks.
const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle,
size_t neelen);
} // namespace strings_internal
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_MEMUTIL_H_

View file

@ -0,0 +1,180 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Unit test for memutil.cc
#include "absl/strings/internal/memutil.h"
#include <algorithm>
#include <cstdlib>
#include "gtest/gtest.h"
#include "absl/strings/ascii.h"
namespace {
static char* memcasechr(const char* s, int c, size_t slen) {
c = absl::ascii_tolower(c);
for (; slen; ++s, --slen) {
if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s);
}
return nullptr;
}
static const char* memcasematch(const char* phaystack, size_t haylen,
const char* pneedle, size_t neelen) {
if (0 == neelen) {
return phaystack; // even if haylen is 0
}
if (haylen < neelen) return nullptr;
const char* match;
const char* hayend = phaystack + haylen - neelen + 1;
while ((match = static_cast<char*>(
memcasechr(phaystack, pneedle[0], hayend - phaystack)))) {
if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0)
return match;
else
phaystack = match + 1;
}
return nullptr;
}
TEST(MemUtilTest, AllTests) {
// check memutil functions
char a[1000];
absl::strings_internal::memcat(a, 0, "hello", sizeof("hello") - 1);
absl::strings_internal::memcat(a, 5, " there", sizeof(" there") - 1);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO there",
sizeof("hello there") - 1),
0);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf",
sizeof("hello there") - 1),
-1);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf",
sizeof("hello there") - 2),
0);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "whatever", 0), 0);
char* p = absl::strings_internal::memdup("hello", 5);
free(p);
p = absl::strings_internal::memrchr("hello there", 'e',
sizeof("hello there") - 1);
EXPECT_TRUE(p && p[-1] == 'r');
p = absl::strings_internal::memrchr("hello there", 'e',
sizeof("hello there") - 2);
EXPECT_TRUE(p && p[-1] == 'h');
p = absl::strings_internal::memrchr("hello there", 'u',
sizeof("hello there") - 1);
EXPECT_TRUE(p == nullptr);
int len = absl::strings_internal::memspn("hello there",
sizeof("hello there") - 1, "hole");
EXPECT_EQ(len, sizeof("hello") - 1);
len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
"u");
EXPECT_EQ(len, 0);
len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
"");
EXPECT_EQ(len, 0);
len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
"trole h");
EXPECT_EQ(len, sizeof("hello there") - 1);
len = absl::strings_internal::memspn("hello there!",
sizeof("hello there!") - 1, "trole h");
EXPECT_EQ(len, sizeof("hello there") - 1);
len = absl::strings_internal::memspn("hello there!",
sizeof("hello there!") - 2, "trole h!");
EXPECT_EQ(len, sizeof("hello there!") - 2);
len = absl::strings_internal::memcspn("hello there",
sizeof("hello there") - 1, "leho");
EXPECT_EQ(len, 0);
len = absl::strings_internal::memcspn("hello there",
sizeof("hello there") - 1, "u");
EXPECT_EQ(len, sizeof("hello there") - 1);
len = absl::strings_internal::memcspn("hello there",
sizeof("hello there") - 1, "");
EXPECT_EQ(len, sizeof("hello there") - 1);
len = absl::strings_internal::memcspn("hello there",
sizeof("hello there") - 1, " ");
EXPECT_EQ(len, 5);
p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
"leho");
EXPECT_TRUE(p && p[1] == 'e' && p[2] == 'l');
p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
"nu");
EXPECT_TRUE(p == nullptr);
p = absl::strings_internal::mempbrk("hello there!",
sizeof("hello there!") - 2, "!");
EXPECT_TRUE(p == nullptr);
p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
" t ");
EXPECT_TRUE(p && p[-1] == 'o' && p[1] == 't');
{
const char kHaystack[] = "0123456789";
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 0, "", 0), kHaystack);
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "012", 3),
kHaystack);
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "0xx", 1),
kHaystack);
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "789", 3),
kHaystack + 7);
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "9xx", 1),
kHaystack + 9);
EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "9xx", 3) ==
nullptr);
EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "xxx", 1) ==
nullptr);
}
{
const char kHaystack[] = "aBcDeFgHiJ";
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 0, "", 0),
kHaystack);
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Abc", 3),
kHaystack);
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Axx", 1),
kHaystack);
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "hIj", 3),
kHaystack + 7);
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 1),
kHaystack + 9);
EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 3) ==
nullptr);
EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "xxx", 1) ==
nullptr);
}
{
const char kHaystack[] = "0123456789";
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 0, "", 0), kHaystack);
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "012", 3),
kHaystack);
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "0xx", 1),
kHaystack);
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "789", 3),
kHaystack + 7);
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 1),
kHaystack + 9);
EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 3) ==
nullptr);
EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "xxx", 1) ==
nullptr);
}
}
} // namespace

View file

@ -0,0 +1,166 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file contains common things needed by numbers_test.cc,
// numbers_legacy_test.cc and numbers_benchmark.cc.
namespace {
// Previously documented minimum buffer sizes for Fast*ToBuffer functions.
// NOTE(edk): These should be deleted and uses replaced with kFastToBufferSize
// once existing code has been fixed to use kFastToBufferSize.
enum {
kFastInt32ToBufferSize = 12,
kFastInt64ToBufferSize = 22,
kFastUInt32ToBufferSize = 12,
kFastUInt64ToBufferSize = 22
};
template <typename IntType>
bool Itoa(IntType value, int base, std::string* destination) {
destination->clear();
if (base <= 1 || base > 36) {
return false;
}
if (value == 0) {
destination->push_back('0');
return true;
}
bool negative = value < 0;
while (value != 0) {
const IntType next_value = value / base;
// Can't use std::abs here because of problems when IntType is unsigned.
int remainder = value > next_value * base ? value - next_value * base
: next_value * base - value;
char c = remainder < 10 ? '0' + remainder : 'A' + remainder - 10;
destination->insert(0, 1, c);
value = next_value;
}
if (negative) {
destination->insert(0, 1, '-');
}
return true;
}
struct uint32_test_case {
const char* str;
bool expect_ok;
int base; // base to pass to the conversion function
uint32_t expected;
} const strtouint32_test_cases[] = {
{"0xffffffff", true, 16, std::numeric_limits<uint32_t>::max()},
{"0x34234324", true, 16, 0x34234324},
{"34234324", true, 16, 0x34234324},
{"0", true, 16, 0},
{" \t\n 0xffffffff", true, 16, std::numeric_limits<uint32_t>::max()},
{" \f\v 46", true, 10, 46}, // must accept weird whitespace
{" \t\n 72717222", true, 8, 072717222},
{" \t\n 072717222", true, 8, 072717222},
{" \t\n 072717228", false, 8, 07271722},
{"0", true, 0, 0},
// Base-10 version.
{"34234324", true, 0, 34234324},
{"4294967295", true, 0, std::numeric_limits<uint32_t>::max()},
{"34234324 \n\t", true, 10, 34234324},
// Unusual base
{"0", true, 3, 0},
{"2", true, 3, 2},
{"11", true, 3, 4},
// Invalid uints.
{"", false, 0, 0},
{" ", false, 0, 0},
{"abc", false, 0, 0}, // would be valid hex, but prefix is missing
{"34234324a", false, 0, 34234324},
{"34234.3", false, 0, 34234},
{"-1", false, 0, 0},
{" -123", false, 0, 0},
{" \t\n -123", false, 0, 0},
// Out of bounds.
{"4294967296", false, 0, std::numeric_limits<uint32_t>::max()},
{"0x100000000", false, 0, std::numeric_limits<uint32_t>::max()},
{nullptr, false, 0, 0},
};
struct uint64_test_case {
const char* str;
bool expect_ok;
int base;
uint64_t expected;
} const strtouint64_test_cases[] = {
{"0x3423432448783446", true, 16, int64_t{0x3423432448783446}},
{"3423432448783446", true, 16, int64_t{0x3423432448783446}},
{"0", true, 16, 0},
{"000", true, 0, 0},
{"0", true, 0, 0},
{" \t\n 0xffffffffffffffff", true, 16,
std::numeric_limits<uint64_t>::max()},
{"012345670123456701234", true, 8, int64_t{012345670123456701234}},
{"12345670123456701234", true, 8, int64_t{012345670123456701234}},
{"12845670123456701234", false, 8, 0},
// Base-10 version.
{"34234324487834466", true, 0, int64_t{34234324487834466}},
{" \t\n 18446744073709551615", true, 0,
std::numeric_limits<uint64_t>::max()},
{"34234324487834466 \n\t ", true, 0, int64_t{34234324487834466}},
{" \f\v 46", true, 10, 46}, // must accept weird whitespace
// Unusual base
{"0", true, 3, 0},
{"2", true, 3, 2},
{"11", true, 3, 4},
{"0", true, 0, 0},
// Invalid uints.
{"", false, 0, 0},
{" ", false, 0, 0},
{"abc", false, 0, 0},
{"34234324487834466a", false, 0, 0},
{"34234487834466.3", false, 0, 0},
{"-1", false, 0, 0},
{" -123", false, 0, 0},
{" \t\n -123", false, 0, 0},
// Out of bounds.
{"18446744073709551616", false, 10, 0},
{"18446744073709551616", false, 0, 0},
{"0x10000000000000000", false, 16, std::numeric_limits<uint64_t>::max()},
{"0X10000000000000000", false, 16,
std::numeric_limits<uint64_t>::max()}, // 0X versus 0x.
{"0x10000000000000000", false, 0, std::numeric_limits<uint64_t>::max()},
{"0X10000000000000000", false, 0,
std::numeric_limits<uint64_t>::max()}, // 0X versus 0x.
{"0x1234", true, 16, 0x1234},
// Base-10 std::string version.
{"1234", true, 0, 1234},
{nullptr, false, 0, 0},
};
} // namespace

View file

@ -0,0 +1,97 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
#define ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
#include <cassert>
#include <ostream>
#include <streambuf>
#include <string>
#include "absl/base/port.h"
namespace absl {
namespace strings_internal {
// The same as std::ostringstream but appends to a user-specified std::string,
// and is faster. It is ~70% faster to create, ~50% faster to write to, and
// completely free to extract the result std::string.
//
// std::string s;
// OStringStream strm(&s);
// strm << 42 << ' ' << 3.14; // appends to `s`
//
// The stream object doesn't have to be named. Starting from C++11 operator<<
// works with rvalues of std::ostream.
//
// std::string s;
// OStringStream(&s) << 42 << ' ' << 3.14; // appends to `s`
//
// OStringStream is faster to create than std::ostringstream but it's still
// relatively slow. Avoid creating multiple streams where a single stream will
// do.
//
// Creates unnecessary instances of OStringStream: slow.
//
// std::string s;
// OStringStream(&s) << 42;
// OStringStream(&s) << ' ';
// OStringStream(&s) << 3.14;
//
// Creates a single instance of OStringStream and reuses it: fast.
//
// std::string s;
// OStringStream strm(&s);
// strm << 42;
// strm << ' ';
// strm << 3.14;
//
// Note: flush() has no effect. No reason to call it.
class OStringStream : private std::basic_streambuf<char>, public std::ostream {
public:
// The argument can be null, in which case you'll need to call str(p) with a
// non-null argument before you can write to the stream.
//
// The destructor of OStringStream doesn't use the std::string. It's OK to destroy
// the std::string before the stream.
explicit OStringStream(std::string* s) : std::ostream(this), s_(s) {}
std::string* str() { return s_; }
const std::string* str() const { return s_; }
void str(std::string* s) { s_ = s; }
private:
using Buf = std::basic_streambuf<char>;
Buf::int_type overflow(int c = Buf::traits_type::eof()) override {
assert(s_);
if (!Buf::traits_type::eq_int_type(c, Buf::traits_type::eof()))
s_->push_back(static_cast<char>(c));
return 1;
}
std::streamsize xsputn(const char* s, std::streamsize n) override {
assert(s_);
s_->append(s, n);
return n;
}
std::string* s_;
};
} // namespace strings_internal
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_

View file

@ -0,0 +1,103 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/ostringstream.h"
#include <memory>
#include <ostream>
#include <sstream>
#include <string>
#include <type_traits>
#include "gtest/gtest.h"
namespace {
TEST(OStringStream, IsOStream) {
static_assert(
std::is_base_of<std::ostream, absl::strings_internal::OStringStream>(),
"");
}
TEST(OStringStream, ConstructDestroy) {
{
absl::strings_internal::OStringStream strm(nullptr);
EXPECT_EQ(nullptr, strm.str());
}
{
std::string s = "abc";
{
absl::strings_internal::OStringStream strm(&s);
EXPECT_EQ(&s, strm.str());
}
EXPECT_EQ("abc", s);
}
{
std::unique_ptr<std::string> s(new std::string);
absl::strings_internal::OStringStream strm(s.get());
s.reset();
}
}
TEST(OStringStream, Str) {
std::string s1;
absl::strings_internal::OStringStream strm(&s1);
const absl::strings_internal::OStringStream& c_strm(strm);
static_assert(std::is_same<decltype(strm.str()), std::string*>(), "");
static_assert(std::is_same<decltype(c_strm.str()), const std::string*>(), "");
EXPECT_EQ(&s1, strm.str());
EXPECT_EQ(&s1, c_strm.str());
strm.str(&s1);
EXPECT_EQ(&s1, strm.str());
EXPECT_EQ(&s1, c_strm.str());
std::string s2;
strm.str(&s2);
EXPECT_EQ(&s2, strm.str());
EXPECT_EQ(&s2, c_strm.str());
strm.str(nullptr);
EXPECT_EQ(nullptr, strm.str());
EXPECT_EQ(nullptr, c_strm.str());
}
TEST(OStreamStream, WriteToLValue) {
std::string s = "abc";
{
absl::strings_internal::OStringStream strm(&s);
EXPECT_EQ("abc", s);
strm << "";
EXPECT_EQ("abc", s);
strm << 42;
EXPECT_EQ("abc42", s);
strm << 'x' << 'y';
EXPECT_EQ("abc42xy", s);
}
EXPECT_EQ("abc42xy", s);
}
TEST(OStreamStream, WriteToRValue) {
std::string s = "abc";
absl::strings_internal::OStringStream(&s) << "";
EXPECT_EQ("abc", s);
absl::strings_internal::OStringStream(&s) << 42;
EXPECT_EQ("abc42", s);
absl::strings_internal::OStringStream(&s) << 'x' << 'y';
EXPECT_EQ("abc42xy", s);
}
} // namespace

View file

@ -0,0 +1,69 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
#define ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
#include <string>
#include <utility>
#include "absl/base/port.h"
#include "absl/meta/type_traits.h" // for void_t
namespace absl {
namespace strings_internal {
// Is a subclass of true_type or false_type, depending on whether or not
// T has a resize_uninitialized member.
template <typename T, typename = void>
struct HasResizeUninitialized : std::false_type {};
template <typename T>
struct HasResizeUninitialized<
T, absl::void_t<decltype(std::declval<T>().resize_uninitialized(237))>>
: std::true_type {};
template <typename string_type>
void ResizeUninit(string_type* s, size_t new_size, std::true_type) {
s->resize_uninitialized(new_size);
}
template <typename string_type>
void ResizeUninit(string_type* s, size_t new_size, std::false_type) {
s->resize(new_size);
}
// Returns true if the std::string implementation supports a resize where
// the new characters added to the std::string are left untouched.
//
// (A better name might be "STLStringSupportsUninitializedResize", alluding to
// the previous function.)
template <typename string_type>
inline constexpr bool STLStringSupportsNontrashingResize(string_type*) {
return HasResizeUninitialized<string_type>();
}
// Like str->resize(new_size), except any new characters added to "*str" as a
// result of resizing may be left uninitialized, rather than being filled with
// '0' bytes. Typically used when code is then going to overwrite the backing
// store of the std::string with known data. Uses a Google extension to std::string.
template <typename string_type, typename = void>
inline void STLStringResizeUninitialized(string_type* s, size_t new_size) {
ResizeUninit(s, new_size, HasResizeUninitialized<string_type>());
}
} // namespace strings_internal
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_

View file

@ -0,0 +1,68 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/resize_uninitialized.h"
#include "gtest/gtest.h"
namespace {
int resize_call_count = 0;
struct resizable_string {
void resize(size_t) { resize_call_count += 1; }
};
int resize_uninitialized_call_count = 0;
struct resize_uninitializable_string {
void resize(size_t) { resize_call_count += 1; }
void resize_uninitialized(size_t) { resize_uninitialized_call_count += 1; }
};
TEST(ResizeUninit, WithAndWithout) {
resize_call_count = 0;
resize_uninitialized_call_count = 0;
{
resizable_string rs;
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_uninitialized_call_count, 0);
EXPECT_FALSE(
absl::strings_internal::STLStringSupportsNontrashingResize(&rs));
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_uninitialized_call_count, 0);
absl::strings_internal::STLStringResizeUninitialized(&rs, 237);
EXPECT_EQ(resize_call_count, 1);
EXPECT_EQ(resize_uninitialized_call_count, 0);
}
resize_call_count = 0;
resize_uninitialized_call_count = 0;
{
resize_uninitializable_string rus;
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_uninitialized_call_count, 0);
EXPECT_TRUE(
absl::strings_internal::STLStringSupportsNontrashingResize(&rus));
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_uninitialized_call_count, 0);
absl::strings_internal::STLStringResizeUninitialized(&rus, 237);
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_uninitialized_call_count, 1);
}
}
} // namespace

View file

@ -0,0 +1,314 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file declares INTERNAL parts of the Join API that are inlined/templated
// or otherwise need to be available at compile time. The main abstractions
// defined in this file are:
//
// - A handful of default Formatters
// - JoinAlgorithm() overloads
// - JoinRange() overloads
// - JoinTuple()
//
// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
// absl/strings/str_join.h
//
// IWYU pragma: private, include "absl/strings/str_join.h"
#ifndef ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
#define ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
#include <cassert>
#include <iterator>
#include <memory>
#include <string>
#include <utility>
#include "absl/strings/internal/ostringstream.h"
#include "absl/strings/str_cat.h"
namespace absl {
namespace strings_internal {
//
// Formatter objects
//
// The following are implementation classes for standard Formatter objects. The
// factory functions that users will call to create and use these formatters are
// defined and documented in strings/join.h.
//
// The default formatter. Converts alpha-numeric types to strings.
struct AlphaNumFormatterImpl {
// This template is needed in order to support passing in a dereferenced
// vector<bool>::iterator
template <typename T>
void operator()(std::string* out, const T& t) const {
StrAppend(out, AlphaNum(t));
}
void operator()(std::string* out, const AlphaNum& t) const {
StrAppend(out, t);
}
};
// A type that's used to overload the JoinAlgorithm() function (defined below)
// for ranges that do not require additional formatting (e.g., a range of
// strings).
struct NoFormatter : public AlphaNumFormatterImpl {};
// Formats types to strings using the << operator.
class StreamFormatterImpl {
public:
// The method isn't const because it mutates state. Making it const will
// render StreamFormatterImpl thread-hostile.
template <typename T>
void operator()(std::string* out, const T& t) {
// The stream is created lazily to avoid paying the relatively high cost
// of its construction when joining an empty range.
if (strm_) {
strm_->clear(); // clear the bad, fail and eof bits in case they were set
strm_->str(out);
} else {
strm_.reset(new strings_internal::OStringStream(out));
}
*strm_ << t;
}
private:
std::unique_ptr<strings_internal::OStringStream> strm_;
};
// Formats a std::pair<>. The 'first' member is formatted using f1_ and the
// 'second' member is formatted using f2_. sep_ is the separator.
template <typename F1, typename F2>
class PairFormatterImpl {
public:
PairFormatterImpl(F1 f1, absl::string_view sep, F2 f2)
: f1_(std::move(f1)), sep_(sep), f2_(std::move(f2)) {}
template <typename T>
void operator()(std::string* out, const T& p) {
f1_(out, p.first);
out->append(sep_);
f2_(out, p.second);
}
template <typename T>
void operator()(std::string* out, const T& p) const {
f1_(out, p.first);
out->append(sep_);
f2_(out, p.second);
}
private:
F1 f1_;
std::string sep_;
F2 f2_;
};
// Wraps another formatter and dereferences the argument to operator() then
// passes the dereferenced argument to the wrapped formatter. This can be
// useful, for example, to join a std::vector<int*>.
template <typename Formatter>
class DereferenceFormatterImpl {
public:
DereferenceFormatterImpl() : f_() {}
explicit DereferenceFormatterImpl(Formatter&& f)
: f_(std::forward<Formatter>(f)) {}
template <typename T>
void operator()(std::string* out, const T& t) {
f_(out, *t);
}
template <typename T>
void operator()(std::string* out, const T& t) const {
f_(out, *t);
}
private:
Formatter f_;
};
// DefaultFormatter<T> is a traits class that selects a default Formatter to use
// for the given type T. The ::Type member names the Formatter to use. This is
// used by the strings::Join() functions that do NOT take a Formatter argument,
// in which case a default Formatter must be chosen.
//
// AlphaNumFormatterImpl is the default in the base template, followed by
// specializations for other types.
template <typename ValueType>
struct DefaultFormatter {
typedef AlphaNumFormatterImpl Type;
};
template <>
struct DefaultFormatter<const char*> {
typedef AlphaNumFormatterImpl Type;
};
template <>
struct DefaultFormatter<char*> {
typedef AlphaNumFormatterImpl Type;
};
template <>
struct DefaultFormatter<std::string> {
typedef NoFormatter Type;
};
template <>
struct DefaultFormatter<absl::string_view> {
typedef NoFormatter Type;
};
template <typename ValueType>
struct DefaultFormatter<ValueType*> {
typedef DereferenceFormatterImpl<typename DefaultFormatter<ValueType>::Type>
Type;
};
template <typename ValueType>
struct DefaultFormatter<std::unique_ptr<ValueType>>
: public DefaultFormatter<ValueType*> {};
//
// JoinAlgorithm() functions
//
// The main joining algorithm. This simply joins the elements in the given
// iterator range, each separated by the given separator, into an output std::string,
// and formats each element using the provided Formatter object.
template <typename Iterator, typename Formatter>
std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s,
Formatter&& f) {
std::string result;
absl::string_view sep("");
for (Iterator it = start; it != end; ++it) {
result.append(sep.data(), sep.size());
f(&result, *it);
sep = s;
}
return result;
}
// No-op placeholder for input iterators which can not be iterated over.
template <typename Iterator>
size_t GetResultSize(Iterator, Iterator, size_t, std::input_iterator_tag) {
return 0;
}
// Calculates space to reserve, if the iterator supports multiple passes.
template <typename Iterator>
size_t GetResultSize(Iterator it, Iterator end, size_t separator_size,
std::forward_iterator_tag) {
assert(it != end);
size_t length = it->size();
while (++it != end) {
length += separator_size;
length += it->size();
}
return length;
}
// A joining algorithm that's optimized for an iterator range of std::string-like
// objects that do not need any additional formatting. This is to optimize the
// common case of joining, say, a std::vector<std::string> or a
// std::vector<absl::string_view>.
//
// This is an overload of the previous JoinAlgorithm() function. Here the
// Formatter argument is of type NoFormatter. Since NoFormatter is an internal
// type, this overload is only invoked when strings::Join() is called with a
// range of std::string-like objects (e.g., std::string, absl::string_view), and an
// explicit Formatter argument was NOT specified.
//
// The optimization is that the needed space will be reserved in the output
// std::string to avoid the need to resize while appending. To do this, the iterator
// range will be traversed twice: once to calculate the total needed size, and
// then again to copy the elements and delimiters to the output std::string.
template <typename Iterator>
std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s,
NoFormatter) {
std::string result;
if (start != end) {
typename std::iterator_traits<Iterator>::iterator_category iterator_tag;
result.reserve(GetResultSize(start, end, s.size(), iterator_tag));
// Joins strings
absl::string_view sep("", 0);
for (Iterator it = start; it != end; ++it) {
result.append(sep.data(), sep.size());
result.append(it->data(), it->size());
sep = s;
}
}
return result;
}
// JoinTupleLoop implements a loop over the elements of a std::tuple, which
// are heterogeneous. The primary template matches the tuple interior case. It
// continues the iteration after appending a separator (for nonzero indices)
// and formatting an element of the tuple. The specialization for the I=N case
// matches the end-of-tuple, and terminates the iteration.
template <size_t I, size_t N>
struct JoinTupleLoop {
template <typename Tup, typename Formatter>
void operator()(std::string* out, const Tup& tup, absl::string_view sep,
Formatter&& fmt) {
if (I > 0) out->append(sep.data(), sep.size());
fmt(out, std::get<I>(tup));
JoinTupleLoop<I + 1, N>()(out, tup, sep, fmt);
}
};
template <size_t N>
struct JoinTupleLoop<N, N> {
template <typename Tup, typename Formatter>
void operator()(std::string*, const Tup&, absl::string_view, Formatter&&) {}
};
template <typename... T, typename Formatter>
std::string JoinAlgorithm(const std::tuple<T...>& tup, absl::string_view sep,
Formatter&& fmt) {
std::string result;
JoinTupleLoop<0, sizeof...(T)>()(&result, tup, sep, fmt);
return result;
}
template <typename Iterator>
std::string JoinRange(Iterator first, Iterator last, absl::string_view separator) {
// No formatter was explicitly given, so a default must be chosen.
typedef typename std::iterator_traits<Iterator>::value_type ValueType;
typedef typename DefaultFormatter<ValueType>::Type Formatter;
return JoinAlgorithm(first, last, separator, Formatter());
}
template <typename Range, typename Formatter>
std::string JoinRange(const Range& range, absl::string_view separator,
Formatter&& fmt) {
using std::begin;
using std::end;
return JoinAlgorithm(begin(range), end(range), separator, fmt);
}
template <typename Range>
std::string JoinRange(const Range& range, absl::string_view separator) {
using std::begin;
using std::end;
return JoinRange(begin(range), end(range), separator);
}
} // namespace strings_internal
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_

View file

@ -0,0 +1,439 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file declares INTERNAL parts of the Split API that are inline/templated
// or otherwise need to be available at compile time. The main abstractions
// defined in here are
//
// - ConvertibleToStringView
// - SplitIterator<>
// - Splitter<>
//
// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
// absl/strings/str_split.h.
//
// IWYU pragma: private, include "absl/strings/str_split.h"
#ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
#define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
#ifdef _GLIBCXX_DEBUG
#include <glibcxx_debug_traits.h>
#endif // _GLIBCXX_DEBUG
#include <array>
#include <initializer_list>
#include <iterator>
#include <map>
#include <type_traits>
#include <utility>
#include <vector>
#include "absl/base/macros.h"
#include "absl/base/port.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/string_view.h"
namespace absl {
namespace strings_internal {
#ifdef _GLIBCXX_DEBUG
using ::glibcxx_debug_traits::IsStrictlyDebugWrapperBase;
#else // _GLIBCXX_DEBUG
template <typename T> struct IsStrictlyDebugWrapperBase : std::false_type {};
#endif // _GLIBCXX_DEBUG
// This class is implicitly constructible from everything that absl::string_view
// is implicitly constructible from. If it's constructed from a temporary
// std::string, the data is moved into a data member so its lifetime matches that of
// the ConvertibleToStringView instance.
class ConvertibleToStringView {
public:
ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit)
: value_(s) {}
ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit)
ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit)
: value_(s) {}
ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit)
: value_(s) {}
// Matches rvalue strings and moves their data to a member.
ConvertibleToStringView(std::string&& s) // NOLINT(runtime/explicit)
: copy_(std::move(s)), value_(copy_) {}
ConvertibleToStringView(const ConvertibleToStringView& other)
: copy_(other.copy_),
value_(other.IsSelfReferential() ? copy_ : other.value_) {}
ConvertibleToStringView(ConvertibleToStringView&& other) {
StealMembers(std::move(other));
}
ConvertibleToStringView& operator=(ConvertibleToStringView other) {
StealMembers(std::move(other));
return *this;
}
absl::string_view value() const { return value_; }
private:
// Returns true if ctsp's value refers to its internal copy_ member.
bool IsSelfReferential() const { return value_.data() == copy_.data(); }
void StealMembers(ConvertibleToStringView&& other) {
if (other.IsSelfReferential()) {
copy_ = std::move(other.copy_);
value_ = copy_;
other.value_ = other.copy_;
} else {
value_ = other.value_;
}
}
// Holds the data moved from temporary std::string arguments. Declared first so
// that 'value' can refer to 'copy_'.
std::string copy_;
absl::string_view value_;
};
// An iterator that enumerates the parts of a std::string from a Splitter. The text
// to be split, the Delimiter, and the Predicate are all taken from the given
// Splitter object. Iterators may only be compared if they refer to the same
// Splitter instance.
//
// This class is NOT part of the public splitting API.
template <typename Splitter>
class SplitIterator {
public:
using iterator_category = std::input_iterator_tag;
using value_type = absl::string_view;
using difference_type = ptrdiff_t;
using pointer = const value_type*;
using reference = const value_type&;
enum State { kInitState, kLastState, kEndState };
SplitIterator(State state, const Splitter* splitter)
: pos_(0),
state_(state),
splitter_(splitter),
delimiter_(splitter->delimiter()),
predicate_(splitter->predicate()) {
// Hack to maintain backward compatibility. This one block makes it so an
// empty absl::string_view whose .data() happens to be nullptr behaves
// *differently* from an otherwise empty absl::string_view whose .data() is
// not nullptr. This is an undesirable difference in general, but this
// behavior is maintained to avoid breaking existing code that happens to
// depend on this old behavior/bug. Perhaps it will be fixed one day. The
// difference in behavior is as follows:
// Split(absl::string_view(""), '-'); // {""}
// Split(absl::string_view(), '-'); // {}
if (splitter_->text().data() == nullptr) {
state_ = kEndState;
pos_ = splitter_->text().size();
return;
}
if (state_ == kEndState) {
pos_ = splitter_->text().size();
} else {
++(*this);
}
}
bool at_end() const { return state_ == kEndState; }
reference operator*() const { return curr_; }
pointer operator->() const { return &curr_; }
SplitIterator& operator++() {
do {
if (state_ == kLastState) {
state_ = kEndState;
return *this;
}
const absl::string_view text = splitter_->text();
const absl::string_view d = delimiter_.Find(text, pos_);
if (d.data() == text.end()) state_ = kLastState;
curr_ = text.substr(pos_, d.data() - (text.data() + pos_));
pos_ += curr_.size() + d.size();
} while (!predicate_(curr_));
return *this;
}
SplitIterator operator++(int) {
SplitIterator old(*this);
++(*this);
return old;
}
friend bool operator==(const SplitIterator& a, const SplitIterator& b) {
return a.state_ == b.state_ && a.pos_ == b.pos_;
}
friend bool operator!=(const SplitIterator& a, const SplitIterator& b) {
return !(a == b);
}
private:
size_t pos_;
State state_;
absl::string_view curr_;
const Splitter* splitter_;
typename Splitter::DelimiterType delimiter_;
typename Splitter::PredicateType predicate_;
};
// HasMappedType<T>::value is true iff there exists a type T::mapped_type.
template <typename T, typename = void>
struct HasMappedType : std::false_type {};
template <typename T>
struct HasMappedType<T, absl::void_t<typename T::mapped_type>>
: std::true_type {};
// HasValueType<T>::value is true iff there exists a type T::value_type.
template <typename T, typename = void>
struct HasValueType : std::false_type {};
template <typename T>
struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type {
};
// HasConstIterator<T>::value is true iff there exists a type T::const_iterator.
template <typename T, typename = void>
struct HasConstIterator : std::false_type {};
template <typename T>
struct HasConstIterator<T, absl::void_t<typename T::const_iterator>>
: std::true_type {};
// IsInitializerList<T>::value is true iff T is an std::initializer_list. More
// details below in Splitter<> where this is used.
std::false_type IsInitializerListDispatch(...); // default: No
template <typename T>
std::true_type IsInitializerListDispatch(std::initializer_list<T>*);
template <typename T>
struct IsInitializerList
: decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {};
// A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition
// is true for type 'C'.
//
// Restricts conversion to container-like types (by testing for the presence of
// a const_iterator member type) and also to disable conversion to an
// std::initializer_list (which also has a const_iterator). Otherwise, code
// compiled in C++11 will get an error due to ambiguous conversion paths (in
// C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T>
// or an std::initializer_list<T>).
template <typename C>
struct SplitterIsConvertibleTo
: std::enable_if<
!IsStrictlyDebugWrapperBase<C>::value &&
!IsInitializerList<C>::value &&
HasValueType<C>::value &&
HasConstIterator<C>::value> {};
// This class implements the range that is returned by absl::StrSplit(). This
// class has templated conversion operators that allow it to be implicitly
// converted to a variety of types that the caller may have specified on the
// left-hand side of an assignment.
//
// The main interface for interacting with this class is through its implicit
// conversion operators. However, this class may also be used like a container
// in that it has .begin() and .end() member functions. It may also be used
// within a range-for loop.
//
// Output containers can be collections of any type that is constructible from
// an absl::string_view.
//
// An Predicate functor may be supplied. This predicate will be used to filter
// the split strings: only strings for which the predicate returns true will be
// kept. A Predicate object is any unary functor that takes an absl::string_view
// and returns bool.
template <typename Delimiter, typename Predicate>
class Splitter {
public:
using DelimiterType = Delimiter;
using PredicateType = Predicate;
using const_iterator = strings_internal::SplitIterator<Splitter>;
using value_type = typename std::iterator_traits<const_iterator>::value_type;
Splitter(ConvertibleToStringView input_text, Delimiter d, Predicate p)
: text_(std::move(input_text)),
delimiter_(std::move(d)),
predicate_(std::move(p)) {}
absl::string_view text() const { return text_.value(); }
const Delimiter& delimiter() const { return delimiter_; }
const Predicate& predicate() const { return predicate_; }
// Range functions that iterate the split substrings as absl::string_view
// objects. These methods enable a Splitter to be used in a range-based for
// loop.
const_iterator begin() const { return {const_iterator::kInitState, this}; }
const_iterator end() const { return {const_iterator::kEndState, this}; }
// An implicit conversion operator that is restricted to only those containers
// that the splitter is convertible to.
template <typename Container,
typename OnlyIf = typename SplitterIsConvertibleTo<Container>::type>
operator Container() const { // NOLINT(runtime/explicit)
return ConvertToContainer<Container, typename Container::value_type,
HasMappedType<Container>::value>()(*this);
}
// Returns a pair with its .first and .second members set to the first two
// strings returned by the begin() iterator. Either/both of .first and .second
// will be constructed with empty strings if the iterator doesn't have a
// corresponding value.
template <typename First, typename Second>
operator std::pair<First, Second>() const { // NOLINT(runtime/explicit)
absl::string_view first, second;
auto it = begin();
if (it != end()) {
first = *it;
if (++it != end()) {
second = *it;
}
}
return {First(first), Second(second)};
}
private:
// ConvertToContainer is a functor converting a Splitter to the requested
// Container of ValueType. It is specialized below to optimize splitting to
// certain combinations of Container and ValueType.
//
// This base template handles the generic case of storing the split results in
// the requested non-map-like container and converting the split substrings to
// the requested type.
template <typename Container, typename ValueType, bool is_map = false>
struct ConvertToContainer {
Container operator()(const Splitter& splitter) const {
Container c;
auto it = std::inserter(c, c.end());
for (const auto sp : splitter) {
*it++ = ValueType(sp);
}
return c;
}
};
// Partial specialization for a std::vector<absl::string_view>.
//
// Optimized for the common case of splitting to a
// std::vector<absl::string_view>. In this case we first split the results to
// a small array of absl::string_view on the stack, to reduce reallocations.
template <typename A>
struct ConvertToContainer<std::vector<absl::string_view, A>,
absl::string_view, false> {
std::vector<absl::string_view, A> operator()(
const Splitter& splitter) const {
struct raw_view {
const char* data;
size_t size;
operator absl::string_view() const { // NOLINT(runtime/explicit)
return {data, size};
}
};
std::vector<absl::string_view, A> v;
std::array<raw_view, 16> ar;
for (auto it = splitter.begin(); !it.at_end();) {
size_t index = 0;
do {
ar[index].data = it->data();
ar[index].size = it->size();
++it;
} while (++index != ar.size() && !it.at_end());
v.insert(v.end(), ar.begin(), ar.begin() + index);
}
return v;
}
};
// Partial specialization for a std::vector<std::string>.
//
// Optimized for the common case of splitting to a std::vector<std::string>. In
// this case we first split the results to a std::vector<absl::string_view> so
// the returned std::vector<std::string> can have space reserved to avoid std::string
// moves.
template <typename A>
struct ConvertToContainer<std::vector<std::string, A>, std::string, false> {
std::vector<std::string, A> operator()(const Splitter& splitter) const {
const std::vector<absl::string_view> v = splitter;
return std::vector<std::string, A>(v.begin(), v.end());
}
};
// Partial specialization for containers of pairs (e.g., maps).
//
// The algorithm is to insert a new pair into the map for each even-numbered
// item, with the even-numbered item as the key with a default-constructed
// value. Each odd-numbered item will then be assigned to the last pair's
// value.
template <typename Container, typename First, typename Second>
struct ConvertToContainer<Container, std::pair<const First, Second>, true> {
Container operator()(const Splitter& splitter) const {
Container m;
typename Container::iterator it;
bool insert = true;
for (const auto sp : splitter) {
if (insert) {
it = Inserter<Container>::Insert(&m, First(sp), Second());
} else {
it->second = Second(sp);
}
insert = !insert;
}
return m;
}
// Inserts the key and value into the given map, returning an iterator to
// the inserted item. Specialized for std::map and std::multimap to use
// emplace() and adapt emplace()'s return value.
template <typename Map>
struct Inserter {
using M = Map;
template <typename... Args>
static typename M::iterator Insert(M* m, Args&&... args) {
return m->insert(std::make_pair(std::forward<Args>(args)...)).first;
}
};
template <typename... Ts>
struct Inserter<std::map<Ts...>> {
using M = std::map<Ts...>;
template <typename... Args>
static typename M::iterator Insert(M* m, Args&&... args) {
return m->emplace(std::make_pair(std::forward<Args>(args)...)).first;
}
};
template <typename... Ts>
struct Inserter<std::multimap<Ts...>> {
using M = std::multimap<Ts...>;
template <typename... Args>
static typename M::iterator Insert(M* m, Args&&... args) {
return m->emplace(std::make_pair(std::forward<Args>(args)...));
}
};
};
ConvertibleToStringView text_;
Delimiter delimiter_;
Predicate predicate_;
};
} // namespace strings_internal
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_

View file

@ -0,0 +1,51 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// UTF8 utilities, implemented to reduce dependencies.
#include "absl/strings/internal/utf8.h"
namespace absl {
namespace strings_internal {
size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) {
if (utf8_char <= 0x7F) {
*buffer = static_cast<char>(utf8_char);
return 1;
} else if (utf8_char <= 0x7FF) {
buffer[1] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[0] = 0xC0 | utf8_char;
return 2;
} else if (utf8_char <= 0xFFFF) {
buffer[2] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[1] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[0] = 0xE0 | utf8_char;
return 3;
} else {
buffer[3] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[2] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[1] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[0] = 0xF0 | utf8_char;
return 4;
}
}
} // namespace strings_internal
} // namespace absl

View file

@ -0,0 +1,52 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// UTF8 utilities, implemented to reduce dependencies.
//
// If you need Unicode specific processing (for example being aware of
// Unicode character boundaries, or knowledge of Unicode casing rules,
// or various forms of equivalence and normalization), take a look at
// files in i18n/utf8.
#ifndef ABSL_STRINGS_INTERNAL_UTF8_H_
#define ABSL_STRINGS_INTERNAL_UTF8_H_
#include <cstddef>
#include <cstdint>
namespace absl {
namespace strings_internal {
// For Unicode code points 0 through 0x10FFFF, EncodeUTF8Char writes
// out the UTF-8 encoding into buffer, and returns the number of chars
// it wrote.
//
// As described in https://tools.ietf.org/html/rfc3629#section-3 , the encodings
// are:
// 00 - 7F : 0xxxxxxx
// 80 - 7FF : 110xxxxx 10xxxxxx
// 800 - FFFF : 1110xxxx 10xxxxxx 10xxxxxx
// 10000 - 10FFFF : 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
//
// Values greater than 0x10FFFF are not supported and may or may not write
// characters into buffer, however never will more than kMaxEncodedUTF8Size
// bytes be written, regardless of the value of utf8_char.
enum { kMaxEncodedUTF8Size = 4 };
size_t EncodeUTF8Char(char *buffer, char32_t utf8_char);
} // namespace strings_internal
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_UTF8_H_

View file

@ -0,0 +1,58 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/utf8.h"
#include <cctype>
#include <cstdlib>
#include <cstring>
#include <cstdint>
#include "gtest/gtest.h"
namespace {
TEST(EncodeUTF8Char, BasicFunction) {
std::pair<char32_t, std::string> tests[] = {{0x0030, u8"\u0030"},
{0x00A3, u8"\u00A3"},
{0x00010000, u8"\U00010000"},
{0x0000FFFF, u8"\U0000FFFF"},
{0x0010FFFD, u8"\U0010FFFD"}};
for (auto &test : tests) {
char buf0[7] = {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'};
char buf1[7] = {'\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF'};
char *buf0_written =
&buf0[absl::strings_internal::EncodeUTF8Char(buf0, test.first)];
char *buf1_written =
&buf1[absl::strings_internal::EncodeUTF8Char(buf1, test.first)];
int apparent_length = 7;
while (buf0[apparent_length - 1] == '\x00' &&
buf1[apparent_length - 1] == '\xFF') {
if (--apparent_length == 0) break;
}
EXPECT_EQ(apparent_length, buf0_written - buf0);
EXPECT_EQ(apparent_length, buf1_written - buf1);
EXPECT_EQ(apparent_length, test.second.length());
EXPECT_EQ(std::string(buf0, apparent_length), test.second);
EXPECT_EQ(std::string(buf1, apparent_length), test.second);
}
char buf[32] = "Don't Tread On Me";
EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf, 0x00110000),
absl::strings_internal::kMaxEncodedUTF8Size);
char buf2[32] = "Negative is invalid but sane";
EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf2, -1),
absl::strings_internal::kMaxEncodedUTF8Size);
}
} // namespace

40
absl/strings/match.cc Normal file
View file

@ -0,0 +1,40 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/match.h"
#include "absl/strings/internal/memutil.h"
namespace absl {
namespace {
bool CaseEqual(absl::string_view piece1, absl::string_view piece2) {
return (piece1.size() == piece2.size() &&
0 == strings_internal::memcasecmp(piece1.data(), piece2.data(),
piece1.size()));
// memcasecmp uses ascii_tolower().
}
} // namespace
bool StartsWithIgnoreCase(absl::string_view text, absl::string_view preffix) {
return (text.size() >= preffix.size()) &&
CaseEqual(text.substr(0, preffix.size()), preffix);
}
bool EndsWithIgnoreCase(absl::string_view text, absl::string_view suffix) {
return (text.size() >= suffix.size()) &&
CaseEqual(text.substr(text.size() - suffix.size()), suffix);
}
} // namespace absl

81
absl/strings/match.h Normal file
View file

@ -0,0 +1,81 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: match.h
// -----------------------------------------------------------------------------
//
// This file contains simple utilities for performing std::string matching checks.
// All of these function parameters are specified as `absl::string_view`,
// meaning that these functions can accept `std::string`, `absl::string_view` or
// nul-terminated C-style strings.
//
// Examples:
// std::string s = "foo";
// absl::string_view sv = "f";
// EXPECT_TRUE(absl::StrContains(s, sv));
//
// Note: The order of parameters in these functions is designed to mimic the
// order an equivalent member function would exhibit;
// e.g. `s.Contains(x)` ==> `absl::StrContains(s, x).
#ifndef ABSL_STRINGS_MATCH_H_
#define ABSL_STRINGS_MATCH_H_
#include <cstring>
#include "absl/strings/string_view.h"
namespace absl {
// StrContains()
//
// Returns whether a given std::string `s` contains the substring `x`.
inline bool StrContains(absl::string_view s, absl::string_view x) {
return static_cast<absl::string_view::size_type>(s.find(x, 0)) != s.npos;
}
// StartsWith()
//
// Returns whether a given std::string `s` begins with `x`.
inline bool StartsWith(absl::string_view s, absl::string_view x) {
return x.empty() ||
(s.size() >= x.size() && memcmp(s.data(), x.data(), x.size()) == 0);
}
// EndsWith()
//
// Returns whether a given std::string `s` ends `x`.
inline bool EndsWith(absl::string_view s, absl::string_view x) {
return x.empty() ||
(s.size() >= x.size() &&
memcmp(s.data() + (s.size() - x.size()), x.data(), x.size()) == 0);
}
// StartsWithIgnoreCase()
//
// Returns whether a given std::string `text` starts with `starts_with`, ignoring
// case in the comparison.
bool StartsWithIgnoreCase(absl::string_view text,
absl::string_view starts_with);
// EndsWithIgnoreCase()
//
// Returns whether a given std::string `text` ends with `ends_with`, ignoring case
// in the comparison.
bool EndsWithIgnoreCase(absl::string_view text, absl::string_view ends_with);
} // namespace absl
#endif // ABSL_STRINGS_MATCH_H_

View file

@ -0,0 +1,99 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/match.h"
#include "gtest/gtest.h"
namespace {
TEST(MatchTest, StartsWith) {
const std::string s1("123" "\0" "456", 7);
const absl::string_view a("foobar");
const absl::string_view b(s1);
const absl::string_view e;
EXPECT_TRUE(absl::StartsWith(a, a));
EXPECT_TRUE(absl::StartsWith(a, "foo"));
EXPECT_TRUE(absl::StartsWith(a, e));
EXPECT_TRUE(absl::StartsWith(b, s1));
EXPECT_TRUE(absl::StartsWith(b, b));
EXPECT_TRUE(absl::StartsWith(b, e));
EXPECT_TRUE(absl::StartsWith(e, ""));
EXPECT_FALSE(absl::StartsWith(a, b));
EXPECT_FALSE(absl::StartsWith(b, a));
EXPECT_FALSE(absl::StartsWith(e, a));
}
TEST(MatchTest, EndsWith) {
const std::string s1("123" "\0" "456", 7);
const absl::string_view a("foobar");
const absl::string_view b(s1);
const absl::string_view e;
EXPECT_TRUE(absl::EndsWith(a, a));
EXPECT_TRUE(absl::EndsWith(a, "bar"));
EXPECT_TRUE(absl::EndsWith(a, e));
EXPECT_TRUE(absl::EndsWith(b, s1));
EXPECT_TRUE(absl::EndsWith(b, b));
EXPECT_TRUE(absl::EndsWith(b, e));
EXPECT_TRUE(absl::EndsWith(e, ""));
EXPECT_FALSE(absl::EndsWith(a, b));
EXPECT_FALSE(absl::EndsWith(b, a));
EXPECT_FALSE(absl::EndsWith(e, a));
}
TEST(MatchTest, Contains) {
absl::string_view a("abcdefg");
absl::string_view b("abcd");
absl::string_view c("efg");
absl::string_view d("gh");
EXPECT_TRUE(absl::StrContains(a, a));
EXPECT_TRUE(absl::StrContains(a, b));
EXPECT_TRUE(absl::StrContains(a, c));
EXPECT_FALSE(absl::StrContains(a, d));
EXPECT_TRUE(absl::StrContains("", ""));
EXPECT_TRUE(absl::StrContains("abc", ""));
EXPECT_FALSE(absl::StrContains("", "a"));
}
TEST(MatchTest, ContainsNull) {
const std::string s = "foo";
const char* cs = "foo";
const absl::string_view sv("foo");
const absl::string_view sv2("foo\0bar", 4);
EXPECT_EQ(s, "foo");
EXPECT_EQ(sv, "foo");
EXPECT_NE(sv2, "foo");
EXPECT_TRUE(absl::EndsWith(s, sv));
EXPECT_TRUE(absl::StartsWith(cs, sv));
EXPECT_TRUE(absl::StrContains(cs, sv));
EXPECT_FALSE(absl::StrContains(cs, sv2));
}
TEST(MatchTest, StartsWithIgnoreCase) {
EXPECT_TRUE(absl::StartsWithIgnoreCase("foo", "foo"));
EXPECT_TRUE(absl::StartsWithIgnoreCase("foo", "Fo"));
EXPECT_TRUE(absl::StartsWithIgnoreCase("foo", ""));
EXPECT_FALSE(absl::StartsWithIgnoreCase("foo", "fooo"));
EXPECT_FALSE(absl::StartsWithIgnoreCase("", "fo"));
}
TEST(MatchTest, EndsWithIgnoreCase) {
EXPECT_TRUE(absl::EndsWithIgnoreCase("foo", "foo"));
EXPECT_TRUE(absl::EndsWithIgnoreCase("foo", "Oo"));
EXPECT_TRUE(absl::EndsWithIgnoreCase("foo", ""));
EXPECT_FALSE(absl::EndsWithIgnoreCase("foo", "fooo"));
EXPECT_FALSE(absl::EndsWithIgnoreCase("", "fo"));
}
} // namespace

1288
absl/strings/numbers.cc Normal file

File diff suppressed because it is too large Load diff

173
absl/strings/numbers.h Normal file
View file

@ -0,0 +1,173 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: numbers.h
// -----------------------------------------------------------------------------
//
// This package contains functions for converting strings to numbers. For
// converting numbers to strings, use `StrCat()` or `StrAppend()` in str_cat.h,
// which automatically detect and convert most number values appropriately.
#ifndef ABSL_STRINGS_NUMBERS_H_
#define ABSL_STRINGS_NUMBERS_H_
#include <cstddef>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <limits>
#include <string>
#include <type_traits>
#include "absl/base/macros.h"
#include "absl/base/port.h"
#include "absl/numeric/int128.h"
#include "absl/strings/string_view.h"
namespace absl {
// SimpleAtoi()
//
// Converts the given std::string into an integer value, returning `true` if
// successful. The std::string must reflect a base-10 integer (optionally followed or
// preceded by ASCII whitespace) whose value falls within the range of the
// integer type,
template <typename int_type>
ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view s, int_type* out);
// SimpleAtof()
//
// Converts the given std::string (optionally followed or preceded by ASCII
// whitespace) into a float, which may be rounded on overflow or underflow.
ABSL_MUST_USE_RESULT bool SimpleAtof(absl::string_view str, float* value);
// SimpleAtod()
//
// Converts the given std::string (optionally followed or preceded by ASCII
// whitespace) into a double, which may be rounded on overflow or underflow.
ABSL_MUST_USE_RESULT bool SimpleAtod(absl::string_view str, double* value);
// SimpleAtob()
//
// Converts the given std::string into into a boolean, returning `true` if
// successful. The following case-insensitive strings are interpreted as boolean
// `true`: "true", "t", "yes", "y", "1". The following case-insensitive strings
// are interpreted as boolean `false`: "false", "f", "no", "n", "0".
ABSL_MUST_USE_RESULT bool SimpleAtob(absl::string_view str, bool* value);
} // namespace absl
// End of public API. Implementation details follow.
namespace absl {
namespace numbers_internal {
// safe_strto?() functions for implementing SimpleAtoi()
bool safe_strto32_base(absl::string_view text, int32_t* value, int base);
bool safe_strto64_base(absl::string_view text, int64_t* value, int base);
bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base);
bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base);
// These functions are intended for speed. All functions take an output buffer
// as an argument and return a pointer to the last byte they wrote, which is the
// terminating '\0'. At most `kFastToBufferSize` bytes are written.
char* FastInt32ToBuffer(int32_t i, char* buffer);
char* FastUInt32ToBuffer(uint32_t i, char* buffer);
char* FastInt64ToBuffer(int64_t i, char* buffer);
char* FastUInt64ToBuffer(uint64_t i, char* buffer);
static const int kFastToBufferSize = 32;
static const int kSixDigitsToBufferSize = 16;
char* RoundTripDoubleToBuffer(double d, char* buffer);
char* RoundTripFloatToBuffer(float f, char* buffer);
// Helper function for fast formatting of floating-point values.
// The result is the same as printf's "%g", a.k.a. "%.6g"; that is, six
// significant digits are returned, trailing zeros are removed, and numbers
// outside the range 0.0001-999999 are output using scientific notation
// (1.23456e+06). This routine is heavily optimized.
// Required buffer size is `kSixDigitsToBufferSize`.
size_t SixDigitsToBuffer(double d, char* buffer);
template <typename int_type>
char* FastIntToBuffer(int_type i, char* buffer) {
static_assert(sizeof(i) <= 64 / 8,
"FastIntToBuffer works only with 64-bit-or-less integers.");
// TODO(jorg): This signed-ness check is used because it works correctly
// with enums, and it also serves to check that int_type is not a pointer.
// If one day something like std::is_signed<enum E> works, switch to it.
if (static_cast<int_type>(1) - 2 < 0) { // Signed
if (sizeof(i) > 32 / 8) { // 33-bit to 64-bit
return numbers_internal::FastInt64ToBuffer(i, buffer);
} else { // 32-bit or less
return numbers_internal::FastInt32ToBuffer(i, buffer);
}
} else { // Unsigned
if (sizeof(i) > 32 / 8) { // 33-bit to 64-bit
return numbers_internal::FastUInt64ToBuffer(i, buffer);
} else { // 32-bit or less
return numbers_internal::FastUInt32ToBuffer(i, buffer);
}
}
}
} // namespace numbers_internal
// SimpleAtoi()
//
// Converts a std::string to an integer, using `safe_strto?()` functions for actual
// parsing, returning `true` if successful. The `safe_strto?()` functions apply
// strict checking; the std::string must be a base-10 integer, optionally followed or
// preceded by ASCII whitespace, with a value in the range of the corresponding
// integer type.
template <typename int_type>
ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view s, int_type* out) {
static_assert(sizeof(*out) == 4 || sizeof(*out) == 8,
"SimpleAtoi works only with 32-bit or 64-bit integers.");
static_assert(!std::is_floating_point<int_type>::value,
"Use SimpleAtof or SimpleAtod instead.");
bool parsed;
// TODO(jorg): This signed-ness check is used because it works correctly
// with enums, and it also serves to check that int_type is not a pointer.
// If one day something like std::is_signed<enum E> works, switch to it.
if (static_cast<int_type>(1) - 2 < 0) { // Signed
if (sizeof(*out) == 64 / 8) { // 64-bit
int64_t val;
parsed = numbers_internal::safe_strto64_base(s, &val, 10);
*out = static_cast<int_type>(val);
} else { // 32-bit
int32_t val;
parsed = numbers_internal::safe_strto32_base(s, &val, 10);
*out = static_cast<int_type>(val);
}
} else { // Unsigned
if (sizeof(*out) == 64 / 8) { // 64-bit
uint64_t val;
parsed = numbers_internal::safe_strtou64_base(s, &val, 10);
*out = static_cast<int_type>(val);
} else { // 32-bit
uint32_t val;
parsed = numbers_internal::safe_strtou32_base(s, &val, 10);
*out = static_cast<int_type>(val);
}
}
return parsed;
}
} // namespace absl
#endif // ABSL_STRINGS_NUMBERS_H_

1186
absl/strings/numbers_test.cc Normal file

File diff suppressed because it is too large Load diff

208
absl/strings/str_cat.cc Normal file
View file

@ -0,0 +1,208 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/str_cat.h"
#include <cstdarg>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include "absl/strings/ascii.h"
#include "absl/strings/internal/resize_uninitialized.h"
namespace absl {
AlphaNum::AlphaNum(Hex hex) {
char* const end = &digits_[numbers_internal::kFastToBufferSize];
char* writer = end;
uint64_t value = hex.value;
static const char hexdigits[] = "0123456789abcdef";
do {
*--writer = hexdigits[value & 0xF];
value >>= 4;
} while (value != 0);
char* beg;
if (end - writer < hex.width) {
beg = end - hex.width;
std::fill_n(beg, writer - beg, hex.fill);
} else {
beg = writer;
}
piece_ = absl::string_view(beg, end - beg);
}
// ----------------------------------------------------------------------
// StrCat()
// This merges the given strings or integers, with no delimiter. This
// is designed to be the fastest possible way to construct a std::string out
// of a mix of raw C strings, StringPieces, strings, and integer values.
// ----------------------------------------------------------------------
// Append is merely a version of memcpy that returns the address of the byte
// after the area just overwritten.
static char* Append(char* out, const AlphaNum& x) {
// memcpy is allowed to overwrite arbitrary memory, so doing this after the
// call would force an extra fetch of x.size().
char* after = out + x.size();
memcpy(out, x.data(), x.size());
return after;
}
std::string StrCat(const AlphaNum& a, const AlphaNum& b) {
std::string result;
absl::strings_internal::STLStringResizeUninitialized(&result,
a.size() + b.size());
char* const begin = &*result.begin();
char* out = begin;
out = Append(out, a);
out = Append(out, b);
assert(out == begin + result.size());
return result;
}
std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c) {
std::string result;
strings_internal::STLStringResizeUninitialized(
&result, a.size() + b.size() + c.size());
char* const begin = &*result.begin();
char* out = begin;
out = Append(out, a);
out = Append(out, b);
out = Append(out, c);
assert(out == begin + result.size());
return result;
}
std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c,
const AlphaNum& d) {
std::string result;
strings_internal::STLStringResizeUninitialized(
&result, a.size() + b.size() + c.size() + d.size());
char* const begin = &*result.begin();
char* out = begin;
out = Append(out, a);
out = Append(out, b);
out = Append(out, c);
out = Append(out, d);
assert(out == begin + result.size());
return result;
}
namespace strings_internal {
// Do not call directly - these are not part of the public API.
std::string CatPieces(std::initializer_list<absl::string_view> pieces) {
std::string result;
size_t total_size = 0;
for (const absl::string_view piece : pieces) total_size += piece.size();
strings_internal::STLStringResizeUninitialized(&result, total_size);
char* const begin = &*result.begin();
char* out = begin;
for (const absl::string_view piece : pieces) {
const size_t this_size = piece.size();
memcpy(out, piece.data(), this_size);
out += this_size;
}
assert(out == begin + result.size());
return result;
}
// It's possible to call StrAppend with an absl::string_view that is itself a
// fragment of the std::string we're appending to. However the results of this are
// random. Therefore, check for this in debug mode. Use unsigned math so we
// only have to do one comparison. Note, there's an exception case: appending an
// empty std::string is always allowed.
#define ASSERT_NO_OVERLAP(dest, src) \
assert(((src).size() == 0) || \
(uintptr_t((src).data() - (dest).data()) > uintptr_t((dest).size())))
void AppendPieces(std::string* dest,
std::initializer_list<absl::string_view> pieces) {
size_t old_size = dest->size();
size_t total_size = old_size;
for (const absl::string_view piece : pieces) {
ASSERT_NO_OVERLAP(*dest, piece);
total_size += piece.size();
}
strings_internal::STLStringResizeUninitialized(dest, total_size);
char* const begin = &*dest->begin();
char* out = begin + old_size;
for (const absl::string_view piece : pieces) {
const size_t this_size = piece.size();
memcpy(out, piece.data(), this_size);
out += this_size;
}
assert(out == begin + dest->size());
}
} // namespace strings_internal
void StrAppend(std::string* dest, const AlphaNum& a) {
ASSERT_NO_OVERLAP(*dest, a);
dest->append(a.data(), a.size());
}
void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b) {
ASSERT_NO_OVERLAP(*dest, a);
ASSERT_NO_OVERLAP(*dest, b);
std::string::size_type old_size = dest->size();
strings_internal::STLStringResizeUninitialized(
dest, old_size + a.size() + b.size());
char* const begin = &*dest->begin();
char* out = begin + old_size;
out = Append(out, a);
out = Append(out, b);
assert(out == begin + dest->size());
}
void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
const AlphaNum& c) {
ASSERT_NO_OVERLAP(*dest, a);
ASSERT_NO_OVERLAP(*dest, b);
ASSERT_NO_OVERLAP(*dest, c);
std::string::size_type old_size = dest->size();
strings_internal::STLStringResizeUninitialized(
dest, old_size + a.size() + b.size() + c.size());
char* const begin = &*dest->begin();
char* out = begin + old_size;
out = Append(out, a);
out = Append(out, b);
out = Append(out, c);
assert(out == begin + dest->size());
}
void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
const AlphaNum& c, const AlphaNum& d) {
ASSERT_NO_OVERLAP(*dest, a);
ASSERT_NO_OVERLAP(*dest, b);
ASSERT_NO_OVERLAP(*dest, c);
ASSERT_NO_OVERLAP(*dest, d);
std::string::size_type old_size = dest->size();
strings_internal::STLStringResizeUninitialized(
dest, old_size + a.size() + b.size() + c.size() + d.size());
char* const begin = &*dest->begin();
char* out = begin + old_size;
out = Append(out, a);
out = Append(out, b);
out = Append(out, c);
out = Append(out, d);
assert(out == begin + dest->size());
}
} // namespace absl

348
absl/strings/str_cat.h Normal file
View file

@ -0,0 +1,348 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: str_cat.h
// -----------------------------------------------------------------------------
//
// This package contains functions for efficiently concatenating and appending
// strings: `StrCat()` and `StrAppend()`. Most of the work within these routines
// is actually handled through use of a special AlphaNum type, which was
// designed to be used as a parameter type that efficiently manages conversion
// to strings and avoids copies in the above operations.
//
// Any routine accepting either a std::string or a number may accept `AlphaNum`.
// The basic idea is that by accepting a `const AlphaNum &` as an argument
// to your function, your callers will automagically convert bools, integers,
// and floating point values to strings for you.
//
// NOTE: Use of `AlphaNum` outside of the //absl/strings package is unsupported
// except for the specific case of function parameters of type `AlphaNum` or
// `const AlphaNum &`. In particular, instantiating `AlphaNum` directly as a
// stack variable is not supported.
//
// Conversion from 8-bit values is not accepted because, if it were, then an
// attempt to pass ':' instead of ":" might result in a 58 ending up in your
// result.
//
// Bools convert to "0" or "1".
//
// Floating point numbers are formatted with six-digit precision, which is
// the default for "std::cout <<" or printf "%g" (the same as "%.6g").
//
//
// You can convert to hexadecimal output rather than decimal output using the
// `Hex` type contained here. To do so, pass `Hex(my_int)` as a parameter to
// `StrCat()` or `StrAppend()`. You may specify a minimum hex field width using
// a `PadSpec` enum, so the equivalent of `StringPrintf("%04x", my_int)` is
// `absl::StrCat(absl::Hex(my_int, absl::kZeroPad4))`.
//
// -----------------------------------------------------------------------------
#ifndef ABSL_STRINGS_STR_CAT_H_
#define ABSL_STRINGS_STR_CAT_H_
#include <array>
#include <cstdint>
#include <string>
#include <type_traits>
#include "absl/base/port.h"
#include "absl/strings/numbers.h"
#include "absl/strings/string_view.h"
namespace absl {
namespace strings_internal {
// AlphaNumBuffer allows a way to pass a std::string to StrCat without having to do
// memory allocation. It is simply a pair of a fixed-size character array, and
// a size. Please don't use outside of absl, yet.
template <size_t max_size>
struct AlphaNumBuffer {
std::array<char, max_size> data;
size_t size;
};
} // namespace strings_internal
// Enum that specifies the number of significant digits to return in a `Hex`
// conversion and fill character to use. A `kZeroPad2` value, for example, would
// produce hexadecimal strings such as "0A","0F" and 'kSpacePad5' value would
// produce hexadecimal strings such as " A"," F".
enum PadSpec {
kNoPad = 1,
kZeroPad2,
kZeroPad3,
kZeroPad4,
kZeroPad5,
kZeroPad6,
kZeroPad7,
kZeroPad8,
kZeroPad9,
kZeroPad10,
kZeroPad11,
kZeroPad12,
kZeroPad13,
kZeroPad14,
kZeroPad15,
kZeroPad16,
kSpacePad2 = kZeroPad2 + 64,
kSpacePad3,
kSpacePad4,
kSpacePad5,
kSpacePad6,
kSpacePad7,
kSpacePad8,
kSpacePad9,
kSpacePad10,
kSpacePad11,
kSpacePad12,
kSpacePad13,
kSpacePad14,
kSpacePad15,
kSpacePad16,
};
// -----------------------------------------------------------------------------
// Hex
// -----------------------------------------------------------------------------
//
// `Hex` stores a set of hexadecimal std::string conversion parameters for use
// within `AlphaNum` std::string conversions.
struct Hex {
uint64_t value;
uint8_t width;
char fill;
template <typename Int>
explicit Hex(Int v, PadSpec spec = absl::kNoPad,
typename std::enable_if<sizeof(Int) == 1>::type* = nullptr)
: Hex(spec, static_cast<uint8_t>(v)) {}
template <typename Int>
explicit Hex(Int v, PadSpec spec = absl::kNoPad,
typename std::enable_if<sizeof(Int) == 2>::type* = nullptr)
: Hex(spec, static_cast<uint16_t>(v)) {}
template <typename Int>
explicit Hex(Int v, PadSpec spec = absl::kNoPad,
typename std::enable_if<sizeof(Int) == 4>::type* = nullptr)
: Hex(spec, static_cast<uint32_t>(v)) {}
template <typename Int>
explicit Hex(Int v, PadSpec spec = absl::kNoPad,
typename std::enable_if<sizeof(Int) == 8>::type* = nullptr)
: Hex(spec, static_cast<uint64_t>(v)) {}
private:
Hex(PadSpec spec, uint64_t v)
: value(v),
width(spec == absl::kNoPad
? 1
: spec >= absl::kSpacePad2 ? spec - absl::kSpacePad2 + 2
: spec - absl::kZeroPad2 + 2),
fill(spec >= absl::kSpacePad2 ? ' ' : '0') {}
};
// -----------------------------------------------------------------------------
// AlphaNum
// -----------------------------------------------------------------------------
//
// The `AlphaNum` class acts as the main parameter type for `StrCat()` and
// `StrAppend()`, providing efficient conversion of numeric, boolean, and
// hexadecimal values (through the `Hex` type) into strings.
class AlphaNum {
public:
// No bool ctor -- bools convert to an integral type.
// A bool ctor would also convert incoming pointers (bletch).
AlphaNum(int x) // NOLINT(runtime/explicit)
: piece_(digits_,
numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {}
AlphaNum(unsigned int x) // NOLINT(runtime/explicit)
: piece_(digits_,
numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {}
AlphaNum(long x) // NOLINT(*)
: piece_(digits_,
numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {}
AlphaNum(unsigned long x) // NOLINT(*)
: piece_(digits_,
numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {}
AlphaNum(long long x) // NOLINT(*)
: piece_(digits_,
numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {}
AlphaNum(unsigned long long x) // NOLINT(*)
: piece_(digits_,
numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {}
AlphaNum(float f) // NOLINT(runtime/explicit)
: piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {}
AlphaNum(double f) // NOLINT(runtime/explicit)
: piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {}
AlphaNum(Hex hex); // NOLINT(runtime/explicit)
template <size_t size>
AlphaNum( // NOLINT(runtime/explicit)
const strings_internal::AlphaNumBuffer<size>& buf)
: piece_(&buf.data[0], buf.size) {}
AlphaNum(const char* c_str) : piece_(c_str) {} // NOLINT(runtime/explicit)
AlphaNum(absl::string_view pc) : piece_(pc) {} // NOLINT(runtime/explicit)
template <typename Allocator>
AlphaNum( // NOLINT(runtime/explicit)
const std::basic_string<char, std::char_traits<char>, Allocator>& str)
: piece_(str) {}
// Use std::string literals ":" instead of character literals ':'.
AlphaNum(char c) = delete; // NOLINT(runtime/explicit)
AlphaNum(const AlphaNum&) = delete;
AlphaNum& operator=(const AlphaNum&) = delete;
absl::string_view::size_type size() const { return piece_.size(); }
const char* data() const { return piece_.data(); }
absl::string_view Piece() const { return piece_; }
// Normal enums are already handled by the integer formatters.
// This overload matches only scoped enums.
template <typename T,
typename = typename std::enable_if<
std::is_enum<T>{} && !std::is_convertible<T, int>{}>::type>
AlphaNum(T e) // NOLINT(runtime/explicit)
: AlphaNum(static_cast<typename std::underlying_type<T>::type>(e)) {}
private:
absl::string_view piece_;
char digits_[numbers_internal::kFastToBufferSize];
};
// -----------------------------------------------------------------------------
// StrCat()
// -----------------------------------------------------------------------------
//
// Merges given strings or numbers, using no delimiter(s).
//
// `StrCat()` is designed to be the fastest possible way to construct a std::string
// out of a mix of raw C strings, string_views, strings, bool values,
// and numeric values.
//
// Don't use `StrCat()` for user-visible strings. The localization process
// works poorly on strings built up out of fragments.
//
// For clarity and performance, don't use `StrCat()` when appending to a
// std::string. Use `StrAppend()` instead. In particular, avoid using any of these
// (anti-)patterns:
//
// str.append(StrCat(...))
// str += StrCat(...)
// str = StrCat(str, ...)
//
// The last case is the worst, with a potential to change a loop
// from a linear time operation with O(1) dynamic allocations into a
// quadratic time operation with O(n) dynamic allocations.
//
// See `StrAppend()` below for more information.
namespace strings_internal {
// Do not call directly - this is not part of the public API.
std::string CatPieces(std::initializer_list<absl::string_view> pieces);
void AppendPieces(std::string* dest,
std::initializer_list<absl::string_view> pieces);
} // namespace strings_internal
ABSL_MUST_USE_RESULT inline std::string StrCat() { return std::string(); }
ABSL_MUST_USE_RESULT inline std::string StrCat(const AlphaNum& a) {
return std::string(a.data(), a.size());
}
ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b);
ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b,
const AlphaNum& c);
ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b,
const AlphaNum& c, const AlphaNum& d);
// Support 5 or more arguments
template <typename... AV>
ABSL_MUST_USE_RESULT inline std::string StrCat(const AlphaNum& a, const AlphaNum& b,
const AlphaNum& c, const AlphaNum& d,
const AlphaNum& e,
const AV&... args) {
return strings_internal::CatPieces(
{a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(),
static_cast<const AlphaNum&>(args).Piece()...});
}
// -----------------------------------------------------------------------------
// StrAppend()
// -----------------------------------------------------------------------------
//
// Appends a std::string or set of strings to an existing std::string, in a similar
// fashion to `StrCat()`.
//
// WARNING: `StrAppend(&str, a, b, c, ...)` requires that none of the
// a, b, c, parameters be a reference into str. For speed, `StrAppend()` does
// not try to check each of its input arguments to be sure that they are not
// a subset of the std::string being appended to. That is, while this will work:
//
// std::string s = "foo";
// s += s;
//
// This output is undefined:
//
// std::string s = "foo";
// StrAppend(&s, s);
//
// This output is undefined as well, since `absl::string_view` does not own its
// data:
//
// std::string s = "foobar";
// absl::string_view p = s;
// StrAppend(&s, p);
inline void StrAppend(std::string*) {}
void StrAppend(std::string* dest, const AlphaNum& a);
void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b);
void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
const AlphaNum& c);
void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
const AlphaNum& c, const AlphaNum& d);
// Support 5 or more arguments
template <typename... AV>
inline void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
const AlphaNum& c, const AlphaNum& d, const AlphaNum& e,
const AV&... args) {
strings_internal::AppendPieces(
dest, {a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(),
static_cast<const AlphaNum&>(args).Piece()...});
}
// Helper function for the future StrCat default floating-point format, %.6g
// This is fast.
inline strings_internal::AlphaNumBuffer<
numbers_internal::kSixDigitsToBufferSize>
SixDigits(double d) {
strings_internal::AlphaNumBuffer<numbers_internal::kSixDigitsToBufferSize>
result;
result.size = numbers_internal::SixDigitsToBuffer(d, &result.data[0]);
return result;
}
} // namespace absl
#endif // ABSL_STRINGS_STR_CAT_H_

View file

@ -0,0 +1,462 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Unit tests for all str_cat.h functions
#include "absl/strings/str_cat.h"
#include <cstdint>
#include <string>
#include "gtest/gtest.h"
#include "absl/strings/substitute.h"
namespace {
// Test absl::StrCat of ints and longs of various sizes and signdedness.
TEST(StrCat, Ints) {
const short s = -1; // NOLINT(runtime/int)
const uint16_t us = 2;
const int i = -3;
const unsigned int ui = 4;
const long l = -5; // NOLINT(runtime/int)
const unsigned long ul = 6; // NOLINT(runtime/int)
const long long ll = -7; // NOLINT(runtime/int)
const unsigned long long ull = 8; // NOLINT(runtime/int)
const ptrdiff_t ptrdiff = -9;
const size_t size = 10;
const intptr_t intptr = -12;
const uintptr_t uintptr = 13;
std::string answer;
answer = absl::StrCat(s, us);
EXPECT_EQ(answer, "-12");
answer = absl::StrCat(i, ui);
EXPECT_EQ(answer, "-34");
answer = absl::StrCat(l, ul);
EXPECT_EQ(answer, "-56");
answer = absl::StrCat(ll, ull);
EXPECT_EQ(answer, "-78");
answer = absl::StrCat(ptrdiff, size);
EXPECT_EQ(answer, "-910");
answer = absl::StrCat(ptrdiff, intptr);
EXPECT_EQ(answer, "-9-12");
answer = absl::StrCat(uintptr, 0);
EXPECT_EQ(answer, "130");
}
TEST(StrCat, Enums) {
enum SmallNumbers { One = 1, Ten = 10 } e = Ten;
EXPECT_EQ("10", absl::StrCat(e));
EXPECT_EQ("-5", absl::StrCat(SmallNumbers(-5)));
enum class Option { Boxers = 1, Briefs = -1 };
EXPECT_EQ("-1", absl::StrCat(Option::Briefs));
enum class Airplane : uint64_t {
Airbus = 1,
Boeing = 1000,
Canary = 10000000000 // too big for "int"
};
EXPECT_EQ("10000000000", absl::StrCat(Airplane::Canary));
enum class TwoGig : int32_t {
TwoToTheZero = 1,
TwoToTheSixteenth = 1 << 16,
TwoToTheThirtyFirst = INT32_MIN
};
EXPECT_EQ("65536", absl::StrCat(TwoGig::TwoToTheSixteenth));
EXPECT_EQ("-2147483648", absl::StrCat(TwoGig::TwoToTheThirtyFirst));
EXPECT_EQ("-1", absl::StrCat(static_cast<TwoGig>(-1)));
enum class FourGig : uint32_t {
TwoToTheZero = 1,
TwoToTheSixteenth = 1 << 16,
TwoToTheThirtyFirst = 1U << 31 // too big for "int"
};
EXPECT_EQ("65536", absl::StrCat(FourGig::TwoToTheSixteenth));
EXPECT_EQ("2147483648", absl::StrCat(FourGig::TwoToTheThirtyFirst));
EXPECT_EQ("4294967295", absl::StrCat(static_cast<FourGig>(-1)));
EXPECT_EQ("10000000000", absl::StrCat(Airplane::Canary));
}
TEST(StrCat, Basics) {
std::string result;
std::string strs[] = {
"Hello",
"Cruel",
"World"
};
std::string stdstrs[] = {
"std::Hello",
"std::Cruel",
"std::World"
};
absl::string_view pieces[] = {"Hello", "Cruel", "World"};
const char* c_strs[] = {
"Hello",
"Cruel",
"World"
};
int32_t i32s[] = {'H', 'C', 'W'};
uint64_t ui64s[] = {12345678910LL, 10987654321LL};
EXPECT_EQ(absl::StrCat(), "");
result = absl::StrCat(false, true, 2, 3);
EXPECT_EQ(result, "0123");
result = absl::StrCat(-1);
EXPECT_EQ(result, "-1");
result = absl::StrCat(absl::SixDigits(0.5));
EXPECT_EQ(result, "0.5");
result = absl::StrCat(strs[1], pieces[2]);
EXPECT_EQ(result, "CruelWorld");
result = absl::StrCat(stdstrs[1], " ", stdstrs[2]);
EXPECT_EQ(result, "std::Cruel std::World");
result = absl::StrCat(strs[0], ", ", pieces[2]);
EXPECT_EQ(result, "Hello, World");
result = absl::StrCat(strs[0], ", ", strs[1], " ", strs[2], "!");
EXPECT_EQ(result, "Hello, Cruel World!");
result = absl::StrCat(pieces[0], ", ", pieces[1], " ", pieces[2]);
EXPECT_EQ(result, "Hello, Cruel World");
result = absl::StrCat(c_strs[0], ", ", c_strs[1], " ", c_strs[2]);
EXPECT_EQ(result, "Hello, Cruel World");
result = absl::StrCat("ASCII ", i32s[0], ", ", i32s[1], " ", i32s[2], "!");
EXPECT_EQ(result, "ASCII 72, 67 87!");
result = absl::StrCat(ui64s[0], ", ", ui64s[1], "!");
EXPECT_EQ(result, "12345678910, 10987654321!");
std::string one = "1"; // Actually, it's the size of this std::string that we want; a
// 64-bit build distinguishes between size_t and uint64_t,
// even though they're both unsigned 64-bit values.
result = absl::StrCat("And a ", one.size(), " and a ",
&result[2] - &result[0], " and a ", one, " 2 3 4", "!");
EXPECT_EQ(result, "And a 1 and a 2 and a 1 2 3 4!");
// result = absl::StrCat("Single chars won't compile", '!');
// result = absl::StrCat("Neither will nullptrs", nullptr);
result =
absl::StrCat("To output a char by ASCII/numeric value, use +: ", '!' + 0);
EXPECT_EQ(result, "To output a char by ASCII/numeric value, use +: 33");
float f = 100000.5;
result = absl::StrCat("A hundred K and a half is ", absl::SixDigits(f));
EXPECT_EQ(result, "A hundred K and a half is 100000");
f = 100001.5;
result =
absl::StrCat("A hundred K and one and a half is ", absl::SixDigits(f));
EXPECT_EQ(result, "A hundred K and one and a half is 100002");
double d = 100000.5;
d *= d;
result =
absl::StrCat("A hundred K and a half squared is ", absl::SixDigits(d));
EXPECT_EQ(result, "A hundred K and a half squared is 1.00001e+10");
result = absl::StrCat(1, 2, 333, 4444, 55555, 666666, 7777777, 88888888,
999999999);
EXPECT_EQ(result, "12333444455555666666777777788888888999999999");
}
// A minimal allocator that uses malloc().
template <typename T>
struct Mallocator {
typedef T value_type;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
size_type max_size() const {
return size_t(std::numeric_limits<size_type>::max()) / sizeof(value_type);
}
template <typename U>
struct rebind {
typedef Mallocator<U> other;
};
Mallocator() = default;
T* allocate(size_t n) { return static_cast<T*>(std::malloc(n * sizeof(T))); }
void deallocate(T* p, size_t) { std::free(p); }
};
template <typename T, typename U>
bool operator==(const Mallocator<T>&, const Mallocator<U>&) {
return true;
}
template <typename T, typename U>
bool operator!=(const Mallocator<T>&, const Mallocator<U>&) {
return false;
}
TEST(StrCat, CustomAllocator) {
using mstring =
std::basic_string<char, std::char_traits<char>, Mallocator<char>>;
const mstring str1("PARACHUTE OFF A BLIMP INTO MOSCONE!!");
const mstring str2("Read this book about coffee tables");
std::string result = absl::StrCat(str1, str2);
EXPECT_EQ(result,
"PARACHUTE OFF A BLIMP INTO MOSCONE!!"
"Read this book about coffee tables");
}
TEST(StrCat, MaxArgs) {
std::string result;
// Test 10 up to 26 arguments, the current maximum
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a");
EXPECT_EQ(result, "123456789a");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b");
EXPECT_EQ(result, "123456789ab");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c");
EXPECT_EQ(result, "123456789abc");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d");
EXPECT_EQ(result, "123456789abcd");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e");
EXPECT_EQ(result, "123456789abcde");
result =
absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f");
EXPECT_EQ(result, "123456789abcdef");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g");
EXPECT_EQ(result, "123456789abcdefg");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h");
EXPECT_EQ(result, "123456789abcdefgh");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h", "i");
EXPECT_EQ(result, "123456789abcdefghi");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h", "i", "j");
EXPECT_EQ(result, "123456789abcdefghij");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h", "i", "j", "k");
EXPECT_EQ(result, "123456789abcdefghijk");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h", "i", "j", "k", "l");
EXPECT_EQ(result, "123456789abcdefghijkl");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h", "i", "j", "k", "l", "m");
EXPECT_EQ(result, "123456789abcdefghijklm");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h", "i", "j", "k", "l", "m", "n");
EXPECT_EQ(result, "123456789abcdefghijklmn");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h", "i", "j", "k", "l", "m", "n", "o");
EXPECT_EQ(result, "123456789abcdefghijklmno");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h", "i", "j", "k", "l", "m", "n", "o", "p");
EXPECT_EQ(result, "123456789abcdefghijklmnop");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q");
EXPECT_EQ(result, "123456789abcdefghijklmnopq");
// No limit thanks to C++11's variadic templates
result = absl::StrCat(
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, "a", "b", "c", "d", "e", "f", "g", "h",
"i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
"x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L",
"M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z");
EXPECT_EQ(result,
"12345678910abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
}
TEST(StrAppend, Basics) {
std::string result = "existing text";
std::string strs[] = {
"Hello",
"Cruel",
"World"
};
absl::string_view pieces[] = {"Hello", "Cruel", "World"};
const char* c_strs[] = {
"Hello",
"Cruel",
"World"
};
int32_t i32s[] = {'H', 'C', 'W'};
uint64_t ui64s[] = {12345678910LL, 10987654321LL};
std::string::size_type old_size = result.size();
absl::StrAppend(&result);
EXPECT_EQ(result.size(), old_size);
old_size = result.size();
absl::StrAppend(&result, strs[0]);
EXPECT_EQ(result.substr(old_size), "Hello");
old_size = result.size();
absl::StrAppend(&result, strs[1], pieces[2]);
EXPECT_EQ(result.substr(old_size), "CruelWorld");
old_size = result.size();
absl::StrAppend(&result, strs[0], ", ", pieces[2]);
EXPECT_EQ(result.substr(old_size), "Hello, World");
old_size = result.size();
absl::StrAppend(&result, strs[0], ", ", strs[1], " ", strs[2], "!");
EXPECT_EQ(result.substr(old_size), "Hello, Cruel World!");
old_size = result.size();
absl::StrAppend(&result, pieces[0], ", ", pieces[1], " ", pieces[2]);
EXPECT_EQ(result.substr(old_size), "Hello, Cruel World");
old_size = result.size();
absl::StrAppend(&result, c_strs[0], ", ", c_strs[1], " ", c_strs[2]);
EXPECT_EQ(result.substr(old_size), "Hello, Cruel World");
old_size = result.size();
absl::StrAppend(&result, "ASCII ", i32s[0], ", ", i32s[1], " ", i32s[2], "!");
EXPECT_EQ(result.substr(old_size), "ASCII 72, 67 87!");
old_size = result.size();
absl::StrAppend(&result, ui64s[0], ", ", ui64s[1], "!");
EXPECT_EQ(result.substr(old_size), "12345678910, 10987654321!");
std::string one = "1"; // Actually, it's the size of this std::string that we want; a
// 64-bit build distinguishes between size_t and uint64_t,
// even though they're both unsigned 64-bit values.
old_size = result.size();
absl::StrAppend(&result, "And a ", one.size(), " and a ",
&result[2] - &result[0], " and a ", one, " 2 3 4", "!");
EXPECT_EQ(result.substr(old_size), "And a 1 and a 2 and a 1 2 3 4!");
// result = absl::StrCat("Single chars won't compile", '!');
// result = absl::StrCat("Neither will nullptrs", nullptr);
old_size = result.size();
absl::StrAppend(&result,
"To output a char by ASCII/numeric value, use +: ", '!' + 0);
EXPECT_EQ(result.substr(old_size),
"To output a char by ASCII/numeric value, use +: 33");
// Test 9 arguments, the old maximum
old_size = result.size();
absl::StrAppend(&result, 1, 22, 333, 4444, 55555, 666666, 7777777, 88888888,
9);
EXPECT_EQ(result.substr(old_size), "1223334444555556666667777777888888889");
// No limit thanks to C++11's variadic templates
old_size = result.size();
absl::StrAppend(
&result, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, //
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", //
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", //
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", //
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", //
"No limit thanks to C++11's variadic templates");
EXPECT_EQ(result.substr(old_size),
"12345678910abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
"No limit thanks to C++11's variadic templates");
}
#ifdef GTEST_HAS_DEATH_TEST
TEST(StrAppend, Death) {
std::string s = "self";
// on linux it's "assertion", on mac it's "Assertion",
// on chromiumos it's "Assertion ... failed".
EXPECT_DEBUG_DEATH(absl::StrAppend(&s, s.c_str() + 1), "ssertion.*failed");
EXPECT_DEBUG_DEATH(absl::StrAppend(&s, s), "ssertion.*failed");
}
#endif // GTEST_HAS_DEATH_TEST
TEST(StrAppend, EmptyString) {
std::string s = "";
absl::StrAppend(&s, s);
EXPECT_EQ(s, "");
}
template <typename IntType>
void CheckHex(IntType v, const char* nopad_format, const char* zeropad_format,
const char* spacepad_format) {
char expected[256];
std::string actual = absl::StrCat(absl::Hex(v, absl::kNoPad));
snprintf(expected, sizeof(expected), nopad_format, v);
EXPECT_EQ(expected, actual) << " decimal value " << v;
for (int spec = absl::kZeroPad2; spec <= absl::kZeroPad16; ++spec) {
std::string actual =
absl::StrCat(absl::Hex(v, static_cast<absl::PadSpec>(spec)));
snprintf(expected, sizeof(expected), zeropad_format,
spec - absl::kZeroPad2 + 2, v);
EXPECT_EQ(expected, actual) << " decimal value " << v;
}
for (int spec = absl::kSpacePad2; spec <= absl::kSpacePad16; ++spec) {
std::string actual =
absl::StrCat(absl::Hex(v, static_cast<absl::PadSpec>(spec)));
snprintf(expected, sizeof(expected), spacepad_format,
spec - absl::kSpacePad2 + 2, v);
EXPECT_EQ(expected, actual) << " decimal value " << v;
}
}
void CheckHex64(uint64_t v) {
unsigned long long llv = v; // NOLINT(runtime/int)
CheckHex(llv, "%llx", "%0*llx", "%*llx");
}
template <typename Int32Type>
void CheckHex32(Int32Type v) {
CheckHex(v, "%x", "%0*x", "%*x");
}
void TestFastPrints() {
// Test min int to make sure that works
for (int i = 0; i < 10000; i++) {
CheckHex64(i);
CheckHex32(static_cast<uint32_t>(i));
CheckHex32(i);
CheckHex32(-i);
}
CheckHex64(uint64_t{0x123456789abcdef0});
CheckHex32(0x12345678U);
int8_t minus_one_8bit = -1;
EXPECT_EQ("ff", absl::StrCat(absl::Hex(minus_one_8bit)));
int16_t minus_one_16bit = -1;
EXPECT_EQ("ffff", absl::StrCat(absl::Hex(minus_one_16bit)));
}
TEST(Numbers, TestFunctionsMovedOverFromNumbersMain) {
TestFastPrints();
}
} // namespace

288
absl/strings/str_join.h Normal file
View file

@ -0,0 +1,288 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: str_join.h
// -----------------------------------------------------------------------------
//
// This header file contains functions for joining a range of elements and
// returning the result as a std::string. StrJoin operations are specified by passing
// a range, a separator std::string to use between the elements joined, and an
// optional Formatter responsible for converting each argument in the range to a
// std::string. If omitted, a default `AlphaNumFormatter()` is called on the elements
// to be joined, using the same formatting that `absl::StrCat()` uses. This
// package defines a number of default formatters, and you can define your own
// implementations.
//
// Ranges are specified by passing a container with `std::begin()` and
// `std::end()` iterators, container-specific `begin()` and `end()` iterators, a
// brace-initialized `std::initializer_list`, or a `std::tuple` of heterogeneous
// objects. The separator std::string is specified as an `absl::string_view`.
//
// Because the default formatter uses the `absl::AlphaNum` class,
// `absl::StrJoin()`, like `absl::StrCat()`, will work out-of-the-box on
// collections of strings, ints, floats, doubles, etc.
//
// Example:
//
// std::vector<std::string> v = {"foo", "bar", "baz"};
// std::string s = absl::StrJoin(v, "-");
// EXPECT_EQ("foo-bar-baz", s);
//
// See comments on the `absl::StrJoin()` function for more examples.
#ifndef ABSL_STRINGS_STR_JOIN_H_
#define ABSL_STRINGS_STR_JOIN_H_
#include <cstdio>
#include <cstring>
#include <initializer_list>
#include <iterator>
#include <string>
#include <tuple>
#include <utility>
#include "absl/base/macros.h"
#include "absl/strings/internal/str_join_internal.h"
#include "absl/strings/string_view.h"
namespace absl {
// -----------------------------------------------------------------------------
// Concept: Formatter
// -----------------------------------------------------------------------------
//
// A Formatter is a function object that is responsible for formatting its
// argument as a std::string and appending it to a given output std::string. Formatters
// may be implemented as function objects, lambdas, or normal functions. You may
// provide your own Formatter to enable `absl::StrJoin()` to work with arbitrary
// types.
//
// The following is an example of a custom Formatter that simply uses
// `std::to_string()` to format an integer as a std::string.
//
// struct MyFormatter {
// void operator()(std::string* out, int i) const {
// out->append(std::to_string(i));
// }
// };
//
// You would use the above formatter by passing an instance of it as the final
// argument to `absl::StrJoin()`:
//
// std::vector<int> v = {1, 2, 3, 4};
// std::string s = absl::StrJoin(v, "-", MyFormatter());
// EXPECT_EQ("1-2-3-4", s);
//
// The following standard formatters are provided within this file:
//
// - `AlphaNumFormatter()` (the default)
// - `StreamFormatter()`
// - `PairFormatter()`
// - `DereferenceFormatter()`
// AlphaNumFormatter()
//
// Default formatter used if none is specified. Uses `absl::AlphaNum` to convert
// numeric arguments to strings.
inline strings_internal::AlphaNumFormatterImpl AlphaNumFormatter() {
return strings_internal::AlphaNumFormatterImpl();
}
// StreamFormatter()
//
// Formats its argument using the << operator.
inline strings_internal::StreamFormatterImpl StreamFormatter() {
return strings_internal::StreamFormatterImpl();
}
// Function Template: PairFormatter(Formatter, absl::string_view, Formatter)
//
// Formats a `std::pair` by putting a given separator between the pair's
// `.first` and `.second` members. This formatter allows you to specify
// custom Formatters for both the first and second member of each pair.
template <typename FirstFormatter, typename SecondFormatter>
inline strings_internal::PairFormatterImpl<FirstFormatter, SecondFormatter>
PairFormatter(FirstFormatter f1, absl::string_view sep, SecondFormatter f2) {
return strings_internal::PairFormatterImpl<FirstFormatter, SecondFormatter>(
std::move(f1), sep, std::move(f2));
}
// Function overload of PairFormatter() for using a default
// `AlphaNumFormatter()` for each Formatter in the pair.
inline strings_internal::PairFormatterImpl<
strings_internal::AlphaNumFormatterImpl,
strings_internal::AlphaNumFormatterImpl>
PairFormatter(absl::string_view sep) {
return PairFormatter(AlphaNumFormatter(), sep, AlphaNumFormatter());
}
// Function Template: DereferenceFormatter(Formatter)
//
// Formats its argument by dereferencing it and then applying the given
// formatter. This formatter is useful for formatting a container of
// pointer-to-T. This pattern often shows up when joining repeated fields in
// protocol buffers.
template <typename Formatter>
strings_internal::DereferenceFormatterImpl<Formatter> DereferenceFormatter(
Formatter&& f) {
return strings_internal::DereferenceFormatterImpl<Formatter>(
std::forward<Formatter>(f));
}
// Function overload of `DererefenceFormatter()` for using a default
// `AlphaNumFormatter()`.
inline strings_internal::DereferenceFormatterImpl<
strings_internal::AlphaNumFormatterImpl>
DereferenceFormatter() {
return strings_internal::DereferenceFormatterImpl<
strings_internal::AlphaNumFormatterImpl>(AlphaNumFormatter());
}
// -----------------------------------------------------------------------------
// StrJoin()
// -----------------------------------------------------------------------------
//
// Joins a range of elements and returns the result as a std::string.
// `absl::StrJoin()` takes a range, a separator std::string to use between the
// elements joined, and an optional Formatter responsible for converting each
// argument in the range to a std::string.
//
// If omitted, the default `AlphaNumFormatter()` is called on the elements to be
// joined.
//
// Example 1:
// // Joins a collection of strings. This pattern also works with a collection
// // of `asbl::string_view` or even `const char*`.
// std::vector<std::string> v = {"foo", "bar", "baz"};
// std::string s = absl::StrJoin(v, "-");
// EXPECT_EQ("foo-bar-baz", s);
//
// Example 2:
// // Joins the values in the given `std::initializer_list<>` specified using
// // brace initialization. This pattern also works with an initializer_list
// // of ints or `absl::string_view` -- any `AlphaNum`-compatible type.
// std::string s = absl::StrJoin({"foo", "bar", "baz"}, "-");
// EXPECT_EQ("foo-bar-baz", s);
//
// Example 3:
// // Joins a collection of ints. This pattern also works with floats,
// // doubles, int64s -- any `StrCat()`-compatible type.
// std::vector<int> v = {1, 2, 3, -4};
// std::string s = absl::StrJoin(v, "-");
// EXPECT_EQ("1-2-3--4", s);
//
// Example 4:
// // Joins a collection of pointer-to-int. By default, pointers are
// // dereferenced and the pointee is formatted using the default format for
// // that type; such dereferencing occurs for all levels of indirection, so
// // this pattern works just as well for `std::vector<int**>` as for
// // `std::vector<int*>`.
// int x = 1, y = 2, z = 3;
// std::vector<int*> v = {&x, &y, &z};
// std::string s = absl::StrJoin(v, "-");
// EXPECT_EQ("1-2-3", s);
//
// Example 5:
// // Dereferencing of `std::unique_ptr<>` is also supported:
// std::vector<std::unique_ptr<int>> v
// v.emplace_back(new int(1));
// v.emplace_back(new int(2));
// v.emplace_back(new int(3));
// std::string s = absl::StrJoin(v, "-");
// EXPECT_EQ("1-2-3", s);
//
// Example 6:
// // Joins a `std::map`, with each key-value pair separated by an equals
// // sign. This pattern would also work with, say, a
// // `std::vector<std::pair<>>`.
// std::map<std::string, int> m = {
// std::make_pair("a", 1),
// std::make_pair("b", 2),
// std::make_pair("c", 3)};
// std::string s = absl::StrJoin(m, ",", strings::PairFormatter("="));
// EXPECT_EQ("a=1,b=2,c=3", s);
//
// Example 7:
// // These examples show how `absl::StrJoin()` handles a few common edge
// // cases:
// std::vector<std::string> v_empty;
// EXPECT_EQ("", absl::StrJoin(v_empty, "-"));
//
// std::vector<std::string> v_one_item = {"foo"};
// EXPECT_EQ("foo", absl::StrJoin(v_one_item, "-"));
//
// std::vector<std::string> v_empty_string = {""};
// EXPECT_EQ("", absl::StrJoin(v_empty_string, "-"));
//
// std::vector<std::string> v_one_item_empty_string = {"a", ""};
// EXPECT_EQ("a-", absl::StrJoin(v_one_item_empty_string, "-"));
//
// std::vector<std::string> v_two_empty_string = {"", ""};
// EXPECT_EQ("-", absl::StrJoin(v_two_empty_string, "-"));
//
// Example 8:
// // Joins a `std::tuple<T...>` of heterogeneous types, converting each to
// // a std::string using the `absl::AlphaNum` class.
// std::string s = absl::StrJoin(std::make_tuple(123, "abc", 0.456), "-");
// EXPECT_EQ("123-abc-0.456", s);
template <typename Iterator, typename Formatter>
std::string StrJoin(Iterator start, Iterator end, absl::string_view sep,
Formatter&& fmt) {
return strings_internal::JoinAlgorithm(start, end, sep, fmt);
}
template <typename Range, typename Formatter>
std::string StrJoin(const Range& range, absl::string_view separator,
Formatter&& fmt) {
return strings_internal::JoinRange(range, separator, fmt);
}
template <typename T, typename Formatter>
std::string StrJoin(std::initializer_list<T> il, absl::string_view separator,
Formatter&& fmt) {
return strings_internal::JoinRange(il, separator, fmt);
}
template <typename... T, typename Formatter>
std::string StrJoin(const std::tuple<T...>& value, absl::string_view separator,
Formatter&& fmt) {
return strings_internal::JoinAlgorithm(value, separator, fmt);
}
template <typename Iterator>
std::string StrJoin(Iterator start, Iterator end, absl::string_view separator) {
return strings_internal::JoinRange(start, end, separator);
}
template <typename Range>
std::string StrJoin(const Range& range, absl::string_view separator) {
return strings_internal::JoinRange(range, separator);
}
template <typename T>
std::string StrJoin(std::initializer_list<T> il, absl::string_view separator) {
return strings_internal::JoinRange(il, separator);
}
template <typename... T>
std::string StrJoin(const std::tuple<T...>& value, absl::string_view separator) {
return strings_internal::JoinAlgorithm(value, separator, AlphaNumFormatter());
}
} // namespace absl
#endif // ABSL_STRINGS_STR_JOIN_H_

View file

@ -0,0 +1,474 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Unit tests for all join.h functions
#include "absl/strings/str_join.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <initializer_list>
#include <map>
#include <ostream>
#include <random>
#include <set>
#include <tuple>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/macros.h"
#include "absl/base/port.h"
#include "absl/memory/memory.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_split.h"
namespace {
TEST(StrJoin, APIExamples) {
{
// Collection of strings
std::vector<std::string> v = {"foo", "bar", "baz"};
EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-"));
}
{
// Collection of absl::string_view
std::vector<absl::string_view> v = {"foo", "bar", "baz"};
EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-"));
}
{
// Collection of const char*
std::vector<const char*> v = {"foo", "bar", "baz"};
EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-"));
}
{
// Collection of non-const char*
std::string a = "foo", b = "bar", c = "baz";
std::vector<char*> v = {&a[0], &b[0], &c[0]};
EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-"));
}
{
// Collection of ints
std::vector<int> v = {1, 2, 3, -4};
EXPECT_EQ("1-2-3--4", absl::StrJoin(v, "-"));
}
{
// Literals passed as a std::initializer_list
std::string s = absl::StrJoin({"a", "b", "c"}, "-");
EXPECT_EQ("a-b-c", s);
}
{
// Join a std::tuple<T...>.
std::string s = absl::StrJoin(std::make_tuple(123, "abc", 0.456), "-");
EXPECT_EQ("123-abc-0.456", s);
}
{
// Collection of unique_ptrs
std::vector<std::unique_ptr<int>> v;
v.emplace_back(new int(1));
v.emplace_back(new int(2));
v.emplace_back(new int(3));
EXPECT_EQ("1-2-3", absl::StrJoin(v, "-"));
}
{
// Array of ints
const int a[] = {1, 2, 3, -4};
EXPECT_EQ("1-2-3--4", absl::StrJoin(a, a + ABSL_ARRAYSIZE(a), "-"));
}
{
// Collection of pointers
int x = 1, y = 2, z = 3;
std::vector<int*> v = {&x, &y, &z};
EXPECT_EQ("1-2-3", absl::StrJoin(v, "-"));
}
{
// Collection of pointers to pointers
int x = 1, y = 2, z = 3;
int *px = &x, *py = &y, *pz = &z;
std::vector<int**> v = {&px, &py, &pz};
EXPECT_EQ("1-2-3", absl::StrJoin(v, "-"));
}
{
// Collection of pointers to std::string
std::string a("a"), b("b");
std::vector<std::string*> v = {&a, &b};
EXPECT_EQ("a-b", absl::StrJoin(v, "-"));
}
{
// A std::map, which is a collection of std::pair<>s.
std::map<std::string, int> m = { {"a", 1}, {"b", 2}, {"c", 3} };
EXPECT_EQ("a=1,b=2,c=3", absl::StrJoin(m, ",", absl::PairFormatter("=")));
}
{
// Shows absl::StrSplit and absl::StrJoin working together. This example is
// equivalent to s/=/-/g.
const std::string s = "a=b=c=d";
EXPECT_EQ("a-b-c-d", absl::StrJoin(absl::StrSplit(s, "="), "-"));
}
//
// A few examples of edge cases
//
{
// Empty range yields an empty std::string.
std::vector<std::string> v;
EXPECT_EQ("", absl::StrJoin(v, "-"));
}
{
// A range of 1 element gives a std::string with that element but no separator.
std::vector<std::string> v = {"foo"};
EXPECT_EQ("foo", absl::StrJoin(v, "-"));
}
{
// A range with a single empty std::string element
std::vector<std::string> v = {""};
EXPECT_EQ("", absl::StrJoin(v, "-"));
}
{
// A range with 2 elements, one of which is an empty std::string
std::vector<std::string> v = {"a", ""};
EXPECT_EQ("a-", absl::StrJoin(v, "-"));
}
{
// A range with 2 empty elements.
std::vector<std::string> v = {"", ""};
EXPECT_EQ("-", absl::StrJoin(v, "-"));
}
{
// A std::vector of bool.
std::vector<bool> v = {true, false, true};
EXPECT_EQ("1-0-1", absl::StrJoin(v, "-"));
}
}
TEST(StrJoin, CustomFormatter) {
std::vector<std::string> v{"One", "Two", "Three"};
{
std::string joined = absl::StrJoin(v, "", [](std::string* out, const std::string& in) {
absl::StrAppend(out, "(", in, ")");
});
EXPECT_EQ("(One)(Two)(Three)", joined);
}
{
class ImmovableFormatter {
public:
void operator()(std::string* out, const std::string& in) {
absl::StrAppend(out, "(", in, ")");
}
ImmovableFormatter() {}
ImmovableFormatter(const ImmovableFormatter&) = delete;
};
EXPECT_EQ("(One)(Two)(Three)", absl::StrJoin(v, "", ImmovableFormatter()));
}
{
class OverloadedFormatter {
public:
void operator()(std::string* out, const std::string& in) {
absl::StrAppend(out, "(", in, ")");
}
void operator()(std::string* out, const std::string& in) const {
absl::StrAppend(out, "[", in, "]");
}
};
EXPECT_EQ("(One)(Two)(Three)", absl::StrJoin(v, "", OverloadedFormatter()));
const OverloadedFormatter fmt = {};
EXPECT_EQ("[One][Two][Three]", absl::StrJoin(v, "", fmt));
}
}
//
// Tests the Formatters
//
TEST(AlphaNumFormatter, FormatterAPI) {
// Not an exhaustive test. See strings/strcat_test.h for the exhaustive test
// of what AlphaNum can convert.
auto f = absl::AlphaNumFormatter();
std::string s;
f(&s, "Testing: ");
f(&s, static_cast<int>(1));
f(&s, static_cast<int16_t>(2));
f(&s, static_cast<int64_t>(3));
f(&s, static_cast<float>(4));
f(&s, static_cast<double>(5));
f(&s, static_cast<unsigned>(6));
f(&s, static_cast<size_t>(7));
f(&s, absl::string_view(" OK"));
EXPECT_EQ("Testing: 1234567 OK", s);
}
// Make sure people who are mistakenly using std::vector<bool> even though
// they're not memory-constrained can use absl::AlphaNumFormatter().
TEST(AlphaNumFormatter, VectorOfBool) {
auto f = absl::AlphaNumFormatter();
std::string s;
std::vector<bool> v = {true, false, true};
f(&s, *v.cbegin());
f(&s, *v.begin());
f(&s, v[1]);
EXPECT_EQ("110", s);
}
TEST(AlphaNumFormatter, AlphaNum) {
auto f = absl::AlphaNumFormatter();
std::string s;
f(&s, absl::AlphaNum("hello"));
EXPECT_EQ("hello", s);
}
struct StreamableType {
std::string contents;
};
inline std::ostream& operator<<(std::ostream& os, const StreamableType& t) {
os << "Streamable:" << t.contents;
return os;
}
TEST(StreamFormatter, FormatterAPI) {
auto f = absl::StreamFormatter();
std::string s;
f(&s, "Testing: ");
f(&s, static_cast<int>(1));
f(&s, static_cast<int16_t>(2));
f(&s, static_cast<int64_t>(3));
f(&s, static_cast<float>(4));
f(&s, static_cast<double>(5));
f(&s, static_cast<unsigned>(6));
f(&s, static_cast<size_t>(7));
f(&s, absl::string_view(" OK "));
StreamableType streamable = {"object"};
f(&s, streamable);
EXPECT_EQ("Testing: 1234567 OK Streamable:object", s);
}
// A dummy formatter that wraps each element in parens. Used in some tests
// below.
struct TestingParenFormatter {
template <typename T>
void operator()(std::string* s, const T& t) {
absl::StrAppend(s, "(", t, ")");
}
};
TEST(PairFormatter, FormatterAPI) {
{
// Tests default PairFormatter(sep) that uses AlphaNumFormatter for the
// 'first' and 'second' members.
const auto f = absl::PairFormatter("=");
std::string s;
f(&s, std::make_pair("a", "b"));
f(&s, std::make_pair(1, 2));
EXPECT_EQ("a=b1=2", s);
}
{
// Tests using a custom formatter for the 'first' and 'second' members.
auto f = absl::PairFormatter(TestingParenFormatter(), "=",
TestingParenFormatter());
std::string s;
f(&s, std::make_pair("a", "b"));
f(&s, std::make_pair(1, 2));
EXPECT_EQ("(a)=(b)(1)=(2)", s);
}
}
TEST(DereferenceFormatter, FormatterAPI) {
{
// Tests wrapping the default AlphaNumFormatter.
const absl::strings_internal::DereferenceFormatterImpl<
absl::strings_internal::AlphaNumFormatterImpl>
f;
int x = 1, y = 2, z = 3;
std::string s;
f(&s, &x);
f(&s, &y);
f(&s, &z);
EXPECT_EQ("123", s);
}
{
// Tests wrapping std::string's default formatter.
absl::strings_internal::DereferenceFormatterImpl<
absl::strings_internal::DefaultFormatter<std::string>::Type>
f;
std::string x = "x";
std::string y = "y";
std::string z = "z";
std::string s;
f(&s, &x);
f(&s, &y);
f(&s, &z);
EXPECT_EQ(s, "xyz");
}
{
// Tests wrapping a custom formatter.
auto f = absl::DereferenceFormatter(TestingParenFormatter());
int x = 1, y = 2, z = 3;
std::string s;
f(&s, &x);
f(&s, &y);
f(&s, &z);
EXPECT_EQ("(1)(2)(3)", s);
}
{
absl::strings_internal::DereferenceFormatterImpl<
absl::strings_internal::AlphaNumFormatterImpl>
f;
auto x = std::unique_ptr<int>(new int(1));
auto y = std::unique_ptr<int>(new int(2));
auto z = std::unique_ptr<int>(new int(3));
std::string s;
f(&s, x);
f(&s, y);
f(&s, z);
EXPECT_EQ("123", s);
}
}
//
// Tests the interfaces for the 4 public Join function overloads. The semantics
// of the algorithm is covered in the above APIExamples test.
//
TEST(StrJoin, PublicAPIOverloads) {
std::vector<std::string> v = {"a", "b", "c"};
// Iterators + formatter
EXPECT_EQ("a-b-c",
absl::StrJoin(v.begin(), v.end(), "-", absl::AlphaNumFormatter()));
// Range + formatter
EXPECT_EQ("a-b-c", absl::StrJoin(v, "-", absl::AlphaNumFormatter()));
// Iterators, no formatter
EXPECT_EQ("a-b-c", absl::StrJoin(v.begin(), v.end(), "-"));
// Range, no formatter
EXPECT_EQ("a-b-c", absl::StrJoin(v, "-"));
}
TEST(StrJoin, Array) {
const absl::string_view a[] = {"a", "b", "c"};
EXPECT_EQ("a-b-c", absl::StrJoin(a, "-"));
}
TEST(StrJoin, InitializerList) {
{ EXPECT_EQ("a-b-c", absl::StrJoin({"a", "b", "c"}, "-")); }
{
auto a = {"a", "b", "c"};
EXPECT_EQ("a-b-c", absl::StrJoin(a, "-"));
}
{
std::initializer_list<const char*> a = {"a", "b", "c"};
EXPECT_EQ("a-b-c", absl::StrJoin(a, "-"));
}
{
std::initializer_list<std::string> a = {"a", "b", "c"};
EXPECT_EQ("a-b-c", absl::StrJoin(a, "-"));
}
{
std::initializer_list<absl::string_view> a = {"a", "b", "c"};
EXPECT_EQ("a-b-c", absl::StrJoin(a, "-"));
}
{
// Tests initializer_list with a non-default formatter
auto a = {"a", "b", "c"};
TestingParenFormatter f;
EXPECT_EQ("(a)-(b)-(c)", absl::StrJoin(a, "-", f));
}
{
// initializer_list of ints
EXPECT_EQ("1-2-3", absl::StrJoin({1, 2, 3}, "-"));
}
{
// Tests initializer_list of ints with a non-default formatter
auto a = {1, 2, 3};
TestingParenFormatter f;
EXPECT_EQ("(1)-(2)-(3)", absl::StrJoin(a, "-", f));
}
}
TEST(StrJoin, Tuple) {
EXPECT_EQ("", absl::StrJoin(std::make_tuple(), "-"));
EXPECT_EQ("hello", absl::StrJoin(std::make_tuple("hello"), "-"));
int x(10);
std::string y("hello");
double z(3.14);
EXPECT_EQ("10-hello-3.14", absl::StrJoin(std::make_tuple(x, y, z), "-"));
// Faster! Faster!!
EXPECT_EQ("10-hello-3.14",
absl::StrJoin(std::make_tuple(x, std::cref(y), z), "-"));
struct TestFormatter {
char buffer[128];
void operator()(std::string* out, int v) {
snprintf(buffer, sizeof(buffer), "%#.8x", v);
out->append(buffer);
}
void operator()(std::string* out, double v) {
snprintf(buffer, sizeof(buffer), "%#.0f", v);
out->append(buffer);
}
void operator()(std::string* out, const std::string& v) {
snprintf(buffer, sizeof(buffer), "%.4s", v.c_str());
out->append(buffer);
}
};
EXPECT_EQ("0x0000000a-hell-3.",
absl::StrJoin(std::make_tuple(x, y, z), "-", TestFormatter()));
EXPECT_EQ(
"0x0000000a-hell-3.",
absl::StrJoin(std::make_tuple(x, std::cref(y), z), "-", TestFormatter()));
EXPECT_EQ("0x0000000a-hell-3.",
absl::StrJoin(std::make_tuple(&x, &y, &z), "-",
absl::DereferenceFormatter(TestFormatter())));
EXPECT_EQ("0x0000000a-hell-3.",
absl::StrJoin(std::make_tuple(absl::make_unique<int>(x),
absl::make_unique<std::string>(y),
absl::make_unique<double>(z)),
"-", absl::DereferenceFormatter(TestFormatter())));
EXPECT_EQ("0x0000000a-hell-3.",
absl::StrJoin(std::make_tuple(absl::make_unique<int>(x), &y, &z),
"-", absl::DereferenceFormatter(TestFormatter())));
}
} // namespace

View file

@ -0,0 +1,79 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/str_replace.h"
#include "absl/strings/str_cat.h"
namespace absl {
namespace strings_internal {
using FixedMapping =
std::initializer_list<std::pair<absl::string_view, absl::string_view>>;
// Applies the ViableSubstitutions in subs_ptr to the absl::string_view s, and
// stores the result in *result_ptr. Returns the number of substitutions that
// occurred.
int ApplySubstitutions(
absl::string_view s,
std::vector<strings_internal::ViableSubstitution>* subs_ptr,
std::string* result_ptr) {
auto& subs = *subs_ptr;
int substitutions = 0;
size_t pos = 0;
while (!subs.empty()) {
auto& sub = subs.back();
if (sub.offset >= pos) {
if (pos <= s.size()) {
StrAppend(result_ptr, s.substr(pos, sub.offset - pos), sub.replacement);
}
pos = sub.offset + sub.old.size();
substitutions += 1;
}
sub.offset = s.find(sub.old, pos);
if (sub.offset == s.npos) {
subs.pop_back();
} else {
// Insertion sort to ensure the last ViableSubstitution continues to be
// before all the others.
size_t index = subs.size();
while (--index && subs[index - 1].OccursBefore(subs[index])) {
std::swap(subs[index], subs[index - 1]);
}
}
}
result_ptr->append(s.data() + pos, s.size() - pos);
return substitutions;
}
} // namespace strings_internal
// We can implement this in terms of the generic StrReplaceAll, but
// we must specify the template overload because C++ cannot deduce the type
// of an initializer_list parameter to a function, and also if we don't specify
// the type, we just call ourselves.
//
// Note that we implement them here, rather than in the header, so that they
// aren't inlined.
std::string StrReplaceAll(absl::string_view s,
strings_internal::FixedMapping replacements) {
return StrReplaceAll<strings_internal::FixedMapping>(s, replacements);
}
int StrReplaceAll(strings_internal::FixedMapping replacements, std::string* target) {
return StrReplaceAll<strings_internal::FixedMapping>(replacements, target);
}
} // namespace absl

213
absl/strings/str_replace.h Normal file
View file

@ -0,0 +1,213 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: str_replace.h
// -----------------------------------------------------------------------------
//
// This file defines `absl::StrReplaceAll()`, a general-purpose std::string
// replacement function designed for large, arbitrary text substitutions,
// especially on strings which you are receiving from some other system for
// further processing (e.g. processing regular expressions, escaping HTML
// entities, etc. `StrReplaceAll` is designed to be efficient even when only
// one substitution is being performed, or when substitution is rare.
//
// If the std::string being modified is known at compile-time, and the substitutions
// vary, `absl::Substitute()` may be a better choice.
//
// Example:
//
// std::string html_escaped = absl::StrReplaceAll(user_input, {
// {"&", "&amp;"},
// {"<", "&lt;"},
// {">", "&gt;"},
// {"\"", "&quot;"},
// {"'", "&#39;"}});
#ifndef ABSL_STRINGS_STR_REPLACE_H_
#define ABSL_STRINGS_STR_REPLACE_H_
#include <string>
#include <utility>
#include <vector>
#include "absl/base/attributes.h"
#include "absl/strings/string_view.h"
namespace absl {
// StrReplaceAll()
//
// Replaces character sequences within a given std::string with replacements provided
// within an initializer list of key/value pairs. Candidate replacements are
// considered in order as they occur within the std::string, with earlier matches
// taking precedence, and longer matches taking precedence for candidates
// starting at the same position in the std::string. Once a substitution is made, the
// replaced text is not considered for any further substitutions.
//
// Example:
//
// std::string s = absl::StrReplaceAll("$who bought $count #Noun. Thanks $who!",
// {{"$count", absl::StrCat(5)},
// {"$who", "Bob"},
// {"#Noun", "Apples"}});
// EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s);
ABSL_MUST_USE_RESULT std::string StrReplaceAll(
absl::string_view s,
std::initializer_list<std::pair<absl::string_view, absl::string_view>>
replacements);
// Overload of `StrReplaceAll()` to accept a container of key/value replacement
// pairs (typically either an associative map or a `std::vector` of `std::pair`
// elements). A vector of pairs is generally more efficient.
//
// Examples:
//
// std::map<const absl::string_view, const absl::string_view> replacements;
// replacements["$who"] = "Bob";
// replacements["$count"] = "5";
// replacements["#Noun"] = "Apples";
// std::string s = absl::StrReplaceAll("$who bought $count #Noun. Thanks $who!",
// replacements);
// EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s);
//
// // A std::vector of std::pair elements can be more efficient.
// std::vector<std::pair<const absl::string_view, std::string>> replacements;
// replacements.push_back({"&", "&amp;"});
// replacements.push_back({"<", "&lt;"});
// replacements.push_back({">", "&gt;"});
// std::string s = absl::StrReplaceAll("if (ptr < &foo)",
// replacements);
// EXPECT_EQ("if (ptr &lt; &amp;foo)", s);
template <typename StrToStrMapping>
std::string StrReplaceAll(absl::string_view s, const StrToStrMapping& replacements);
// Overload of `StrReplaceAll()` to replace character sequences within a given
// output std::string *in place* with replacements provided within an initializer
// list of key/value pairs, returning the number of substitutions that occurred.
//
// Example:
//
// std::string s = std::string("$who bought $count #Noun. Thanks $who!");
// int count;
// count = absl::StrReplaceAll({{"$count", absl::StrCat(5)},
// {"$who", "Bob"},
// {"#Noun", "Apples"}}, &s);
// EXPECT_EQ(count, 4);
// EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s);
int StrReplaceAll(
std::initializer_list<std::pair<absl::string_view, absl::string_view>>
replacements,
std::string* target);
// Overload of `StrReplaceAll()` to replace patterns within a given output
// std::string *in place* with replacements provided within a container of key/value
// pairs.
//
// Example:
//
// std::string s = std::string("if (ptr < &foo)");
// int count = absl::StrReplaceAll({{"&", "&amp;"},
// {"<", "&lt;"},
// {">", "&gt;"}}, &s);
// EXPECT_EQ(count, 2);
// EXPECT_EQ("if (ptr &lt; &amp;foo)", s);
template <typename StrToStrMapping>
int StrReplaceAll(const StrToStrMapping& replacements, std::string* target);
// Implementation details only, past this point.
namespace strings_internal {
struct ViableSubstitution {
absl::string_view old;
absl::string_view replacement;
size_t offset;
ViableSubstitution(absl::string_view old_str,
absl::string_view replacement_str, size_t offset_val)
: old(old_str), replacement(replacement_str), offset(offset_val) {}
// One substitution occurs "before" another (takes priority) if either
// it has the lowest offset, or it has the same offset but a larger size.
bool OccursBefore(const ViableSubstitution& y) const {
if (offset != y.offset) return offset < y.offset;
return old.size() > y.old.size();
}
};
// Build a vector of ViableSubstitutions based on the given list of
// replacements. subs can be implemented as a priority_queue. However, it turns
// out that most callers have small enough a list of substitutions that the
// overhead of such a queue isn't worth it.
template <typename StrToStrMapping>
std::vector<ViableSubstitution> FindSubstitutions(
absl::string_view s, const StrToStrMapping& replacements) {
std::vector<ViableSubstitution> subs;
subs.reserve(replacements.size());
for (const auto& rep : replacements) {
using std::get;
absl::string_view old(get<0>(rep));
size_t pos = s.find(old);
if (pos == s.npos) continue;
// Ignore attempts to replace "". This condition is almost never true,
// but above condition is frequently true. That's why we test for this
// now and not before.
if (old.empty()) continue;
subs.emplace_back(old, get<1>(rep), pos);
// Insertion sort to ensure the last ViableSubstitution comes before
// all the others.
size_t index = subs.size();
while (--index && subs[index - 1].OccursBefore(subs[index])) {
std::swap(subs[index], subs[index - 1]);
}
}
return subs;
}
int ApplySubstitutions(absl::string_view s,
std::vector<ViableSubstitution>* subs_ptr,
std::string* result_ptr);
} // namespace strings_internal
template <typename StrToStrMapping>
std::string StrReplaceAll(absl::string_view s, const StrToStrMapping& replacements) {
auto subs = strings_internal::FindSubstitutions(s, replacements);
std::string result;
result.reserve(s.size());
strings_internal::ApplySubstitutions(s, &subs, &result);
return result;
}
template <typename StrToStrMapping>
int StrReplaceAll(const StrToStrMapping& replacements, std::string* target) {
auto subs = strings_internal::FindSubstitutions(*target, replacements);
if (subs.empty()) return 0;
std::string result;
result.reserve(target->size());
int substitutions =
strings_internal::ApplySubstitutions(*target, &subs, &result);
target->swap(result);
return substitutions;
}
} // namespace absl
#endif // ABSL_STRINGS_STR_REPLACE_H_

View file

@ -0,0 +1,340 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/str_replace.h"
#include <list>
#include <tuple>
#include "gtest/gtest.h"
#include "absl/strings/str_split.h"
#include "absl/strings/str_cat.h"
TEST(StrReplaceAll, OneReplacement) {
std::string s;
// Empty std::string.
s = absl::StrReplaceAll(s, {{"", ""}});
EXPECT_EQ(s, "");
s = absl::StrReplaceAll(s, {{"x", ""}});
EXPECT_EQ(s, "");
s = absl::StrReplaceAll(s, {{"", "y"}});
EXPECT_EQ(s, "");
s = absl::StrReplaceAll(s, {{"x", "y"}});
EXPECT_EQ(s, "");
// Empty substring.
s = absl::StrReplaceAll("abc", {{"", ""}});
EXPECT_EQ(s, "abc");
s = absl::StrReplaceAll("abc", {{"", "y"}});
EXPECT_EQ(s, "abc");
s = absl::StrReplaceAll("abc", {{"x", ""}});
EXPECT_EQ(s, "abc");
// Substring not found.
s = absl::StrReplaceAll("abc", {{"xyz", "123"}});
EXPECT_EQ(s, "abc");
// Replace entire std::string.
s = absl::StrReplaceAll("abc", {{"abc", "xyz"}});
EXPECT_EQ(s, "xyz");
// Replace once at the start.
s = absl::StrReplaceAll("abc", {{"a", "x"}});
EXPECT_EQ(s, "xbc");
// Replace once in the middle.
s = absl::StrReplaceAll("abc", {{"b", "x"}});
EXPECT_EQ(s, "axc");
// Replace once at the end.
s = absl::StrReplaceAll("abc", {{"c", "x"}});
EXPECT_EQ(s, "abx");
// Replace multiple times with varying lengths of original/replacement.
s = absl::StrReplaceAll("ababa", {{"a", "xxx"}});
EXPECT_EQ(s, "xxxbxxxbxxx");
s = absl::StrReplaceAll("ababa", {{"b", "xxx"}});
EXPECT_EQ(s, "axxxaxxxa");
s = absl::StrReplaceAll("aaabaaabaaa", {{"aaa", "x"}});
EXPECT_EQ(s, "xbxbx");
s = absl::StrReplaceAll("abbbabbba", {{"bbb", "x"}});
EXPECT_EQ(s, "axaxa");
// Overlapping matches are replaced greedily.
s = absl::StrReplaceAll("aaa", {{"aa", "x"}});
EXPECT_EQ(s, "xa");
// The replacements are not recursive.
s = absl::StrReplaceAll("aaa", {{"aa", "a"}});
EXPECT_EQ(s, "aa");
}
TEST(StrReplaceAll, ManyReplacements) {
std::string s;
// Empty std::string.
s = absl::StrReplaceAll("", {{"", ""}, {"x", ""}, {"", "y"}, {"x", "y"}});
EXPECT_EQ(s, "");
// Empty substring.
s = absl::StrReplaceAll("abc", {{"", ""}, {"", "y"}, {"x", ""}});
EXPECT_EQ(s, "abc");
// Replace entire std::string, one char at a time
s = absl::StrReplaceAll("abc", {{"a", "x"}, {"b", "y"}, {"c", "z"}});
EXPECT_EQ(s, "xyz");
s = absl::StrReplaceAll("zxy", {{"z", "x"}, {"x", "y"}, {"y", "z"}});
EXPECT_EQ(s, "xyz");
// Replace once at the start (longer matches take precedence)
s = absl::StrReplaceAll("abc", {{"a", "x"}, {"ab", "xy"}, {"abc", "xyz"}});
EXPECT_EQ(s, "xyz");
// Replace once in the middle.
s = absl::StrReplaceAll(
"Abc!", {{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc", "yz"}, {"c", "z"}});
EXPECT_EQ(s, "Ayz!");
// Replace once at the end.
s = absl::StrReplaceAll(
"Abc!",
{{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc!", "yz?"}, {"c!", "z;"}});
EXPECT_EQ(s, "Ayz?");
// Replace multiple times with varying lengths of original/replacement.
s = absl::StrReplaceAll("ababa", {{"a", "xxx"}, {"b", "XXXX"}});
EXPECT_EQ(s, "xxxXXXXxxxXXXXxxx");
// Overlapping matches are replaced greedily.
s = absl::StrReplaceAll("aaa", {{"aa", "x"}, {"a", "X"}});
EXPECT_EQ(s, "xX");
s = absl::StrReplaceAll("aaa", {{"a", "X"}, {"aa", "x"}});
EXPECT_EQ(s, "xX");
// Two well-known sentences
s = absl::StrReplaceAll("the quick brown fox jumped over the lazy dogs",
{
{"brown", "box"},
{"dogs", "jugs"},
{"fox", "with"},
{"jumped", "five"},
{"over", "dozen"},
{"quick", "my"},
{"the", "pack"},
{"the lazy", "liquor"},
});
EXPECT_EQ(s, "pack my box with five dozen liquor jugs");
}
TEST(StrReplaceAll, ManyReplacementsInMap) {
std::map<const char *, const char *> replacements;
replacements["$who"] = "Bob";
replacements["$count"] = "5";
replacements["#Noun"] = "Apples";
std::string s = absl::StrReplaceAll("$who bought $count #Noun. Thanks $who!",
replacements);
EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s);
}
TEST(StrReplaceAll, ReplacementsInPlace) {
std::string s = std::string("$who bought $count #Noun. Thanks $who!");
int count;
count = absl::StrReplaceAll({{"$count", absl::StrCat(5)},
{"$who", "Bob"},
{"#Noun", "Apples"}}, &s);
EXPECT_EQ(count, 4);
EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s);
}
TEST(StrReplaceAll, ReplacementsInPlaceInMap) {
std::string s = std::string("$who bought $count #Noun. Thanks $who!");
std::map<absl::string_view, absl::string_view> replacements;
replacements["$who"] = "Bob";
replacements["$count"] = "5";
replacements["#Noun"] = "Apples";
int count;
count = absl::StrReplaceAll(replacements, &s);
EXPECT_EQ(count, 4);
EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s);
}
struct Cont {
Cont() {}
explicit Cont(absl::string_view src) : data(src) {}
absl::string_view data;
};
template <int index>
absl::string_view get(const Cont& c) {
auto splitter = absl::StrSplit(c.data, ':');
auto it = splitter.begin();
for (int i = 0; i < index; ++i) ++it;
return *it;
}
TEST(StrReplaceAll, VariableNumber) {
std::string s;
{
std::vector<std::pair<std::string, std::string>> replacements;
s = "abc";
EXPECT_EQ(0, absl::StrReplaceAll(replacements, &s));
EXPECT_EQ("abc", s);
s = "abc";
replacements.push_back({"a", "A"});
EXPECT_EQ(1, absl::StrReplaceAll(replacements, &s));
EXPECT_EQ("Abc", s);
s = "abc";
replacements.push_back({"b", "B"});
EXPECT_EQ(2, absl::StrReplaceAll(replacements, &s));
EXPECT_EQ("ABc", s);
s = "abc";
replacements.push_back({"d", "D"});
EXPECT_EQ(2, absl::StrReplaceAll(replacements, &s));
EXPECT_EQ("ABc", s);
EXPECT_EQ("ABcABc", absl::StrReplaceAll("abcabc", replacements));
}
{
std::map<const char*, const char*> replacements;
replacements["aa"] = "x";
replacements["a"] = "X";
s = "aaa";
EXPECT_EQ(2, absl::StrReplaceAll(replacements, &s));
EXPECT_EQ("xX", s);
EXPECT_EQ("xxX", absl::StrReplaceAll("aaaaa", replacements));
}
{
std::list<std::pair<absl::string_view, absl::string_view>> replacements = {
{"a", "x"}, {"b", "y"}, {"c", "z"}};
std::string s = absl::StrReplaceAll("abc", replacements);
EXPECT_EQ(s, "xyz");
}
{
using X = std::tuple<absl::string_view, std::string, int>;
std::vector<X> replacements(3);
replacements[0] = X{"a", "x", 1};
replacements[1] = X{"b", "y", 0};
replacements[2] = X{"c", "z", -1};
std::string s = absl::StrReplaceAll("abc", replacements);
EXPECT_EQ(s, "xyz");
}
{
std::vector<Cont> replacements(3);
replacements[0] = Cont{"a:x"};
replacements[1] = Cont{"b:y"};
replacements[2] = Cont{"c:z"};
std::string s = absl::StrReplaceAll("abc", replacements);
EXPECT_EQ(s, "xyz");
}
}
// Same as above, but using the in-place variant of absl::StrReplaceAll,
// that returns the # of replacements performed.
TEST(StrReplaceAll, Inplace) {
std::string s;
int reps;
// Empty std::string.
s = "";
reps = absl::StrReplaceAll({{"", ""}, {"x", ""}, {"", "y"}, {"x", "y"}}, &s);
EXPECT_EQ(reps, 0);
EXPECT_EQ(s, "");
// Empty substring.
s = "abc";
reps = absl::StrReplaceAll({{"", ""}, {"", "y"}, {"x", ""}}, &s);
EXPECT_EQ(reps, 0);
EXPECT_EQ(s, "abc");
// Replace entire std::string, one char at a time
s = "abc";
reps = absl::StrReplaceAll({{"a", "x"}, {"b", "y"}, {"c", "z"}}, &s);
EXPECT_EQ(reps, 3);
EXPECT_EQ(s, "xyz");
s = "zxy";
reps = absl::StrReplaceAll({{"z", "x"}, {"x", "y"}, {"y", "z"}}, &s);
EXPECT_EQ(reps, 3);
EXPECT_EQ(s, "xyz");
// Replace once at the start (longer matches take precedence)
s = "abc";
reps = absl::StrReplaceAll({{"a", "x"}, {"ab", "xy"}, {"abc", "xyz"}}, &s);
EXPECT_EQ(reps, 1);
EXPECT_EQ(s, "xyz");
// Replace once in the middle.
s = "Abc!";
reps = absl::StrReplaceAll(
{{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc", "yz"}, {"c", "z"}}, &s);
EXPECT_EQ(reps, 1);
EXPECT_EQ(s, "Ayz!");
// Replace once at the end.
s = "Abc!";
reps = absl::StrReplaceAll(
{{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc!", "yz?"}, {"c!", "z;"}}, &s);
EXPECT_EQ(reps, 1);
EXPECT_EQ(s, "Ayz?");
// Replace multiple times with varying lengths of original/replacement.
s = "ababa";
reps = absl::StrReplaceAll({{"a", "xxx"}, {"b", "XXXX"}}, &s);
EXPECT_EQ(reps, 5);
EXPECT_EQ(s, "xxxXXXXxxxXXXXxxx");
// Overlapping matches are replaced greedily.
s = "aaa";
reps = absl::StrReplaceAll({{"aa", "x"}, {"a", "X"}}, &s);
EXPECT_EQ(reps, 2);
EXPECT_EQ(s, "xX");
s = "aaa";
reps = absl::StrReplaceAll({{"a", "X"}, {"aa", "x"}}, &s);
EXPECT_EQ(reps, 2);
EXPECT_EQ(s, "xX");
// Two well-known sentences
s = "the quick brown fox jumped over the lazy dogs";
reps = absl::StrReplaceAll(
{
{"brown", "box"},
{"dogs", "jugs"},
{"fox", "with"},
{"jumped", "five"},
{"over", "dozen"},
{"quick", "my"},
{"the", "pack"},
{"the lazy", "liquor"},
},
&s);
EXPECT_EQ(reps, 8);
EXPECT_EQ(s, "pack my box with five dozen liquor jugs");
}

133
absl/strings/str_split.cc Normal file
View file

@ -0,0 +1,133 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/str_split.h"
#include <cassert>
#include <cstdlib>
#include <cstring>
#include <iterator>
#include <limits>
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/ascii.h"
namespace absl {
namespace {
// This GenericFind() template function encapsulates the finding algorithm
// shared between the ByString and ByAnyChar delimiters. The FindPolicy
// template parameter allows each delimiter to customize the actual find
// function to use and the length of the found delimiter. For example, the
// Literal delimiter will ultimately use absl::string_view::find(), and the
// AnyOf delimiter will use absl::string_view::find_first_of().
template <typename FindPolicy>
absl::string_view GenericFind(absl::string_view text,
absl::string_view delimiter, size_t pos,
FindPolicy find_policy) {
if (delimiter.empty() && text.length() > 0) {
// Special case for empty std::string delimiters: always return a zero-length
// absl::string_view referring to the item at position 1 past pos.
return absl::string_view(text.begin() + pos + 1, 0);
}
size_t found_pos = absl::string_view::npos;
absl::string_view found(text.end(), 0); // By default, not found
found_pos = find_policy.Find(text, delimiter, pos);
if (found_pos != absl::string_view::npos) {
found = absl::string_view(text.data() + found_pos,
find_policy.Length(delimiter));
}
return found;
}
// Finds using absl::string_view::find(), therefore the length of the found
// delimiter is delimiter.length().
struct LiteralPolicy {
size_t Find(absl::string_view text, absl::string_view delimiter, size_t pos) {
return text.find(delimiter, pos);
}
size_t Length(absl::string_view delimiter) { return delimiter.length(); }
};
// Finds using absl::string_view::find_first_of(), therefore the length of the
// found delimiter is 1.
struct AnyOfPolicy {
size_t Find(absl::string_view text, absl::string_view delimiter, size_t pos) {
return text.find_first_of(delimiter, pos);
}
size_t Length(absl::string_view /* delimiter */) { return 1; }
};
} // namespace
//
// ByString
//
ByString::ByString(absl::string_view sp) : delimiter_(sp) {}
absl::string_view ByString::Find(absl::string_view text, size_t pos) const {
if (delimiter_.length() == 1) {
// Much faster to call find on a single character than on an
// absl::string_view.
size_t found_pos = text.find(delimiter_[0], pos);
if (found_pos == absl::string_view::npos)
return absl::string_view(text.end(), 0);
return text.substr(found_pos, 1);
}
return GenericFind(text, delimiter_, pos, LiteralPolicy());
}
//
// ByChar
//
absl::string_view ByChar::Find(absl::string_view text, size_t pos) const {
size_t found_pos = text.find(c_, pos);
if (found_pos == absl::string_view::npos)
return absl::string_view(text.end(), 0);
return text.substr(found_pos, 1);
}
//
// ByAnyChar
//
ByAnyChar::ByAnyChar(absl::string_view sp) : delimiters_(sp) {}
absl::string_view ByAnyChar::Find(absl::string_view text, size_t pos) const {
return GenericFind(text, delimiters_, pos, AnyOfPolicy());
}
//
// ByLength
//
ByLength::ByLength(ptrdiff_t length) : length_(length) {
ABSL_RAW_CHECK(length > 0, "");
}
absl::string_view ByLength::Find(absl::string_view text,
size_t pos) const {
pos = std::min(pos, text.size()); // truncate `pos`
absl::string_view substr = text.substr(pos);
// If the std::string is shorter than the chunk size we say we
// "can't find the delimiter" so this will be the last chunk.
if (substr.length() <= static_cast<size_t>(length_))
return absl::string_view(text.end(), 0);
return absl::string_view(substr.begin() + length_, 0);
}
} // namespace absl

511
absl/strings/str_split.h Normal file
View file

@ -0,0 +1,511 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: str_split.h
// -----------------------------------------------------------------------------
//
// This file contains functions for splitting strings. It defines the main
// `StrSplit()` function, several delimiters for determining the boundaries on
// which to split the std::string, and predicates for filtering delimited results.
// `StrSplit()` adapts the returned collection to the type specified by the
// caller.
//
// Example:
//
// // Splits the given std::string on commas. Returns the results in a
// // vector of strings.
// std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
// // Can also use ","
// // v[0] == "a", v[1] == "b", v[2] == "c"
//
// See StrSplit() below for more information.
#ifndef ABSL_STRINGS_STR_SPLIT_H_
#define ABSL_STRINGS_STR_SPLIT_H_
#include <algorithm>
#include <cstddef>
#include <map>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/internal/str_split_internal.h"
#include "absl/strings/string_view.h"
#include "absl/strings/strip.h"
namespace absl {
//------------------------------------------------------------------------------
// Delimiters
//------------------------------------------------------------------------------
//
// `StrSplit()` uses delimiters to define the boundaries between elements in the
// provided input. Several `Delimiter` types are defined below. If a std::string
// (`const char*`, `std::string`, or `absl::string_view`) is passed in place of
// an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it
// were passed a `ByString` delimiter.
//
// A `Delimiter` is an object with a `Find()` function that knows how to find
// the first occurrence of itself in a given `absl::string_view`.
//
// The following `Delimiter` types are available for use within `StrSplit()`:
//
// - `ByString` (default for std::string arguments)
// - `ByChar` (default for a char argument)
// - `ByAnyChar`
// - `ByLength`
// - `MaxSplits`
//
//
// A Delimiter's Find() member function will be passed the input text that is to
// be split and the position to begin searching for the next delimiter in the
// input text. The returned absl::string_view should refer to the next
// occurrence (after pos) of the represented delimiter; this returned
// absl::string_view represents the next location where the input std::string should
// be broken. The returned absl::string_view may be zero-length if the Delimiter
// does not represent a part of the std::string (e.g., a fixed-length delimiter). If
// no delimiter is found in the given text, a zero-length absl::string_view
// referring to text.end() should be returned (e.g.,
// absl::string_view(text.end(), 0)). It is important that the returned
// absl::string_view always be within the bounds of input text given as an
// argument--it must not refer to a std::string that is physically located outside of
// the given std::string.
//
// The following example is a simple Delimiter object that is created with a
// single char and will look for that char in the text passed to the Find()
// function:
//
// struct SimpleDelimiter {
// const char c_;
// explicit SimpleDelimiter(char c) : c_(c) {}
// absl::string_view Find(absl::string_view text, size_t pos) {
// auto found = text.find(c_, pos);
// if (found == absl::string_view::npos)
// return absl::string_view(text.end(), 0);
//
// return absl::string_view(text, found, 1);
// }
// };
// ByString
//
// A sub-std::string delimiter. If `StrSplit()` is passed a std::string in place of a
// `Delimiter` object, the std::string will be implicitly converted into a
// `ByString` delimiter.
//
// Example:
//
// // Because a std::string literal is converted to an `absl::ByString`,
// // the following two splits are equivalent.
//
// std::vector<std::string> v1 = absl::StrSplit("a, b, c", ", ");
//
// using absl::ByString;
// std::vector<std::string> v2 = absl::StrSplit("a, b, c",
// ByString(", "));
// // v[0] == "a", v[1] == "b", v[3] == "c"
class ByString {
public:
explicit ByString(absl::string_view sp);
absl::string_view Find(absl::string_view text, size_t pos) const;
private:
const std::string delimiter_;
};
// ByChar
//
// A single character delimiter. `ByChar` is functionally equivalent to a
// 1-char std::string within a `ByString` delimiter, but slightly more
// efficient.
//
// Example:
//
// // Because a char literal is converted to a absl::ByChar,
// // the following two splits are equivalent.
// std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
// using absl::ByChar;
// std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(','));
// // v[0] == "a", v[1] == "b", v[3] == "c"
//
// `ByChar` is also the default delimiter if a single character is given
// as the delimiter to `StrSplit()`. For example, the following calls are
// equivalent:
//
// std::vector<std::string> v = absl::StrSplit("a-b", '-');
//
// using absl::ByChar;
// std::vector<std::string> v = absl::StrSplit("a-b", ByChar('-'));
//
class ByChar {
public:
explicit ByChar(char c) : c_(c) {}
absl::string_view Find(absl::string_view text, size_t pos) const;
private:
char c_;
};
// ByAnyChar
//
// A delimiter that will match any of the given byte-sized characters within
// its provided std::string.
//
// Note: this delimiter works with single-byte std::string data, but does not work
// with variable-width encodings, such as UTF-8.
//
// Example:
//
// using absl::ByAnyChar;
// std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
// // v[0] == "a", v[1] == "b", v[3] == "c"
//
// If `ByAnyChar` is given the empty std::string, it behaves exactly like
// `ByString` and matches each individual character in the input std::string.
//
class ByAnyChar {
public:
explicit ByAnyChar(absl::string_view sp);
absl::string_view Find(absl::string_view text, size_t pos) const;
private:
const std::string delimiters_;
};
// ByLength
//
// A delimiter for splitting into equal-length strings. The length argument to
// the constructor must be greater than 0.
//
// Note: this delimiter works with single-byte std::string data, but does not work
// with variable-width encodings, such as UTF-8.
//
// Example:
//
// using absl::ByLength;
// std::vector<std::string> v = absl::StrSplit("123456789", ByLength(3));
// // v[0] == "123", v[1] == "456", v[2] == "789"
//
// Note that the std::string does not have to be a multiple of the fixed split
// length. In such a case, the last substring will be shorter.
//
// using absl::ByLength;
// std::vector<std::string> v = absl::StrSplit("12345", ByLength(2));
//
// // v[0] == "12", v[1] == "35", v[2] == "5"
class ByLength {
public:
explicit ByLength(ptrdiff_t length);
absl::string_view Find(absl::string_view text, size_t pos) const;
private:
const ptrdiff_t length_;
};
namespace strings_internal {
// A traits-like metafunction for selecting the default Delimiter object type
// for a particular Delimiter type. The base case simply exposes type Delimiter
// itself as the delimiter's Type. However, there are specializations for
// std::string-like objects that map them to the ByString delimiter object.
// This allows functions like absl::StrSplit() and absl::MaxSplits() to accept
// std::string-like objects (e.g., ',') as delimiter arguments but they will be
// treated as if a ByString delimiter was given.
template <typename Delimiter>
struct SelectDelimiter {
using type = Delimiter;
};
template <>
struct SelectDelimiter<char> {
using type = ByChar;
};
template <>
struct SelectDelimiter<char*> {
using type = ByString;
};
template <>
struct SelectDelimiter<const char*> {
using type = ByString;
};
template <>
struct SelectDelimiter<absl::string_view> {
using type = ByString;
};
template <>
struct SelectDelimiter<std::string> {
using type = ByString;
};
// Wraps another delimiter and sets a max number of matches for that delimiter.
template <typename Delimiter>
class MaxSplitsImpl {
public:
MaxSplitsImpl(Delimiter delimiter, int limit)
: delimiter_(delimiter), limit_(limit), count_(0) {}
absl::string_view Find(absl::string_view text, size_t pos) {
if (count_++ == limit_) {
return absl::string_view(text.end(), 0); // No more matches.
}
return delimiter_.Find(text, pos);
}
private:
Delimiter delimiter_;
const int limit_;
int count_;
};
} // namespace strings_internal
// MaxSplits()
//
// A delimiter that limits the number of matches which can occur to the passed
// `limit`. The last element in the returned collection will contain all
// remaining unsplit pieces, which may contain instances of the delimiter.
// The collection will contain at most `limit` + 1 elements.
// Example:
//
// using absl::MaxSplits;
// std::vector<std::string> v = absl::StrSplit("a,b,c", MaxSplits(',', 1));
//
// // v[0] == "a", v[1] == "b,c"
template <typename Delimiter>
inline strings_internal::MaxSplitsImpl<
typename strings_internal::SelectDelimiter<Delimiter>::type>
MaxSplits(Delimiter delimiter, int limit) {
typedef
typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType;
return strings_internal::MaxSplitsImpl<DelimiterType>(
DelimiterType(delimiter), limit);
}
//------------------------------------------------------------------------------
// Predicates
//------------------------------------------------------------------------------
//
// Predicates filter the results of a `StrSplit()` by determining whether or not
// a resultant element is included in the result set. A predicate may be passed
// as an optional third argument to the `StrSplit()` function.
//
// Predicates are unary functions (or functors) that take a single
// `absl::string_view` argument and return a bool indicating whether the
// argument should be included (`true`) or excluded (`false`).
//
// Predicates are useful when filtering out empty substrings. By default, empty
// substrings may be returned by `StrSplit()`, which is similar to the way split
// functions work in other programming languages.
// AllowEmpty()
//
// Always returns `true`, indicating that all strings--including empty
// strings--should be included in the split output. This predicate is not
// strictly needed because this is the default behavior of `StrSplit()`;
// however, it might be useful at some call sites to make the intent explicit.
//
// Example:
//
// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', AllowEmpty());
//
// // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == ""
struct AllowEmpty {
bool operator()(absl::string_view) const { return true; }
};
// SkipEmpty()
//
// Returns `false` if the given `absl::string_view` is empty, indicating that
// `StrSplit()` should omit the empty std::string.
//
// Example:
//
// std::vector<std::string> v = absl::StrSplit(",a,,b,", ',', SkipEmpty());
//
// // v[0] == "a", v[1] == "b"
//
// Note: `SkipEmpty()` does not consider a std::string containing only whitespace
// to be empty. To skip such whitespace as well, use the `SkipWhitespace()`
// predicate.
struct SkipEmpty {
bool operator()(absl::string_view sp) const { return !sp.empty(); }
};
// SkipWhitespace()
//
// Returns `false` if the given `absl::string_view` is empty *or* contains only
// whitespace, indicating that `StrSplit()` should omit the std::string.
//
// Example:
//
// std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
// ',', SkipWhitespace());
// // v[0] == " a ", v[1] == "b"
//
// // SkipEmpty() would return whitespace elements
// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', SkipEmpty());
// // v[0] == " a ", v[1] == " ", v[2] == "b"
struct SkipWhitespace {
bool operator()(absl::string_view sp) const {
sp = absl::StripAsciiWhitespace(sp);
return !sp.empty();
}
};
//------------------------------------------------------------------------------
// StrSplit()
//------------------------------------------------------------------------------
// StrSplit()
//
// Splits a given `std::string` based on the provided `Delimiter` object,
// returning the elements within the type specified by the caller. Optionally,
// you may also pass a `Predicate` to `StrSplit()` indicating whether to include
// or exclude the resulting element within the final result set. (See the
// overviews for Delimiters and Predicates above.)
//
// Example:
//
// std::vector<std::string> v = absl::StrSplit("a,b,c,d", ',');
// // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d"
//
// You can also provide an explicit `Delimiter` object:
//
// Example:
//
// using absl::ByAnyChar;
// std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
// // v[0] == "a", v[1] == "b", v[3] == "c"
//
// See above for more information on delimiters.
//
// By default, empty strings are included in the result set. You can optionally
// include a third `Predicate` argument to apply a test for whether the
// resultant element should be included in the result set:
//
// Example:
//
// std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
// ',', SkipWhitespace());
// // v[0] == "a", v[1] == "b"
//
// See above for more information on predicates.
//
//------------------------------------------------------------------------------
// StrSplit() Return Types
//------------------------------------------------------------------------------
//
// The `StrSplit()` function adapts the returned collection to the collection
// specified by the caller (e.g. `std::vector` above). The returned collections
// may contain `string`, `absl::string_view` (in which case the original std::string
// being split must ensure that it outlives the collection), or any object that
// can be explicitly created from an `absl::string_view`. This behavior works
// for:
//
// 1) All standard STL containers including `std::vector`, `std::list`,
// `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap`
// 2) `std::pair` (which is not actually a container). See below.
//
// Example:
//
// // The results are returned as `absl::string_view` objects. Note that we
// // have to ensure that the input std::string outlives any results.
// std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
//
// // Stores results in a std::set<std::string>, which also performs
// // de-duplication and orders the elements in ascending order.
// std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ',');
// // v[0] == "a", v[1] == "b", v[2] = "c"
//
// // `StrSplit()` can be used within a range-based for loop, in which case
// // each element will be of type `absl::string_view`.
// std::vector<std::string> v;
// for (const auto sv : absl::StrSplit("a,b,c", ',')) {
// if (sv != "b") v.emplace_back(sv);
// }
// // v[0] == "a", v[1] == "c"
//
// // Stores results in a map. The map implementation assumes that the input
// // is provided as a series of key/value pairs. For example, the 0th element
// // resulting from the split will be stored as a key to the 1st element. If
// // an odd number of elements are resolved, the last element is paired with
// // a default-constructed value (e.g., empty std::string).
// std::map<std::string, std::string> m = absl::StrSplit("a,b,c", ',');
// // m["a"] == "b", m["c"] == "" // last component value equals ""
//
// Splitting to `std::pair` is an interesting case because it can hold only two
// elements and is not a collection type. When splitting to a `std::pair` the
// first two split strings become the `std::pair` `.first` and `.second`
// members, respectively. The remaining split substrings are discarded. If there
// are less than two split substrings, the empty std::string is used for the
// corresponding
// `std::pair` member.
//
// Example:
//
// // Stores first two split strings as the members in a std::pair.
// std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
// // p.first == "a", p.second == "b" // "c" is omitted.
//
// The `StrSplit()` function can be used multiple times to perform more
// complicated splitting logic, such as intelligently parsing key-value pairs.
//
// Example:
//
// // The input std::string "a=b=c,d=e,f=,g" becomes
// // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" }
// std::map<std::string, std::string> m;
// for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
// m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
// }
// EXPECT_EQ("b=c", m.find("a")->second);
// EXPECT_EQ("e", m.find("d")->second);
// EXPECT_EQ("", m.find("f")->second);
// EXPECT_EQ("", m.find("g")->second);
//
// WARNING: Due to a legacy bug that is maintained for backward compatibility,
// splitting the following empty string_views produces different results:
//
// absl::StrSplit(absl::string_view(""), '-'); // {""}
// absl::StrSplit(absl::string_view(), '-'); // {}, but should be {""}
//
// Try not to depend on this distinction because the bug may one day be fixed.
template <typename Delimiter>
strings_internal::Splitter<
typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty>
StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) {
using DelimiterType =
typename strings_internal::SelectDelimiter<Delimiter>::type;
return strings_internal::Splitter<DelimiterType, AllowEmpty>(
std::move(text), DelimiterType(d), AllowEmpty());
}
template <typename Delimiter, typename Predicate>
strings_internal::Splitter<
typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate>
StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d,
Predicate p) {
using DelimiterType =
typename strings_internal::SelectDelimiter<Delimiter>::type;
return strings_internal::Splitter<DelimiterType, Predicate>(
std::move(text), DelimiterType(d), std::move(p));
}
} // namespace absl
#endif // ABSL_STRINGS_STR_SPLIT_H_

View file

@ -0,0 +1,896 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/str_split.h"
#include <climits>
#include <cstdlib>
#include <cstring>
#include <deque>
#include <limits>
#include <list>
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/dynamic_annotations.h" // for RunningOnValgrind
#include "absl/base/macros.h"
#include "absl/base/port.h"
#include "absl/strings/numbers.h"
namespace {
using ::testing::ElementsAre;
using ::testing::Pair;
using ::testing::UnorderedElementsAre;
// This tests the overall split API, which is made up of the absl::StrSplit()
// function and the Delimiter objects in the absl:: namespace.
// This TEST macro is outside of any namespace to require full specification of
// namespaces just like callers will need to use.
TEST(Split, APIExamples) {
{
// Passes std::string delimiter. Assumes the default of Literal.
std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
// Equivalent to...
using absl::ByString;
v = absl::StrSplit("a,b,c", ByString(","));
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
// Equivalent to...
EXPECT_THAT(absl::StrSplit("a,b,c", ByString(",")),
ElementsAre("a", "b", "c"));
}
{
// Same as above, but using a single character as the delimiter.
std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
// Equivalent to...
using absl::ByChar;
v = absl::StrSplit("a,b,c", ByChar(','));
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// Same as above, but using std::string
std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
// Equivalent to...
using absl::ByChar;
v = absl::StrSplit("a,b,c", ByChar(','));
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// Uses the Literal std::string "=>" as the delimiter.
const std::vector<std::string> v = absl::StrSplit("a=>b=>c", "=>");
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// The substrings are returned as string_views, eliminating copying.
std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// Leading and trailing empty substrings.
std::vector<std::string> v = absl::StrSplit(",a,b,c,", ',');
EXPECT_THAT(v, ElementsAre("", "a", "b", "c", ""));
}
{
// Splits on a delimiter that is not found.
std::vector<std::string> v = absl::StrSplit("abc", ',');
EXPECT_THAT(v, ElementsAre("abc"));
}
{
// Splits the input std::string into individual characters by using an empty
// std::string as the delimiter.
std::vector<std::string> v = absl::StrSplit("abc", "");
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// Splits std::string data with embedded NUL characters, using NUL as the
// delimiter. A simple delimiter of "\0" doesn't work because strlen() will
// say that's the empty std::string when constructing the absl::string_view
// delimiter. Instead, a non-empty std::string containing NUL can be used as the
// delimiter.
std::string embedded_nulls("a\0b\0c", 5);
std::string null_delim("\0", 1);
std::vector<std::string> v = absl::StrSplit(embedded_nulls, null_delim);
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// Stores first two split strings as the members in a std::pair.
std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
EXPECT_EQ("a", p.first);
EXPECT_EQ("b", p.second);
// "c" is omitted because std::pair can hold only two elements.
}
{
// Results stored in std::set<std::string>
std::set<std::string> v = absl::StrSplit("a,b,c,a,b,c,a,b,c", ',');
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// Uses a non-const char* delimiter.
char a[] = ",";
char* d = a + 0;
std::vector<std::string> v = absl::StrSplit("a,b,c", d);
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// Results split using either of , or ;
using absl::ByAnyChar;
std::vector<std::string> v = absl::StrSplit("a,b;c", ByAnyChar(",;"));
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// Uses the SkipWhitespace predicate.
using absl::SkipWhitespace;
std::vector<std::string> v = absl::StrSplit("a, ,,b,", ',', SkipWhitespace());
EXPECT_THAT(v, ElementsAre("a", "b"));
}
{
// Uses the ByLength delimiter.
using absl::ByLength;
std::vector<std::string> v = absl::StrSplit("abcdefg", ByLength(3));
EXPECT_THAT(v, ElementsAre("abc", "def", "g"));
}
{
// Results stored in a std::map.
std::map<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ',');
EXPECT_EQ(2, m.size());
EXPECT_EQ("3", m["a"]);
EXPECT_EQ("2", m["b"]);
}
{
// Results stored in a std::multimap.
std::multimap<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ',');
EXPECT_EQ(3, m.size());
auto it = m.find("a");
EXPECT_EQ("1", it->second);
++it;
EXPECT_EQ("3", it->second);
it = m.find("b");
EXPECT_EQ("2", it->second);
}
{
// Demonstrates use in a range-based for loop in C++11.
std::string s = "x,x,x,x,x,x,x";
for (absl::string_view sp : absl::StrSplit(s, ',')) {
EXPECT_EQ("x", sp);
}
}
{
// Demonstrates use with a Predicate in a range-based for loop.
using absl::SkipWhitespace;
std::string s = " ,x,,x,,x,x,x,,";
for (absl::string_view sp : absl::StrSplit(s, ',', SkipWhitespace())) {
EXPECT_EQ("x", sp);
}
}
{
// Demonstrates a "smart" split to std::map using two separate calls to
// absl::StrSplit. One call to split the records, and another call to split
// the keys and values. This also uses the Limit delimiter so that the
// std::string "a=b=c" will split to "a" -> "b=c".
std::map<std::string, std::string> m;
for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
}
EXPECT_EQ("b=c", m.find("a")->second);
EXPECT_EQ("e", m.find("d")->second);
EXPECT_EQ("", m.find("f")->second);
EXPECT_EQ("", m.find("g")->second);
}
}
//
// Tests for SplitIterator
//
TEST(SplitIterator, Basics) {
auto splitter = absl::StrSplit("a,b", ',');
auto it = splitter.begin();
auto end = splitter.end();
EXPECT_NE(it, end);
EXPECT_EQ("a", *it); // tests dereference
++it; // tests preincrement
EXPECT_NE(it, end);
EXPECT_EQ("b", std::string(it->data(), it->size())); // tests dereference as ptr
it++; // tests postincrement
EXPECT_EQ(it, end);
}
// Simple Predicate to skip a particular std::string.
class Skip {
public:
explicit Skip(const std::string& s) : s_(s) {}
bool operator()(absl::string_view sp) { return sp != s_; }
private:
std::string s_;
};
TEST(SplitIterator, Predicate) {
auto splitter = absl::StrSplit("a,b,c", ',', Skip("b"));
auto it = splitter.begin();
auto end = splitter.end();
EXPECT_NE(it, end);
EXPECT_EQ("a", *it); // tests dereference
++it; // tests preincrement -- "b" should be skipped here.
EXPECT_NE(it, end);
EXPECT_EQ("c", std::string(it->data(), it->size())); // tests dereference as ptr
it++; // tests postincrement
EXPECT_EQ(it, end);
}
TEST(SplitIterator, EdgeCases) {
// Expected input and output, assuming a delimiter of ','
struct {
std::string in;
std::vector<std::string> expect;
} specs[] = {
{"", {""}},
{"foo", {"foo"}},
{",", {"", ""}},
{",foo", {"", "foo"}},
{"foo,", {"foo", ""}},
{",foo,", {"", "foo", ""}},
{"foo,bar", {"foo", "bar"}},
};
for (const auto& spec : specs) {
SCOPED_TRACE(spec.in);
auto splitter = absl::StrSplit(spec.in, ',');
auto it = splitter.begin();
auto end = splitter.end();
for (const auto& expected : spec.expect) {
EXPECT_NE(it, end);
EXPECT_EQ(expected, *it++);
}
EXPECT_EQ(it, end);
}
}
TEST(Splitter, Const) {
const auto splitter = absl::StrSplit("a,b,c", ',');
EXPECT_THAT(splitter, ElementsAre("a", "b", "c"));
}
TEST(Split, EmptyAndNull) {
// Attention: Splitting a null absl::string_view is different than splitting
// an empty absl::string_view even though both string_views are considered
// equal. This behavior is likely surprising and undesirable. However, to
// maintain backward compatibility, there is a small "hack" in
// str_split_internal.h that preserves this behavior. If that behavior is ever
// changed/fixed, this test will need to be updated.
EXPECT_THAT(absl::StrSplit(absl::string_view(""), '-'), ElementsAre(""));
EXPECT_THAT(absl::StrSplit(absl::string_view(), '-'), ElementsAre());
}
TEST(SplitIterator, EqualityAsEndCondition) {
auto splitter = absl::StrSplit("a,b,c", ',');
auto it = splitter.begin();
auto it2 = it;
// Increments it2 twice to point to "c" in the input text.
++it2;
++it2;
EXPECT_EQ("c", *it2);
// This test uses a non-end SplitIterator as the terminating condition in a
// for loop. This relies on SplitIterator equality for non-end SplitIterators
// working correctly. At this point it2 points to "c", and we use that as the
// "end" condition in this test.
std::vector<absl::string_view> v;
for (; it != it2; ++it) {
v.push_back(*it);
}
EXPECT_THAT(v, ElementsAre("a", "b"));
}
//
// Tests for Splitter
//
TEST(Splitter, RangeIterators) {
auto splitter = absl::StrSplit("a,b,c", ',');
std::vector<absl::string_view> output;
for (const absl::string_view p : splitter) {
output.push_back(p);
}
EXPECT_THAT(output, ElementsAre("a", "b", "c"));
}
// Some template functions for use in testing conversion operators
template <typename ContainerType, typename Splitter>
void TestConversionOperator(const Splitter& splitter) {
ContainerType output = splitter;
EXPECT_THAT(output, UnorderedElementsAre("a", "b", "c", "d"));
}
template <typename MapType, typename Splitter>
void TestMapConversionOperator(const Splitter& splitter) {
MapType m = splitter;
EXPECT_THAT(m, UnorderedElementsAre(Pair("a", "b"), Pair("c", "d")));
}
template <typename FirstType, typename SecondType, typename Splitter>
void TestPairConversionOperator(const Splitter& splitter) {
std::pair<FirstType, SecondType> p = splitter;
EXPECT_EQ(p, (std::pair<FirstType, SecondType>("a", "b")));
}
TEST(Splitter, ConversionOperator) {
auto splitter = absl::StrSplit("a,b,c,d", ',');
TestConversionOperator<std::vector<absl::string_view>>(splitter);
TestConversionOperator<std::vector<std::string>>(splitter);
TestConversionOperator<std::list<absl::string_view>>(splitter);
TestConversionOperator<std::list<std::string>>(splitter);
TestConversionOperator<std::deque<absl::string_view>>(splitter);
TestConversionOperator<std::deque<std::string>>(splitter);
TestConversionOperator<std::set<absl::string_view>>(splitter);
TestConversionOperator<std::set<std::string>>(splitter);
TestConversionOperator<std::multiset<absl::string_view>>(splitter);
TestConversionOperator<std::multiset<std::string>>(splitter);
TestConversionOperator<std::unordered_set<std::string>>(splitter);
// Tests conversion to map-like objects.
TestMapConversionOperator<std::map<absl::string_view, absl::string_view>>(
splitter);
TestMapConversionOperator<std::map<absl::string_view, std::string>>(splitter);
TestMapConversionOperator<std::map<std::string, absl::string_view>>(splitter);
TestMapConversionOperator<std::map<std::string, std::string>>(splitter);
TestMapConversionOperator<
std::multimap<absl::string_view, absl::string_view>>(splitter);
TestMapConversionOperator<std::multimap<absl::string_view, std::string>>(splitter);
TestMapConversionOperator<std::multimap<std::string, absl::string_view>>(splitter);
TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter);
TestMapConversionOperator<std::unordered_map<std::string, std::string>>(splitter);
// Tests conversion to std::pair
TestPairConversionOperator<absl::string_view, absl::string_view>(splitter);
TestPairConversionOperator<absl::string_view, std::string>(splitter);
TestPairConversionOperator<std::string, absl::string_view>(splitter);
TestPairConversionOperator<std::string, std::string>(splitter);
}
// A few additional tests for conversion to std::pair. This conversion is
// different from others because a std::pair always has exactly two elements:
// .first and .second. The split has to work even when the split has
// less-than, equal-to, and more-than 2 strings.
TEST(Splitter, ToPair) {
{
// Empty std::string
std::pair<std::string, std::string> p = absl::StrSplit("", ',');
EXPECT_EQ("", p.first);
EXPECT_EQ("", p.second);
}
{
// Only first
std::pair<std::string, std::string> p = absl::StrSplit("a", ',');
EXPECT_EQ("a", p.first);
EXPECT_EQ("", p.second);
}
{
// Only second
std::pair<std::string, std::string> p = absl::StrSplit(",b", ',');
EXPECT_EQ("", p.first);
EXPECT_EQ("b", p.second);
}
{
// First and second.
std::pair<std::string, std::string> p = absl::StrSplit("a,b", ',');
EXPECT_EQ("a", p.first);
EXPECT_EQ("b", p.second);
}
{
// First and second and then more stuff that will be ignored.
std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
EXPECT_EQ("a", p.first);
EXPECT_EQ("b", p.second);
// "c" is omitted.
}
}
TEST(Splitter, Predicates) {
static const char kTestChars[] = ",a, ,b,";
using absl::AllowEmpty;
using absl::SkipEmpty;
using absl::SkipWhitespace;
{
// No predicate. Does not skip empties.
auto splitter = absl::StrSplit(kTestChars, ',');
std::vector<std::string> v = splitter;
EXPECT_THAT(v, ElementsAre("", "a", " ", "b", ""));
}
{
// Allows empty strings. Same behavior as no predicate at all.
auto splitter = absl::StrSplit(kTestChars, ',', AllowEmpty());
std::vector<std::string> v_allowempty = splitter;
EXPECT_THAT(v_allowempty, ElementsAre("", "a", " ", "b", ""));
// Ensures AllowEmpty equals the behavior with no predicate.
auto splitter_nopredicate = absl::StrSplit(kTestChars, ',');
std::vector<std::string> v_nopredicate = splitter_nopredicate;
EXPECT_EQ(v_allowempty, v_nopredicate);
}
{
// Skips empty strings.
auto splitter = absl::StrSplit(kTestChars, ',', SkipEmpty());
std::vector<std::string> v = splitter;
EXPECT_THAT(v, ElementsAre("a", " ", "b"));
}
{
// Skips empty and all-whitespace strings.
auto splitter = absl::StrSplit(kTestChars, ',', SkipWhitespace());
std::vector<std::string> v = splitter;
EXPECT_THAT(v, ElementsAre("a", "b"));
}
}
//
// Tests for StrSplit()
//
TEST(Split, Basics) {
{
// Doesn't really do anything useful because the return value is ignored,
// but it should work.
absl::StrSplit("a,b,c", ',');
}
{
std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// Ensures that assignment works. This requires a little extra work with
// C++11 because of overloads with initializer_list.
std::vector<std::string> v;
v = absl::StrSplit("a,b,c", ',');
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
std::map<std::string, std::string> m;
m = absl::StrSplit("a,b,c", ',');
EXPECT_EQ(2, m.size());
std::unordered_map<std::string, std::string> hm;
hm = absl::StrSplit("a,b,c", ',');
EXPECT_EQ(2, hm.size());
}
}
absl::string_view ReturnStringView() { return "Hello World"; }
const char* ReturnConstCharP() { return "Hello World"; }
char* ReturnCharP() { return const_cast<char*>("Hello World"); }
TEST(Split, AcceptsCertainTemporaries) {
std::vector<std::string> v;
v = absl::StrSplit(ReturnStringView(), ' ');
EXPECT_THAT(v, ElementsAre("Hello", "World"));
v = absl::StrSplit(ReturnConstCharP(), ' ');
EXPECT_THAT(v, ElementsAre("Hello", "World"));
v = absl::StrSplit(ReturnCharP(), ' ');
EXPECT_THAT(v, ElementsAre("Hello", "World"));
}
TEST(Split, Temporary) {
// Use a std::string longer than the small-std::string-optimization length, so that when
// the temporary is destroyed, if the splitter keeps a reference to the
// std::string's contents, it'll reference freed memory instead of just dead
// on-stack memory.
const char input[] = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u";
EXPECT_LT(sizeof(std::string), ABSL_ARRAYSIZE(input))
<< "Input should be larger than fits on the stack.";
// This happens more often in C++11 as part of a range-based for loop.
auto splitter = absl::StrSplit(std::string(input), ',');
std::string expected = "a";
for (absl::string_view letter : splitter) {
EXPECT_EQ(expected, letter);
++expected[0];
}
EXPECT_EQ("v", expected);
// This happens more often in C++11 as part of a range-based for loop.
auto std_splitter = absl::StrSplit(std::string(input), ',');
expected = "a";
for (absl::string_view letter : std_splitter) {
EXPECT_EQ(expected, letter);
++expected[0];
}
EXPECT_EQ("v", expected);
}
template <typename T>
static std::unique_ptr<T> CopyToHeap(const T& value) {
return std::unique_ptr<T>(new T(value));
}
TEST(Split, LvalueCaptureIsCopyable) {
std::string input = "a,b";
auto heap_splitter = CopyToHeap(absl::StrSplit(input, ','));
auto stack_splitter = *heap_splitter;
heap_splitter.reset();
std::vector<std::string> result = stack_splitter;
EXPECT_THAT(result, testing::ElementsAre("a", "b"));
}
TEST(Split, TemporaryCaptureIsCopyable) {
auto heap_splitter = CopyToHeap(absl::StrSplit(std::string("a,b"), ','));
auto stack_splitter = *heap_splitter;
heap_splitter.reset();
std::vector<std::string> result = stack_splitter;
EXPECT_THAT(result, testing::ElementsAre("a", "b"));
}
TEST(Split, SplitterIsCopyableAndMoveable) {
auto a = absl::StrSplit("foo", '-');
// Ensures that the following expressions compile.
auto b = a; // Copy construct
auto c = std::move(a); // Move construct
b = c; // Copy assign
c = std::move(b); // Move assign
EXPECT_THAT(c, ElementsAre("foo"));
}
TEST(Split, StringDelimiter) {
{
std::vector<absl::string_view> v = absl::StrSplit("a,b", ',');
EXPECT_THAT(v, ElementsAre("a", "b"));
}
{
std::vector<absl::string_view> v = absl::StrSplit("a,b", std::string(","));
EXPECT_THAT(v, ElementsAre("a", "b"));
}
{
std::vector<absl::string_view> v =
absl::StrSplit("a,b", absl::string_view(","));
EXPECT_THAT(v, ElementsAre("a", "b"));
}
}
TEST(Split, UTF8) {
// Tests splitting utf8 strings and utf8 delimiters.
{
// A utf8 input std::string with an ascii delimiter.
std::vector<absl::string_view> v = absl::StrSplit("a,κόσμε", ',');
EXPECT_THAT(v, ElementsAre("a", "κόσμε"));
}
{
// A utf8 input std::string and a utf8 delimiter.
std::vector<absl::string_view> v = absl::StrSplit("a,κόσμε,b", ",κόσμε,");
EXPECT_THAT(v, ElementsAre("a", "b"));
}
{
// A utf8 input std::string and ByAnyChar with ascii chars.
std::vector<absl::string_view> v =
absl::StrSplit("Foo hällo th丞re", absl::ByAnyChar(" \t"));
EXPECT_THAT(v, ElementsAre("Foo", "hällo", "th丞re"));
}
}
TEST(Split, EmptyStringDelimiter) {
{
std::vector<std::string> v = absl::StrSplit("", "");
EXPECT_THAT(v, ElementsAre(""));
}
{
std::vector<std::string> v = absl::StrSplit("a", "");
EXPECT_THAT(v, ElementsAre("a"));
}
{
std::vector<std::string> v = absl::StrSplit("ab", "");
EXPECT_THAT(v, ElementsAre("a", "b"));
}
{
std::vector<std::string> v = absl::StrSplit("a b", "");
EXPECT_THAT(v, ElementsAre("a", " ", "b"));
}
}
TEST(Split, SubstrDelimiter) {
std::vector<absl::string_view> results;
absl::string_view delim("//");
results = absl::StrSplit("", delim);
EXPECT_THAT(results, ElementsAre(""));
results = absl::StrSplit("//", delim);
EXPECT_THAT(results, ElementsAre("", ""));
results = absl::StrSplit("ab", delim);
EXPECT_THAT(results, ElementsAre("ab"));
results = absl::StrSplit("ab//", delim);
EXPECT_THAT(results, ElementsAre("ab", ""));
results = absl::StrSplit("ab/", delim);
EXPECT_THAT(results, ElementsAre("ab/"));
results = absl::StrSplit("a/b", delim);
EXPECT_THAT(results, ElementsAre("a/b"));
results = absl::StrSplit("a//b", delim);
EXPECT_THAT(results, ElementsAre("a", "b"));
results = absl::StrSplit("a///b", delim);
EXPECT_THAT(results, ElementsAre("a", "/b"));
results = absl::StrSplit("a////b", delim);
EXPECT_THAT(results, ElementsAre("a", "", "b"));
}
TEST(Split, EmptyResults) {
std::vector<absl::string_view> results;
results = absl::StrSplit("", '#');
EXPECT_THAT(results, ElementsAre(""));
results = absl::StrSplit("#", '#');
EXPECT_THAT(results, ElementsAre("", ""));
results = absl::StrSplit("#cd", '#');
EXPECT_THAT(results, ElementsAre("", "cd"));
results = absl::StrSplit("ab#cd#", '#');
EXPECT_THAT(results, ElementsAre("ab", "cd", ""));
results = absl::StrSplit("ab##cd", '#');
EXPECT_THAT(results, ElementsAre("ab", "", "cd"));
results = absl::StrSplit("ab##", '#');
EXPECT_THAT(results, ElementsAre("ab", "", ""));
results = absl::StrSplit("ab#ab#", '#');
EXPECT_THAT(results, ElementsAre("ab", "ab", ""));
results = absl::StrSplit("aaaa", 'a');
EXPECT_THAT(results, ElementsAre("", "", "", "", ""));
results = absl::StrSplit("", '#', absl::SkipEmpty());
EXPECT_THAT(results, ElementsAre());
}
template <typename Delimiter>
static bool IsFoundAtStartingPos(absl::string_view text, Delimiter d,
size_t starting_pos, int expected_pos) {
absl::string_view found = d.Find(text, starting_pos);
return found.data() != text.end() &&
expected_pos == found.data() - text.data();
}
// Helper function for testing Delimiter objects. Returns true if the given
// Delimiter is found in the given std::string at the given position. This function
// tests two cases:
// 1. The actual text given, staring at position 0
// 2. The text given with leading padding that should be ignored
template <typename Delimiter>
static bool IsFoundAt(absl::string_view text, Delimiter d, int expected_pos) {
const std::string leading_text = ",x,y,z,";
return IsFoundAtStartingPos(text, d, 0, expected_pos) &&
IsFoundAtStartingPos(leading_text + std::string(text), d,
leading_text.length(),
expected_pos + leading_text.length());
}
//
// Tests for Literal
//
// Tests using any delimiter that represents a single comma.
template <typename Delimiter>
void TestComma(Delimiter d) {
EXPECT_TRUE(IsFoundAt(",", d, 0));
EXPECT_TRUE(IsFoundAt("a,", d, 1));
EXPECT_TRUE(IsFoundAt(",b", d, 0));
EXPECT_TRUE(IsFoundAt("a,b", d, 1));
EXPECT_TRUE(IsFoundAt("a,b,", d, 1));
EXPECT_TRUE(IsFoundAt("a,b,c", d, 1));
EXPECT_FALSE(IsFoundAt("", d, -1));
EXPECT_FALSE(IsFoundAt(" ", d, -1));
EXPECT_FALSE(IsFoundAt("a", d, -1));
EXPECT_FALSE(IsFoundAt("a b c", d, -1));
EXPECT_FALSE(IsFoundAt("a;b;c", d, -1));
EXPECT_FALSE(IsFoundAt(";", d, -1));
}
TEST(Delimiter, Literal) {
using absl::ByString;
TestComma(ByString(","));
// Works as named variable.
ByString comma_string(",");
TestComma(comma_string);
// The first occurrence of empty std::string ("") in a std::string is at position 0.
// There is a test below that demonstrates this for absl::string_view::find().
// If the ByString delimiter returned position 0 for this, there would
// be an infinite loop in the SplitIterator code. To avoid this, empty std::string
// is a special case in that it always returns the item at position 1.
absl::string_view abc("abc");
EXPECT_EQ(0, abc.find("")); // "" is found at position 0
ByString empty("");
EXPECT_FALSE(IsFoundAt("", empty, 0));
EXPECT_FALSE(IsFoundAt("a", empty, 0));
EXPECT_TRUE(IsFoundAt("ab", empty, 1));
EXPECT_TRUE(IsFoundAt("abc", empty, 1));
}
TEST(Split, ByChar) {
using absl::ByChar;
TestComma(ByChar(','));
// Works as named variable.
ByChar comma_char(',');
TestComma(comma_char);
}
//
// Tests for ByAnyChar
//
TEST(Delimiter, ByAnyChar) {
using absl::ByAnyChar;
ByAnyChar one_delim(",");
// Found
EXPECT_TRUE(IsFoundAt(",", one_delim, 0));
EXPECT_TRUE(IsFoundAt("a,", one_delim, 1));
EXPECT_TRUE(IsFoundAt("a,b", one_delim, 1));
EXPECT_TRUE(IsFoundAt(",b", one_delim, 0));
// Not found
EXPECT_FALSE(IsFoundAt("", one_delim, -1));
EXPECT_FALSE(IsFoundAt(" ", one_delim, -1));
EXPECT_FALSE(IsFoundAt("a", one_delim, -1));
EXPECT_FALSE(IsFoundAt("a;b;c", one_delim, -1));
EXPECT_FALSE(IsFoundAt(";", one_delim, -1));
ByAnyChar two_delims(",;");
// Found
EXPECT_TRUE(IsFoundAt(",", two_delims, 0));
EXPECT_TRUE(IsFoundAt(";", two_delims, 0));
EXPECT_TRUE(IsFoundAt(",;", two_delims, 0));
EXPECT_TRUE(IsFoundAt(";,", two_delims, 0));
EXPECT_TRUE(IsFoundAt(",;b", two_delims, 0));
EXPECT_TRUE(IsFoundAt(";,b", two_delims, 0));
EXPECT_TRUE(IsFoundAt("a;,", two_delims, 1));
EXPECT_TRUE(IsFoundAt("a,;", two_delims, 1));
EXPECT_TRUE(IsFoundAt("a;,b", two_delims, 1));
EXPECT_TRUE(IsFoundAt("a,;b", two_delims, 1));
// Not found
EXPECT_FALSE(IsFoundAt("", two_delims, -1));
EXPECT_FALSE(IsFoundAt(" ", two_delims, -1));
EXPECT_FALSE(IsFoundAt("a", two_delims, -1));
EXPECT_FALSE(IsFoundAt("a=b=c", two_delims, -1));
EXPECT_FALSE(IsFoundAt("=", two_delims, -1));
// ByAnyChar behaves just like ByString when given a delimiter of empty
// std::string. That is, it always returns a zero-length absl::string_view
// referring to the item at position 1, not position 0.
ByAnyChar empty("");
EXPECT_FALSE(IsFoundAt("", empty, 0));
EXPECT_FALSE(IsFoundAt("a", empty, 0));
EXPECT_TRUE(IsFoundAt("ab", empty, 1));
EXPECT_TRUE(IsFoundAt("abc", empty, 1));
}
//
// Tests for ByLength
//
TEST(Delimiter, ByLength) {
using absl::ByLength;
ByLength four_char_delim(4);
// Found
EXPECT_TRUE(IsFoundAt("abcde", four_char_delim, 4));
EXPECT_TRUE(IsFoundAt("abcdefghijklmnopqrstuvwxyz", four_char_delim, 4));
EXPECT_TRUE(IsFoundAt("a b,c\nd", four_char_delim, 4));
// Not found
EXPECT_FALSE(IsFoundAt("", four_char_delim, 0));
EXPECT_FALSE(IsFoundAt("a", four_char_delim, 0));
EXPECT_FALSE(IsFoundAt("ab", four_char_delim, 0));
EXPECT_FALSE(IsFoundAt("abc", four_char_delim, 0));
EXPECT_FALSE(IsFoundAt("abcd", four_char_delim, 0));
}
// Allocates too much memory for TSan and MSan.
#if !defined(THREAD_SANITIZER) && !defined(MEMORY_SANITIZER)
TEST(Split, WorksWithLargeStrings) {
if (sizeof(size_t) > 4 && !RunningOnValgrind()) {
std::string s(1ULL << 31, 'x');
s.push_back('-'); // 2G + 1 byte
std::vector<absl::string_view> v = absl::StrSplit(s, '-');
EXPECT_EQ(2, v.size());
// The first element will contain 2G of 'x's.
// testing::StartsWith is too slow with a 2G std::string.
EXPECT_EQ('x', v[0][0]);
EXPECT_EQ('x', v[0][1]);
EXPECT_EQ('x', v[0][3]);
EXPECT_EQ("", v[1]);
}
}
#endif // THREAD_SANITIZER
TEST(SplitInternalTest, TypeTraits) {
EXPECT_FALSE(absl::strings_internal::HasMappedType<int>::value);
EXPECT_TRUE(
(absl::strings_internal::HasMappedType<std::map<int, int>>::value));
EXPECT_FALSE(absl::strings_internal::HasValueType<int>::value);
EXPECT_TRUE(
(absl::strings_internal::HasValueType<std::map<int, int>>::value));
EXPECT_FALSE(absl::strings_internal::HasConstIterator<int>::value);
EXPECT_TRUE(
(absl::strings_internal::HasConstIterator<std::map<int, int>>::value));
EXPECT_FALSE(absl::strings_internal::IsInitializerList<int>::value);
EXPECT_TRUE((absl::strings_internal::IsInitializerList<
std::initializer_list<int>>::value));
}
} // namespace

248
absl/strings/string_view.cc Normal file
View file

@ -0,0 +1,248 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/string_view.h"
#ifndef ABSL_HAVE_STD_STRING_VIEW
#include <algorithm>
#include <climits>
#include <cstring>
#include <ostream>
#include <string>
#include "absl/strings/internal/memutil.h"
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/match.h"
namespace absl {
namespace {
void WritePadding(std::ostream& o, size_t pad) {
char fill_buf[32];
memset(fill_buf, o.fill(), sizeof(fill_buf));
while (pad) {
size_t n = std::min(pad, sizeof(fill_buf));
o.write(fill_buf, n);
pad -= n;
}
}
class LookupTable {
public:
// For each character in wanted, sets the index corresponding
// to the ASCII code of that character. This is used by
// the find_.*_of methods below to tell whether or not a character is in
// the lookup table in constant time.
explicit LookupTable(string_view wanted) {
for (char c : wanted) {
table_[Index(c)] = true;
}
}
bool operator[](char c) const { return table_[Index(c)]; }
private:
static unsigned char Index(char c) { return static_cast<unsigned char>(c); }
bool table_[UCHAR_MAX + 1] = {};
};
} // namespace
std::ostream& operator<<(std::ostream& o, string_view piece) {
std::ostream::sentry sentry(o);
if (sentry) {
size_t lpad = 0;
size_t rpad = 0;
if (static_cast<size_t>(o.width()) > piece.size()) {
size_t pad = o.width() - piece.size();
if ((o.flags() & o.adjustfield) == o.left) {
rpad = pad;
} else {
lpad = pad;
}
}
if (lpad) WritePadding(o, lpad);
o.write(piece.data(), piece.size());
if (rpad) WritePadding(o, rpad);
o.width(0);
}
return o;
}
string_view::size_type string_view::copy(char* buf, size_type n,
size_type pos) const {
size_type ulen = length_;
assert(pos <= ulen);
size_type rlen = std::min(ulen - pos, n);
if (rlen > 0) {
const char* start = ptr_ + pos;
std::copy(start, start + rlen, buf);
}
return rlen;
}
string_view::size_type string_view::find(string_view s, size_type pos) const
noexcept {
if (empty() || pos > length_) {
if (empty() && pos == 0 && s.empty()) return 0;
return npos;
}
const char* result =
strings_internal::memmatch(ptr_ + pos, length_ - pos, s.ptr_, s.length_);
return result ? result - ptr_ : npos;
}
string_view::size_type string_view::find(char c, size_type pos) const noexcept {
if (empty() || pos >= length_) {
return npos;
}
const char* result =
static_cast<const char*>(memchr(ptr_ + pos, c, length_ - pos));
return result != nullptr ? result - ptr_ : npos;
}
string_view::size_type string_view::rfind(string_view s, size_type pos) const
noexcept {
if (length_ < s.length_) return npos;
if (s.empty()) return std::min(length_, pos);
const char* last = ptr_ + std::min(length_ - s.length_, pos) + s.length_;
const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_);
return result != last ? result - ptr_ : npos;
}
// Search range is [0..pos] inclusive. If pos == npos, search everything.
string_view::size_type string_view::rfind(char c, size_type pos) const
noexcept {
// Note: memrchr() is not available on Windows.
if (empty()) return npos;
for (size_type i = std::min(pos, length_ - 1);; --i) {
if (ptr_[i] == c) {
return i;
}
if (i == 0) break;
}
return npos;
}
string_view::size_type string_view::find_first_of(string_view s,
size_type pos) const
noexcept {
if (empty() || s.empty()) {
return npos;
}
// Avoid the cost of LookupTable() for a single-character search.
if (s.length_ == 1) return find_first_of(s.ptr_[0], pos);
LookupTable tbl(s);
for (size_type i = pos; i < length_; ++i) {
if (tbl[ptr_[i]]) {
return i;
}
}
return npos;
}
string_view::size_type string_view::find_first_not_of(string_view s,
size_type pos) const
noexcept {
if (empty()) return npos;
// Avoid the cost of LookupTable() for a single-character search.
if (s.length_ == 1) return find_first_not_of(s.ptr_[0], pos);
LookupTable tbl(s);
for (size_type i = pos; i < length_; ++i) {
if (!tbl[ptr_[i]]) {
return i;
}
}
return npos;
}
string_view::size_type string_view::find_first_not_of(char c,
size_type pos) const
noexcept {
if (empty()) return npos;
for (; pos < length_; ++pos) {
if (ptr_[pos] != c) {
return pos;
}
}
return npos;
}
string_view::size_type string_view::find_last_of(string_view s,
size_type pos) const noexcept {
if (empty() || s.empty()) return npos;
// Avoid the cost of LookupTable() for a single-character search.
if (s.length_ == 1) return find_last_of(s.ptr_[0], pos);
LookupTable tbl(s);
for (size_type i = std::min(pos, length_ - 1);; --i) {
if (tbl[ptr_[i]]) {
return i;
}
if (i == 0) break;
}
return npos;
}
string_view::size_type string_view::find_last_not_of(string_view s,
size_type pos) const
noexcept {
if (empty()) return npos;
size_type i = std::min(pos, length_ - 1);
if (s.empty()) return i;
// Avoid the cost of LookupTable() for a single-character search.
if (s.length_ == 1) return find_last_not_of(s.ptr_[0], pos);
LookupTable tbl(s);
for (;; --i) {
if (!tbl[ptr_[i]]) {
return i;
}
if (i == 0) break;
}
return npos;
}
string_view::size_type string_view::find_last_not_of(char c,
size_type pos) const
noexcept {
if (empty()) return npos;
size_type i = std::min(pos, length_ - 1);
for (;; --i) {
if (ptr_[i] != c) {
return i;
}
if (i == 0) break;
}
return npos;
}
// MSVC has non-standard behavior that implicitly creates definitions for static
// const members. These implicit definitions conflict with explicit out-of-class
// member definitions that are required by the C++ standard, resulting in
// LNK1169 "multiply defined" errors at link time. __declspec(selectany) asks
// MSVC to choose only one definition for the symbol it decorates. See details
// at http://msdn.microsoft.com/en-us/library/34h23df8(v=vs.100).aspx
#ifdef _MSC_VER
#define ABSL_STRING_VIEW_SELECTANY __declspec(selectany)
#else
#define ABSL_STRING_VIEW_SELECTANY
#endif
ABSL_STRING_VIEW_SELECTANY
constexpr string_view::size_type string_view::npos;
ABSL_STRING_VIEW_SELECTANY
constexpr string_view::size_type string_view::kMaxSize;
} // namespace absl
#endif // ABSL_HAVE_STD_STRING_VIEW

572
absl/strings/string_view.h Normal file
View file

@ -0,0 +1,572 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: string_view.h
// -----------------------------------------------------------------------------
//
// This file contains the definition of the `absl::string_view` class. A
// `string_view` points to a contiguous span of characters, often part or all of
// another `std::string`, double-quoted std::string literal, character array, or even
// another `string_view`.
//
// This `absl::string_view` abstraction is designed to be a drop-in
// replacement for the C++17 `std::string_view` abstraction.
#ifndef ABSL_STRINGS_STRING_VIEW_H_
#define ABSL_STRINGS_STRING_VIEW_H_
#include <algorithm>
#include "absl/base/config.h"
#ifdef ABSL_HAVE_STD_STRING_VIEW
#include <string_view>
namespace absl {
using std::string_view;
};
#else // ABSL_HAVE_STD_STRING_VIEW
#include <cassert>
#include <cstddef>
#include <cstring>
#include <iosfwd>
#include <iterator>
#include <limits>
#include <string>
#include "absl/base/internal/throw_delegate.h"
#include "absl/base/macros.h"
#include "absl/base/port.h"
namespace absl {
// absl::string_view
//
// A `string_view` provides a lightweight view into the std::string data provided by
// a `std::string`, double-quoted std::string literal, character array, or even
// another `string_view`. A `string_view` does *not* own the std::string to which it
// points, and that data cannot be modified through the view.
//
// You can use `string_view` as a function or method parameter anywhere a
// parameter can receive a double-quoted std::string literal, `const char*`,
// `std::string`, or another `absl::string_view` argument with no need to copy
// the std::string data. Systematic use of `string_view` within function arguments
// reduces data copies and `strlen()` calls.
//
// Because of its small size, prefer passing `string_view` by value:
//
// void MyFunction(absl::string_view arg);
//
// If circumstances require, you may also pass one by const reference:
//
// void MyFunction(const absl::string_view& arg); // not preferred
//
// Passing by value generates slightly smaller code for many architectures.
//
// In either case, the source data of the `string_view` must outlive the
// `string_view` itself.
//
// A `string_view` is also suitable for local variables if you know that the
// lifetime of the underlying object is longer than the lifetime of your
// `string_view` variable. However, beware of binding a `string_view` to a
// temporary value:
//
// // BAD use of string_view: lifetime problem
// absl::string_view sv = obj.ReturnAString();
//
// // GOOD use of string_view: str outlives sv
// std::string str = obj.ReturnAString();
// absl::string_view sv = str;
//
// Due to lifetime issues, a `string_view` is sometimes a poor choice for a
// return value and usually a poor choice for a data member. If you do use a
// `string_view` this way, it is your responsibility to ensure that the object
// pointed to by the `string_view` outlives the `string_view`.
//
// A `string_view` may represent a whole std::string or just part of a std::string. For
// example, when splitting a std::string, `std::vector<absl::string_view>` is a
// natural data type for the output.
//
//
// When constructed from a source which is nul-terminated, the `string_view`
// itself will not include the nul-terminator unless a specific size (including
// the nul) is passed to the constructor. As a result, common idioms that work
// on nul-terminated strings do not work on `string_view` objects. If you write
// code that scans a `string_view`, you must check its length rather than test
// for nul, for example. Note, however, that nuls may still be embedded within
// a `string_view` explicitly.
//
// You may create a null `string_view` in two ways:
//
// absl::string_view sv();
// absl::string_view sv(nullptr, 0);
//
// For the above, `sv.data() == nullptr`, `sv.length() == 0`, and
// `sv.empty() == true`. Also, if you create a `string_view` with a non-null
// pointer then `sv.data() != nullptr`. Thus, you can use `string_view()` to
// signal an undefined value that is different from other `string_view` values
// in a similar fashion to how `const char* p1 = nullptr;` is different from
// `const char* p2 = "";`. However, in practice, it is not recommended to rely
// on this behavior.
//
// Be careful not to confuse a null `string_view` with an empty one. A null
// `string_view` is an empty `string_view`, but some empty `string_view`s are
// not null. Prefer checking for emptiness over checking for null.
//
// There are many ways to create an empty string_view:
//
// const char* nullcp = nullptr;
// // string_view.size() will return 0 in all cases.
// absl::string_view();
// absl::string_view(nullcp, 0);
// absl::string_view("");
// absl::string_view("", 0);
// absl::string_view("abcdef", 0);
// absl::string_view("abcdef" + 6, 0);
//
// All empty `string_view` objects whether null or not, are equal:
//
// absl::string_view() == absl::string_view("", 0)
// absl::string_view(nullptr, 0) == absl:: string_view("abcdef"+6, 0)
class string_view {
public:
using traits_type = std::char_traits<char>;
using value_type = char;
using pointer = char*;
using const_pointer = const char*;
using reference = char&;
using const_reference = const char&;
using const_iterator = const char*;
using iterator = const_iterator;
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
using reverse_iterator = const_reverse_iterator;
using size_type = size_t;
using difference_type = std::ptrdiff_t;
static constexpr size_type npos = static_cast<size_type>(-1);
// Null `string_view` constructor
constexpr string_view() noexcept : ptr_(nullptr), length_(0) {}
// Implicit constructors
template <typename Allocator>
string_view( // NOLINT(runtime/explicit)
const std::basic_string<char, std::char_traits<char>, Allocator>&
str) noexcept
: ptr_(str.data()), length_(str.size()) {}
// Implicit constructor of a `string_view` from nul-terminated `str`. When
// accepting possibly null strings, use `absl::NullSafeStringView(str)`
// instead (see below).
constexpr string_view(const char* str) // NOLINT(runtime/explicit)
: ptr_(str), length_(StrLenInternal(str)) {}
// Implicit consructor of a `string_view` from a `const char*` and length
constexpr string_view(const char* data, size_type len)
: ptr_(data), length_(CheckLengthInternal(len)) {}
// NOTE(b/36227513): harmlessly omitted to work around gdb bug.
// constexpr string_view(const string_view&) noexcept = default;
// string_view& operator=(const string_view&) noexcept = default;
// Iterators
// string_view::begin()
//
// Returns an iterator pointing to the first character at the beginning of the
// `string_view`, or `end()` if the `string_view` is empty.
constexpr const_iterator begin() const noexcept { return ptr_; }
// string_view::end()
//
// Returns an iterator pointing just beyond the last character at the end of
// the `string_view`. This iterator acts as a placeholder; attempting to
// access it results in undefined behavior.
constexpr const_iterator end() const noexcept { return ptr_ + length_; }
// string_view::cbegin()
//
// Returns a const iterator pointing to the first character at the beginning
// of the `string_view`, or `end()` if the `string_view` is empty.
constexpr const_iterator cbegin() const noexcept { return begin(); }
// string_view::cend()
//
// Returns a const iterator pointing just beyond the last character at the end
// of the `string_view`. This pointer acts as a placeholder; attempting to
// access its element results in undefined behavior.
constexpr const_iterator cend() const noexcept { return end(); }
// string_view::rbegin()
//
// Returns a reverse iterator pointing to the last character at the end of the
// `string_view`, or `rend()` if the `string_view` is empty.
const_reverse_iterator rbegin() const noexcept {
return const_reverse_iterator(end());
}
// string_view::rend()
//
// Returns a reverse iterator pointing just before the first character at the
// beginning of the `string_view`. This pointer acts as a placeholder;
// attempting to access its element results in undefined behavior.
const_reverse_iterator rend() const noexcept {
return const_reverse_iterator(begin());
}
// string_view::crbegin()
//
// Returns a const reverse iterator pointing to the last character at the end
// of the `string_view`, or `crend()` if the `string_view` is empty.
const_reverse_iterator crbegin() const noexcept { return rbegin(); }
// string_view::crend()
//
// Returns a const reverse iterator pointing just before the first character
// at the beginning of the `string_view`. This pointer acts as a placeholder;
// attempting to access its element results in undefined behavior.
const_reverse_iterator crend() const noexcept { return rend(); }
// Capacity Utilities
// string_view::size()
//
// Returns the number of characters in the `string_view`.
constexpr size_type size() const noexcept {
return length_;
}
// string_view::length()
//
// Returns the number of characters in the `string_view`. Alias for `size()`.
constexpr size_type length() const noexcept { return size(); }
// string_view::max_size()
//
// Returns the maximum number of characters the `string_view` can hold.
constexpr size_type max_size() const noexcept { return kMaxSize; }
// string_view::empty()
//
// Checks if the `string_view` is empty (refers to no characters).
constexpr bool empty() const noexcept { return length_ == 0; }
// std::string:view::operator[]
//
// Returns the ith element of an `string_view` using the array operator.
// Note that this operator does not perform any bounds checking.
constexpr const_reference operator[](size_type i) const { return ptr_[i]; }
// string_view::front()
//
// Returns the first element of a `string_view`.
constexpr const_reference front() const { return ptr_[0]; }
// string_view::back()
//
// Returns the last element of a `string_view`.
constexpr const_reference back() const { return ptr_[size() - 1]; }
// string_view::data()
//
// Returns a pointer to the underlying character array (which is of course
// stored elsewhere). Note that `string_view::data()` may contain embedded nul
// characters, but the returned buffer may or may not be nul-terminated;
// therefore, do not pass `data()` to a routine that expects a nul-terminated
// std::string.
constexpr const_pointer data() const noexcept { return ptr_; }
// Modifiers
// string_view::remove_prefix()
//
// Removes the first `n` characters from the `string_view`, returning a
// pointer to the new first character. Note that the underlying std::string is not
// changed, only the view.
void remove_prefix(size_type n) {
assert(n <= length_);
ptr_ += n;
length_ -= n;
}
// string_view::remove_suffix()
//
// Removes the last `n` characters from the `string_view`. Note that the
// underlying std::string is not changed, only the view.
void remove_suffix(size_type n) {
assert(n <= length_);
length_ -= n;
}
// string_view::swap()
//
// Swaps this `string_view` with another `string_view`.
void swap(string_view& s) noexcept {
auto t = *this;
*this = s;
s = t;
}
// Explicit conversion operators
// Supports conversion to both `std::basic_string` where available.
template <typename A>
explicit operator std::basic_string<char, traits_type, A>() const {
if (!data()) return {};
return std::basic_string<char, traits_type, A>(data(), size());
}
// string_view::copy()
//
// Copies the contents of the `string_view` at offset `pos` and length `n`
// into `buf`.
size_type copy(char* buf, size_type n, size_type pos = 0) const;
// string_view::substr()
//
// Returns a "substring" of the `string_view` (at offset `post` and length
// `n`) as another std::string views. This function throws `std::out_of_bounds` if
// `pos > size'.
string_view substr(size_type pos, size_type n = npos) const {
if (ABSL_PREDICT_FALSE(pos > length_))
base_internal::ThrowStdOutOfRange("absl::string_view::substr");
n = std::min(n, length_ - pos);
return string_view(ptr_ + pos, n);
}
// string_view::compare()
//
// Performs a lexicographical comparison between the `string_view` and
// another `absl::string_view), returning -1 if `this` is less than, 0 if
// `this` is equal to, and 1 if `this` is greater than the passed std::string
// view. Note that in the case of data equality, a further comparison is made
// on the respective sizes of the two `string_view`s to determine which is
// smaller, equal, or greater.
int compare(string_view x) const noexcept {
auto min_length = std::min(length_, x.length_);
if (min_length > 0) {
int r = memcmp(ptr_, x.ptr_, min_length);
if (r < 0) return -1;
if (r > 0) return 1;
}
if (length_ < x.length_) return -1;
if (length_ > x.length_) return 1;
return 0;
}
// Overload of `string_view::compare()` for comparing a substring of the
// 'string_view` and another `absl::string_view`.
int compare(size_type pos1, size_type count1, string_view v) const {
return substr(pos1, count1).compare(v);
}
// Overload of `string_view::compare()` for comparing a substring of the
// `string_view` and a substring of another `absl::string_view`.
int compare(size_type pos1, size_type count1, string_view v, size_type pos2,
size_type count2) const {
return substr(pos1, count1).compare(v.substr(pos2, count2));
}
// Overload of `string_view::compare()` for comparing a `string_view` and a
// a different C-style std::string `s`.
int compare(const char* s) const { return compare(string_view(s)); }
// Overload of `string_view::compare()` for comparing a substring of the
// `string_view` and a different std::string C-style std::string `s`.
int compare(size_type pos1, size_type count1, const char* s) const {
return substr(pos1, count1).compare(string_view(s));
}
// Overload of `string_view::compare()` for comparing a substring of the
// `string_view` and a substring of a different C-style std::string `s`.
int compare(size_type pos1, size_type count1, const char* s,
size_type count2) const {
return substr(pos1, count1).compare(string_view(s, count2));
}
// Find Utilities
// string_view::find()
//
// Finds the first occurrence of the substring `s` within the `string_view`,
// returning the position of the first character's match, or `npos` if no
// match was found.
size_type find(string_view s, size_type pos = 0) const noexcept;
// Overload of `string_view::find()` for finding the given character `c`
// within the `string_view`.
size_type find(char c, size_type pos = 0) const noexcept;
// string_view::rfind()
//
// Finds the last occurrence of a substring `s` within the `string_view`,
// returning the position of the first character's match, or `npos` if no
// match was found.
size_type rfind(string_view s, size_type pos = npos) const
noexcept;
// Overload of `string_view::rfind()` for finding the given character `c`
// within the `string_view`.
size_type rfind(char c, size_type pos = npos) const noexcept;
// string_view::find_first_of()
//
// Finds the first occurrence of any of the characters in `s` within the
// `string_view`, returning the start position of the match, or `npos` if no
// match was found.
size_type find_first_of(string_view s, size_type pos = 0) const
noexcept;
// Overload of `string_view::find_first_of()` for finding a character `c`
// within the `string_view`.
size_type find_first_of(char c, size_type pos = 0) const
noexcept {
return find(c, pos);
}
// string_view::find_last_of()
//
// Finds the last occurrence of any of the characters in `s` within the
// `string_view`, returning the start position of the match, or `npos` if no
// match was found.
size_type find_last_of(string_view s, size_type pos = npos) const
noexcept;
// Overload of `string_view::find_last_of()` for finding a character `c`
// within the `string_view`.
size_type find_last_of(char c, size_type pos = npos) const
noexcept {
return rfind(c, pos);
}
// string_view::find_first_not_of()
//
// Finds the first occurrence of any of the characters not in `s` within the
// `string_view`, returning the start position of the first non-match, or
// `npos` if no non-match was found.
size_type find_first_not_of(string_view s, size_type pos = 0) const noexcept;
// Overload of `string_view::find_first_not_of()` for finding a character
// that is not `c` within the `string_view`.
size_type find_first_not_of(char c, size_type pos = 0) const noexcept;
// string_view::find_last_not_of()
//
// Finds the last occurrence of any of the characters not in `s` within the
// `string_view`, returning the start position of the last non-match, or
// `npos` if no non-match was found.
size_type find_last_not_of(string_view s,
size_type pos = npos) const noexcept;
// Overload of `string_view::find_last_not_of()` for finding a character
// that is not `c` within the `string_view`.
size_type find_last_not_of(char c, size_type pos = npos) const
noexcept;
private:
static constexpr size_type kMaxSize =
std::numeric_limits<size_type>::max() / 2 + 1;
static constexpr size_type StrLenInternal(const char* str) {
return str ?
// check whether __builtin_strlen is provided by the compiler.
// GCC doesn't have __has_builtin()
// (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66970),
// but has __builtin_strlen according to
// https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Other-Builtins.html.
#if ABSL_HAVE_BUILTIN(__builtin_strlen) || \
(defined(__GNUC__) && !defined(__clang__))
__builtin_strlen(str)
#else
strlen(str)
#endif
: 0;
}
static constexpr size_type CheckLengthInternal(size_type len) {
return ABSL_ASSERT(len <= kMaxSize), len;
}
const char* ptr_;
size_type length_;
};
// This large function is defined inline so that in a fairly common case where
// one of the arguments is a literal, the compiler can elide a lot of the
// following comparisons.
inline bool operator==(string_view x, string_view y) noexcept {
auto len = x.size();
if (len != y.size()) {
return false;
}
return x.data() == y.data() || len <= 0 ||
memcmp(x.data(), y.data(), len) == 0;
}
inline bool operator!=(string_view x, string_view y) noexcept {
return !(x == y);
}
inline bool operator<(string_view x, string_view y) noexcept {
auto min_size = std::min(x.size(), y.size());
const int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size);
return (r < 0) || (r == 0 && x.size() < y.size());
}
inline bool operator>(string_view x, string_view y) noexcept { return y < x; }
inline bool operator<=(string_view x, string_view y) noexcept {
return !(y < x);
}
inline bool operator>=(string_view x, string_view y) noexcept {
return !(x < y);
}
// IO Insertion Operator
std::ostream& operator<<(std::ostream& o, string_view piece);
} // namespace absl
#endif // ABSL_HAVE_STD_STRING_VIEW
namespace absl {
// ClippedSubstr()
//
// Like `s.substr(pos, n)`, but clips `pos` to an upper bound of `s.size()`.
// Provided because std::string_view::substr throws if `pos > size()`,
// to support b/37991613.
inline string_view ClippedSubstr(string_view s, size_t pos,
size_t n = string_view::npos) {
pos = std::min(pos, static_cast<size_t>(s.size()));
return s.substr(pos, n);
}
// NullSafeStringView()
//
// Creates an `absl::string_view` from a pointer `p` even if it's null-valued.
// This function should be used where an `absl::string_view` can be created from
// a possibly-null pointer.
inline string_view NullSafeStringView(const char* p) {
return p ? string_view(p) : string_view();
}
} // namespace absl
#endif // ABSL_STRINGS_STRING_VIEW_H_

File diff suppressed because it is too large Load diff

269
absl/strings/strip.cc Normal file
View file

@ -0,0 +1,269 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This file contains functions that remove a defined part from the std::string,
// i.e., strip the std::string.
#include "absl/strings/strip.h"
#include <algorithm>
#include <cassert>
#include <cstring>
#include <string>
#include "absl/strings/ascii.h"
#include "absl/strings/string_view.h"
// ----------------------------------------------------------------------
// ReplaceCharacters
// Replaces any occurrence of the character 'remove' (or the characters
// in 'remove') with the character 'replace_with'.
// ----------------------------------------------------------------------
void ReplaceCharacters(char* str, size_t len, absl::string_view remove,
char replace_with) {
for (char* end = str + len; str != end; ++str) {
if (remove.find(*str) != absl::string_view::npos) {
*str = replace_with;
}
}
}
void ReplaceCharacters(std::string* s, absl::string_view remove, char replace_with) {
for (char& ch : *s) {
if (remove.find(ch) != absl::string_view::npos) {
ch = replace_with;
}
}
}
bool StripTrailingNewline(std::string* s) {
if (!s->empty() && (*s)[s->size() - 1] == '\n') {
if (s->size() > 1 && (*s)[s->size() - 2] == '\r')
s->resize(s->size() - 2);
else
s->resize(s->size() - 1);
return true;
}
return false;
}
// ----------------------------------------------------------------------
// Misc. stripping routines
// ----------------------------------------------------------------------
void StripCurlyBraces(std::string* s) {
return StripBrackets('{', '}', s);
}
void StripBrackets(char left, char right, std::string* s) {
std::string::iterator opencurly = std::find(s->begin(), s->end(), left);
while (opencurly != s->end()) {
std::string::iterator closecurly = std::find(opencurly, s->end(), right);
if (closecurly == s->end()) return;
opencurly = s->erase(opencurly, closecurly + 1);
opencurly = std::find(opencurly, s->end(), left);
}
}
void StripMarkupTags(std::string* s) {
std::string::iterator output = std::find(s->begin(), s->end(), '<');
std::string::iterator input = output;
while (input != s->end()) {
if (*input == '<') {
input = std::find(input, s->end(), '>');
if (input == s->end()) break;
++input;
} else {
*output++ = *input++;
}
}
s->resize(output - s->begin());
}
std::string OutputWithMarkupTagsStripped(const std::string& s) {
std::string result(s);
StripMarkupTags(&result);
return result;
}
ptrdiff_t TrimStringLeft(std::string* s, absl::string_view remove) {
size_t i = 0;
while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) {
++i;
}
if (i > 0) s->erase(0, i);
return i;
}
ptrdiff_t TrimStringRight(std::string* s, absl::string_view remove) {
size_t i = s->size(), trimmed = 0;
while (i > 0 && memchr(remove.data(), (*s)[i - 1], remove.size())) {
--i;
}
if (i < s->size()) {
trimmed = s->size() - i;
s->erase(i);
}
return trimmed;
}
// Unfortunately, absl::string_view does not have erase, so we've to replicate
// the implementation with remove_prefix()/remove_suffix()
ptrdiff_t TrimStringLeft(absl::string_view* s, absl::string_view remove) {
size_t i = 0;
while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) {
++i;
}
if (i > 0) s->remove_prefix(i);
return i;
}
ptrdiff_t TrimStringRight(absl::string_view* s, absl::string_view remove) {
size_t i = s->size(), trimmed = 0;
while (i > 0 && memchr(remove.data(), (*s)[i - 1], remove.size())) {
--i;
}
if (i < s->size()) {
trimmed = s->size() - i;
s->remove_suffix(trimmed);
}
return trimmed;
}
// ----------------------------------------------------------------------
// Various removal routines
// ----------------------------------------------------------------------
ptrdiff_t strrm(char* str, char c) {
char* src;
char* dest;
for (src = dest = str; *src != '\0'; ++src)
if (*src != c) *(dest++) = *src;
*dest = '\0';
return dest - str;
}
ptrdiff_t memrm(char* str, ptrdiff_t strlen, char c) {
char* src;
char* dest;
for (src = dest = str; strlen-- > 0; ++src)
if (*src != c) *(dest++) = *src;
return dest - str;
}
ptrdiff_t strrmm(char* str, const char* chars) {
char* src;
char* dest;
for (src = dest = str; *src != '\0'; ++src) {
bool skip = false;
for (const char* c = chars; *c != '\0'; c++) {
if (*src == *c) {
skip = true;
break;
}
}
if (!skip) *(dest++) = *src;
}
*dest = '\0';
return dest - str;
}
ptrdiff_t strrmm(std::string* str, const std::string& chars) {
size_t str_len = str->length();
size_t in_index = str->find_first_of(chars);
if (in_index == std::string::npos) return str_len;
size_t out_index = in_index++;
while (in_index < str_len) {
char c = (*str)[in_index++];
if (chars.find(c) == std::string::npos) (*str)[out_index++] = c;
}
str->resize(out_index);
return out_index;
}
// ----------------------------------------------------------------------
// StripDupCharacters
// Replaces any repeated occurrence of the character 'dup_char'
// with single occurrence. e.g.,
// StripDupCharacters("a//b/c//d", '/', 0) => "a/b/c/d"
// Return the number of characters removed
// ----------------------------------------------------------------------
ptrdiff_t StripDupCharacters(std::string* s, char dup_char, ptrdiff_t start_pos) {
if (start_pos < 0) start_pos = 0;
// remove dups by compaction in-place
ptrdiff_t input_pos = start_pos; // current reader position
ptrdiff_t output_pos = start_pos; // current writer position
const ptrdiff_t input_end = s->size();
while (input_pos < input_end) {
// keep current character
const char curr_char = (*s)[input_pos];
if (output_pos != input_pos) // must copy
(*s)[output_pos] = curr_char;
++input_pos;
++output_pos;
if (curr_char == dup_char) { // skip subsequent dups
while ((input_pos < input_end) && ((*s)[input_pos] == dup_char))
++input_pos;
}
}
const ptrdiff_t num_deleted = input_pos - output_pos;
s->resize(s->size() - num_deleted);
return num_deleted;
}
// ----------------------------------------------------------------------
// TrimRunsInString
// Removes leading and trailing runs, and collapses middle
// runs of a set of characters into a single character (the
// first one specified in 'remove'). Useful for collapsing
// runs of repeated delimiters, whitespace, etc. E.g.,
// TrimRunsInString(&s, " :,()") removes leading and trailing
// delimiter chars and collapses and converts internal runs
// of delimiters to single ' ' characters, so, for example,
// " a:(b):c " -> "a b c"
// "first,last::(area)phone, ::zip" -> "first last area phone zip"
// ----------------------------------------------------------------------
void TrimRunsInString(std::string* s, absl::string_view remove) {
std::string::iterator dest = s->begin();
std::string::iterator src_end = s->end();
for (std::string::iterator src = s->begin(); src != src_end;) {
if (remove.find(*src) == absl::string_view::npos) {
*(dest++) = *(src++);
} else {
// Skip to the end of this run of chars that are in 'remove'.
for (++src; src != src_end; ++src) {
if (remove.find(*src) == absl::string_view::npos) {
if (dest != s->begin()) {
// This is an internal run; collapse it.
*(dest++) = remove[0];
}
*(dest++) = *(src++);
break;
}
}
}
}
s->erase(dest, src_end);
}
// ----------------------------------------------------------------------
// RemoveNullsInString
// Removes any internal \0 characters from the std::string.
// ----------------------------------------------------------------------
void RemoveNullsInString(std::string* s) {
s->erase(std::remove(s->begin(), s->end(), '\0'), s->end());
}

89
absl/strings/strip.h Normal file
View file

@ -0,0 +1,89 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: strip.h
// -----------------------------------------------------------------------------
//
// This file contains various functions for stripping substrings from a std::string.
#ifndef ABSL_STRINGS_STRIP_H_
#define ABSL_STRINGS_STRIP_H_
#include <cstddef>
#include <string>
#include "absl/base/macros.h"
#include "absl/strings/ascii.h"
#include "absl/strings/match.h"
#include "absl/strings/string_view.h"
namespace absl {
// ConsumePrefix()
//
// Strips the `expected` prefix from the start of the given std::string, returning
// `true` if the strip operation succeeded or false otherwise.
//
// Example:
//
// absl::string_view input("abc");
// EXPECT_TRUE(absl::ConsumePrefix(&input, "a"));
// EXPECT_EQ(input, "bc");
inline bool ConsumePrefix(absl::string_view* str, absl::string_view expected) {
if (!absl::StartsWith(*str, expected)) return false;
str->remove_prefix(expected.size());
return true;
}
// ConsumeSuffix()
//
// Strips the `expected` suffix from the end of the given std::string, returning
// `true` if the strip operation succeeded or false otherwise.
//
// Example:
//
// absl::string_view input("abcdef");
// EXPECT_TRUE(absl::ConsumeSuffix(&input, "def"));
// EXPECT_EQ(input, "abc");
inline bool ConsumeSuffix(absl::string_view* str, absl::string_view expected) {
if (!absl::EndsWith(*str, expected)) return false;
str->remove_suffix(expected.size());
return true;
}
// StripPrefix()
//
// Returns a view into the input std::string 'str' with the given 'prefix' removed,
// but leaving the original std::string intact. If the prefix does not match at the
// start of the std::string, returns the original std::string instead.
inline absl::string_view StripPrefix(absl::string_view str,
absl::string_view prefix) {
if (absl::StartsWith(str, prefix)) str.remove_prefix(prefix.size());
return str;
}
// StripSuffix()
//
// Returns a view into the input std::string 'str' with the given 'suffix' removed,
// but leaving the original std::string intact. If the suffix does not match at the
// end of the std::string, returns the original std::string instead.
inline absl::string_view StripSuffix(absl::string_view str,
absl::string_view suffix) {
if (absl::EndsWith(str, suffix)) str.remove_suffix(suffix.size());
return str;
}
} // namespace absl
#endif // ABSL_STRINGS_STRIP_H_

119
absl/strings/strip_test.cc Normal file
View file

@ -0,0 +1,119 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This file contains functions that remove a defined part from the std::string,
// i.e., strip the std::string.
#include "absl/strings/strip.h"
#include <cassert>
#include <cstdio>
#include <cstring>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/strings/string_view.h"
namespace {
using testing::ElementsAre;
using testing::IsEmpty;
TEST(Strip, ConsumePrefixOneChar) {
absl::string_view input("abc");
EXPECT_TRUE(absl::ConsumePrefix(&input, "a"));
EXPECT_EQ(input, "bc");
EXPECT_FALSE(absl::ConsumePrefix(&input, "x"));
EXPECT_EQ(input, "bc");
EXPECT_TRUE(absl::ConsumePrefix(&input, "b"));
EXPECT_EQ(input, "c");
EXPECT_TRUE(absl::ConsumePrefix(&input, "c"));
EXPECT_EQ(input, "");
EXPECT_FALSE(absl::ConsumePrefix(&input, "a"));
EXPECT_EQ(input, "");
}
TEST(Strip, ConsumePrefix) {
absl::string_view input("abcdef");
EXPECT_FALSE(absl::ConsumePrefix(&input, "abcdefg"));
EXPECT_EQ(input, "abcdef");
EXPECT_FALSE(absl::ConsumePrefix(&input, "abce"));
EXPECT_EQ(input, "abcdef");
EXPECT_TRUE(absl::ConsumePrefix(&input, ""));
EXPECT_EQ(input, "abcdef");
EXPECT_FALSE(absl::ConsumePrefix(&input, "abcdeg"));
EXPECT_EQ(input, "abcdef");
EXPECT_TRUE(absl::ConsumePrefix(&input, "abcdef"));
EXPECT_EQ(input, "");
input = "abcdef";
EXPECT_TRUE(absl::ConsumePrefix(&input, "abcde"));
EXPECT_EQ(input, "f");
}
TEST(Strip, ConsumeSuffix) {
absl::string_view input("abcdef");
EXPECT_FALSE(absl::ConsumeSuffix(&input, "abcdefg"));
EXPECT_EQ(input, "abcdef");
EXPECT_TRUE(absl::ConsumeSuffix(&input, ""));
EXPECT_EQ(input, "abcdef");
EXPECT_TRUE(absl::ConsumeSuffix(&input, "def"));
EXPECT_EQ(input, "abc");
input = "abcdef";
EXPECT_FALSE(absl::ConsumeSuffix(&input, "abcdeg"));
EXPECT_EQ(input, "abcdef");
EXPECT_TRUE(absl::ConsumeSuffix(&input, "f"));
EXPECT_EQ(input, "abcde");
EXPECT_TRUE(absl::ConsumeSuffix(&input, "abcde"));
EXPECT_EQ(input, "");
}
TEST(Strip, StripPrefix) {
const absl::string_view null_str;
EXPECT_EQ(absl::StripPrefix("foobar", "foo"), "bar");
EXPECT_EQ(absl::StripPrefix("foobar", ""), "foobar");
EXPECT_EQ(absl::StripPrefix("foobar", null_str), "foobar");
EXPECT_EQ(absl::StripPrefix("foobar", "foobar"), "");
EXPECT_EQ(absl::StripPrefix("foobar", "bar"), "foobar");
EXPECT_EQ(absl::StripPrefix("foobar", "foobarr"), "foobar");
EXPECT_EQ(absl::StripPrefix("", ""), "");
}
TEST(Strip, StripSuffix) {
const absl::string_view null_str;
EXPECT_EQ(absl::StripSuffix("foobar", "bar"), "foo");
EXPECT_EQ(absl::StripSuffix("foobar", ""), "foobar");
EXPECT_EQ(absl::StripSuffix("foobar", null_str), "foobar");
EXPECT_EQ(absl::StripSuffix("foobar", "foobar"), "");
EXPECT_EQ(absl::StripSuffix("foobar", "foo"), "foobar");
EXPECT_EQ(absl::StripSuffix("foobar", "ffoobar"), "foobar");
EXPECT_EQ(absl::StripSuffix("", ""), "");
}
} // namespace

117
absl/strings/substitute.cc Normal file
View file

@ -0,0 +1,117 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/substitute.h"
#include <algorithm>
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/ascii.h"
#include "absl/strings/escaping.h"
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/string_view.h"
namespace absl {
namespace substitute_internal {
void SubstituteAndAppendArray(std::string* output, absl::string_view format,
const absl::string_view* args_array,
size_t num_args) {
// Determine total size needed.
size_t size = 0;
for (size_t i = 0; i < format.size(); i++) {
if (format[i] == '$') {
if (i + 1 >= format.size()) {
#ifndef NDEBUG
ABSL_RAW_LOG(FATAL,
"Invalid strings::Substitute() format std::string: \"%s\".",
absl::CEscape(format).c_str());
#endif
return;
} else if (absl::ascii_isdigit(format[i + 1])) {
int index = format[i + 1] - '0';
if (static_cast<size_t>(index) >= num_args) {
#ifndef NDEBUG
ABSL_RAW_LOG(
FATAL,
"Invalid strings::Substitute() format std::string: asked for \"$"
"%d\", but only %d args were given. Full format std::string was: "
"\"%s\".",
index, static_cast<int>(num_args), absl::CEscape(format).c_str());
#endif
return;
}
size += args_array[index].size();
++i; // Skip next char.
} else if (format[i + 1] == '$') {
++size;
++i; // Skip next char.
} else {
#ifndef NDEBUG
ABSL_RAW_LOG(FATAL,
"Invalid strings::Substitute() format std::string: \"%s\".",
absl::CEscape(format).c_str());
#endif
return;
}
} else {
++size;
}
}
if (size == 0) return;
// Build the std::string.
size_t original_size = output->size();
strings_internal::STLStringResizeUninitialized(output, original_size + size);
char* target = &(*output)[original_size];
for (size_t i = 0; i < format.size(); i++) {
if (format[i] == '$') {
if (absl::ascii_isdigit(format[i + 1])) {
const absl::string_view src = args_array[format[i + 1] - '0'];
target = std::copy(src.begin(), src.end(), target);
++i; // Skip next char.
} else if (format[i + 1] == '$') {
*target++ = '$';
++i; // Skip next char.
}
} else {
*target++ = format[i];
}
}
assert(target == output->data() + output->size());
}
Arg::Arg(const void* value) {
static_assert(sizeof(scratch_) >= sizeof(value) * 2 + 2,
"fix sizeof(scratch_)");
if (value == nullptr) {
piece_ = "NULL";
} else {
char* ptr = scratch_ + sizeof(scratch_);
uintptr_t num = reinterpret_cast<uintptr_t>(value);
static const char kHexDigits[] = "0123456789abcdef";
do {
*--ptr = kHexDigits[num & 0xf];
num >>= 4;
} while (num != 0);
*--ptr = 'x';
*--ptr = '0';
piece_ = absl::string_view(ptr, scratch_ + sizeof(scratch_) - ptr);
}
}
} // namespace substitute_internal
} // namespace absl

674
absl/strings/substitute.h Normal file
View file

@ -0,0 +1,674 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: substitute.h
// -----------------------------------------------------------------------------
//
// This package contains functions for efficiently performing std::string
// substitutions using a format std::string with positional notation:
// `Substitute()` and `SubstituteAndAppend()`.
//
// Unlike printf-style format specifiers, `Substitute()` functions do not need
// to specify the type of the substitution arguments. Supported arguments
// following the format std::string, such as strings, string_views, ints,
// floats, and bools, are automatically converted to strings during the
// substitution process. (See below for a full list of supported types.)
//
// `Substitute()` does not allow you to specify *how* to format a value, beyond
// the default conversion to std::string. For example, you cannot format an integer
// in hex.
//
// The format std::string uses positional identifiers indicated by a dollar sign ($)
// and single digit positional ids to indicate which substitution arguments to
// use at that location within the format std::string.
//
// Example 1:
// std::string s = Substitute("$1 purchased $0 $2. Thanks $1!",
// 5, "Bob", "Apples");
// EXPECT_EQ("Bob purchased 5 Apples. Thanks Bob!", s);
//
// Example 2:
// std::string s = "Hi. ";
// SubstituteAndAppend(&s, "My name is $0 and I am $1 years old.", "Bob", 5);
// EXPECT_EQ("Hi. My name is Bob and I am 5 years old.", s);
//
// Differences from `StringPrintf()`:
// * The format std::string does not identify the types of arguments. Instead, the
// arguments are implicitly converted to strings. See below for a list of
// accepted types.
// * Substitutions in the format std::string are identified by a '$' followed by a
// single digit. You can use arguments out-of-order and use the same
// argument multiple times.
// * A '$$' sequence in the format std::string means output a literal '$'
// character.
// * `Substitute()` is significantly faster than `StringPrintf()`. For very
// large strings, it may be orders of magnitude faster.
//
// Supported types:
// * absl::string_view, std::string, const char* (null is equivalent to "")
// * int32_t, int64_t, uint32_t, uint64
// * float, double
// * bool (Printed as "true" or "false")
// * pointer types other than char* (Printed as "0x<lower case hex std::string>",
// except that null is printed as "NULL")
//
// If an invalid format std::string is provided, Substitute returns an empty std::string
// and SubstituteAndAppend does not change the provided output std::string.
// A format std::string is invalid if it:
// * ends in an unescaped $ character,
// e.g. "Hello $", or
// * calls for a position argument which is not provided,
// e.g. Substitute("Hello $2", "world"), or
// * specifies a non-digit, non-$ character after an unescaped $ character,
// e.g. "Hello %f".
// In debug mode, i.e. #ifndef NDEBUG, such errors terminate the program.
#ifndef ABSL_STRINGS_SUBSTITUTE_H_
#define ABSL_STRINGS_SUBSTITUTE_H_
#include <cstring>
#include <string>
#include "absl/base/macros.h"
#include "absl/base/port.h"
#include "absl/strings/ascii.h"
#include "absl/strings/escaping.h"
#include "absl/strings/numbers.h"
#include "absl/strings/str_join.h"
#include "absl/strings/str_split.h"
#include "absl/strings/string_view.h"
#include "absl/strings/strip.h"
namespace absl {
namespace substitute_internal {
// Arg
//
// This class provides an argument type for `absl::Substitute()` and
// `absl::SubstituteAndAppend()`. `Arg` handles implicit conversion of various
// types to a std::string. (`Arg` is very similar to the `AlphaNum` class in
// `StrCat()`.)
//
// This class has implicit constructors.
class Arg {
public:
// Overloads for std::string-y things
//
// Explicitly overload `const char*` so the compiler doesn't cast to `bool`.
Arg(const char* value) // NOLINT(runtime/explicit)
: piece_(value) {}
Arg(const std::string& value) // NOLINT(runtime/explicit)
: piece_(value) {}
Arg(absl::string_view value) // NOLINT(runtime/explicit)
: piece_(value) {}
// Overloads for primitives
//
// No overloads are available for signed and unsigned char because if people
// are explicitly declaring their chars as signed or unsigned then they are
// probably using them as 8-bit integers and would probably prefer an integer
// representation. However, we can't really know, so we make the caller decide
// what to do.
Arg(char value) // NOLINT(runtime/explicit)
: piece_(scratch_, 1) { scratch_[0] = value; }
Arg(short value) // NOLINT(runtime/explicit)
: piece_(scratch_,
numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {}
Arg(unsigned short value) // NOLINT(runtime/explicit)
: piece_(scratch_,
numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {}
Arg(int value) // NOLINT(runtime/explicit)
: piece_(scratch_,
numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {}
Arg(unsigned int value) // NOLINT(runtime/explicit)
: piece_(scratch_,
numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {}
Arg(long value) // NOLINT(runtime/explicit)
: piece_(scratch_,
numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {}
Arg(unsigned long value) // NOLINT(runtime/explicit)
: piece_(scratch_,
numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {}
Arg(long long value) // NOLINT(runtime/explicit)
: piece_(scratch_,
numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {}
Arg(unsigned long long value) // NOLINT(runtime/explicit)
: piece_(scratch_,
numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {}
Arg(float value) // NOLINT(runtime/explicit)
: piece_(numbers_internal::RoundTripFloatToBuffer(value, scratch_)) {}
Arg(double value) // NOLINT(runtime/explicit)
: piece_(numbers_internal::RoundTripDoubleToBuffer(value, scratch_)) {}
Arg(bool value) // NOLINT(runtime/explicit)
: piece_(value ? "true" : "false") {}
// `void*` values, with the exception of `char*`, are printed as
// `StringPrintf()` with format "%p": e.g. ("0x<hex value>").
// However, in the case of `nullptr`, "NULL" is printed.
Arg(const void* value); // NOLINT(runtime/explicit)
Arg(const Arg&) = delete;
Arg& operator=(const Arg&) = delete;
absl::string_view piece() const { return piece_; }
private:
absl::string_view piece_;
char scratch_[numbers_internal::kFastToBufferSize];
};
// Internal helper function. Don't call this from outside this implementation.
// This interface may change without notice.
void SubstituteAndAppendArray(std::string* output, absl::string_view format,
const absl::string_view* args_array,
size_t num_args);
#if defined(ABSL_BAD_CALL_IF)
constexpr int CalculateOneBit(const char* format) {
return (*format < '0' || *format > '9') ? 0 : (1 << (*format - '0'));
}
constexpr const char* SkipNumber(const char* format) {
return !*format ? format : (format + 1);
}
constexpr int PlaceholderBitmask(const char* format) {
return !*format ? 0 : *format != '$'
? PlaceholderBitmask(format + 1)
: (CalculateOneBit(format + 1) |
PlaceholderBitmask(SkipNumber(format + 1)));
}
#endif // ABSL_BAD_CALL_IF
} // namespace substitute_internal
//
// PUBLIC API
//
// SubstituteAndAppend()
//
// Substitutes variables into a given format std::string and appends to a given
// output std::string. See file comments above for usage.
//
// The declarations of `SubstituteAndAppend()` below consist of overloads
// for passing 0 to 10 arguments, respectively.
//
// NOTE: A zero-argument `SubstituteAndAppend()` may be used within variadic
// templates to allow a variable number of arguments.
//
// Example:
// template <typename... Args>
// void VarMsg(std::string* boilerplate, const std::string& format,
// const Args&... args) {
// std::string s = absl::SubstituteAndAppend(boilerplate, format, args...)";
// }
//
inline void SubstituteAndAppend(std::string* output, absl::string_view format) {
substitute_internal::SubstituteAndAppendArray(output, format, nullptr, 0);
}
inline void SubstituteAndAppend(std::string* output, absl::string_view format,
const substitute_internal::Arg& a0) {
const absl::string_view args[] = {a0.piece()};
substitute_internal::SubstituteAndAppendArray(output, format, args,
ABSL_ARRAYSIZE(args));
}
inline void SubstituteAndAppend(std::string* output, absl::string_view format,
const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1) {
const absl::string_view args[] = {a0.piece(), a1.piece()};
substitute_internal::SubstituteAndAppendArray(output, format, args,
ABSL_ARRAYSIZE(args));
}
inline void SubstituteAndAppend(std::string* output, absl::string_view format,
const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1,
const substitute_internal::Arg& a2) {
const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece()};
substitute_internal::SubstituteAndAppendArray(output, format, args,
ABSL_ARRAYSIZE(args));
}
inline void SubstituteAndAppend(std::string* output, absl::string_view format,
const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1,
const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3) {
const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(),
a3.piece()};
substitute_internal::SubstituteAndAppendArray(output, format, args,
ABSL_ARRAYSIZE(args));
}
inline void SubstituteAndAppend(std::string* output, absl::string_view format,
const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1,
const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3,
const substitute_internal::Arg& a4) {
const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(),
a3.piece(), a4.piece()};
substitute_internal::SubstituteAndAppendArray(output, format, args,
ABSL_ARRAYSIZE(args));
}
inline void SubstituteAndAppend(std::string* output, absl::string_view format,
const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1,
const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3,
const substitute_internal::Arg& a4,
const substitute_internal::Arg& a5) {
const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(),
a3.piece(), a4.piece(), a5.piece()};
substitute_internal::SubstituteAndAppendArray(output, format, args,
ABSL_ARRAYSIZE(args));
}
inline void SubstituteAndAppend(std::string* output, absl::string_view format,
const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1,
const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3,
const substitute_internal::Arg& a4,
const substitute_internal::Arg& a5,
const substitute_internal::Arg& a6) {
const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(),
a3.piece(), a4.piece(), a5.piece(),
a6.piece()};
substitute_internal::SubstituteAndAppendArray(output, format, args,
ABSL_ARRAYSIZE(args));
}
inline void SubstituteAndAppend(
std::string* output, absl::string_view format,
const substitute_internal::Arg& a0, const substitute_internal::Arg& a1,
const substitute_internal::Arg& a2, const substitute_internal::Arg& a3,
const substitute_internal::Arg& a4, const substitute_internal::Arg& a5,
const substitute_internal::Arg& a6, const substitute_internal::Arg& a7) {
const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(),
a3.piece(), a4.piece(), a5.piece(),
a6.piece(), a7.piece()};
substitute_internal::SubstituteAndAppendArray(output, format, args,
ABSL_ARRAYSIZE(args));
}
inline void SubstituteAndAppend(
std::string* output, absl::string_view format,
const substitute_internal::Arg& a0, const substitute_internal::Arg& a1,
const substitute_internal::Arg& a2, const substitute_internal::Arg& a3,
const substitute_internal::Arg& a4, const substitute_internal::Arg& a5,
const substitute_internal::Arg& a6, const substitute_internal::Arg& a7,
const substitute_internal::Arg& a8) {
const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(),
a3.piece(), a4.piece(), a5.piece(),
a6.piece(), a7.piece(), a8.piece()};
substitute_internal::SubstituteAndAppendArray(output, format, args,
ABSL_ARRAYSIZE(args));
}
inline void SubstituteAndAppend(
std::string* output, absl::string_view format,
const substitute_internal::Arg& a0, const substitute_internal::Arg& a1,
const substitute_internal::Arg& a2, const substitute_internal::Arg& a3,
const substitute_internal::Arg& a4, const substitute_internal::Arg& a5,
const substitute_internal::Arg& a6, const substitute_internal::Arg& a7,
const substitute_internal::Arg& a8, const substitute_internal::Arg& a9) {
const absl::string_view args[] = {
a0.piece(), a1.piece(), a2.piece(), a3.piece(), a4.piece(),
a5.piece(), a6.piece(), a7.piece(), a8.piece(), a9.piece()};
substitute_internal::SubstituteAndAppendArray(output, format, args,
ABSL_ARRAYSIZE(args));
}
#if defined(ABSL_BAD_CALL_IF)
// This body of functions catches cases where the number of placeholders
// doesn't match the number of data arguments.
void SubstituteAndAppend(std::string* output, const char* format)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 0,
"There were no substitution arguments "
"but this format std::string has a $[0-9] in it");
void SubstituteAndAppend(std::string* output, const char* format,
const substitute_internal::Arg& a0)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1,
"There was 1 substitution argument given, but "
"this format std::string is either missing its $0, or "
"contains one of $1-$9");
void SubstituteAndAppend(std::string* output, const char* format,
const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 3,
"There were 2 substitution arguments given, but "
"this format std::string is either missing its $0/$1, or "
"contains one of $2-$9");
void SubstituteAndAppend(std::string* output, const char* format,
const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1,
const substitute_internal::Arg& a2)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 7,
"There were 3 substitution arguments given, but "
"this format std::string is either missing its $0/$1/$2, or "
"contains one of $3-$9");
void SubstituteAndAppend(std::string* output, const char* format,
const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1,
const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 15,
"There were 4 substitution arguments given, but "
"this format std::string is either missing its $0-$3, or "
"contains one of $4-$9");
void SubstituteAndAppend(std::string* output, const char* format,
const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1,
const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3,
const substitute_internal::Arg& a4)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 31,
"There were 5 substitution arguments given, but "
"this format std::string is either missing its $0-$4, or "
"contains one of $5-$9");
void SubstituteAndAppend(std::string* output, const char* format,
const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1,
const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3,
const substitute_internal::Arg& a4,
const substitute_internal::Arg& a5)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 63,
"There were 6 substitution arguments given, but "
"this format std::string is either missing its $0-$5, or "
"contains one of $6-$9");
void SubstituteAndAppend(
std::string* output, const char* format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
const substitute_internal::Arg& a5, const substitute_internal::Arg& a6)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 127,
"There were 7 substitution arguments given, but "
"this format std::string is either missing its $0-$6, or "
"contains one of $7-$9");
void SubstituteAndAppend(
std::string* output, const char* format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
const substitute_internal::Arg& a5, const substitute_internal::Arg& a6,
const substitute_internal::Arg& a7)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 255,
"There were 8 substitution arguments given, but "
"this format std::string is either missing its $0-$7, or "
"contains one of $8-$9");
void SubstituteAndAppend(
std::string* output, const char* format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
const substitute_internal::Arg& a5, const substitute_internal::Arg& a6,
const substitute_internal::Arg& a7, const substitute_internal::Arg& a8)
ABSL_BAD_CALL_IF(
substitute_internal::PlaceholderBitmask(format) != 511,
"There were 9 substitution arguments given, but "
"this format std::string is either missing its $0-$8, or contains a $9");
void SubstituteAndAppend(
std::string* output, const char* format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
const substitute_internal::Arg& a5, const substitute_internal::Arg& a6,
const substitute_internal::Arg& a7, const substitute_internal::Arg& a8,
const substitute_internal::Arg& a9)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1023,
"There were 10 substitution arguments given, but this "
"format std::string doesn't contain all of $0 through $9");
#endif // ABSL_BAD_CALL_IF
// Substitute()
//
// Substitutes variables into a given format std::string. See file comments above
// for usage.
//
// The declarations of `Substitute()` below consist of overloads for passing 0
// to 10 arguments, respectively.
//
// NOTE: A zero-argument `Substitute()` may be used within variadic templates to
// allow a variable number of arguments.
//
// Example:
// template <typename... Args>
// void VarMsg(const std::string& format, const Args&... args) {
// std::string s = absl::Substitute(format, args...)";
ABSL_MUST_USE_RESULT inline std::string Substitute(absl::string_view format) {
std::string result;
SubstituteAndAppend(&result, format);
return result;
}
ABSL_MUST_USE_RESULT inline std::string Substitute(
absl::string_view format, const substitute_internal::Arg& a0) {
std::string result;
SubstituteAndAppend(&result, format, a0);
return result;
}
ABSL_MUST_USE_RESULT inline std::string Substitute(
absl::string_view format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1) {
std::string result;
SubstituteAndAppend(&result, format, a0, a1);
return result;
}
ABSL_MUST_USE_RESULT inline std::string Substitute(
absl::string_view format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1, const substitute_internal::Arg& a2) {
std::string result;
SubstituteAndAppend(&result, format, a0, a1, a2);
return result;
}
ABSL_MUST_USE_RESULT inline std::string Substitute(
absl::string_view format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3) {
std::string result;
SubstituteAndAppend(&result, format, a0, a1, a2, a3);
return result;
}
ABSL_MUST_USE_RESULT inline std::string Substitute(
absl::string_view format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3, const substitute_internal::Arg& a4) {
std::string result;
SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4);
return result;
}
ABSL_MUST_USE_RESULT inline std::string Substitute(
absl::string_view format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
const substitute_internal::Arg& a5) {
std::string result;
SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5);
return result;
}
ABSL_MUST_USE_RESULT inline std::string Substitute(
absl::string_view format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
const substitute_internal::Arg& a5, const substitute_internal::Arg& a6) {
std::string result;
SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6);
return result;
}
ABSL_MUST_USE_RESULT inline std::string Substitute(
absl::string_view format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
const substitute_internal::Arg& a5, const substitute_internal::Arg& a6,
const substitute_internal::Arg& a7) {
std::string result;
SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6, a7);
return result;
}
ABSL_MUST_USE_RESULT inline std::string Substitute(
absl::string_view format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
const substitute_internal::Arg& a5, const substitute_internal::Arg& a6,
const substitute_internal::Arg& a7, const substitute_internal::Arg& a8) {
std::string result;
SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6, a7, a8);
return result;
}
ABSL_MUST_USE_RESULT inline std::string Substitute(
absl::string_view format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
const substitute_internal::Arg& a5, const substitute_internal::Arg& a6,
const substitute_internal::Arg& a7, const substitute_internal::Arg& a8,
const substitute_internal::Arg& a9) {
std::string result;
SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9);
return result;
}
#if defined(ABSL_BAD_CALL_IF)
// This body of functions catches cases where the number of placeholders
// doesn't match the number of data arguments.
std::string Substitute(const char* format)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 0,
"There were no substitution arguments "
"but this format std::string has a $[0-9] in it");
std::string Substitute(const char* format, const substitute_internal::Arg& a0)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1,
"There was 1 substitution argument given, but "
"this format std::string is either missing its $0, or "
"contains one of $1-$9");
std::string Substitute(const char* format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 3,
"There were 2 substitution arguments given, but "
"this format std::string is either missing its $0/$1, or "
"contains one of $2-$9");
std::string Substitute(const char* format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1,
const substitute_internal::Arg& a2)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 7,
"There were 3 substitution arguments given, but "
"this format std::string is either missing its $0/$1/$2, or "
"contains one of $3-$9");
std::string Substitute(const char* format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1,
const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 15,
"There were 4 substitution arguments given, but "
"this format std::string is either missing its $0-$3, or "
"contains one of $4-$9");
std::string Substitute(const char* format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1,
const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3,
const substitute_internal::Arg& a4)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 31,
"There were 5 substitution arguments given, but "
"this format std::string is either missing its $0-$4, or "
"contains one of $5-$9");
std::string Substitute(const char* format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1,
const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3,
const substitute_internal::Arg& a4,
const substitute_internal::Arg& a5)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 63,
"There were 6 substitution arguments given, but "
"this format std::string is either missing its $0-$5, or "
"contains one of $6-$9");
std::string Substitute(const char* format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1,
const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3,
const substitute_internal::Arg& a4,
const substitute_internal::Arg& a5,
const substitute_internal::Arg& a6)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 127,
"There were 7 substitution arguments given, but "
"this format std::string is either missing its $0-$6, or "
"contains one of $7-$9");
std::string Substitute(const char* format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1,
const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3,
const substitute_internal::Arg& a4,
const substitute_internal::Arg& a5,
const substitute_internal::Arg& a6,
const substitute_internal::Arg& a7)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 255,
"There were 8 substitution arguments given, but "
"this format std::string is either missing its $0-$7, or "
"contains one of $8-$9");
std::string Substitute(
const char* format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
const substitute_internal::Arg& a5, const substitute_internal::Arg& a6,
const substitute_internal::Arg& a7, const substitute_internal::Arg& a8)
ABSL_BAD_CALL_IF(
substitute_internal::PlaceholderBitmask(format) != 511,
"There were 9 substitution arguments given, but "
"this format std::string is either missing its $0-$8, or contains a $9");
std::string Substitute(
const char* format, const substitute_internal::Arg& a0,
const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
const substitute_internal::Arg& a5, const substitute_internal::Arg& a6,
const substitute_internal::Arg& a7, const substitute_internal::Arg& a8,
const substitute_internal::Arg& a9)
ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1023,
"There were 10 substitution arguments given, but this "
"format std::string doesn't contain all of $0 through $9");
#endif // ABSL_BAD_CALL_IF
} // namespace absl
#endif // ABSL_STRINGS_SUBSTITUTE_H_

View file

@ -0,0 +1,168 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/substitute.h"
#include <cstdint>
#include "gtest/gtest.h"
#include "absl/strings/str_cat.h"
namespace {
TEST(SubstituteTest, Substitute) {
// Basic.
EXPECT_EQ("Hello, world!", absl::Substitute("$0, $1!", "Hello", "world"));
// Non-char* types.
EXPECT_EQ("123 0.2 0.1 foo true false x",
absl::Substitute("$0 $1 $2 $3 $4 $5 $6", 123, 0.2, 0.1f,
std::string("foo"), true, false, 'x'));
// All int types.
EXPECT_EQ(
"-32767 65535 "
"-1234567890 3234567890 "
"-1234567890 3234567890 "
"-1234567890123456789 9234567890123456789",
absl::Substitute(
"$0 $1 $2 $3 $4 $5 $6 $7",
static_cast<short>(-32767), // NOLINT(runtime/int)
static_cast<unsigned short>(65535), // NOLINT(runtime/int)
-1234567890, 3234567890U, -1234567890L, 3234567890UL,
-int64_t{1234567890123456789}, uint64_t{9234567890123456789u}));
// Pointer.
const int* int_p = reinterpret_cast<const int*>(0x12345);
std::string str = absl::Substitute("$0", int_p);
EXPECT_EQ(absl::StrCat("0x", absl::Hex(reinterpret_cast<intptr_t>(int_p))),
str);
// null is special. StrCat prints 0x0. Substitute prints NULL.
const uint64_t* null_p = nullptr;
str = absl::Substitute("$0", null_p);
EXPECT_EQ("NULL", str);
// char* is also special.
const char* char_p = "print me";
str = absl::Substitute("$0", char_p);
EXPECT_EQ("print me", str);
char char_buf[16];
strncpy(char_buf, "print me too", sizeof(char_buf));
str = absl::Substitute("$0", char_buf);
EXPECT_EQ("print me too", str);
// null char* is "doubly" special. Represented as the empty std::string.
char_p = nullptr;
str = absl::Substitute("$0", char_p);
EXPECT_EQ("", str);
// Out-of-order.
EXPECT_EQ("b, a, c, b", absl::Substitute("$1, $0, $2, $1", "a", "b", "c"));
// Literal $
EXPECT_EQ("$", absl::Substitute("$$"));
EXPECT_EQ("$1", absl::Substitute("$$1"));
// Test all overloads.
EXPECT_EQ("a", absl::Substitute("$0", "a"));
EXPECT_EQ("a b", absl::Substitute("$0 $1", "a", "b"));
EXPECT_EQ("a b c", absl::Substitute("$0 $1 $2", "a", "b", "c"));
EXPECT_EQ("a b c d", absl::Substitute("$0 $1 $2 $3", "a", "b", "c", "d"));
EXPECT_EQ("a b c d e",
absl::Substitute("$0 $1 $2 $3 $4", "a", "b", "c", "d", "e"));
EXPECT_EQ("a b c d e f", absl::Substitute("$0 $1 $2 $3 $4 $5", "a", "b", "c",
"d", "e", "f"));
EXPECT_EQ("a b c d e f g", absl::Substitute("$0 $1 $2 $3 $4 $5 $6", "a", "b",
"c", "d", "e", "f", "g"));
EXPECT_EQ("a b c d e f g h",
absl::Substitute("$0 $1 $2 $3 $4 $5 $6 $7", "a", "b", "c", "d", "e",
"f", "g", "h"));
EXPECT_EQ("a b c d e f g h i",
absl::Substitute("$0 $1 $2 $3 $4 $5 $6 $7 $8", "a", "b", "c", "d",
"e", "f", "g", "h", "i"));
EXPECT_EQ("a b c d e f g h i j",
absl::Substitute("$0 $1 $2 $3 $4 $5 $6 $7 $8 $9", "a", "b", "c",
"d", "e", "f", "g", "h", "i", "j"));
EXPECT_EQ("a b c d e f g h i j b0",
absl::Substitute("$0 $1 $2 $3 $4 $5 $6 $7 $8 $9 $10", "a", "b", "c",
"d", "e", "f", "g", "h", "i", "j"));
const char* null_cstring = nullptr;
EXPECT_EQ("Text: ''", absl::Substitute("Text: '$0'", null_cstring));
}
TEST(SubstituteTest, SubstituteAndAppend) {
std::string str = "Hello";
absl::SubstituteAndAppend(&str, ", $0!", "world");
EXPECT_EQ("Hello, world!", str);
// Test all overloads.
str.clear();
absl::SubstituteAndAppend(&str, "$0", "a");
EXPECT_EQ("a", str);
str.clear();
absl::SubstituteAndAppend(&str, "$0 $1", "a", "b");
EXPECT_EQ("a b", str);
str.clear();
absl::SubstituteAndAppend(&str, "$0 $1 $2", "a", "b", "c");
EXPECT_EQ("a b c", str);
str.clear();
absl::SubstituteAndAppend(&str, "$0 $1 $2 $3", "a", "b", "c", "d");
EXPECT_EQ("a b c d", str);
str.clear();
absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4", "a", "b", "c", "d", "e");
EXPECT_EQ("a b c d e", str);
str.clear();
absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5", "a", "b", "c", "d", "e",
"f");
EXPECT_EQ("a b c d e f", str);
str.clear();
absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5 $6", "a", "b", "c", "d",
"e", "f", "g");
EXPECT_EQ("a b c d e f g", str);
str.clear();
absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5 $6 $7", "a", "b", "c", "d",
"e", "f", "g", "h");
EXPECT_EQ("a b c d e f g h", str);
str.clear();
absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5 $6 $7 $8", "a", "b", "c",
"d", "e", "f", "g", "h", "i");
EXPECT_EQ("a b c d e f g h i", str);
str.clear();
absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5 $6 $7 $8 $9", "a", "b",
"c", "d", "e", "f", "g", "h", "i", "j");
EXPECT_EQ("a b c d e f g h i j", str);
}
#ifdef GTEST_HAS_DEATH_TEST
TEST(SubstituteDeathTest, SubstituteDeath) {
EXPECT_DEBUG_DEATH(
static_cast<void>(absl::Substitute(absl::string_view("-$2"), "a", "b")),
"Invalid strings::Substitute\\(\\) format std::string: asked for \"\\$2\", "
"but only 2 args were given.");
EXPECT_DEBUG_DEATH(
static_cast<void>(absl::Substitute("-$z-")),
"Invalid strings::Substitute\\(\\) format std::string: \"-\\$z-\"");
EXPECT_DEBUG_DEATH(
static_cast<void>(absl::Substitute("-$")),
"Invalid strings::Substitute\\(\\) format std::string: \"-\\$\"");
}
#endif // GTEST_HAS_DEATH_TEST
} // namespace

3
absl/strings/testdata/getline-1.txt vendored Normal file
View file

@ -0,0 +1,3 @@
alpha
beta gamma

1
absl/strings/testdata/getline-2.txt vendored Normal file
View file

@ -0,0 +1 @@
one.two.three