Add 'third_party/abseil_cpp/' from commit '768eb2ca28'

git-subtree-dir: third_party/abseil_cpp
git-subtree-mainline: ffb2ae54be
git-subtree-split: 768eb2ca28
This commit is contained in:
Vincent Ambo 2020-05-20 02:32:24 +01:00
commit fc8dc48020
1276 changed files with 208196 additions and 0 deletions

View file

@ -0,0 +1,772 @@
#
# Copyright 2017 The Abseil Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
load(
"//absl:copts/configure_copts.bzl",
"ABSL_DEFAULT_COPTS",
"ABSL_TEST_COPTS",
)
package(
default_visibility = ["//visibility:public"],
features = ["parse_headers"],
)
licenses(["notice"])
cc_library(
name = "strings",
srcs = [
"ascii.cc",
"charconv.cc",
"escaping.cc",
"internal/charconv_bigint.cc",
"internal/charconv_bigint.h",
"internal/charconv_parse.cc",
"internal/charconv_parse.h",
"internal/memutil.cc",
"internal/memutil.h",
"internal/stl_type_traits.h",
"internal/str_join_internal.h",
"internal/str_split_internal.h",
"match.cc",
"numbers.cc",
"str_cat.cc",
"str_replace.cc",
"str_split.cc",
"string_view.cc",
"substitute.cc",
],
hdrs = [
"ascii.h",
"charconv.h",
"escaping.h",
"match.h",
"numbers.h",
"str_cat.h",
"str_join.h",
"str_replace.h",
"str_split.h",
"string_view.h",
"strip.h",
"substitute.h",
],
copts = ABSL_DEFAULT_COPTS,
deps = [
":internal",
"//absl/base",
"//absl/base:bits",
"//absl/base:config",
"//absl/base:core_headers",
"//absl/base:endian",
"//absl/base:raw_logging_internal",
"//absl/base:throw_delegate",
"//absl/memory",
"//absl/meta:type_traits",
"//absl/numeric:int128",
],
)
cc_library(
name = "internal",
srcs = [
"internal/escaping.cc",
"internal/ostringstream.cc",
"internal/utf8.cc",
],
hdrs = [
"internal/char_map.h",
"internal/escaping.h",
"internal/ostringstream.h",
"internal/resize_uninitialized.h",
"internal/utf8.h",
],
copts = ABSL_DEFAULT_COPTS,
deps = [
"//absl/base:config",
"//absl/base:core_headers",
"//absl/base:endian",
"//absl/base:raw_logging_internal",
"//absl/meta:type_traits",
],
)
cc_test(
name = "match_test",
size = "small",
srcs = ["match_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":strings",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "escaping_test",
size = "small",
srcs = [
"escaping_test.cc",
"internal/escaping_test_common.h",
],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":cord",
":strings",
"//absl/base:core_headers",
"//absl/container:fixed_array",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "escaping_benchmark",
srcs = [
"escaping_benchmark.cc",
"internal/escaping_test_common.h",
],
copts = ABSL_TEST_COPTS,
tags = ["benchmark"],
visibility = ["//visibility:private"],
deps = [
":strings",
"//absl/base:raw_logging_internal",
"@com_github_google_benchmark//:benchmark_main",
],
)
cc_test(
name = "ascii_test",
size = "small",
srcs = ["ascii_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":strings",
"//absl/base:core_headers",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "ascii_benchmark",
srcs = ["ascii_benchmark.cc"],
copts = ABSL_TEST_COPTS,
tags = ["benchmark"],
visibility = ["//visibility:private"],
deps = [
":strings",
"@com_github_google_benchmark//:benchmark_main",
],
)
cc_test(
name = "memutil_benchmark",
srcs = [
"internal/memutil.h",
"internal/memutil_benchmark.cc",
],
copts = ABSL_TEST_COPTS,
tags = ["benchmark"],
visibility = ["//visibility:private"],
deps = [
":strings",
"//absl/base:core_headers",
"@com_github_google_benchmark//:benchmark_main",
],
)
cc_test(
name = "memutil_test",
size = "small",
srcs = [
"internal/memutil.h",
"internal/memutil_test.cc",
],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":strings",
"//absl/base:core_headers",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "utf8_test",
size = "small",
srcs = [
"internal/utf8_test.cc",
],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":internal",
"//absl/base:core_headers",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "string_view_benchmark",
srcs = ["string_view_benchmark.cc"],
copts = ABSL_TEST_COPTS,
tags = ["benchmark"],
visibility = ["//visibility:private"],
deps = [
":strings",
"//absl/base:core_headers",
"//absl/base:raw_logging_internal",
"@com_github_google_benchmark//:benchmark_main",
],
)
cc_test(
name = "string_view_test",
size = "small",
srcs = ["string_view_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":strings",
"//absl/base:config",
"//absl/base:core_headers",
"//absl/base:dynamic_annotations",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "cord_internal",
hdrs = ["internal/cord_internal.h"],
copts = ABSL_DEFAULT_COPTS,
visibility = ["//visibility:private"],
deps = [
":strings",
"//absl/meta:type_traits",
],
)
cc_library(
name = "cord",
srcs = [
"cord.cc",
],
hdrs = [
"cord.h",
],
copts = ABSL_DEFAULT_COPTS,
deps = [
":cord_internal",
":internal",
":str_format",
":strings",
"//absl/base",
"//absl/base:base_internal",
"//absl/base:core_headers",
"//absl/base:endian",
"//absl/base:raw_logging_internal",
"//absl/container:fixed_array",
"//absl/container:inlined_vector",
"//absl/functional:function_ref",
"//absl/meta:type_traits",
"//absl/types:optional",
],
)
cc_library(
name = "cord_test_helpers",
testonly = 1,
hdrs = [
"cord_test_helpers.h",
],
copts = ABSL_DEFAULT_COPTS,
deps = [
":cord",
],
)
cc_test(
name = "cord_test",
size = "medium",
srcs = ["cord_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":cord",
":cord_test_helpers",
":str_format",
":strings",
"//absl/base",
"//absl/base:config",
"//absl/base:core_headers",
"//absl/base:endian",
"//absl/base:raw_logging_internal",
"//absl/container:fixed_array",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "substitute_test",
size = "small",
srcs = ["substitute_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":strings",
"//absl/base:core_headers",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "str_replace_benchmark",
srcs = ["str_replace_benchmark.cc"],
copts = ABSL_TEST_COPTS,
tags = ["benchmark"],
visibility = ["//visibility:private"],
deps = [
":strings",
"//absl/base:raw_logging_internal",
"@com_github_google_benchmark//:benchmark_main",
],
)
cc_test(
name = "str_replace_test",
size = "small",
srcs = ["str_replace_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":strings",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "str_split_test",
srcs = ["str_split_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":strings",
"//absl/base:core_headers",
"//absl/base:dynamic_annotations",
"//absl/container:flat_hash_map",
"//absl/container:node_hash_map",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "str_split_benchmark",
srcs = ["str_split_benchmark.cc"],
copts = ABSL_TEST_COPTS,
tags = ["benchmark"],
visibility = ["//visibility:private"],
deps = [
":strings",
"//absl/base:raw_logging_internal",
"@com_github_google_benchmark//:benchmark_main",
],
)
cc_test(
name = "ostringstream_test",
size = "small",
srcs = ["internal/ostringstream_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":internal",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "ostringstream_benchmark",
srcs = ["internal/ostringstream_benchmark.cc"],
copts = ABSL_TEST_COPTS,
tags = ["benchmark"],
visibility = ["//visibility:private"],
deps = [
":internal",
"@com_github_google_benchmark//:benchmark_main",
],
)
cc_test(
name = "resize_uninitialized_test",
size = "small",
srcs = [
"internal/resize_uninitialized.h",
"internal/resize_uninitialized_test.cc",
],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
"//absl/base:core_headers",
"//absl/meta:type_traits",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "str_join_test",
size = "small",
srcs = ["str_join_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":strings",
"//absl/base:core_headers",
"//absl/memory",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "str_join_benchmark",
srcs = ["str_join_benchmark.cc"],
copts = ABSL_TEST_COPTS,
tags = ["benchmark"],
visibility = ["//visibility:private"],
deps = [
":strings",
"@com_github_google_benchmark//:benchmark_main",
],
)
cc_test(
name = "str_cat_test",
size = "small",
srcs = ["str_cat_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":strings",
"//absl/base:core_headers",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "str_cat_benchmark",
srcs = ["str_cat_benchmark.cc"],
copts = ABSL_TEST_COPTS,
tags = ["benchmark"],
visibility = ["//visibility:private"],
deps = [
":strings",
"@com_github_google_benchmark//:benchmark_main",
],
)
cc_test(
name = "numbers_test",
size = "medium",
srcs = [
"internal/numbers_test_common.h",
"numbers_test.cc",
],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":internal",
":pow10_helper",
":strings",
"//absl/base:config",
"//absl/base:raw_logging_internal",
"//absl/random",
"//absl/random:distributions",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "numbers_benchmark",
srcs = ["numbers_benchmark.cc"],
copts = ABSL_TEST_COPTS,
tags = ["benchmark"],
visibility = ["//visibility:private"],
deps = [
":strings",
"//absl/base:raw_logging_internal",
"//absl/random",
"//absl/random:distributions",
"@com_github_google_benchmark//:benchmark_main",
],
)
cc_test(
name = "strip_test",
size = "small",
srcs = ["strip_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":strings",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "char_map_test",
srcs = ["internal/char_map_test.cc"],
copts = ABSL_TEST_COPTS,
deps = [
":internal",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "char_map_benchmark",
srcs = ["internal/char_map_benchmark.cc"],
copts = ABSL_TEST_COPTS,
tags = ["benchmark"],
deps = [
":internal",
"@com_github_google_benchmark//:benchmark_main",
],
)
cc_test(
name = "charconv_test",
srcs = ["charconv_test.cc"],
copts = ABSL_TEST_COPTS,
deps = [
":pow10_helper",
":str_format",
":strings",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "charconv_parse_test",
srcs = [
"internal/charconv_parse.h",
"internal/charconv_parse_test.cc",
],
copts = ABSL_TEST_COPTS,
deps = [
":strings",
"//absl/base:config",
"//absl/base:raw_logging_internal",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "charconv_bigint_test",
srcs = [
"internal/charconv_bigint.h",
"internal/charconv_bigint_test.cc",
"internal/charconv_parse.h",
],
copts = ABSL_TEST_COPTS,
deps = [
":strings",
"//absl/base:config",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "charconv_benchmark",
srcs = [
"charconv_benchmark.cc",
],
tags = [
"benchmark",
],
deps = [
":strings",
"@com_github_google_benchmark//:benchmark_main",
],
)
cc_library(
name = "str_format",
hdrs = [
"str_format.h",
],
copts = ABSL_DEFAULT_COPTS,
deps = [
":str_format_internal",
],
)
cc_library(
name = "str_format_internal",
srcs = [
"internal/str_format/arg.cc",
"internal/str_format/bind.cc",
"internal/str_format/extension.cc",
"internal/str_format/float_conversion.cc",
"internal/str_format/output.cc",
"internal/str_format/parser.cc",
],
hdrs = [
"internal/str_format/arg.h",
"internal/str_format/bind.h",
"internal/str_format/checker.h",
"internal/str_format/extension.h",
"internal/str_format/float_conversion.h",
"internal/str_format/output.h",
"internal/str_format/parser.h",
],
copts = ABSL_DEFAULT_COPTS,
visibility = ["//visibility:private"],
deps = [
":strings",
"//absl/base:bits",
"//absl/base:config",
"//absl/base:core_headers",
"//absl/functional:function_ref",
"//absl/meta:type_traits",
"//absl/numeric:int128",
"//absl/types:optional",
"//absl/types:span",
],
)
cc_test(
name = "str_format_test",
srcs = ["str_format_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":cord",
":str_format",
":strings",
"//absl/base:core_headers",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "str_format_extension_test",
srcs = [
"internal/str_format/extension_test.cc",
],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":str_format",
":str_format_internal",
":strings",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "str_format_arg_test",
srcs = ["internal/str_format/arg_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":str_format",
":str_format_internal",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "str_format_bind_test",
srcs = ["internal/str_format/bind_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":str_format_internal",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "str_format_checker_test",
srcs = ["internal/str_format/checker_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":str_format",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "str_format_convert_test",
size = "medium",
srcs = ["internal/str_format/convert_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":str_format_internal",
"//absl/base:raw_logging_internal",
"//absl/types:optional",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "str_format_output_test",
srcs = ["internal/str_format/output_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":cord",
":str_format_internal",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "str_format_parser_test",
srcs = ["internal/str_format/parser_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":str_format_internal",
"//absl/base:core_headers",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "pow10_helper",
testonly = True,
srcs = ["internal/pow10_helper.cc"],
hdrs = ["internal/pow10_helper.h"],
visibility = ["//visibility:private"],
deps = ["//absl/base:config"],
)
cc_test(
name = "pow10_helper_test",
srcs = ["internal/pow10_helper_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":pow10_helper",
":str_format",
"@com_google_googletest//:gtest_main",
],
)

View file

@ -0,0 +1,593 @@
#
# Copyright 2017 The Abseil Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
absl_cc_library(
NAME
strings
HDRS
"ascii.h"
"charconv.h"
"escaping.h"
"match.h"
"numbers.h"
"str_cat.h"
"str_join.h"
"str_replace.h"
"str_split.h"
"string_view.h"
"strip.h"
"substitute.h"
SRCS
"ascii.cc"
"charconv.cc"
"escaping.cc"
"internal/charconv_bigint.cc"
"internal/charconv_bigint.h"
"internal/charconv_parse.cc"
"internal/charconv_parse.h"
"internal/memutil.cc"
"internal/memutil.h"
"internal/stl_type_traits.h"
"internal/str_join_internal.h"
"internal/str_split_internal.h"
"match.cc"
"numbers.cc"
"str_cat.cc"
"str_replace.cc"
"str_split.cc"
"string_view.cc"
"substitute.cc"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::strings_internal
absl::base
absl::bits
absl::config
absl::core_headers
absl::endian
absl::int128
absl::memory
absl::raw_logging_internal
absl::throw_delegate
absl::type_traits
PUBLIC
)
absl_cc_library(
NAME
strings_internal
HDRS
"internal/char_map.h"
"internal/escaping.cc"
"internal/escaping.h"
"internal/ostringstream.h"
"internal/resize_uninitialized.h"
"internal/utf8.h"
SRCS
"internal/ostringstream.cc"
"internal/utf8.cc"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::config
absl::core_headers
absl::endian
absl::raw_logging_internal
absl::type_traits
)
absl_cc_test(
NAME
match_test
SRCS
"match_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings
absl::base
gmock_main
)
absl_cc_test(
NAME
escaping_test
SRCS
"escaping_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings
absl::core_headers
absl::fixed_array
gmock_main
)
absl_cc_test(
NAME
ascii_test
SRCS
"ascii_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings
absl::core_headers
gmock_main
)
absl_cc_test(
NAME
memutil_test
SRCS
"internal/memutil.h"
"internal/memutil_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings
absl::core_headers
gmock_main
)
absl_cc_test(
NAME
utf8_test
SRCS
"internal/utf8_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings_internal
absl::base
absl::core_headers
gmock_main
)
absl_cc_test(
NAME
string_view_test
SRCS
"string_view_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings
absl::config
absl::core_headers
absl::dynamic_annotations
gmock_main
)
absl_cc_test(
NAME
substitute_test
SRCS
"substitute_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings
absl::core_headers
gmock_main
)
absl_cc_test(
NAME
str_replace_test
SRCS
"str_replace_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings
gmock_main
)
absl_cc_test(
NAME
str_split_test
SRCS
"str_split_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings
absl::base
absl::core_headers
absl::dynamic_annotations
absl::flat_hash_map
absl::node_hash_map
gmock_main
)
absl_cc_test(
NAME
ostringstream_test
SRCS
"internal/ostringstream_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings_internal
gmock_main
)
absl_cc_test(
NAME
resize_uninitialized_test
SRCS
"internal/resize_uninitialized.h"
"internal/resize_uninitialized_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::base
absl::core_headers
absl::type_traits
gmock_main
)
absl_cc_test(
NAME
str_join_test
SRCS
"str_join_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings
absl::base
absl::core_headers
absl::memory
gmock_main
)
absl_cc_test(
NAME
str_cat_test
SRCS
"str_cat_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings
absl::core_headers
gmock_main
)
absl_cc_test(
NAME
numbers_test
SRCS
"internal/numbers_test_common.h"
"numbers_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings
absl::core_headers
absl::pow10_helper
absl::config
absl::raw_logging_internal
absl::random_random
absl::random_distributions
absl::strings_internal
gmock_main
)
absl_cc_test(
NAME
strip_test
SRCS
"strip_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings
absl::base
gmock_main
)
absl_cc_test(
NAME
char_map_test
SRCS
"internal/char_map_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings_internal
gmock_main
)
absl_cc_test(
NAME
charconv_test
SRCS
"charconv_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings
absl::str_format
absl::pow10_helper
gmock_main
)
absl_cc_test(
NAME
charconv_parse_test
SRCS
"internal/charconv_parse.h"
"internal/charconv_parse_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings
absl::config
absl::raw_logging_internal
gmock_main
)
absl_cc_test(
NAME
charconv_bigint_test
SRCS
"internal/charconv_bigint.h"
"internal/charconv_bigint_test.cc"
"internal/charconv_parse.h"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::strings
absl::config
gmock_main
)
absl_cc_library(
NAME
str_format
HDRS
"str_format.h"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::str_format_internal
PUBLIC
)
absl_cc_library(
NAME
str_format_internal
HDRS
"internal/str_format/arg.h"
"internal/str_format/bind.h"
"internal/str_format/checker.h"
"internal/str_format/extension.h"
"internal/str_format/float_conversion.h"
"internal/str_format/output.h"
"internal/str_format/parser.h"
SRCS
"internal/str_format/arg.cc"
"internal/str_format/bind.cc"
"internal/str_format/extension.cc"
"internal/str_format/float_conversion.cc"
"internal/str_format/output.cc"
"internal/str_format/parser.cc"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::bits
absl::strings
absl::config
absl::core_headers
absl::type_traits
absl::int128
absl::span
)
absl_cc_test(
NAME
str_format_test
SRCS
"str_format_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::str_format
absl::cord
absl::strings
absl::core_headers
gmock_main
)
absl_cc_test(
NAME
str_format_extension_test
SRCS
"internal/str_format/extension_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::str_format
absl::str_format_internal
absl::strings
gmock_main
)
absl_cc_test(
NAME
str_format_arg_test
SRCS
"internal/str_format/arg_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::str_format
absl::str_format_internal
gmock_main
)
absl_cc_test(
NAME
str_format_bind_test
SRCS
"internal/str_format/bind_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::str_format_internal
gmock_main
)
absl_cc_test(
NAME
str_format_checker_test
SRCS
"internal/str_format/checker_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::str_format
gmock_main
)
absl_cc_test(
NAME
str_format_convert_test
SRCS
"internal/str_format/convert_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::str_format_internal
absl::raw_logging_internal
absl::int128
gmock_main
)
absl_cc_test(
NAME
str_format_output_test
SRCS
"internal/str_format/output_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::str_format_internal
absl::cord
gmock_main
)
absl_cc_test(
NAME
str_format_parser_test
SRCS
"internal/str_format/parser_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::str_format_internal
absl::core_headers
gmock_main
)
absl_cc_library(
NAME
pow10_helper
HDRS
"internal/pow10_helper.h"
SRCS
"internal/pow10_helper.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::config
TESTONLY
)
absl_cc_test(
NAME
pow10_helper_test
SRCS
"internal/pow10_helper_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::pow10_helper
absl::str_format
gmock_main
)
absl_cc_library(
NAME
cord
HDRS
"cord.h"
SRCS
"cord.cc"
"internal/cord_internal.h"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::base
absl::base_internal
absl::core_headers
absl::endian
absl::fixed_array
absl::function_ref
absl::inlined_vector
absl::optional
absl::raw_logging_internal
absl::strings
absl::strings_internal
absl::type_traits
PUBLIC
)
absl_cc_library(
NAME
cord_test_helpers
HDRS
"cord_test_helpers.h"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::cord
TESTONLY
)
absl_cc_test(
NAME
cord_test
SRCS
"cord_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::cord
absl::str_format
absl::strings
absl::base
absl::config
absl::core_headers
absl::endian
absl::raw_logging_internal
absl::fixed_array
gmock_main
)

View file

@ -0,0 +1,200 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/ascii.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace ascii_internal {
// # Table generated by this Python code (bit 0x02 is currently unused):
// TODO(mbar) Move Python code for generation of table to BUILD and link here.
// NOTE: The kAsciiPropertyBits table used within this code was generated by
// Python code of the following form. (Bit 0x02 is currently unused and
// available.)
//
// def Hex2(n):
// return '0x' + hex(n/16)[2:] + hex(n%16)[2:]
// def IsPunct(ch):
// return (ord(ch) >= 32 and ord(ch) < 127 and
// not ch.isspace() and not ch.isalnum())
// def IsBlank(ch):
// return ch in ' \t'
// def IsCntrl(ch):
// return ord(ch) < 32 or ord(ch) == 127
// def IsXDigit(ch):
// return ch.isdigit() or ch.lower() in 'abcdef'
// for i in range(128):
// ch = chr(i)
// mask = ((ch.isalpha() and 0x01 or 0) |
// (ch.isalnum() and 0x04 or 0) |
// (ch.isspace() and 0x08 or 0) |
// (IsPunct(ch) and 0x10 or 0) |
// (IsBlank(ch) and 0x20 or 0) |
// (IsCntrl(ch) and 0x40 or 0) |
// (IsXDigit(ch) and 0x80 or 0))
// print Hex2(mask) + ',',
// if i % 16 == 7:
// print ' //', Hex2(i & 0x78)
// elif i % 16 == 15:
// print
// clang-format off
// Array of bitfields holding character information. Each bit value corresponds
// to a particular character feature. For readability, and because the value
// of these bits is tightly coupled to this implementation, the individual bits
// are not named. Note that bitfields for all characters above ASCII 127 are
// zero-initialized.
ABSL_DLL const unsigned char kPropertyBits[256] = {
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x00
0x40, 0x68, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40,
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x10
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
0x28, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0x20
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, // 0x30
0x84, 0x84, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x40
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x50
0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x60
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x70
0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x40,
};
// Array of characters for the ascii_tolower() function. For values 'A'
// through 'Z', return the lower-case character; otherwise, return the
// identity of the passed character.
ABSL_DLL const char kToLower[256] = {
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
'\x40', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
'x', 'y', 'z', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
'\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
'\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
'\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
'\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
'\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
'\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
'\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
'\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
'\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
'\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
'\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
'\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
'\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
'\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
'\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
'\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
'\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
'\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
};
// Array of characters for the ascii_toupper() function. For values 'a'
// through 'z', return the upper-case character; otherwise, return the
// identity of the passed character.
ABSL_DLL const char kToUpper[256] = {
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
'\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
'\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
'\x60', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
'X', 'Y', 'Z', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
'\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
'\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
'\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
'\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
'\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
'\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
'\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
'\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
'\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
'\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
'\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
'\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
'\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
'\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
'\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
'\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
};
// clang-format on
} // namespace ascii_internal
void AsciiStrToLower(std::string* s) {
for (auto& ch : *s) {
ch = absl::ascii_tolower(ch);
}
}
void AsciiStrToUpper(std::string* s) {
for (auto& ch : *s) {
ch = absl::ascii_toupper(ch);
}
}
void RemoveExtraAsciiWhitespace(std::string* str) {
auto stripped = StripAsciiWhitespace(*str);
if (stripped.empty()) {
str->clear();
return;
}
auto input_it = stripped.begin();
auto input_end = stripped.end();
auto output_it = &(*str)[0];
bool is_ws = false;
for (; input_it < input_end; ++input_it) {
if (is_ws) {
// Consecutive whitespace? Keep only the last.
is_ws = absl::ascii_isspace(*input_it);
if (is_ws) --output_it;
} else {
is_ws = absl::ascii_isspace(*input_it);
}
*output_it = *input_it;
++output_it;
}
str->erase(output_it - &(*str)[0]);
}
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,242 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: ascii.h
// -----------------------------------------------------------------------------
//
// This package contains functions operating on characters and strings
// restricted to standard ASCII. These include character classification
// functions analogous to those found in the ANSI C Standard Library <ctype.h>
// header file.
//
// C++ implementations provide <ctype.h> functionality based on their
// C environment locale. In general, reliance on such a locale is not ideal, as
// the locale standard is problematic (and may not return invariant information
// for the same character set, for example). These `ascii_*()` functions are
// hard-wired for standard ASCII, much faster, and guaranteed to behave
// consistently. They will never be overloaded, nor will their function
// signature change.
//
// `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`,
// `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`,
// `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`,
// `ascii_isxdigit()`
// Analogous to the <ctype.h> functions with similar names, these
// functions take an unsigned char and return a bool, based on whether the
// character matches the condition specified.
//
// If the input character has a numerical value greater than 127, these
// functions return `false`.
//
// `ascii_tolower()`, `ascii_toupper()`
// Analogous to the <ctype.h> functions with similar names, these functions
// take an unsigned char and return a char.
//
// If the input character is not an ASCII {lower,upper}-case letter (including
// numerical values greater than 127) then the functions return the same value
// as the input character.
#ifndef ABSL_STRINGS_ASCII_H_
#define ABSL_STRINGS_ASCII_H_
#include <algorithm>
#include <string>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace ascii_internal {
// Declaration for an array of bitfields holding character information.
ABSL_DLL extern const unsigned char kPropertyBits[256];
// Declaration for the array of characters to upper-case characters.
ABSL_DLL extern const char kToUpper[256];
// Declaration for the array of characters to lower-case characters.
ABSL_DLL extern const char kToLower[256];
} // namespace ascii_internal
// ascii_isalpha()
//
// Determines whether the given character is an alphabetic character.
inline bool ascii_isalpha(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x01) != 0;
}
// ascii_isalnum()
//
// Determines whether the given character is an alphanumeric character.
inline bool ascii_isalnum(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x04) != 0;
}
// ascii_isspace()
//
// Determines whether the given character is a whitespace character (space,
// tab, vertical tab, formfeed, linefeed, or carriage return).
inline bool ascii_isspace(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x08) != 0;
}
// ascii_ispunct()
//
// Determines whether the given character is a punctuation character.
inline bool ascii_ispunct(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x10) != 0;
}
// ascii_isblank()
//
// Determines whether the given character is a blank character (tab or space).
inline bool ascii_isblank(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x20) != 0;
}
// ascii_iscntrl()
//
// Determines whether the given character is a control character.
inline bool ascii_iscntrl(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x40) != 0;
}
// ascii_isxdigit()
//
// Determines whether the given character can be represented as a hexadecimal
// digit character (i.e. {0-9} or {A-F}).
inline bool ascii_isxdigit(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x80) != 0;
}
// ascii_isdigit()
//
// Determines whether the given character can be represented as a decimal
// digit character (i.e. {0-9}).
inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; }
// ascii_isprint()
//
// Determines whether the given character is printable, including whitespace.
inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; }
// ascii_isgraph()
//
// Determines whether the given character has a graphical representation.
inline bool ascii_isgraph(unsigned char c) { return c > 32 && c < 127; }
// ascii_isupper()
//
// Determines whether the given character is uppercase.
inline bool ascii_isupper(unsigned char c) { return c >= 'A' && c <= 'Z'; }
// ascii_islower()
//
// Determines whether the given character is lowercase.
inline bool ascii_islower(unsigned char c) { return c >= 'a' && c <= 'z'; }
// ascii_isascii()
//
// Determines whether the given character is ASCII.
inline bool ascii_isascii(unsigned char c) { return c < 128; }
// ascii_tolower()
//
// Returns an ASCII character, converting to lowercase if uppercase is
// passed. Note that character values > 127 are simply returned.
inline char ascii_tolower(unsigned char c) {
return ascii_internal::kToLower[c];
}
// Converts the characters in `s` to lowercase, changing the contents of `s`.
void AsciiStrToLower(std::string* s);
// Creates a lowercase string from a given absl::string_view.
ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) {
std::string result(s);
absl::AsciiStrToLower(&result);
return result;
}
// ascii_toupper()
//
// Returns the ASCII character, converting to upper-case if lower-case is
// passed. Note that characters values > 127 are simply returned.
inline char ascii_toupper(unsigned char c) {
return ascii_internal::kToUpper[c];
}
// Converts the characters in `s` to uppercase, changing the contents of `s`.
void AsciiStrToUpper(std::string* s);
// Creates an uppercase string from a given absl::string_view.
ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) {
std::string result(s);
absl::AsciiStrToUpper(&result);
return result;
}
// Returns absl::string_view with whitespace stripped from the beginning of the
// given string_view.
ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace(
absl::string_view str) {
auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace);
return str.substr(it - str.begin());
}
// Strips in place whitespace from the beginning of the given string.
inline void StripLeadingAsciiWhitespace(std::string* str) {
auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace);
str->erase(str->begin(), it);
}
// Returns absl::string_view with whitespace stripped from the end of the given
// string_view.
ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace(
absl::string_view str) {
auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace);
return str.substr(0, str.rend() - it);
}
// Strips in place whitespace from the end of the given string
inline void StripTrailingAsciiWhitespace(std::string* str) {
auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace);
str->erase(str->rend() - it);
}
// Returns absl::string_view with whitespace stripped from both ends of the
// given string_view.
ABSL_MUST_USE_RESULT inline absl::string_view StripAsciiWhitespace(
absl::string_view str) {
return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str));
}
// Strips in place whitespace from both ends of the given string
inline void StripAsciiWhitespace(std::string* str) {
StripTrailingAsciiWhitespace(str);
StripLeadingAsciiWhitespace(str);
}
// Removes leading, trailing, and consecutive internal whitespace.
void RemoveExtraAsciiWhitespace(std::string*);
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_ASCII_H_

View file

@ -0,0 +1,120 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/ascii.h"
#include <cctype>
#include <string>
#include <array>
#include <random>
#include "benchmark/benchmark.h"
namespace {
std::array<unsigned char, 256> MakeShuffledBytes() {
std::array<unsigned char, 256> bytes;
for (size_t i = 0; i < 256; ++i) bytes[i] = static_cast<unsigned char>(i);
std::random_device rd;
std::seed_seq seed({rd(), rd(), rd(), rd(), rd(), rd(), rd(), rd()});
std::mt19937 g(seed);
std::shuffle(bytes.begin(), bytes.end(), g);
return bytes;
}
template <typename Function>
void AsciiBenchmark(benchmark::State& state, Function f) {
std::array<unsigned char, 256> bytes = MakeShuffledBytes();
size_t sum = 0;
for (auto _ : state) {
for (unsigned char b : bytes) sum += f(b) ? 1 : 0;
}
// Make a copy of `sum` before calling `DoNotOptimize` to make sure that `sum`
// can be put in a CPU register and not degrade performance in the loop above.
size_t sum2 = sum;
benchmark::DoNotOptimize(sum2);
state.SetBytesProcessed(state.iterations() * bytes.size());
}
using StdAsciiFunction = int (*)(int);
template <StdAsciiFunction f>
void BM_Ascii(benchmark::State& state) {
AsciiBenchmark(state, f);
}
using AbslAsciiIsFunction = bool (*)(unsigned char);
template <AbslAsciiIsFunction f>
void BM_Ascii(benchmark::State& state) {
AsciiBenchmark(state, f);
}
using AbslAsciiToFunction = char (*)(unsigned char);
template <AbslAsciiToFunction f>
void BM_Ascii(benchmark::State& state) {
AsciiBenchmark(state, f);
}
inline char Noop(unsigned char b) { return static_cast<char>(b); }
BENCHMARK_TEMPLATE(BM_Ascii, Noop);
BENCHMARK_TEMPLATE(BM_Ascii, std::isalpha);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isalpha);
BENCHMARK_TEMPLATE(BM_Ascii, std::isdigit);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isdigit);
BENCHMARK_TEMPLATE(BM_Ascii, std::isalnum);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isalnum);
BENCHMARK_TEMPLATE(BM_Ascii, std::isspace);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isspace);
BENCHMARK_TEMPLATE(BM_Ascii, std::ispunct);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_ispunct);
BENCHMARK_TEMPLATE(BM_Ascii, std::isblank);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isblank);
BENCHMARK_TEMPLATE(BM_Ascii, std::iscntrl);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_iscntrl);
BENCHMARK_TEMPLATE(BM_Ascii, std::isxdigit);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isxdigit);
BENCHMARK_TEMPLATE(BM_Ascii, std::isprint);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isprint);
BENCHMARK_TEMPLATE(BM_Ascii, std::isgraph);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isgraph);
BENCHMARK_TEMPLATE(BM_Ascii, std::isupper);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isupper);
BENCHMARK_TEMPLATE(BM_Ascii, std::islower);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_islower);
BENCHMARK_TEMPLATE(BM_Ascii, isascii);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isascii);
BENCHMARK_TEMPLATE(BM_Ascii, std::tolower);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_tolower);
BENCHMARK_TEMPLATE(BM_Ascii, std::toupper);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_toupper);
static void BM_StrToLower(benchmark::State& state) {
const int size = state.range(0);
std::string s(size, 'X');
for (auto _ : state) {
benchmark::DoNotOptimize(absl::AsciiStrToLower(s));
}
}
BENCHMARK(BM_StrToLower)->Range(1, 1 << 20);
static void BM_StrToUpper(benchmark::State& state) {
const int size = state.range(0);
std::string s(size, 'x');
for (auto _ : state) {
benchmark::DoNotOptimize(absl::AsciiStrToUpper(s));
}
}
BENCHMARK(BM_StrToUpper)->Range(1, 1 << 20);
} // namespace

View file

@ -0,0 +1,361 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/ascii.h"
#include <cctype>
#include <clocale>
#include <cstring>
#include <string>
#include "gtest/gtest.h"
#include "absl/base/macros.h"
#include "absl/base/port.h"
namespace {
TEST(AsciiIsFoo, All) {
for (int i = 0; i < 256; i++) {
if ((i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z'))
EXPECT_TRUE(absl::ascii_isalpha(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_isalpha(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if ((i >= '0' && i <= '9'))
EXPECT_TRUE(absl::ascii_isdigit(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_isdigit(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (absl::ascii_isalpha(i) || absl::ascii_isdigit(i))
EXPECT_TRUE(absl::ascii_isalnum(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_isalnum(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (i != '\0' && strchr(" \r\n\t\v\f", i))
EXPECT_TRUE(absl::ascii_isspace(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_isspace(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (i >= 32 && i < 127)
EXPECT_TRUE(absl::ascii_isprint(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_isprint(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (absl::ascii_isprint(i) && !absl::ascii_isspace(i) &&
!absl::ascii_isalnum(i))
EXPECT_TRUE(absl::ascii_ispunct(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_ispunct(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (i == ' ' || i == '\t')
EXPECT_TRUE(absl::ascii_isblank(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_isblank(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (i < 32 || i == 127)
EXPECT_TRUE(absl::ascii_iscntrl(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_iscntrl(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (absl::ascii_isdigit(i) || (i >= 'A' && i <= 'F') ||
(i >= 'a' && i <= 'f'))
EXPECT_TRUE(absl::ascii_isxdigit(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_isxdigit(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (i > 32 && i < 127)
EXPECT_TRUE(absl::ascii_isgraph(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_isgraph(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (i >= 'A' && i <= 'Z')
EXPECT_TRUE(absl::ascii_isupper(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_isupper(i)) << ": failed on " << i;
}
for (int i = 0; i < 256; i++) {
if (i >= 'a' && i <= 'z')
EXPECT_TRUE(absl::ascii_islower(i)) << ": failed on " << i;
else
EXPECT_TRUE(!absl::ascii_islower(i)) << ": failed on " << i;
}
for (int i = 0; i < 128; i++) {
EXPECT_TRUE(absl::ascii_isascii(i)) << ": failed on " << i;
}
for (int i = 128; i < 256; i++) {
EXPECT_TRUE(!absl::ascii_isascii(i)) << ": failed on " << i;
}
// The official is* functions don't accept negative signed chars, but
// our absl::ascii_is* functions do.
for (int i = 0; i < 256; i++) {
signed char sc = static_cast<signed char>(static_cast<unsigned char>(i));
EXPECT_EQ(absl::ascii_isalpha(i), absl::ascii_isalpha(sc)) << i;
EXPECT_EQ(absl::ascii_isdigit(i), absl::ascii_isdigit(sc)) << i;
EXPECT_EQ(absl::ascii_isalnum(i), absl::ascii_isalnum(sc)) << i;
EXPECT_EQ(absl::ascii_isspace(i), absl::ascii_isspace(sc)) << i;
EXPECT_EQ(absl::ascii_ispunct(i), absl::ascii_ispunct(sc)) << i;
EXPECT_EQ(absl::ascii_isblank(i), absl::ascii_isblank(sc)) << i;
EXPECT_EQ(absl::ascii_iscntrl(i), absl::ascii_iscntrl(sc)) << i;
EXPECT_EQ(absl::ascii_isxdigit(i), absl::ascii_isxdigit(sc)) << i;
EXPECT_EQ(absl::ascii_isprint(i), absl::ascii_isprint(sc)) << i;
EXPECT_EQ(absl::ascii_isgraph(i), absl::ascii_isgraph(sc)) << i;
EXPECT_EQ(absl::ascii_isupper(i), absl::ascii_isupper(sc)) << i;
EXPECT_EQ(absl::ascii_islower(i), absl::ascii_islower(sc)) << i;
EXPECT_EQ(absl::ascii_isascii(i), absl::ascii_isascii(sc)) << i;
}
}
// Checks that absl::ascii_isfoo returns the same value as isfoo in the C
// locale.
TEST(AsciiIsFoo, SameAsIsFoo) {
#ifndef __ANDROID__
// temporarily change locale to C. It should already be C, but just for safety
const char* old_locale = setlocale(LC_CTYPE, "C");
ASSERT_TRUE(old_locale != nullptr);
#endif
for (int i = 0; i < 256; i++) {
EXPECT_EQ(isalpha(i) != 0, absl::ascii_isalpha(i)) << i;
EXPECT_EQ(isdigit(i) != 0, absl::ascii_isdigit(i)) << i;
EXPECT_EQ(isalnum(i) != 0, absl::ascii_isalnum(i)) << i;
EXPECT_EQ(isspace(i) != 0, absl::ascii_isspace(i)) << i;
EXPECT_EQ(ispunct(i) != 0, absl::ascii_ispunct(i)) << i;
EXPECT_EQ(isblank(i) != 0, absl::ascii_isblank(i)) << i;
EXPECT_EQ(iscntrl(i) != 0, absl::ascii_iscntrl(i)) << i;
EXPECT_EQ(isxdigit(i) != 0, absl::ascii_isxdigit(i)) << i;
EXPECT_EQ(isprint(i) != 0, absl::ascii_isprint(i)) << i;
EXPECT_EQ(isgraph(i) != 0, absl::ascii_isgraph(i)) << i;
EXPECT_EQ(isupper(i) != 0, absl::ascii_isupper(i)) << i;
EXPECT_EQ(islower(i) != 0, absl::ascii_islower(i)) << i;
EXPECT_EQ(isascii(i) != 0, absl::ascii_isascii(i)) << i;
}
#ifndef __ANDROID__
// restore the old locale.
ASSERT_TRUE(setlocale(LC_CTYPE, old_locale));
#endif
}
TEST(AsciiToFoo, All) {
#ifndef __ANDROID__
// temporarily change locale to C. It should already be C, but just for safety
const char* old_locale = setlocale(LC_CTYPE, "C");
ASSERT_TRUE(old_locale != nullptr);
#endif
for (int i = 0; i < 256; i++) {
if (absl::ascii_islower(i))
EXPECT_EQ(absl::ascii_toupper(i), 'A' + (i - 'a')) << i;
else
EXPECT_EQ(absl::ascii_toupper(i), static_cast<char>(i)) << i;
if (absl::ascii_isupper(i))
EXPECT_EQ(absl::ascii_tolower(i), 'a' + (i - 'A')) << i;
else
EXPECT_EQ(absl::ascii_tolower(i), static_cast<char>(i)) << i;
// These CHECKs only hold in a C locale.
EXPECT_EQ(static_cast<char>(tolower(i)), absl::ascii_tolower(i)) << i;
EXPECT_EQ(static_cast<char>(toupper(i)), absl::ascii_toupper(i)) << i;
// The official to* functions don't accept negative signed chars, but
// our absl::ascii_to* functions do.
signed char sc = static_cast<signed char>(static_cast<unsigned char>(i));
EXPECT_EQ(absl::ascii_tolower(i), absl::ascii_tolower(sc)) << i;
EXPECT_EQ(absl::ascii_toupper(i), absl::ascii_toupper(sc)) << i;
}
#ifndef __ANDROID__
// restore the old locale.
ASSERT_TRUE(setlocale(LC_CTYPE, old_locale));
#endif
}
TEST(AsciiStrTo, Lower) {
const char buf[] = "ABCDEF";
const std::string str("GHIJKL");
const std::string str2("MNOPQR");
const absl::string_view sp(str2);
EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf));
EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str));
EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp));
char mutable_buf[] = "Mutable";
std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),
mutable_buf, absl::ascii_tolower);
EXPECT_STREQ("mutable", mutable_buf);
}
TEST(AsciiStrTo, Upper) {
const char buf[] = "abcdef";
const std::string str("ghijkl");
const std::string str2("mnopqr");
const absl::string_view sp(str2);
EXPECT_EQ("ABCDEF", absl::AsciiStrToUpper(buf));
EXPECT_EQ("GHIJKL", absl::AsciiStrToUpper(str));
EXPECT_EQ("MNOPQR", absl::AsciiStrToUpper(sp));
char mutable_buf[] = "Mutable";
std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),
mutable_buf, absl::ascii_toupper);
EXPECT_STREQ("MUTABLE", mutable_buf);
}
TEST(StripLeadingAsciiWhitespace, FromStringView) {
EXPECT_EQ(absl::string_view{},
absl::StripLeadingAsciiWhitespace(absl::string_view{}));
EXPECT_EQ("foo", absl::StripLeadingAsciiWhitespace({"foo"}));
EXPECT_EQ("foo", absl::StripLeadingAsciiWhitespace({"\t \n\f\r\n\vfoo"}));
EXPECT_EQ("foo foo\n ",
absl::StripLeadingAsciiWhitespace({"\t \n\f\r\n\vfoo foo\n "}));
EXPECT_EQ(absl::string_view{}, absl::StripLeadingAsciiWhitespace(
{"\t \n\f\r\v\n\t \n\f\r\v\n"}));
}
TEST(StripLeadingAsciiWhitespace, InPlace) {
std::string str;
absl::StripLeadingAsciiWhitespace(&str);
EXPECT_EQ("", str);
str = "foo";
absl::StripLeadingAsciiWhitespace(&str);
EXPECT_EQ("foo", str);
str = "\t \n\f\r\n\vfoo";
absl::StripLeadingAsciiWhitespace(&str);
EXPECT_EQ("foo", str);
str = "\t \n\f\r\n\vfoo foo\n ";
absl::StripLeadingAsciiWhitespace(&str);
EXPECT_EQ("foo foo\n ", str);
str = "\t \n\f\r\v\n\t \n\f\r\v\n";
absl::StripLeadingAsciiWhitespace(&str);
EXPECT_EQ(absl::string_view{}, str);
}
TEST(StripTrailingAsciiWhitespace, FromStringView) {
EXPECT_EQ(absl::string_view{},
absl::StripTrailingAsciiWhitespace(absl::string_view{}));
EXPECT_EQ("foo", absl::StripTrailingAsciiWhitespace({"foo"}));
EXPECT_EQ("foo", absl::StripTrailingAsciiWhitespace({"foo\t \n\f\r\n\v"}));
EXPECT_EQ(" \nfoo foo",
absl::StripTrailingAsciiWhitespace({" \nfoo foo\t \n\f\r\n\v"}));
EXPECT_EQ(absl::string_view{}, absl::StripTrailingAsciiWhitespace(
{"\t \n\f\r\v\n\t \n\f\r\v\n"}));
}
TEST(StripTrailingAsciiWhitespace, InPlace) {
std::string str;
absl::StripTrailingAsciiWhitespace(&str);
EXPECT_EQ("", str);
str = "foo";
absl::StripTrailingAsciiWhitespace(&str);
EXPECT_EQ("foo", str);
str = "foo\t \n\f\r\n\v";
absl::StripTrailingAsciiWhitespace(&str);
EXPECT_EQ("foo", str);
str = " \nfoo foo\t \n\f\r\n\v";
absl::StripTrailingAsciiWhitespace(&str);
EXPECT_EQ(" \nfoo foo", str);
str = "\t \n\f\r\v\n\t \n\f\r\v\n";
absl::StripTrailingAsciiWhitespace(&str);
EXPECT_EQ(absl::string_view{}, str);
}
TEST(StripAsciiWhitespace, FromStringView) {
EXPECT_EQ(absl::string_view{},
absl::StripAsciiWhitespace(absl::string_view{}));
EXPECT_EQ("foo", absl::StripAsciiWhitespace({"foo"}));
EXPECT_EQ("foo",
absl::StripAsciiWhitespace({"\t \n\f\r\n\vfoo\t \n\f\r\n\v"}));
EXPECT_EQ("foo foo", absl::StripAsciiWhitespace(
{"\t \n\f\r\n\vfoo foo\t \n\f\r\n\v"}));
EXPECT_EQ(absl::string_view{},
absl::StripAsciiWhitespace({"\t \n\f\r\v\n\t \n\f\r\v\n"}));
}
TEST(StripAsciiWhitespace, InPlace) {
std::string str;
absl::StripAsciiWhitespace(&str);
EXPECT_EQ("", str);
str = "foo";
absl::StripAsciiWhitespace(&str);
EXPECT_EQ("foo", str);
str = "\t \n\f\r\n\vfoo\t \n\f\r\n\v";
absl::StripAsciiWhitespace(&str);
EXPECT_EQ("foo", str);
str = "\t \n\f\r\n\vfoo foo\t \n\f\r\n\v";
absl::StripAsciiWhitespace(&str);
EXPECT_EQ("foo foo", str);
str = "\t \n\f\r\v\n\t \n\f\r\v\n";
absl::StripAsciiWhitespace(&str);
EXPECT_EQ(absl::string_view{}, str);
}
TEST(RemoveExtraAsciiWhitespace, InPlace) {
const char* inputs[] = {"No extra space",
" Leading whitespace",
"Trailing whitespace ",
" Leading and trailing ",
" Whitespace \t in\v middle ",
"'Eeeeep! \n Newlines!\n",
"nospaces",
"",
"\n\t a\t\n\nb \t\n"};
const char* outputs[] = {
"No extra space",
"Leading whitespace",
"Trailing whitespace",
"Leading and trailing",
"Whitespace in middle",
"'Eeeeep! Newlines!",
"nospaces",
"",
"a\nb",
};
const int NUM_TESTS = ABSL_ARRAYSIZE(inputs);
for (int i = 0; i < NUM_TESTS; i++) {
std::string s(inputs[i]);
absl::RemoveExtraAsciiWhitespace(&s);
EXPECT_EQ(outputs[i], s);
}
}
} // namespace

View file

@ -0,0 +1,984 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/charconv.h"
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstring>
#include "absl/base/casts.h"
#include "absl/base/internal/bits.h"
#include "absl/numeric/int128.h"
#include "absl/strings/internal/charconv_bigint.h"
#include "absl/strings/internal/charconv_parse.h"
// The macro ABSL_BIT_PACK_FLOATS is defined on x86-64, where IEEE floating
// point numbers have the same endianness in memory as a bitfield struct
// containing the corresponding parts.
//
// When set, we replace calls to ldexp() with manual bit packing, which is
// faster and is unaffected by floating point environment.
#ifdef ABSL_BIT_PACK_FLOATS
#error ABSL_BIT_PACK_FLOATS cannot be directly set
#elif defined(__x86_64__) || defined(_M_X64)
#define ABSL_BIT_PACK_FLOATS 1
#endif
// A note about subnormals:
//
// The code below talks about "normals" and "subnormals". A normal IEEE float
// has a fixed-width mantissa and power of two exponent. For example, a normal
// `double` has a 53-bit mantissa. Because the high bit is always 1, it is not
// stored in the representation. The implicit bit buys an extra bit of
// resolution in the datatype.
//
// The downside of this scheme is that there is a large gap between DBL_MIN and
// zero. (Large, at least, relative to the different between DBL_MIN and the
// next representable number). This gap is softened by the "subnormal" numbers,
// which have the same power-of-two exponent as DBL_MIN, but no implicit 53rd
// bit. An all-bits-zero exponent in the encoding represents subnormals. (Zero
// is represented as a subnormal with an all-bits-zero mantissa.)
//
// The code below, in calculations, represents the mantissa as a uint64_t. The
// end result normally has the 53rd bit set. It represents subnormals by using
// narrower mantissas.
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace {
template <typename FloatType>
struct FloatTraits;
template <>
struct FloatTraits<double> {
// The number of mantissa bits in the given float type. This includes the
// implied high bit.
static constexpr int kTargetMantissaBits = 53;
// The largest supported IEEE exponent, in our integral mantissa
// representation.
//
// If `m` is the largest possible int kTargetMantissaBits bits wide, then
// m * 2**kMaxExponent is exactly equal to DBL_MAX.
static constexpr int kMaxExponent = 971;
// The smallest supported IEEE normal exponent, in our integral mantissa
// representation.
//
// If `m` is the smallest possible int kTargetMantissaBits bits wide, then
// m * 2**kMinNormalExponent is exactly equal to DBL_MIN.
static constexpr int kMinNormalExponent = -1074;
static double MakeNan(const char* tagp) {
// Support nan no matter which namespace it's in. Some platforms
// incorrectly don't put it in namespace std.
using namespace std; // NOLINT
return nan(tagp);
}
// Builds a nonzero floating point number out of the provided parts.
//
// This is intended to do the same operation as ldexp(mantissa, exponent),
// but using purely integer math, to avoid -ffastmath and floating
// point environment issues. Using type punning is also faster. We fall back
// to ldexp on a per-platform basis for portability.
//
// `exponent` must be between kMinNormalExponent and kMaxExponent.
//
// `mantissa` must either be exactly kTargetMantissaBits wide, in which case
// a normal value is made, or it must be less narrow than that, in which case
// `exponent` must be exactly kMinNormalExponent, and a subnormal value is
// made.
static double Make(uint64_t mantissa, int exponent, bool sign) {
#ifndef ABSL_BIT_PACK_FLOATS
// Support ldexp no matter which namespace it's in. Some platforms
// incorrectly don't put it in namespace std.
using namespace std; // NOLINT
return sign ? -ldexp(mantissa, exponent) : ldexp(mantissa, exponent);
#else
constexpr uint64_t kMantissaMask =
(uint64_t(1) << (kTargetMantissaBits - 1)) - 1;
uint64_t dbl = static_cast<uint64_t>(sign) << 63;
if (mantissa > kMantissaMask) {
// Normal value.
// Adjust by 1023 for the exponent representation bias, and an additional
// 52 due to the implied decimal point in the IEEE mantissa represenation.
dbl += uint64_t{exponent + 1023u + kTargetMantissaBits - 1} << 52;
mantissa &= kMantissaMask;
} else {
// subnormal value
assert(exponent == kMinNormalExponent);
}
dbl += mantissa;
return absl::bit_cast<double>(dbl);
#endif // ABSL_BIT_PACK_FLOATS
}
};
// Specialization of floating point traits for the `float` type. See the
// FloatTraits<double> specialization above for meaning of each of the following
// members and methods.
template <>
struct FloatTraits<float> {
static constexpr int kTargetMantissaBits = 24;
static constexpr int kMaxExponent = 104;
static constexpr int kMinNormalExponent = -149;
static float MakeNan(const char* tagp) {
// Support nanf no matter which namespace it's in. Some platforms
// incorrectly don't put it in namespace std.
using namespace std; // NOLINT
return nanf(tagp);
}
static float Make(uint32_t mantissa, int exponent, bool sign) {
#ifndef ABSL_BIT_PACK_FLOATS
// Support ldexpf no matter which namespace it's in. Some platforms
// incorrectly don't put it in namespace std.
using namespace std; // NOLINT
return sign ? -ldexpf(mantissa, exponent) : ldexpf(mantissa, exponent);
#else
constexpr uint32_t kMantissaMask =
(uint32_t(1) << (kTargetMantissaBits - 1)) - 1;
uint32_t flt = static_cast<uint32_t>(sign) << 31;
if (mantissa > kMantissaMask) {
// Normal value.
// Adjust by 127 for the exponent representation bias, and an additional
// 23 due to the implied decimal point in the IEEE mantissa represenation.
flt += uint32_t{exponent + 127u + kTargetMantissaBits - 1} << 23;
mantissa &= kMantissaMask;
} else {
// subnormal value
assert(exponent == kMinNormalExponent);
}
flt += mantissa;
return absl::bit_cast<float>(flt);
#endif // ABSL_BIT_PACK_FLOATS
}
};
// Decimal-to-binary conversions require coercing powers of 10 into a mantissa
// and a power of 2. The two helper functions Power10Mantissa(n) and
// Power10Exponent(n) perform this task. Together, these represent a hand-
// rolled floating point value which is equal to or just less than 10**n.
//
// The return values satisfy two range guarantees:
//
// Power10Mantissa(n) * 2**Power10Exponent(n) <= 10**n
// < (Power10Mantissa(n) + 1) * 2**Power10Exponent(n)
//
// 2**63 <= Power10Mantissa(n) < 2**64.
//
// Lookups into the power-of-10 table must first check the Power10Overflow() and
// Power10Underflow() functions, to avoid out-of-bounds table access.
//
// Indexes into these tables are biased by -kPower10TableMin, and the table has
// values in the range [kPower10TableMin, kPower10TableMax].
extern const uint64_t kPower10MantissaTable[];
extern const int16_t kPower10ExponentTable[];
// The smallest allowed value for use with the Power10Mantissa() and
// Power10Exponent() functions below. (If a smaller exponent is needed in
// calculations, the end result is guaranteed to underflow.)
constexpr int kPower10TableMin = -342;
// The largest allowed value for use with the Power10Mantissa() and
// Power10Exponent() functions below. (If a smaller exponent is needed in
// calculations, the end result is guaranteed to overflow.)
constexpr int kPower10TableMax = 308;
uint64_t Power10Mantissa(int n) {
return kPower10MantissaTable[n - kPower10TableMin];
}
int Power10Exponent(int n) {
return kPower10ExponentTable[n - kPower10TableMin];
}
// Returns true if n is large enough that 10**n always results in an IEEE
// overflow.
bool Power10Overflow(int n) { return n > kPower10TableMax; }
// Returns true if n is small enough that 10**n times a ParsedFloat mantissa
// always results in an IEEE underflow.
bool Power10Underflow(int n) { return n < kPower10TableMin; }
// Returns true if Power10Mantissa(n) * 2**Power10Exponent(n) is exactly equal
// to 10**n numerically. Put another way, this returns true if there is no
// truncation error in Power10Mantissa(n).
bool Power10Exact(int n) { return n >= 0 && n <= 27; }
// Sentinel exponent values for representing numbers too large or too close to
// zero to represent in a double.
constexpr int kOverflow = 99999;
constexpr int kUnderflow = -99999;
// Struct representing the calculated conversion result of a positive (nonzero)
// floating point number.
//
// The calculated number is mantissa * 2**exponent (mantissa is treated as an
// integer.) `mantissa` is chosen to be the correct width for the IEEE float
// representation being calculated. (`mantissa` will always have the same bit
// width for normal values, and narrower bit widths for subnormals.)
//
// If the result of conversion was an underflow or overflow, exponent is set
// to kUnderflow or kOverflow.
struct CalculatedFloat {
uint64_t mantissa = 0;
int exponent = 0;
};
// Returns the bit width of the given uint128. (Equivalently, returns 128
// minus the number of leading zero bits.)
int BitWidth(uint128 value) {
if (Uint128High64(value) == 0) {
return 64 - base_internal::CountLeadingZeros64(Uint128Low64(value));
}
return 128 - base_internal::CountLeadingZeros64(Uint128High64(value));
}
// Calculates how far to the right a mantissa needs to be shifted to create a
// properly adjusted mantissa for an IEEE floating point number.
//
// `mantissa_width` is the bit width of the mantissa to be shifted, and
// `binary_exponent` is the exponent of the number before the shift.
//
// This accounts for subnormal values, and will return a larger-than-normal
// shift if binary_exponent would otherwise be too low.
template <typename FloatType>
int NormalizedShiftSize(int mantissa_width, int binary_exponent) {
const int normal_shift =
mantissa_width - FloatTraits<FloatType>::kTargetMantissaBits;
const int minimum_shift =
FloatTraits<FloatType>::kMinNormalExponent - binary_exponent;
return std::max(normal_shift, minimum_shift);
}
// Right shifts a uint128 so that it has the requested bit width. (The
// resulting value will have 128 - bit_width leading zeroes.) The initial
// `value` must be wider than the requested bit width.
//
// Returns the number of bits shifted.
int TruncateToBitWidth(int bit_width, uint128* value) {
const int current_bit_width = BitWidth(*value);
const int shift = current_bit_width - bit_width;
*value >>= shift;
return shift;
}
// Checks if the given ParsedFloat represents one of the edge cases that are
// not dependent on number base: zero, infinity, or NaN. If so, sets *value
// the appropriate double, and returns true.
template <typename FloatType>
bool HandleEdgeCase(const strings_internal::ParsedFloat& input, bool negative,
FloatType* value) {
if (input.type == strings_internal::FloatType::kNan) {
// A bug in both clang and gcc would cause the compiler to optimize away the
// buffer we are building below. Declaring the buffer volatile avoids the
// issue, and has no measurable performance impact in microbenchmarks.
//
// https://bugs.llvm.org/show_bug.cgi?id=37778
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86113
constexpr ptrdiff_t kNanBufferSize = 128;
volatile char n_char_sequence[kNanBufferSize];
if (input.subrange_begin == nullptr) {
n_char_sequence[0] = '\0';
} else {
ptrdiff_t nan_size = input.subrange_end - input.subrange_begin;
nan_size = std::min(nan_size, kNanBufferSize - 1);
std::copy_n(input.subrange_begin, nan_size, n_char_sequence);
n_char_sequence[nan_size] = '\0';
}
char* nan_argument = const_cast<char*>(n_char_sequence);
*value = negative ? -FloatTraits<FloatType>::MakeNan(nan_argument)
: FloatTraits<FloatType>::MakeNan(nan_argument);
return true;
}
if (input.type == strings_internal::FloatType::kInfinity) {
*value = negative ? -std::numeric_limits<FloatType>::infinity()
: std::numeric_limits<FloatType>::infinity();
return true;
}
if (input.mantissa == 0) {
*value = negative ? -0.0 : 0.0;
return true;
}
return false;
}
// Given a CalculatedFloat result of a from_chars conversion, generate the
// correct output values.
//
// CalculatedFloat can represent an underflow or overflow, in which case the
// error code in *result is set. Otherwise, the calculated floating point
// number is stored in *value.
template <typename FloatType>
void EncodeResult(const CalculatedFloat& calculated, bool negative,
absl::from_chars_result* result, FloatType* value) {
if (calculated.exponent == kOverflow) {
result->ec = std::errc::result_out_of_range;
*value = negative ? -std::numeric_limits<FloatType>::max()
: std::numeric_limits<FloatType>::max();
return;
} else if (calculated.mantissa == 0 || calculated.exponent == kUnderflow) {
result->ec = std::errc::result_out_of_range;
*value = negative ? -0.0 : 0.0;
return;
}
*value = FloatTraits<FloatType>::Make(calculated.mantissa,
calculated.exponent, negative);
}
// Returns the given uint128 shifted to the right by `shift` bits, and rounds
// the remaining bits using round_to_nearest logic. The value is returned as a
// uint64_t, since this is the type used by this library for storing calculated
// floating point mantissas.
//
// It is expected that the width of the input value shifted by `shift` will
// be the correct bit-width for the target mantissa, which is strictly narrower
// than a uint64_t.
//
// If `input_exact` is false, then a nonzero error epsilon is assumed. For
// rounding purposes, the true value being rounded is strictly greater than the
// input value. The error may represent a single lost carry bit.
//
// When input_exact, shifted bits of the form 1000000... represent a tie, which
// is broken by rounding to even -- the rounding direction is chosen so the low
// bit of the returned value is 0.
//
// When !input_exact, shifted bits of the form 10000000... represent a value
// strictly greater than one half (due to the error epsilon), and so ties are
// always broken by rounding up.
//
// When !input_exact, shifted bits of the form 01111111... are uncertain;
// the true value may or may not be greater than 10000000..., due to the
// possible lost carry bit. The correct rounding direction is unknown. In this
// case, the result is rounded down, and `output_exact` is set to false.
//
// Zero and negative values of `shift` are accepted, in which case the word is
// shifted left, as necessary.
uint64_t ShiftRightAndRound(uint128 value, int shift, bool input_exact,
bool* output_exact) {
if (shift <= 0) {
*output_exact = input_exact;
return static_cast<uint64_t>(value << -shift);
}
if (shift >= 128) {
// Exponent is so small that we are shifting away all significant bits.
// Answer will not be representable, even as a subnormal, so return a zero
// mantissa (which represents underflow).
*output_exact = true;
return 0;
}
*output_exact = true;
const uint128 shift_mask = (uint128(1) << shift) - 1;
const uint128 halfway_point = uint128(1) << (shift - 1);
const uint128 shifted_bits = value & shift_mask;
value >>= shift;
if (shifted_bits > halfway_point) {
// Shifted bits greater than 10000... require rounding up.
return static_cast<uint64_t>(value + 1);
}
if (shifted_bits == halfway_point) {
// In exact mode, shifted bits of 10000... mean we're exactly halfway
// between two numbers, and we must round to even. So only round up if
// the low bit of `value` is set.
//
// In inexact mode, the nonzero error means the actual value is greater
// than the halfway point and we must alway round up.
if ((value & 1) == 1 || !input_exact) {
++value;
}
return static_cast<uint64_t>(value);
}
if (!input_exact && shifted_bits == halfway_point - 1) {
// Rounding direction is unclear, due to error.
*output_exact = false;
}
// Otherwise, round down.
return static_cast<uint64_t>(value);
}
// Checks if a floating point guess needs to be rounded up, using high precision
// math.
//
// `guess_mantissa` and `guess_exponent` represent a candidate guess for the
// number represented by `parsed_decimal`.
//
// The exact number represented by `parsed_decimal` must lie between the two
// numbers:
// A = `guess_mantissa * 2**guess_exponent`
// B = `(guess_mantissa + 1) * 2**guess_exponent`
//
// This function returns false if `A` is the better guess, and true if `B` is
// the better guess, with rounding ties broken by rounding to even.
bool MustRoundUp(uint64_t guess_mantissa, int guess_exponent,
const strings_internal::ParsedFloat& parsed_decimal) {
// 768 is the number of digits needed in the worst case. We could determine a
// better limit dynamically based on the value of parsed_decimal.exponent.
// This would optimize pathological input cases only. (Sane inputs won't have
// hundreds of digits of mantissa.)
absl::strings_internal::BigUnsigned<84> exact_mantissa;
int exact_exponent = exact_mantissa.ReadFloatMantissa(parsed_decimal, 768);
// Adjust the `guess` arguments to be halfway between A and B.
guess_mantissa = guess_mantissa * 2 + 1;
guess_exponent -= 1;
// In our comparison:
// lhs = exact = exact_mantissa * 10**exact_exponent
// = exact_mantissa * 5**exact_exponent * 2**exact_exponent
// rhs = guess = guess_mantissa * 2**guess_exponent
//
// Because we are doing integer math, we can't directly deal with negative
// exponents. We instead move these to the other side of the inequality.
absl::strings_internal::BigUnsigned<84>& lhs = exact_mantissa;
int comparison;
if (exact_exponent >= 0) {
lhs.MultiplyByFiveToTheNth(exact_exponent);
absl::strings_internal::BigUnsigned<84> rhs(guess_mantissa);
// There are powers of 2 on both sides of the inequality; reduce this to
// a single bit-shift.
if (exact_exponent > guess_exponent) {
lhs.ShiftLeft(exact_exponent - guess_exponent);
} else {
rhs.ShiftLeft(guess_exponent - exact_exponent);
}
comparison = Compare(lhs, rhs);
} else {
// Move the power of 5 to the other side of the equation, giving us:
// lhs = exact_mantissa * 2**exact_exponent
// rhs = guess_mantissa * 5**(-exact_exponent) * 2**guess_exponent
absl::strings_internal::BigUnsigned<84> rhs =
absl::strings_internal::BigUnsigned<84>::FiveToTheNth(-exact_exponent);
rhs.MultiplyBy(guess_mantissa);
if (exact_exponent > guess_exponent) {
lhs.ShiftLeft(exact_exponent - guess_exponent);
} else {
rhs.ShiftLeft(guess_exponent - exact_exponent);
}
comparison = Compare(lhs, rhs);
}
if (comparison < 0) {
return false;
} else if (comparison > 0) {
return true;
} else {
// When lhs == rhs, the decimal input is exactly between A and B.
// Round towards even -- round up only if the low bit of the initial
// `guess_mantissa` was a 1. We shifted guess_mantissa left 1 bit at
// the beginning of this function, so test the 2nd bit here.
return (guess_mantissa & 2) == 2;
}
}
// Constructs a CalculatedFloat from a given mantissa and exponent, but
// with the following normalizations applied:
//
// If rounding has caused mantissa to increase just past the allowed bit
// width, shift and adjust exponent.
//
// If exponent is too high, sets kOverflow.
//
// If mantissa is zero (representing a non-zero value not representable, even
// as a subnormal), sets kUnderflow.
template <typename FloatType>
CalculatedFloat CalculatedFloatFromRawValues(uint64_t mantissa, int exponent) {
CalculatedFloat result;
if (mantissa == uint64_t(1) << FloatTraits<FloatType>::kTargetMantissaBits) {
mantissa >>= 1;
exponent += 1;
}
if (exponent > FloatTraits<FloatType>::kMaxExponent) {
result.exponent = kOverflow;
} else if (mantissa == 0) {
result.exponent = kUnderflow;
} else {
result.exponent = exponent;
result.mantissa = mantissa;
}
return result;
}
template <typename FloatType>
CalculatedFloat CalculateFromParsedHexadecimal(
const strings_internal::ParsedFloat& parsed_hex) {
uint64_t mantissa = parsed_hex.mantissa;
int exponent = parsed_hex.exponent;
int mantissa_width = 64 - base_internal::CountLeadingZeros64(mantissa);
const int shift = NormalizedShiftSize<FloatType>(mantissa_width, exponent);
bool result_exact;
exponent += shift;
mantissa = ShiftRightAndRound(mantissa, shift,
/* input exact= */ true, &result_exact);
// ParseFloat handles rounding in the hexadecimal case, so we don't have to
// check `result_exact` here.
return CalculatedFloatFromRawValues<FloatType>(mantissa, exponent);
}
template <typename FloatType>
CalculatedFloat CalculateFromParsedDecimal(
const strings_internal::ParsedFloat& parsed_decimal) {
CalculatedFloat result;
// Large or small enough decimal exponents will always result in overflow
// or underflow.
if (Power10Underflow(parsed_decimal.exponent)) {
result.exponent = kUnderflow;
return result;
} else if (Power10Overflow(parsed_decimal.exponent)) {
result.exponent = kOverflow;
return result;
}
// Otherwise convert our power of 10 into a power of 2 times an integer
// mantissa, and multiply this by our parsed decimal mantissa.
uint128 wide_binary_mantissa = parsed_decimal.mantissa;
wide_binary_mantissa *= Power10Mantissa(parsed_decimal.exponent);
int binary_exponent = Power10Exponent(parsed_decimal.exponent);
// Discard bits that are inaccurate due to truncation error. The magic
// `mantissa_width` constants below are justified in
// https://abseil.io/about/design/charconv. They represent the number of bits
// in `wide_binary_mantissa` that are guaranteed to be unaffected by error
// propagation.
bool mantissa_exact;
int mantissa_width;
if (parsed_decimal.subrange_begin) {
// Truncated mantissa
mantissa_width = 58;
mantissa_exact = false;
binary_exponent +=
TruncateToBitWidth(mantissa_width, &wide_binary_mantissa);
} else if (!Power10Exact(parsed_decimal.exponent)) {
// Exact mantissa, truncated power of ten
mantissa_width = 63;
mantissa_exact = false;
binary_exponent +=
TruncateToBitWidth(mantissa_width, &wide_binary_mantissa);
} else {
// Product is exact
mantissa_width = BitWidth(wide_binary_mantissa);
mantissa_exact = true;
}
// Shift into an FloatType-sized mantissa, and round to nearest.
const int shift =
NormalizedShiftSize<FloatType>(mantissa_width, binary_exponent);
bool result_exact;
binary_exponent += shift;
uint64_t binary_mantissa = ShiftRightAndRound(wide_binary_mantissa, shift,
mantissa_exact, &result_exact);
if (!result_exact) {
// We could not determine the rounding direction using int128 math. Use
// full resolution math instead.
if (MustRoundUp(binary_mantissa, binary_exponent, parsed_decimal)) {
binary_mantissa += 1;
}
}
return CalculatedFloatFromRawValues<FloatType>(binary_mantissa,
binary_exponent);
}
template <typename FloatType>
from_chars_result FromCharsImpl(const char* first, const char* last,
FloatType& value, chars_format fmt_flags) {
from_chars_result result;
result.ptr = first; // overwritten on successful parse
result.ec = std::errc();
bool negative = false;
if (first != last && *first == '-') {
++first;
negative = true;
}
// If the `hex` flag is *not* set, then we will accept a 0x prefix and try
// to parse a hexadecimal float.
if ((fmt_flags & chars_format::hex) == chars_format{} && last - first >= 2 &&
*first == '0' && (first[1] == 'x' || first[1] == 'X')) {
const char* hex_first = first + 2;
strings_internal::ParsedFloat hex_parse =
strings_internal::ParseFloat<16>(hex_first, last, fmt_flags);
if (hex_parse.end == nullptr ||
hex_parse.type != strings_internal::FloatType::kNumber) {
// Either we failed to parse a hex float after the "0x", or we read
// "0xinf" or "0xnan" which we don't want to match.
//
// However, a string that begins with "0x" also begins with "0", which
// is normally a valid match for the number zero. So we want these
// strings to match zero unless fmt_flags is `scientific`. (This flag
// means an exponent is required, which the string "0" does not have.)
if (fmt_flags == chars_format::scientific) {
result.ec = std::errc::invalid_argument;
} else {
result.ptr = first + 1;
value = negative ? -0.0 : 0.0;
}
return result;
}
// We matched a value.
result.ptr = hex_parse.end;
if (HandleEdgeCase(hex_parse, negative, &value)) {
return result;
}
CalculatedFloat calculated =
CalculateFromParsedHexadecimal<FloatType>(hex_parse);
EncodeResult(calculated, negative, &result, &value);
return result;
}
// Otherwise, we choose the number base based on the flags.
if ((fmt_flags & chars_format::hex) == chars_format::hex) {
strings_internal::ParsedFloat hex_parse =
strings_internal::ParseFloat<16>(first, last, fmt_flags);
if (hex_parse.end == nullptr) {
result.ec = std::errc::invalid_argument;
return result;
}
result.ptr = hex_parse.end;
if (HandleEdgeCase(hex_parse, negative, &value)) {
return result;
}
CalculatedFloat calculated =
CalculateFromParsedHexadecimal<FloatType>(hex_parse);
EncodeResult(calculated, negative, &result, &value);
return result;
} else {
strings_internal::ParsedFloat decimal_parse =
strings_internal::ParseFloat<10>(first, last, fmt_flags);
if (decimal_parse.end == nullptr) {
result.ec = std::errc::invalid_argument;
return result;
}
result.ptr = decimal_parse.end;
if (HandleEdgeCase(decimal_parse, negative, &value)) {
return result;
}
CalculatedFloat calculated =
CalculateFromParsedDecimal<FloatType>(decimal_parse);
EncodeResult(calculated, negative, &result, &value);
return result;
}
}
} // namespace
from_chars_result from_chars(const char* first, const char* last, double& value,
chars_format fmt) {
return FromCharsImpl(first, last, value, fmt);
}
from_chars_result from_chars(const char* first, const char* last, float& value,
chars_format fmt) {
return FromCharsImpl(first, last, value, fmt);
}
namespace {
// Table of powers of 10, from kPower10TableMin to kPower10TableMax.
//
// kPower10MantissaTable[i - kPower10TableMin] stores the 64-bit mantissa (high
// bit always on), and kPower10ExponentTable[i - kPower10TableMin] stores the
// power-of-two exponent. For a given number i, this gives the unique mantissa
// and exponent such that mantissa * 2**exponent <= 10**i < (mantissa + 1) *
// 2**exponent.
const uint64_t kPower10MantissaTable[] = {
0xeef453d6923bd65aU, 0x9558b4661b6565f8U, 0xbaaee17fa23ebf76U,
0xe95a99df8ace6f53U, 0x91d8a02bb6c10594U, 0xb64ec836a47146f9U,
0xe3e27a444d8d98b7U, 0x8e6d8c6ab0787f72U, 0xb208ef855c969f4fU,
0xde8b2b66b3bc4723U, 0x8b16fb203055ac76U, 0xaddcb9e83c6b1793U,
0xd953e8624b85dd78U, 0x87d4713d6f33aa6bU, 0xa9c98d8ccb009506U,
0xd43bf0effdc0ba48U, 0x84a57695fe98746dU, 0xa5ced43b7e3e9188U,
0xcf42894a5dce35eaU, 0x818995ce7aa0e1b2U, 0xa1ebfb4219491a1fU,
0xca66fa129f9b60a6U, 0xfd00b897478238d0U, 0x9e20735e8cb16382U,
0xc5a890362fddbc62U, 0xf712b443bbd52b7bU, 0x9a6bb0aa55653b2dU,
0xc1069cd4eabe89f8U, 0xf148440a256e2c76U, 0x96cd2a865764dbcaU,
0xbc807527ed3e12bcU, 0xeba09271e88d976bU, 0x93445b8731587ea3U,
0xb8157268fdae9e4cU, 0xe61acf033d1a45dfU, 0x8fd0c16206306babU,
0xb3c4f1ba87bc8696U, 0xe0b62e2929aba83cU, 0x8c71dcd9ba0b4925U,
0xaf8e5410288e1b6fU, 0xdb71e91432b1a24aU, 0x892731ac9faf056eU,
0xab70fe17c79ac6caU, 0xd64d3d9db981787dU, 0x85f0468293f0eb4eU,
0xa76c582338ed2621U, 0xd1476e2c07286faaU, 0x82cca4db847945caU,
0xa37fce126597973cU, 0xcc5fc196fefd7d0cU, 0xff77b1fcbebcdc4fU,
0x9faacf3df73609b1U, 0xc795830d75038c1dU, 0xf97ae3d0d2446f25U,
0x9becce62836ac577U, 0xc2e801fb244576d5U, 0xf3a20279ed56d48aU,
0x9845418c345644d6U, 0xbe5691ef416bd60cU, 0xedec366b11c6cb8fU,
0x94b3a202eb1c3f39U, 0xb9e08a83a5e34f07U, 0xe858ad248f5c22c9U,
0x91376c36d99995beU, 0xb58547448ffffb2dU, 0xe2e69915b3fff9f9U,
0x8dd01fad907ffc3bU, 0xb1442798f49ffb4aU, 0xdd95317f31c7fa1dU,
0x8a7d3eef7f1cfc52U, 0xad1c8eab5ee43b66U, 0xd863b256369d4a40U,
0x873e4f75e2224e68U, 0xa90de3535aaae202U, 0xd3515c2831559a83U,
0x8412d9991ed58091U, 0xa5178fff668ae0b6U, 0xce5d73ff402d98e3U,
0x80fa687f881c7f8eU, 0xa139029f6a239f72U, 0xc987434744ac874eU,
0xfbe9141915d7a922U, 0x9d71ac8fada6c9b5U, 0xc4ce17b399107c22U,
0xf6019da07f549b2bU, 0x99c102844f94e0fbU, 0xc0314325637a1939U,
0xf03d93eebc589f88U, 0x96267c7535b763b5U, 0xbbb01b9283253ca2U,
0xea9c227723ee8bcbU, 0x92a1958a7675175fU, 0xb749faed14125d36U,
0xe51c79a85916f484U, 0x8f31cc0937ae58d2U, 0xb2fe3f0b8599ef07U,
0xdfbdcece67006ac9U, 0x8bd6a141006042bdU, 0xaecc49914078536dU,
0xda7f5bf590966848U, 0x888f99797a5e012dU, 0xaab37fd7d8f58178U,
0xd5605fcdcf32e1d6U, 0x855c3be0a17fcd26U, 0xa6b34ad8c9dfc06fU,
0xd0601d8efc57b08bU, 0x823c12795db6ce57U, 0xa2cb1717b52481edU,
0xcb7ddcdda26da268U, 0xfe5d54150b090b02U, 0x9efa548d26e5a6e1U,
0xc6b8e9b0709f109aU, 0xf867241c8cc6d4c0U, 0x9b407691d7fc44f8U,
0xc21094364dfb5636U, 0xf294b943e17a2bc4U, 0x979cf3ca6cec5b5aU,
0xbd8430bd08277231U, 0xece53cec4a314ebdU, 0x940f4613ae5ed136U,
0xb913179899f68584U, 0xe757dd7ec07426e5U, 0x9096ea6f3848984fU,
0xb4bca50b065abe63U, 0xe1ebce4dc7f16dfbU, 0x8d3360f09cf6e4bdU,
0xb080392cc4349decU, 0xdca04777f541c567U, 0x89e42caaf9491b60U,
0xac5d37d5b79b6239U, 0xd77485cb25823ac7U, 0x86a8d39ef77164bcU,
0xa8530886b54dbdebU, 0xd267caa862a12d66U, 0x8380dea93da4bc60U,
0xa46116538d0deb78U, 0xcd795be870516656U, 0x806bd9714632dff6U,
0xa086cfcd97bf97f3U, 0xc8a883c0fdaf7df0U, 0xfad2a4b13d1b5d6cU,
0x9cc3a6eec6311a63U, 0xc3f490aa77bd60fcU, 0xf4f1b4d515acb93bU,
0x991711052d8bf3c5U, 0xbf5cd54678eef0b6U, 0xef340a98172aace4U,
0x9580869f0e7aac0eU, 0xbae0a846d2195712U, 0xe998d258869facd7U,
0x91ff83775423cc06U, 0xb67f6455292cbf08U, 0xe41f3d6a7377eecaU,
0x8e938662882af53eU, 0xb23867fb2a35b28dU, 0xdec681f9f4c31f31U,
0x8b3c113c38f9f37eU, 0xae0b158b4738705eU, 0xd98ddaee19068c76U,
0x87f8a8d4cfa417c9U, 0xa9f6d30a038d1dbcU, 0xd47487cc8470652bU,
0x84c8d4dfd2c63f3bU, 0xa5fb0a17c777cf09U, 0xcf79cc9db955c2ccU,
0x81ac1fe293d599bfU, 0xa21727db38cb002fU, 0xca9cf1d206fdc03bU,
0xfd442e4688bd304aU, 0x9e4a9cec15763e2eU, 0xc5dd44271ad3cdbaU,
0xf7549530e188c128U, 0x9a94dd3e8cf578b9U, 0xc13a148e3032d6e7U,
0xf18899b1bc3f8ca1U, 0x96f5600f15a7b7e5U, 0xbcb2b812db11a5deU,
0xebdf661791d60f56U, 0x936b9fcebb25c995U, 0xb84687c269ef3bfbU,
0xe65829b3046b0afaU, 0x8ff71a0fe2c2e6dcU, 0xb3f4e093db73a093U,
0xe0f218b8d25088b8U, 0x8c974f7383725573U, 0xafbd2350644eeacfU,
0xdbac6c247d62a583U, 0x894bc396ce5da772U, 0xab9eb47c81f5114fU,
0xd686619ba27255a2U, 0x8613fd0145877585U, 0xa798fc4196e952e7U,
0xd17f3b51fca3a7a0U, 0x82ef85133de648c4U, 0xa3ab66580d5fdaf5U,
0xcc963fee10b7d1b3U, 0xffbbcfe994e5c61fU, 0x9fd561f1fd0f9bd3U,
0xc7caba6e7c5382c8U, 0xf9bd690a1b68637bU, 0x9c1661a651213e2dU,
0xc31bfa0fe5698db8U, 0xf3e2f893dec3f126U, 0x986ddb5c6b3a76b7U,
0xbe89523386091465U, 0xee2ba6c0678b597fU, 0x94db483840b717efU,
0xba121a4650e4ddebU, 0xe896a0d7e51e1566U, 0x915e2486ef32cd60U,
0xb5b5ada8aaff80b8U, 0xe3231912d5bf60e6U, 0x8df5efabc5979c8fU,
0xb1736b96b6fd83b3U, 0xddd0467c64bce4a0U, 0x8aa22c0dbef60ee4U,
0xad4ab7112eb3929dU, 0xd89d64d57a607744U, 0x87625f056c7c4a8bU,
0xa93af6c6c79b5d2dU, 0xd389b47879823479U, 0x843610cb4bf160cbU,
0xa54394fe1eedb8feU, 0xce947a3da6a9273eU, 0x811ccc668829b887U,
0xa163ff802a3426a8U, 0xc9bcff6034c13052U, 0xfc2c3f3841f17c67U,
0x9d9ba7832936edc0U, 0xc5029163f384a931U, 0xf64335bcf065d37dU,
0x99ea0196163fa42eU, 0xc06481fb9bcf8d39U, 0xf07da27a82c37088U,
0x964e858c91ba2655U, 0xbbe226efb628afeaU, 0xeadab0aba3b2dbe5U,
0x92c8ae6b464fc96fU, 0xb77ada0617e3bbcbU, 0xe55990879ddcaabdU,
0x8f57fa54c2a9eab6U, 0xb32df8e9f3546564U, 0xdff9772470297ebdU,
0x8bfbea76c619ef36U, 0xaefae51477a06b03U, 0xdab99e59958885c4U,
0x88b402f7fd75539bU, 0xaae103b5fcd2a881U, 0xd59944a37c0752a2U,
0x857fcae62d8493a5U, 0xa6dfbd9fb8e5b88eU, 0xd097ad07a71f26b2U,
0x825ecc24c873782fU, 0xa2f67f2dfa90563bU, 0xcbb41ef979346bcaU,
0xfea126b7d78186bcU, 0x9f24b832e6b0f436U, 0xc6ede63fa05d3143U,
0xf8a95fcf88747d94U, 0x9b69dbe1b548ce7cU, 0xc24452da229b021bU,
0xf2d56790ab41c2a2U, 0x97c560ba6b0919a5U, 0xbdb6b8e905cb600fU,
0xed246723473e3813U, 0x9436c0760c86e30bU, 0xb94470938fa89bceU,
0xe7958cb87392c2c2U, 0x90bd77f3483bb9b9U, 0xb4ecd5f01a4aa828U,
0xe2280b6c20dd5232U, 0x8d590723948a535fU, 0xb0af48ec79ace837U,
0xdcdb1b2798182244U, 0x8a08f0f8bf0f156bU, 0xac8b2d36eed2dac5U,
0xd7adf884aa879177U, 0x86ccbb52ea94baeaU, 0xa87fea27a539e9a5U,
0xd29fe4b18e88640eU, 0x83a3eeeef9153e89U, 0xa48ceaaab75a8e2bU,
0xcdb02555653131b6U, 0x808e17555f3ebf11U, 0xa0b19d2ab70e6ed6U,
0xc8de047564d20a8bU, 0xfb158592be068d2eU, 0x9ced737bb6c4183dU,
0xc428d05aa4751e4cU, 0xf53304714d9265dfU, 0x993fe2c6d07b7fabU,
0xbf8fdb78849a5f96U, 0xef73d256a5c0f77cU, 0x95a8637627989aadU,
0xbb127c53b17ec159U, 0xe9d71b689dde71afU, 0x9226712162ab070dU,
0xb6b00d69bb55c8d1U, 0xe45c10c42a2b3b05U, 0x8eb98a7a9a5b04e3U,
0xb267ed1940f1c61cU, 0xdf01e85f912e37a3U, 0x8b61313bbabce2c6U,
0xae397d8aa96c1b77U, 0xd9c7dced53c72255U, 0x881cea14545c7575U,
0xaa242499697392d2U, 0xd4ad2dbfc3d07787U, 0x84ec3c97da624ab4U,
0xa6274bbdd0fadd61U, 0xcfb11ead453994baU, 0x81ceb32c4b43fcf4U,
0xa2425ff75e14fc31U, 0xcad2f7f5359a3b3eU, 0xfd87b5f28300ca0dU,
0x9e74d1b791e07e48U, 0xc612062576589ddaU, 0xf79687aed3eec551U,
0x9abe14cd44753b52U, 0xc16d9a0095928a27U, 0xf1c90080baf72cb1U,
0x971da05074da7beeU, 0xbce5086492111aeaU, 0xec1e4a7db69561a5U,
0x9392ee8e921d5d07U, 0xb877aa3236a4b449U, 0xe69594bec44de15bU,
0x901d7cf73ab0acd9U, 0xb424dc35095cd80fU, 0xe12e13424bb40e13U,
0x8cbccc096f5088cbU, 0xafebff0bcb24aafeU, 0xdbe6fecebdedd5beU,
0x89705f4136b4a597U, 0xabcc77118461cefcU, 0xd6bf94d5e57a42bcU,
0x8637bd05af6c69b5U, 0xa7c5ac471b478423U, 0xd1b71758e219652bU,
0x83126e978d4fdf3bU, 0xa3d70a3d70a3d70aU, 0xccccccccccccccccU,
0x8000000000000000U, 0xa000000000000000U, 0xc800000000000000U,
0xfa00000000000000U, 0x9c40000000000000U, 0xc350000000000000U,
0xf424000000000000U, 0x9896800000000000U, 0xbebc200000000000U,
0xee6b280000000000U, 0x9502f90000000000U, 0xba43b74000000000U,
0xe8d4a51000000000U, 0x9184e72a00000000U, 0xb5e620f480000000U,
0xe35fa931a0000000U, 0x8e1bc9bf04000000U, 0xb1a2bc2ec5000000U,
0xde0b6b3a76400000U, 0x8ac7230489e80000U, 0xad78ebc5ac620000U,
0xd8d726b7177a8000U, 0x878678326eac9000U, 0xa968163f0a57b400U,
0xd3c21bcecceda100U, 0x84595161401484a0U, 0xa56fa5b99019a5c8U,
0xcecb8f27f4200f3aU, 0x813f3978f8940984U, 0xa18f07d736b90be5U,
0xc9f2c9cd04674edeU, 0xfc6f7c4045812296U, 0x9dc5ada82b70b59dU,
0xc5371912364ce305U, 0xf684df56c3e01bc6U, 0x9a130b963a6c115cU,
0xc097ce7bc90715b3U, 0xf0bdc21abb48db20U, 0x96769950b50d88f4U,
0xbc143fa4e250eb31U, 0xeb194f8e1ae525fdU, 0x92efd1b8d0cf37beU,
0xb7abc627050305adU, 0xe596b7b0c643c719U, 0x8f7e32ce7bea5c6fU,
0xb35dbf821ae4f38bU, 0xe0352f62a19e306eU, 0x8c213d9da502de45U,
0xaf298d050e4395d6U, 0xdaf3f04651d47b4cU, 0x88d8762bf324cd0fU,
0xab0e93b6efee0053U, 0xd5d238a4abe98068U, 0x85a36366eb71f041U,
0xa70c3c40a64e6c51U, 0xd0cf4b50cfe20765U, 0x82818f1281ed449fU,
0xa321f2d7226895c7U, 0xcbea6f8ceb02bb39U, 0xfee50b7025c36a08U,
0x9f4f2726179a2245U, 0xc722f0ef9d80aad6U, 0xf8ebad2b84e0d58bU,
0x9b934c3b330c8577U, 0xc2781f49ffcfa6d5U, 0xf316271c7fc3908aU,
0x97edd871cfda3a56U, 0xbde94e8e43d0c8ecU, 0xed63a231d4c4fb27U,
0x945e455f24fb1cf8U, 0xb975d6b6ee39e436U, 0xe7d34c64a9c85d44U,
0x90e40fbeea1d3a4aU, 0xb51d13aea4a488ddU, 0xe264589a4dcdab14U,
0x8d7eb76070a08aecU, 0xb0de65388cc8ada8U, 0xdd15fe86affad912U,
0x8a2dbf142dfcc7abU, 0xacb92ed9397bf996U, 0xd7e77a8f87daf7fbU,
0x86f0ac99b4e8dafdU, 0xa8acd7c0222311bcU, 0xd2d80db02aabd62bU,
0x83c7088e1aab65dbU, 0xa4b8cab1a1563f52U, 0xcde6fd5e09abcf26U,
0x80b05e5ac60b6178U, 0xa0dc75f1778e39d6U, 0xc913936dd571c84cU,
0xfb5878494ace3a5fU, 0x9d174b2dcec0e47bU, 0xc45d1df942711d9aU,
0xf5746577930d6500U, 0x9968bf6abbe85f20U, 0xbfc2ef456ae276e8U,
0xefb3ab16c59b14a2U, 0x95d04aee3b80ece5U, 0xbb445da9ca61281fU,
0xea1575143cf97226U, 0x924d692ca61be758U, 0xb6e0c377cfa2e12eU,
0xe498f455c38b997aU, 0x8edf98b59a373fecU, 0xb2977ee300c50fe7U,
0xdf3d5e9bc0f653e1U, 0x8b865b215899f46cU, 0xae67f1e9aec07187U,
0xda01ee641a708de9U, 0x884134fe908658b2U, 0xaa51823e34a7eedeU,
0xd4e5e2cdc1d1ea96U, 0x850fadc09923329eU, 0xa6539930bf6bff45U,
0xcfe87f7cef46ff16U, 0x81f14fae158c5f6eU, 0xa26da3999aef7749U,
0xcb090c8001ab551cU, 0xfdcb4fa002162a63U, 0x9e9f11c4014dda7eU,
0xc646d63501a1511dU, 0xf7d88bc24209a565U, 0x9ae757596946075fU,
0xc1a12d2fc3978937U, 0xf209787bb47d6b84U, 0x9745eb4d50ce6332U,
0xbd176620a501fbffU, 0xec5d3fa8ce427affU, 0x93ba47c980e98cdfU,
0xb8a8d9bbe123f017U, 0xe6d3102ad96cec1dU, 0x9043ea1ac7e41392U,
0xb454e4a179dd1877U, 0xe16a1dc9d8545e94U, 0x8ce2529e2734bb1dU,
0xb01ae745b101e9e4U, 0xdc21a1171d42645dU, 0x899504ae72497ebaU,
0xabfa45da0edbde69U, 0xd6f8d7509292d603U, 0x865b86925b9bc5c2U,
0xa7f26836f282b732U, 0xd1ef0244af2364ffU, 0x8335616aed761f1fU,
0xa402b9c5a8d3a6e7U, 0xcd036837130890a1U, 0x802221226be55a64U,
0xa02aa96b06deb0fdU, 0xc83553c5c8965d3dU, 0xfa42a8b73abbf48cU,
0x9c69a97284b578d7U, 0xc38413cf25e2d70dU, 0xf46518c2ef5b8cd1U,
0x98bf2f79d5993802U, 0xbeeefb584aff8603U, 0xeeaaba2e5dbf6784U,
0x952ab45cfa97a0b2U, 0xba756174393d88dfU, 0xe912b9d1478ceb17U,
0x91abb422ccb812eeU, 0xb616a12b7fe617aaU, 0xe39c49765fdf9d94U,
0x8e41ade9fbebc27dU, 0xb1d219647ae6b31cU, 0xde469fbd99a05fe3U,
0x8aec23d680043beeU, 0xada72ccc20054ae9U, 0xd910f7ff28069da4U,
0x87aa9aff79042286U, 0xa99541bf57452b28U, 0xd3fa922f2d1675f2U,
0x847c9b5d7c2e09b7U, 0xa59bc234db398c25U, 0xcf02b2c21207ef2eU,
0x8161afb94b44f57dU, 0xa1ba1ba79e1632dcU, 0xca28a291859bbf93U,
0xfcb2cb35e702af78U, 0x9defbf01b061adabU, 0xc56baec21c7a1916U,
0xf6c69a72a3989f5bU, 0x9a3c2087a63f6399U, 0xc0cb28a98fcf3c7fU,
0xf0fdf2d3f3c30b9fU, 0x969eb7c47859e743U, 0xbc4665b596706114U,
0xeb57ff22fc0c7959U, 0x9316ff75dd87cbd8U, 0xb7dcbf5354e9beceU,
0xe5d3ef282a242e81U, 0x8fa475791a569d10U, 0xb38d92d760ec4455U,
0xe070f78d3927556aU, 0x8c469ab843b89562U, 0xaf58416654a6babbU,
0xdb2e51bfe9d0696aU, 0x88fcf317f22241e2U, 0xab3c2fddeeaad25aU,
0xd60b3bd56a5586f1U, 0x85c7056562757456U, 0xa738c6bebb12d16cU,
0xd106f86e69d785c7U, 0x82a45b450226b39cU, 0xa34d721642b06084U,
0xcc20ce9bd35c78a5U, 0xff290242c83396ceU, 0x9f79a169bd203e41U,
0xc75809c42c684dd1U, 0xf92e0c3537826145U, 0x9bbcc7a142b17ccbU,
0xc2abf989935ddbfeU, 0xf356f7ebf83552feU, 0x98165af37b2153deU,
0xbe1bf1b059e9a8d6U, 0xeda2ee1c7064130cU, 0x9485d4d1c63e8be7U,
0xb9a74a0637ce2ee1U, 0xe8111c87c5c1ba99U, 0x910ab1d4db9914a0U,
0xb54d5e4a127f59c8U, 0xe2a0b5dc971f303aU, 0x8da471a9de737e24U,
0xb10d8e1456105dadU, 0xdd50f1996b947518U, 0x8a5296ffe33cc92fU,
0xace73cbfdc0bfb7bU, 0xd8210befd30efa5aU, 0x8714a775e3e95c78U,
0xa8d9d1535ce3b396U, 0xd31045a8341ca07cU, 0x83ea2b892091e44dU,
0xa4e4b66b68b65d60U, 0xce1de40642e3f4b9U, 0x80d2ae83e9ce78f3U,
0xa1075a24e4421730U, 0xc94930ae1d529cfcU, 0xfb9b7cd9a4a7443cU,
0x9d412e0806e88aa5U, 0xc491798a08a2ad4eU, 0xf5b5d7ec8acb58a2U,
0x9991a6f3d6bf1765U, 0xbff610b0cc6edd3fU, 0xeff394dcff8a948eU,
0x95f83d0a1fb69cd9U, 0xbb764c4ca7a4440fU, 0xea53df5fd18d5513U,
0x92746b9be2f8552cU, 0xb7118682dbb66a77U, 0xe4d5e82392a40515U,
0x8f05b1163ba6832dU, 0xb2c71d5bca9023f8U, 0xdf78e4b2bd342cf6U,
0x8bab8eefb6409c1aU, 0xae9672aba3d0c320U, 0xda3c0f568cc4f3e8U,
0x8865899617fb1871U, 0xaa7eebfb9df9de8dU, 0xd51ea6fa85785631U,
0x8533285c936b35deU, 0xa67ff273b8460356U, 0xd01fef10a657842cU,
0x8213f56a67f6b29bU, 0xa298f2c501f45f42U, 0xcb3f2f7642717713U,
0xfe0efb53d30dd4d7U, 0x9ec95d1463e8a506U, 0xc67bb4597ce2ce48U,
0xf81aa16fdc1b81daU, 0x9b10a4e5e9913128U, 0xc1d4ce1f63f57d72U,
0xf24a01a73cf2dccfU, 0x976e41088617ca01U, 0xbd49d14aa79dbc82U,
0xec9c459d51852ba2U, 0x93e1ab8252f33b45U, 0xb8da1662e7b00a17U,
0xe7109bfba19c0c9dU, 0x906a617d450187e2U, 0xb484f9dc9641e9daU,
0xe1a63853bbd26451U, 0x8d07e33455637eb2U, 0xb049dc016abc5e5fU,
0xdc5c5301c56b75f7U, 0x89b9b3e11b6329baU, 0xac2820d9623bf429U,
0xd732290fbacaf133U, 0x867f59a9d4bed6c0U, 0xa81f301449ee8c70U,
0xd226fc195c6a2f8cU, 0x83585d8fd9c25db7U, 0xa42e74f3d032f525U,
0xcd3a1230c43fb26fU, 0x80444b5e7aa7cf85U, 0xa0555e361951c366U,
0xc86ab5c39fa63440U, 0xfa856334878fc150U, 0x9c935e00d4b9d8d2U,
0xc3b8358109e84f07U, 0xf4a642e14c6262c8U, 0x98e7e9cccfbd7dbdU,
0xbf21e44003acdd2cU, 0xeeea5d5004981478U, 0x95527a5202df0ccbU,
0xbaa718e68396cffdU, 0xe950df20247c83fdU, 0x91d28b7416cdd27eU,
0xb6472e511c81471dU, 0xe3d8f9e563a198e5U, 0x8e679c2f5e44ff8fU,
};
const int16_t kPower10ExponentTable[] = {
-1200, -1196, -1193, -1190, -1186, -1183, -1180, -1176, -1173, -1170, -1166,
-1163, -1160, -1156, -1153, -1150, -1146, -1143, -1140, -1136, -1133, -1130,
-1127, -1123, -1120, -1117, -1113, -1110, -1107, -1103, -1100, -1097, -1093,
-1090, -1087, -1083, -1080, -1077, -1073, -1070, -1067, -1063, -1060, -1057,
-1053, -1050, -1047, -1043, -1040, -1037, -1034, -1030, -1027, -1024, -1020,
-1017, -1014, -1010, -1007, -1004, -1000, -997, -994, -990, -987, -984,
-980, -977, -974, -970, -967, -964, -960, -957, -954, -950, -947,
-944, -940, -937, -934, -931, -927, -924, -921, -917, -914, -911,
-907, -904, -901, -897, -894, -891, -887, -884, -881, -877, -874,
-871, -867, -864, -861, -857, -854, -851, -847, -844, -841, -838,
-834, -831, -828, -824, -821, -818, -814, -811, -808, -804, -801,
-798, -794, -791, -788, -784, -781, -778, -774, -771, -768, -764,
-761, -758, -754, -751, -748, -744, -741, -738, -735, -731, -728,
-725, -721, -718, -715, -711, -708, -705, -701, -698, -695, -691,
-688, -685, -681, -678, -675, -671, -668, -665, -661, -658, -655,
-651, -648, -645, -642, -638, -635, -632, -628, -625, -622, -618,
-615, -612, -608, -605, -602, -598, -595, -592, -588, -585, -582,
-578, -575, -572, -568, -565, -562, -558, -555, -552, -549, -545,
-542, -539, -535, -532, -529, -525, -522, -519, -515, -512, -509,
-505, -502, -499, -495, -492, -489, -485, -482, -479, -475, -472,
-469, -465, -462, -459, -455, -452, -449, -446, -442, -439, -436,
-432, -429, -426, -422, -419, -416, -412, -409, -406, -402, -399,
-396, -392, -389, -386, -382, -379, -376, -372, -369, -366, -362,
-359, -356, -353, -349, -346, -343, -339, -336, -333, -329, -326,
-323, -319, -316, -313, -309, -306, -303, -299, -296, -293, -289,
-286, -283, -279, -276, -273, -269, -266, -263, -259, -256, -253,
-250, -246, -243, -240, -236, -233, -230, -226, -223, -220, -216,
-213, -210, -206, -203, -200, -196, -193, -190, -186, -183, -180,
-176, -173, -170, -166, -163, -160, -157, -153, -150, -147, -143,
-140, -137, -133, -130, -127, -123, -120, -117, -113, -110, -107,
-103, -100, -97, -93, -90, -87, -83, -80, -77, -73, -70,
-67, -63, -60, -57, -54, -50, -47, -44, -40, -37, -34,
-30, -27, -24, -20, -17, -14, -10, -7, -4, 0, 3,
6, 10, 13, 16, 20, 23, 26, 30, 33, 36, 39,
43, 46, 49, 53, 56, 59, 63, 66, 69, 73, 76,
79, 83, 86, 89, 93, 96, 99, 103, 106, 109, 113,
116, 119, 123, 126, 129, 132, 136, 139, 142, 146, 149,
152, 156, 159, 162, 166, 169, 172, 176, 179, 182, 186,
189, 192, 196, 199, 202, 206, 209, 212, 216, 219, 222,
226, 229, 232, 235, 239, 242, 245, 249, 252, 255, 259,
262, 265, 269, 272, 275, 279, 282, 285, 289, 292, 295,
299, 302, 305, 309, 312, 315, 319, 322, 325, 328, 332,
335, 338, 342, 345, 348, 352, 355, 358, 362, 365, 368,
372, 375, 378, 382, 385, 388, 392, 395, 398, 402, 405,
408, 412, 415, 418, 422, 425, 428, 431, 435, 438, 441,
445, 448, 451, 455, 458, 461, 465, 468, 471, 475, 478,
481, 485, 488, 491, 495, 498, 501, 505, 508, 511, 515,
518, 521, 524, 528, 531, 534, 538, 541, 544, 548, 551,
554, 558, 561, 564, 568, 571, 574, 578, 581, 584, 588,
591, 594, 598, 601, 604, 608, 611, 614, 617, 621, 624,
627, 631, 634, 637, 641, 644, 647, 651, 654, 657, 661,
664, 667, 671, 674, 677, 681, 684, 687, 691, 694, 697,
701, 704, 707, 711, 714, 717, 720, 724, 727, 730, 734,
737, 740, 744, 747, 750, 754, 757, 760, 764, 767, 770,
774, 777, 780, 784, 787, 790, 794, 797, 800, 804, 807,
810, 813, 817, 820, 823, 827, 830, 833, 837, 840, 843,
847, 850, 853, 857, 860, 863, 867, 870, 873, 877, 880,
883, 887, 890, 893, 897, 900, 903, 907, 910, 913, 916,
920, 923, 926, 930, 933, 936, 940, 943, 946, 950, 953,
956, 960,
};
} // namespace
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,119 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_CHARCONV_H_
#define ABSL_STRINGS_CHARCONV_H_
#include <system_error> // NOLINT(build/c++11)
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
// Workalike compatibilty version of std::chars_format from C++17.
//
// This is an bitfield enumerator which can be passed to absl::from_chars to
// configure the string-to-float conversion.
enum class chars_format {
scientific = 1,
fixed = 2,
hex = 4,
general = fixed | scientific,
};
// The return result of a string-to-number conversion.
//
// `ec` will be set to `invalid_argument` if a well-formed number was not found
// at the start of the input range, `result_out_of_range` if a well-formed
// number was found, but it was out of the representable range of the requested
// type, or to std::errc() otherwise.
//
// If a well-formed number was found, `ptr` is set to one past the sequence of
// characters that were successfully parsed. If none was found, `ptr` is set
// to the `first` argument to from_chars.
struct from_chars_result {
const char* ptr;
std::errc ec;
};
// Workalike compatibilty version of std::from_chars from C++17. Currently
// this only supports the `double` and `float` types.
//
// This interface incorporates the proposed resolutions for library issues
// DR 3080 and DR 3081. If these are adopted with different wording,
// Abseil's behavior will change to match the standard. (The behavior most
// likely to change is for DR 3081, which says what `value` will be set to in
// the case of overflow and underflow. Code that wants to avoid possible
// breaking changes in this area should not depend on `value` when the returned
// from_chars_result indicates a range error.)
//
// Searches the range [first, last) for the longest matching pattern beginning
// at `first` that represents a floating point number. If one is found, store
// the result in `value`.
//
// The matching pattern format is almost the same as that of strtod(), except
// that C locale is not respected, and an initial '+' character in the input
// range will never be matched.
//
// If `fmt` is set, it must be one of the enumerator values of the chars_format.
// (This is despite the fact that chars_format is a bitmask type.) If set to
// `scientific`, a matching number must contain an exponent. If set to `fixed`,
// then an exponent will never match. (For example, the string "1e5" will be
// parsed as "1".) If set to `hex`, then a hexadecimal float is parsed in the
// format that strtod() accepts, except that a "0x" prefix is NOT matched.
// (In particular, in `hex` mode, the input "0xff" results in the largest
// matching pattern "0".)
absl::from_chars_result from_chars(const char* first, const char* last,
double& value, // NOLINT
chars_format fmt = chars_format::general);
absl::from_chars_result from_chars(const char* first, const char* last,
float& value, // NOLINT
chars_format fmt = chars_format::general);
// std::chars_format is specified as a bitmask type, which means the following
// operations must be provided:
inline constexpr chars_format operator&(chars_format lhs, chars_format rhs) {
return static_cast<chars_format>(static_cast<int>(lhs) &
static_cast<int>(rhs));
}
inline constexpr chars_format operator|(chars_format lhs, chars_format rhs) {
return static_cast<chars_format>(static_cast<int>(lhs) |
static_cast<int>(rhs));
}
inline constexpr chars_format operator^(chars_format lhs, chars_format rhs) {
return static_cast<chars_format>(static_cast<int>(lhs) ^
static_cast<int>(rhs));
}
inline constexpr chars_format operator~(chars_format arg) {
return static_cast<chars_format>(~static_cast<int>(arg));
}
inline chars_format& operator&=(chars_format& lhs, chars_format rhs) {
lhs = lhs & rhs;
return lhs;
}
inline chars_format& operator|=(chars_format& lhs, chars_format rhs) {
lhs = lhs | rhs;
return lhs;
}
inline chars_format& operator^=(chars_format& lhs, chars_format rhs) {
lhs = lhs ^ rhs;
return lhs;
}
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_CHARCONV_H_

View file

@ -0,0 +1,204 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/charconv.h"
#include <cstdlib>
#include <cstring>
#include <string>
#include "benchmark/benchmark.h"
namespace {
void BM_Strtod_Pi(benchmark::State& state) {
const char* pi = "3.14159";
for (auto s : state) {
benchmark::DoNotOptimize(pi);
benchmark::DoNotOptimize(strtod(pi, nullptr));
}
}
BENCHMARK(BM_Strtod_Pi);
void BM_Absl_Pi(benchmark::State& state) {
const char* pi = "3.14159";
const char* pi_end = pi + strlen(pi);
for (auto s : state) {
benchmark::DoNotOptimize(pi);
double v;
absl::from_chars(pi, pi_end, v);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(BM_Absl_Pi);
void BM_Strtod_Pi_float(benchmark::State& state) {
const char* pi = "3.14159";
for (auto s : state) {
benchmark::DoNotOptimize(pi);
benchmark::DoNotOptimize(strtof(pi, nullptr));
}
}
BENCHMARK(BM_Strtod_Pi_float);
void BM_Absl_Pi_float(benchmark::State& state) {
const char* pi = "3.14159";
const char* pi_end = pi + strlen(pi);
for (auto s : state) {
benchmark::DoNotOptimize(pi);
float v;
absl::from_chars(pi, pi_end, v);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(BM_Absl_Pi_float);
void BM_Strtod_HardLarge(benchmark::State& state) {
const char* num = "272104041512242479.e200";
for (auto s : state) {
benchmark::DoNotOptimize(num);
benchmark::DoNotOptimize(strtod(num, nullptr));
}
}
BENCHMARK(BM_Strtod_HardLarge);
void BM_Absl_HardLarge(benchmark::State& state) {
const char* numstr = "272104041512242479.e200";
const char* numstr_end = numstr + strlen(numstr);
for (auto s : state) {
benchmark::DoNotOptimize(numstr);
double v;
absl::from_chars(numstr, numstr_end, v);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(BM_Absl_HardLarge);
void BM_Strtod_HardSmall(benchmark::State& state) {
const char* num = "94080055902682397.e-242";
for (auto s : state) {
benchmark::DoNotOptimize(num);
benchmark::DoNotOptimize(strtod(num, nullptr));
}
}
BENCHMARK(BM_Strtod_HardSmall);
void BM_Absl_HardSmall(benchmark::State& state) {
const char* numstr = "94080055902682397.e-242";
const char* numstr_end = numstr + strlen(numstr);
for (auto s : state) {
benchmark::DoNotOptimize(numstr);
double v;
absl::from_chars(numstr, numstr_end, v);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(BM_Absl_HardSmall);
void BM_Strtod_HugeMantissa(benchmark::State& state) {
std::string huge(200, '3');
const char* num = huge.c_str();
for (auto s : state) {
benchmark::DoNotOptimize(num);
benchmark::DoNotOptimize(strtod(num, nullptr));
}
}
BENCHMARK(BM_Strtod_HugeMantissa);
void BM_Absl_HugeMantissa(benchmark::State& state) {
std::string huge(200, '3');
const char* num = huge.c_str();
const char* num_end = num + 200;
for (auto s : state) {
benchmark::DoNotOptimize(num);
double v;
absl::from_chars(num, num_end, v);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(BM_Absl_HugeMantissa);
std::string MakeHardCase(int length) {
// The number 1.1521...e-297 is exactly halfway between 12345 * 2**-1000 and
// the next larger representable number. The digits of this number are in
// the string below.
const std::string digits =
"1."
"152113937042223790993097181572444900347587985074226836242307364987727724"
"831384300183638649152607195040591791364113930628852279348613864894524591"
"272746490313676832900762939595690019745859128071117417798540258114233761"
"012939937017879509401007964861774960297319002612457273148497158989073482"
"171377406078223015359818300988676687994537274548940612510414856761641652"
"513434981938564294004070500716200446656421722229202383105446378511678258"
"370570631774499359748259931676320916632111681001853983492795053244971606"
"922718923011680846577744433974087653954904214152517799883551075537146316"
"168973685866425605046988661997658648354773076621610279716804960009043764"
"038392994055171112475093876476783502487512538082706095923790634572014823"
"78877699375152587890625" +
std::string(5000, '0');
// generate the hard cases on either side for the given length.
// Lengths between 3 and 1000 are reasonable.
return digits.substr(0, length) + "1e-297";
}
void BM_Strtod_Big_And_Difficult(benchmark::State& state) {
std::string testcase = MakeHardCase(state.range(0));
const char* begin = testcase.c_str();
for (auto s : state) {
benchmark::DoNotOptimize(begin);
benchmark::DoNotOptimize(strtod(begin, nullptr));
}
}
BENCHMARK(BM_Strtod_Big_And_Difficult)->Range(3, 5000);
void BM_Absl_Big_And_Difficult(benchmark::State& state) {
std::string testcase = MakeHardCase(state.range(0));
const char* begin = testcase.c_str();
const char* end = begin + testcase.size();
for (auto s : state) {
benchmark::DoNotOptimize(begin);
double v;
absl::from_chars(begin, end, v);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(BM_Absl_Big_And_Difficult)->Range(3, 5000);
} // namespace
// ------------------------------------------------------------------------
// Benchmark Time CPU Iterations
// ------------------------------------------------------------------------
// BM_Strtod_Pi 96 ns 96 ns 6337454
// BM_Absl_Pi 35 ns 35 ns 20031996
// BM_Strtod_Pi_float 91 ns 91 ns 7745851
// BM_Absl_Pi_float 35 ns 35 ns 20430298
// BM_Strtod_HardLarge 133 ns 133 ns 5288341
// BM_Absl_HardLarge 181 ns 181 ns 3855615
// BM_Strtod_HardSmall 279 ns 279 ns 2517243
// BM_Absl_HardSmall 287 ns 287 ns 2458744
// BM_Strtod_HugeMantissa 433 ns 433 ns 1604293
// BM_Absl_HugeMantissa 160 ns 160 ns 4403671
// BM_Strtod_Big_And_Difficult/3 236 ns 236 ns 2942496
// BM_Strtod_Big_And_Difficult/8 232 ns 232 ns 2983796
// BM_Strtod_Big_And_Difficult/64 437 ns 437 ns 1591951
// BM_Strtod_Big_And_Difficult/512 1738 ns 1738 ns 402519
// BM_Strtod_Big_And_Difficult/4096 3943 ns 3943 ns 176128
// BM_Strtod_Big_And_Difficult/5000 4397 ns 4397 ns 157878
// BM_Absl_Big_And_Difficult/3 39 ns 39 ns 17799583
// BM_Absl_Big_And_Difficult/8 43 ns 43 ns 16096859
// BM_Absl_Big_And_Difficult/64 550 ns 550 ns 1259717
// BM_Absl_Big_And_Difficult/512 4167 ns 4167 ns 171414
// BM_Absl_Big_And_Difficult/4096 9160 ns 9159 ns 76297
// BM_Absl_Big_And_Difficult/5000 9738 ns 9738 ns 70140

View file

@ -0,0 +1,780 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/charconv.h"
#include <cstdlib>
#include <string>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/strings/internal/pow10_helper.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#ifdef _MSC_FULL_VER
#define ABSL_COMPILER_DOES_EXACT_ROUNDING 0
#define ABSL_STRTOD_HANDLES_NAN_CORRECTLY 0
#else
#define ABSL_COMPILER_DOES_EXACT_ROUNDING 1
#define ABSL_STRTOD_HANDLES_NAN_CORRECTLY 1
#endif
namespace {
using absl::strings_internal::Pow10;
#if ABSL_COMPILER_DOES_EXACT_ROUNDING
// Tests that the given string is accepted by absl::from_chars, and that it
// converts exactly equal to the given number.
void TestDoubleParse(absl::string_view str, double expected_number) {
SCOPED_TRACE(str);
double actual_number = 0.0;
absl::from_chars_result result =
absl::from_chars(str.data(), str.data() + str.length(), actual_number);
EXPECT_EQ(result.ec, std::errc());
EXPECT_EQ(result.ptr, str.data() + str.length());
EXPECT_EQ(actual_number, expected_number);
}
void TestFloatParse(absl::string_view str, float expected_number) {
SCOPED_TRACE(str);
float actual_number = 0.0;
absl::from_chars_result result =
absl::from_chars(str.data(), str.data() + str.length(), actual_number);
EXPECT_EQ(result.ec, std::errc());
EXPECT_EQ(result.ptr, str.data() + str.length());
EXPECT_EQ(actual_number, expected_number);
}
// Tests that the given double or single precision floating point literal is
// parsed correctly by absl::from_chars.
//
// These convenience macros assume that the C++ compiler being used also does
// fully correct decimal-to-binary conversions.
#define FROM_CHARS_TEST_DOUBLE(number) \
{ \
TestDoubleParse(#number, number); \
TestDoubleParse("-" #number, -number); \
}
#define FROM_CHARS_TEST_FLOAT(number) \
{ \
TestFloatParse(#number, number##f); \
TestFloatParse("-" #number, -number##f); \
}
TEST(FromChars, NearRoundingCases) {
// Cases from "A Program for Testing IEEE Decimal-Binary Conversion"
// by Vern Paxson.
// Forms that should round towards zero. (These are the hardest cases for
// each decimal mantissa size.)
FROM_CHARS_TEST_DOUBLE(5.e125);
FROM_CHARS_TEST_DOUBLE(69.e267);
FROM_CHARS_TEST_DOUBLE(999.e-026);
FROM_CHARS_TEST_DOUBLE(7861.e-034);
FROM_CHARS_TEST_DOUBLE(75569.e-254);
FROM_CHARS_TEST_DOUBLE(928609.e-261);
FROM_CHARS_TEST_DOUBLE(9210917.e080);
FROM_CHARS_TEST_DOUBLE(84863171.e114);
FROM_CHARS_TEST_DOUBLE(653777767.e273);
FROM_CHARS_TEST_DOUBLE(5232604057.e-298);
FROM_CHARS_TEST_DOUBLE(27235667517.e-109);
FROM_CHARS_TEST_DOUBLE(653532977297.e-123);
FROM_CHARS_TEST_DOUBLE(3142213164987.e-294);
FROM_CHARS_TEST_DOUBLE(46202199371337.e-072);
FROM_CHARS_TEST_DOUBLE(231010996856685.e-073);
FROM_CHARS_TEST_DOUBLE(9324754620109615.e212);
FROM_CHARS_TEST_DOUBLE(78459735791271921.e049);
FROM_CHARS_TEST_DOUBLE(272104041512242479.e200);
FROM_CHARS_TEST_DOUBLE(6802601037806061975.e198);
FROM_CHARS_TEST_DOUBLE(20505426358836677347.e-221);
FROM_CHARS_TEST_DOUBLE(836168422905420598437.e-234);
FROM_CHARS_TEST_DOUBLE(4891559871276714924261.e222);
FROM_CHARS_TEST_FLOAT(5.e-20);
FROM_CHARS_TEST_FLOAT(67.e14);
FROM_CHARS_TEST_FLOAT(985.e15);
FROM_CHARS_TEST_FLOAT(7693.e-42);
FROM_CHARS_TEST_FLOAT(55895.e-16);
FROM_CHARS_TEST_FLOAT(996622.e-44);
FROM_CHARS_TEST_FLOAT(7038531.e-32);
FROM_CHARS_TEST_FLOAT(60419369.e-46);
FROM_CHARS_TEST_FLOAT(702990899.e-20);
FROM_CHARS_TEST_FLOAT(6930161142.e-48);
FROM_CHARS_TEST_FLOAT(25933168707.e-13);
FROM_CHARS_TEST_FLOAT(596428896559.e20);
// Similarly, forms that should round away from zero.
FROM_CHARS_TEST_DOUBLE(9.e-265);
FROM_CHARS_TEST_DOUBLE(85.e-037);
FROM_CHARS_TEST_DOUBLE(623.e100);
FROM_CHARS_TEST_DOUBLE(3571.e263);
FROM_CHARS_TEST_DOUBLE(81661.e153);
FROM_CHARS_TEST_DOUBLE(920657.e-023);
FROM_CHARS_TEST_DOUBLE(4603285.e-024);
FROM_CHARS_TEST_DOUBLE(87575437.e-309);
FROM_CHARS_TEST_DOUBLE(245540327.e122);
FROM_CHARS_TEST_DOUBLE(6138508175.e120);
FROM_CHARS_TEST_DOUBLE(83356057653.e193);
FROM_CHARS_TEST_DOUBLE(619534293513.e124);
FROM_CHARS_TEST_DOUBLE(2335141086879.e218);
FROM_CHARS_TEST_DOUBLE(36167929443327.e-159);
FROM_CHARS_TEST_DOUBLE(609610927149051.e-255);
FROM_CHARS_TEST_DOUBLE(3743626360493413.e-165);
FROM_CHARS_TEST_DOUBLE(94080055902682397.e-242);
FROM_CHARS_TEST_DOUBLE(899810892172646163.e283);
FROM_CHARS_TEST_DOUBLE(7120190517612959703.e120);
FROM_CHARS_TEST_DOUBLE(25188282901709339043.e-252);
FROM_CHARS_TEST_DOUBLE(308984926168550152811.e-052);
FROM_CHARS_TEST_DOUBLE(6372891218502368041059.e064);
FROM_CHARS_TEST_FLOAT(3.e-23);
FROM_CHARS_TEST_FLOAT(57.e18);
FROM_CHARS_TEST_FLOAT(789.e-35);
FROM_CHARS_TEST_FLOAT(2539.e-18);
FROM_CHARS_TEST_FLOAT(76173.e28);
FROM_CHARS_TEST_FLOAT(887745.e-11);
FROM_CHARS_TEST_FLOAT(5382571.e-37);
FROM_CHARS_TEST_FLOAT(82381273.e-35);
FROM_CHARS_TEST_FLOAT(750486563.e-38);
FROM_CHARS_TEST_FLOAT(3752432815.e-39);
FROM_CHARS_TEST_FLOAT(75224575729.e-45);
FROM_CHARS_TEST_FLOAT(459926601011.e15);
}
#undef FROM_CHARS_TEST_DOUBLE
#undef FROM_CHARS_TEST_FLOAT
#endif
float ToFloat(absl::string_view s) {
float f;
absl::from_chars(s.data(), s.data() + s.size(), f);
return f;
}
double ToDouble(absl::string_view s) {
double d;
absl::from_chars(s.data(), s.data() + s.size(), d);
return d;
}
// A duplication of the test cases in "NearRoundingCases" above, but with
// expected values expressed with integers, using ldexp/ldexpf. These test
// cases will work even on compilers that do not accurately round floating point
// literals.
TEST(FromChars, NearRoundingCasesExplicit) {
EXPECT_EQ(ToDouble("5.e125"), ldexp(6653062250012735, 365));
EXPECT_EQ(ToDouble("69.e267"), ldexp(4705683757438170, 841));
EXPECT_EQ(ToDouble("999.e-026"), ldexp(6798841691080350, -129));
EXPECT_EQ(ToDouble("7861.e-034"), ldexp(8975675289889240, -153));
EXPECT_EQ(ToDouble("75569.e-254"), ldexp(6091718967192243, -880));
EXPECT_EQ(ToDouble("928609.e-261"), ldexp(7849264900213743, -900));
EXPECT_EQ(ToDouble("9210917.e080"), ldexp(8341110837370930, 236));
EXPECT_EQ(ToDouble("84863171.e114"), ldexp(4625202867375927, 353));
EXPECT_EQ(ToDouble("653777767.e273"), ldexp(5068902999763073, 884));
EXPECT_EQ(ToDouble("5232604057.e-298"), ldexp(5741343011915040, -1010));
EXPECT_EQ(ToDouble("27235667517.e-109"), ldexp(6707124626673586, -380));
EXPECT_EQ(ToDouble("653532977297.e-123"), ldexp(7078246407265384, -422));
EXPECT_EQ(ToDouble("3142213164987.e-294"), ldexp(8219991337640559, -988));
EXPECT_EQ(ToDouble("46202199371337.e-072"), ldexp(5224462102115359, -246));
EXPECT_EQ(ToDouble("231010996856685.e-073"), ldexp(5224462102115359, -247));
EXPECT_EQ(ToDouble("9324754620109615.e212"), ldexp(5539753864394442, 705));
EXPECT_EQ(ToDouble("78459735791271921.e049"), ldexp(8388176519442766, 166));
EXPECT_EQ(ToDouble("272104041512242479.e200"), ldexp(5554409530847367, 670));
EXPECT_EQ(ToDouble("6802601037806061975.e198"), ldexp(5554409530847367, 668));
EXPECT_EQ(ToDouble("20505426358836677347.e-221"),
ldexp(4524032052079546, -722));
EXPECT_EQ(ToDouble("836168422905420598437.e-234"),
ldexp(5070963299887562, -760));
EXPECT_EQ(ToDouble("4891559871276714924261.e222"),
ldexp(6452687840519111, 757));
EXPECT_EQ(ToFloat("5.e-20"), ldexpf(15474250, -88));
EXPECT_EQ(ToFloat("67.e14"), ldexpf(12479722, 29));
EXPECT_EQ(ToFloat("985.e15"), ldexpf(14333636, 36));
EXPECT_EQ(ToFloat("7693.e-42"), ldexpf(10979816, -150));
EXPECT_EQ(ToFloat("55895.e-16"), ldexpf(12888509, -61));
EXPECT_EQ(ToFloat("996622.e-44"), ldexpf(14224264, -150));
EXPECT_EQ(ToFloat("7038531.e-32"), ldexpf(11420669, -107));
EXPECT_EQ(ToFloat("60419369.e-46"), ldexpf(8623340, -150));
EXPECT_EQ(ToFloat("702990899.e-20"), ldexpf(16209866, -61));
EXPECT_EQ(ToFloat("6930161142.e-48"), ldexpf(9891056, -150));
EXPECT_EQ(ToFloat("25933168707.e-13"), ldexpf(11138211, -32));
EXPECT_EQ(ToFloat("596428896559.e20"), ldexpf(12333860, 82));
EXPECT_EQ(ToDouble("9.e-265"), ldexp(8168427841980010, -930));
EXPECT_EQ(ToDouble("85.e-037"), ldexp(6360455125664090, -169));
EXPECT_EQ(ToDouble("623.e100"), ldexp(6263531988747231, 289));
EXPECT_EQ(ToDouble("3571.e263"), ldexp(6234526311072170, 833));
EXPECT_EQ(ToDouble("81661.e153"), ldexp(6696636728760206, 472));
EXPECT_EQ(ToDouble("920657.e-023"), ldexp(5975405561110124, -109));
EXPECT_EQ(ToDouble("4603285.e-024"), ldexp(5975405561110124, -110));
EXPECT_EQ(ToDouble("87575437.e-309"), ldexp(8452160731874668, -1053));
EXPECT_EQ(ToDouble("245540327.e122"), ldexp(4985336549131723, 381));
EXPECT_EQ(ToDouble("6138508175.e120"), ldexp(4985336549131723, 379));
EXPECT_EQ(ToDouble("83356057653.e193"), ldexp(5986732817132056, 625));
EXPECT_EQ(ToDouble("619534293513.e124"), ldexp(4798406992060657, 399));
EXPECT_EQ(ToDouble("2335141086879.e218"), ldexp(5419088166961646, 713));
EXPECT_EQ(ToDouble("36167929443327.e-159"), ldexp(8135819834632444, -536));
EXPECT_EQ(ToDouble("609610927149051.e-255"), ldexp(4576664294594737, -850));
EXPECT_EQ(ToDouble("3743626360493413.e-165"), ldexp(6898586531774201, -549));
EXPECT_EQ(ToDouble("94080055902682397.e-242"), ldexp(6273271706052298, -800));
EXPECT_EQ(ToDouble("899810892172646163.e283"), ldexp(7563892574477827, 947));
EXPECT_EQ(ToDouble("7120190517612959703.e120"), ldexp(5385467232557565, 409));
EXPECT_EQ(ToDouble("25188282901709339043.e-252"),
ldexp(5635662608542340, -825));
EXPECT_EQ(ToDouble("308984926168550152811.e-052"),
ldexp(5644774693823803, -157));
EXPECT_EQ(ToDouble("6372891218502368041059.e064"),
ldexp(4616868614322430, 233));
EXPECT_EQ(ToFloat("3.e-23"), ldexpf(9507380, -98));
EXPECT_EQ(ToFloat("57.e18"), ldexpf(12960300, 42));
EXPECT_EQ(ToFloat("789.e-35"), ldexpf(10739312, -130));
EXPECT_EQ(ToFloat("2539.e-18"), ldexpf(11990089, -72));
EXPECT_EQ(ToFloat("76173.e28"), ldexpf(9845130, 86));
EXPECT_EQ(ToFloat("887745.e-11"), ldexpf(9760860, -40));
EXPECT_EQ(ToFloat("5382571.e-37"), ldexpf(11447463, -124));
EXPECT_EQ(ToFloat("82381273.e-35"), ldexpf(8554961, -113));
EXPECT_EQ(ToFloat("750486563.e-38"), ldexpf(9975678, -120));
EXPECT_EQ(ToFloat("3752432815.e-39"), ldexpf(9975678, -121));
EXPECT_EQ(ToFloat("75224575729.e-45"), ldexpf(13105970, -137));
EXPECT_EQ(ToFloat("459926601011.e15"), ldexpf(12466336, 65));
}
// Common test logic for converting a string which lies exactly halfway between
// two target floats.
//
// mantissa and exponent represent the precise value between two floating point
// numbers, `expected_low` and `expected_high`. The floating point
// representation to parse in `StrCat(mantissa, "e", exponent)`.
//
// This function checks that an input just slightly less than the exact value
// is rounded down to `expected_low`, and an input just slightly greater than
// the exact value is rounded up to `expected_high`.
//
// The exact value should round to `expected_half`, which must be either
// `expected_low` or `expected_high`.
template <typename FloatType>
void TestHalfwayValue(const std::string& mantissa, int exponent,
FloatType expected_low, FloatType expected_high,
FloatType expected_half) {
std::string low_rep = mantissa;
low_rep[low_rep.size() - 1] -= 1;
absl::StrAppend(&low_rep, std::string(1000, '9'), "e", exponent);
FloatType actual_low = 0;
absl::from_chars(low_rep.data(), low_rep.data() + low_rep.size(), actual_low);
EXPECT_EQ(expected_low, actual_low);
std::string high_rep =
absl::StrCat(mantissa, std::string(1000, '0'), "1e", exponent);
FloatType actual_high = 0;
absl::from_chars(high_rep.data(), high_rep.data() + high_rep.size(),
actual_high);
EXPECT_EQ(expected_high, actual_high);
std::string halfway_rep = absl::StrCat(mantissa, "e", exponent);
FloatType actual_half = 0;
absl::from_chars(halfway_rep.data(), halfway_rep.data() + halfway_rep.size(),
actual_half);
EXPECT_EQ(expected_half, actual_half);
}
TEST(FromChars, DoubleRounding) {
const double zero = 0.0;
const double first_subnormal = nextafter(zero, 1.0);
const double second_subnormal = nextafter(first_subnormal, 1.0);
const double first_normal = DBL_MIN;
const double last_subnormal = nextafter(first_normal, 0.0);
const double second_normal = nextafter(first_normal, 1.0);
const double last_normal = DBL_MAX;
const double penultimate_normal = nextafter(last_normal, 0.0);
// Various test cases for numbers between two representable floats. Each
// call to TestHalfwayValue tests a number just below and just above the
// halfway point, as well as the number exactly between them.
// Test between zero and first_subnormal. Round-to-even tie rounds down.
TestHalfwayValue(
"2."
"470328229206232720882843964341106861825299013071623822127928412503377536"
"351043759326499181808179961898982823477228588654633283551779698981993873"
"980053909390631503565951557022639229085839244910518443593180284993653615"
"250031937045767824921936562366986365848075700158576926990370631192827955"
"855133292783433840935197801553124659726357957462276646527282722005637400"
"648549997709659947045402082816622623785739345073633900796776193057750674"
"017632467360096895134053553745851666113422376667860416215968046191446729"
"184030053005753084904876539171138659164623952491262365388187963623937328"
"042389101867234849766823508986338858792562830275599565752445550725518931"
"369083625477918694866799496832404970582102851318545139621383772282614543"
"7693412532098591327667236328125",
-324, zero, first_subnormal, zero);
// first_subnormal and second_subnormal. Round-to-even tie rounds up.
TestHalfwayValue(
"7."
"410984687618698162648531893023320585475897039214871466383785237510132609"
"053131277979497545424539885696948470431685765963899850655339096945981621"
"940161728171894510697854671067917687257517734731555330779540854980960845"
"750095811137303474765809687100959097544227100475730780971111893578483867"
"565399878350301522805593404659373979179073872386829939581848166016912201"
"945649993128979841136206248449867871357218035220901702390328579173252022"
"052897402080290685402160661237554998340267130003581248647904138574340187"
"552090159017259254714629617513415977493871857473787096164563890871811984"
"127167305601704549300470526959016576377688490826798697257336652176556794"
"107250876433756084600398490497214911746308553955635418864151316847843631"
"3080237596295773983001708984375",
-324, first_subnormal, second_subnormal, second_subnormal);
// last_subnormal and first_normal. Round-to-even tie rounds up.
TestHalfwayValue(
"2."
"225073858507201136057409796709131975934819546351645648023426109724822222"
"021076945516529523908135087914149158913039621106870086438694594645527657"
"207407820621743379988141063267329253552286881372149012981122451451889849"
"057222307285255133155755015914397476397983411801999323962548289017107081"
"850690630666655994938275772572015763062690663332647565300009245888316433"
"037779791869612049497390377829704905051080609940730262937128958950003583"
"799967207254304360284078895771796150945516748243471030702609144621572289"
"880258182545180325707018860872113128079512233426288368622321503775666622"
"503982534335974568884423900265498198385487948292206894721689831099698365"
"846814022854243330660339850886445804001034933970427567186443383770486037"
"86162277173854562306587467901408672332763671875",
-308, last_subnormal, first_normal, first_normal);
// first_normal and second_normal. Round-to-even tie rounds down.
TestHalfwayValue(
"2."
"225073858507201630123055637955676152503612414573018013083228724049586647"
"606759446192036794116886953213985520549032000903434781884412325572184367"
"563347617020518175998922941393629966742598285899994830148971433555578567"
"693279306015978183162142425067962460785295885199272493577688320732492479"
"924816869232247165964934329258783950102250973957579510571600738343645738"
"494324192997092179207389919761694314131497173265255020084997973676783743"
"155205818804439163810572367791175177756227497413804253387084478193655533"
"073867420834526162513029462022730109054820067654020201547112002028139700"
"141575259123440177362244273712468151750189745559978653234255886219611516"
"335924167958029604477064946470184777360934300451421683607013647479513962"
"13837722826145437693412532098591327667236328125",
-308, first_normal, second_normal, first_normal);
// penultimate_normal and last_normal. Round-to-even rounds down.
TestHalfwayValue(
"1."
"797693134862315608353258760581052985162070023416521662616611746258695532"
"672923265745300992879465492467506314903358770175220871059269879629062776"
"047355692132901909191523941804762171253349609463563872612866401980290377"
"995141836029815117562837277714038305214839639239356331336428021390916694"
"57927874464075218944",
308, penultimate_normal, last_normal, penultimate_normal);
}
// Same test cases as DoubleRounding, now with new and improved Much Smaller
// Precision!
TEST(FromChars, FloatRounding) {
const float zero = 0.0;
const float first_subnormal = nextafterf(zero, 1.0);
const float second_subnormal = nextafterf(first_subnormal, 1.0);
const float first_normal = FLT_MIN;
const float last_subnormal = nextafterf(first_normal, 0.0);
const float second_normal = nextafterf(first_normal, 1.0);
const float last_normal = FLT_MAX;
const float penultimate_normal = nextafterf(last_normal, 0.0);
// Test between zero and first_subnormal. Round-to-even tie rounds down.
TestHalfwayValue(
"7."
"006492321624085354618647916449580656401309709382578858785341419448955413"
"42930300743319094181060791015625",
-46, zero, first_subnormal, zero);
// first_subnormal and second_subnormal. Round-to-even tie rounds up.
TestHalfwayValue(
"2."
"101947696487225606385594374934874196920392912814773657635602425834686624"
"028790902229957282543182373046875",
-45, first_subnormal, second_subnormal, second_subnormal);
// last_subnormal and first_normal. Round-to-even tie rounds up.
TestHalfwayValue(
"1."
"175494280757364291727882991035766513322858992758990427682963118425003064"
"9651730385585324256680905818939208984375",
-38, last_subnormal, first_normal, first_normal);
// first_normal and second_normal. Round-to-even tie rounds down.
TestHalfwayValue(
"1."
"175494420887210724209590083408724842314472120785184615334540294131831453"
"9442813071445925743319094181060791015625",
-38, first_normal, second_normal, first_normal);
// penultimate_normal and last_normal. Round-to-even rounds down.
TestHalfwayValue("3.40282336497324057985868971510891282432", 38,
penultimate_normal, last_normal, penultimate_normal);
}
TEST(FromChars, Underflow) {
// Check that underflow is handled correctly, according to the specification
// in DR 3081.
double d;
float f;
absl::from_chars_result result;
std::string negative_underflow = "-1e-1000";
const char* begin = negative_underflow.data();
const char* end = begin + negative_underflow.size();
d = 100.0;
result = absl::from_chars(begin, end, d);
EXPECT_EQ(result.ptr, end);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_TRUE(std::signbit(d)); // negative
EXPECT_GE(d, -std::numeric_limits<double>::min());
f = 100.0;
result = absl::from_chars(begin, end, f);
EXPECT_EQ(result.ptr, end);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_TRUE(std::signbit(f)); // negative
EXPECT_GE(f, -std::numeric_limits<float>::min());
std::string positive_underflow = "1e-1000";
begin = positive_underflow.data();
end = begin + positive_underflow.size();
d = -100.0;
result = absl::from_chars(begin, end, d);
EXPECT_EQ(result.ptr, end);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_FALSE(std::signbit(d)); // positive
EXPECT_LE(d, std::numeric_limits<double>::min());
f = -100.0;
result = absl::from_chars(begin, end, f);
EXPECT_EQ(result.ptr, end);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_FALSE(std::signbit(f)); // positive
EXPECT_LE(f, std::numeric_limits<float>::min());
}
TEST(FromChars, Overflow) {
// Check that overflow is handled correctly, according to the specification
// in DR 3081.
double d;
float f;
absl::from_chars_result result;
std::string negative_overflow = "-1e1000";
const char* begin = negative_overflow.data();
const char* end = begin + negative_overflow.size();
d = 100.0;
result = absl::from_chars(begin, end, d);
EXPECT_EQ(result.ptr, end);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_TRUE(std::signbit(d)); // negative
EXPECT_EQ(d, -std::numeric_limits<double>::max());
f = 100.0;
result = absl::from_chars(begin, end, f);
EXPECT_EQ(result.ptr, end);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_TRUE(std::signbit(f)); // negative
EXPECT_EQ(f, -std::numeric_limits<float>::max());
std::string positive_overflow = "1e1000";
begin = positive_overflow.data();
end = begin + positive_overflow.size();
d = -100.0;
result = absl::from_chars(begin, end, d);
EXPECT_EQ(result.ptr, end);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_FALSE(std::signbit(d)); // positive
EXPECT_EQ(d, std::numeric_limits<double>::max());
f = -100.0;
result = absl::from_chars(begin, end, f);
EXPECT_EQ(result.ptr, end);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_FALSE(std::signbit(f)); // positive
EXPECT_EQ(f, std::numeric_limits<float>::max());
}
TEST(FromChars, RegressionTestsFromFuzzer) {
absl::string_view src = "0x21900000p00000000099";
float f;
auto result = absl::from_chars(src.data(), src.data() + src.size(), f);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
}
TEST(FromChars, ReturnValuePtr) {
// Check that `ptr` points one past the number scanned, even if that number
// is not representable.
double d;
absl::from_chars_result result;
std::string normal = "3.14@#$%@#$%";
result = absl::from_chars(normal.data(), normal.data() + normal.size(), d);
EXPECT_EQ(result.ec, std::errc());
EXPECT_EQ(result.ptr - normal.data(), 4);
std::string overflow = "1e1000@#$%@#$%";
result = absl::from_chars(overflow.data(),
overflow.data() + overflow.size(), d);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_EQ(result.ptr - overflow.data(), 6);
std::string garbage = "#$%@#$%";
result = absl::from_chars(garbage.data(),
garbage.data() + garbage.size(), d);
EXPECT_EQ(result.ec, std::errc::invalid_argument);
EXPECT_EQ(result.ptr - garbage.data(), 0);
}
// Check for a wide range of inputs that strtod() and absl::from_chars() exactly
// agree on the conversion amount.
//
// This test assumes the platform's strtod() uses perfect round_to_nearest
// rounding.
TEST(FromChars, TestVersusStrtod) {
for (int mantissa = 1000000; mantissa <= 9999999; mantissa += 501) {
for (int exponent = -300; exponent < 300; ++exponent) {
std::string candidate = absl::StrCat(mantissa, "e", exponent);
double strtod_value = strtod(candidate.c_str(), nullptr);
double absl_value = 0;
absl::from_chars(candidate.data(), candidate.data() + candidate.size(),
absl_value);
ASSERT_EQ(strtod_value, absl_value) << candidate;
}
}
}
// Check for a wide range of inputs that strtof() and absl::from_chars() exactly
// agree on the conversion amount.
//
// This test assumes the platform's strtof() uses perfect round_to_nearest
// rounding.
TEST(FromChars, TestVersusStrtof) {
for (int mantissa = 1000000; mantissa <= 9999999; mantissa += 501) {
for (int exponent = -43; exponent < 32; ++exponent) {
std::string candidate = absl::StrCat(mantissa, "e", exponent);
float strtod_value = strtof(candidate.c_str(), nullptr);
float absl_value = 0;
absl::from_chars(candidate.data(), candidate.data() + candidate.size(),
absl_value);
ASSERT_EQ(strtod_value, absl_value) << candidate;
}
}
}
// Tests if two floating point values have identical bit layouts. (EXPECT_EQ
// is not suitable for NaN testing, since NaNs are never equal.)
template <typename Float>
bool Identical(Float a, Float b) {
return 0 == memcmp(&a, &b, sizeof(Float));
}
// Check that NaNs are parsed correctly. The spec requires that
// std::from_chars on "NaN(123abc)" return the same value as std::nan("123abc").
// How such an n-char-sequence affects the generated NaN is unspecified, so we
// just test for symmetry with std::nan and strtod here.
//
// (In Linux, this parses the value as a number and stuffs that number into the
// free bits of a quiet NaN.)
TEST(FromChars, NaNDoubles) {
for (std::string n_char_sequence :
{"", "1", "2", "3", "fff", "FFF", "200000", "400000", "4000000000000",
"8000000000000", "abc123", "legal_but_unexpected",
"99999999999999999999999", "_"}) {
std::string input = absl::StrCat("nan(", n_char_sequence, ")");
SCOPED_TRACE(input);
double from_chars_double;
absl::from_chars(input.data(), input.data() + input.size(),
from_chars_double);
double std_nan_double = std::nan(n_char_sequence.c_str());
EXPECT_TRUE(Identical(from_chars_double, std_nan_double));
// Also check that we match strtod()'s behavior. This test assumes that the
// platform has a compliant strtod().
#if ABSL_STRTOD_HANDLES_NAN_CORRECTLY
double strtod_double = strtod(input.c_str(), nullptr);
EXPECT_TRUE(Identical(from_chars_double, strtod_double));
#endif // ABSL_STRTOD_HANDLES_NAN_CORRECTLY
// Check that we can parse a negative NaN
std::string negative_input = "-" + input;
double negative_from_chars_double;
absl::from_chars(negative_input.data(),
negative_input.data() + negative_input.size(),
negative_from_chars_double);
EXPECT_TRUE(std::signbit(negative_from_chars_double));
EXPECT_FALSE(Identical(negative_from_chars_double, from_chars_double));
from_chars_double = std::copysign(from_chars_double, -1.0);
EXPECT_TRUE(Identical(negative_from_chars_double, from_chars_double));
}
}
TEST(FromChars, NaNFloats) {
for (std::string n_char_sequence :
{"", "1", "2", "3", "fff", "FFF", "200000", "400000", "4000000000000",
"8000000000000", "abc123", "legal_but_unexpected",
"99999999999999999999999", "_"}) {
std::string input = absl::StrCat("nan(", n_char_sequence, ")");
SCOPED_TRACE(input);
float from_chars_float;
absl::from_chars(input.data(), input.data() + input.size(),
from_chars_float);
float std_nan_float = std::nanf(n_char_sequence.c_str());
EXPECT_TRUE(Identical(from_chars_float, std_nan_float));
// Also check that we match strtof()'s behavior. This test assumes that the
// platform has a compliant strtof().
#if ABSL_STRTOD_HANDLES_NAN_CORRECTLY
float strtof_float = strtof(input.c_str(), nullptr);
EXPECT_TRUE(Identical(from_chars_float, strtof_float));
#endif // ABSL_STRTOD_HANDLES_NAN_CORRECTLY
// Check that we can parse a negative NaN
std::string negative_input = "-" + input;
float negative_from_chars_float;
absl::from_chars(negative_input.data(),
negative_input.data() + negative_input.size(),
negative_from_chars_float);
EXPECT_TRUE(std::signbit(negative_from_chars_float));
EXPECT_FALSE(Identical(negative_from_chars_float, from_chars_float));
from_chars_float = std::copysign(from_chars_float, -1.0);
EXPECT_TRUE(Identical(negative_from_chars_float, from_chars_float));
}
}
// Returns an integer larger than step. The values grow exponentially.
int NextStep(int step) {
return step + (step >> 2) + 1;
}
// Test a conversion on a family of input strings, checking that the calculation
// is correct for in-bounds values, and that overflow and underflow are done
// correctly for out-of-bounds values.
//
// input_generator maps from an integer index to a string to test.
// expected_generator maps from an integer index to an expected Float value.
// from_chars conversion of input_generator(i) should result in
// expected_generator(i).
//
// lower_bound and upper_bound denote the smallest and largest values for which
// the conversion is expected to succeed.
template <typename Float>
void TestOverflowAndUnderflow(
const std::function<std::string(int)>& input_generator,
const std::function<Float(int)>& expected_generator, int lower_bound,
int upper_bound) {
// test legal values near lower_bound
int index, step;
for (index = lower_bound, step = 1; index < upper_bound;
index += step, step = NextStep(step)) {
std::string input = input_generator(index);
SCOPED_TRACE(input);
Float expected = expected_generator(index);
Float actual;
auto result =
absl::from_chars(input.data(), input.data() + input.size(), actual);
EXPECT_EQ(result.ec, std::errc());
EXPECT_EQ(expected, actual)
<< absl::StrFormat("%a vs %a", expected, actual);
}
// test legal values near upper_bound
for (index = upper_bound, step = 1; index > lower_bound;
index -= step, step = NextStep(step)) {
std::string input = input_generator(index);
SCOPED_TRACE(input);
Float expected = expected_generator(index);
Float actual;
auto result =
absl::from_chars(input.data(), input.data() + input.size(), actual);
EXPECT_EQ(result.ec, std::errc());
EXPECT_EQ(expected, actual)
<< absl::StrFormat("%a vs %a", expected, actual);
}
// Test underflow values below lower_bound
for (index = lower_bound - 1, step = 1; index > -1000000;
index -= step, step = NextStep(step)) {
std::string input = input_generator(index);
SCOPED_TRACE(input);
Float actual;
auto result =
absl::from_chars(input.data(), input.data() + input.size(), actual);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_LT(actual, 1.0); // check for underflow
}
// Test overflow values above upper_bound
for (index = upper_bound + 1, step = 1; index < 1000000;
index += step, step = NextStep(step)) {
std::string input = input_generator(index);
SCOPED_TRACE(input);
Float actual;
auto result =
absl::from_chars(input.data(), input.data() + input.size(), actual);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_GT(actual, 1.0); // check for overflow
}
}
// Check that overflow and underflow are caught correctly for hex doubles.
//
// The largest representable double is 0x1.fffffffffffffp+1023, and the
// smallest representable subnormal is 0x0.0000000000001p-1022, which equals
// 0x1p-1074. Therefore 1023 and -1074 are the limits of acceptable exponents
// in this test.
TEST(FromChars, HexdecimalDoubleLimits) {
auto input_gen = [](int index) { return absl::StrCat("0x1.0p", index); };
auto expected_gen = [](int index) { return std::ldexp(1.0, index); };
TestOverflowAndUnderflow<double>(input_gen, expected_gen, -1074, 1023);
}
// Check that overflow and underflow are caught correctly for hex floats.
//
// The largest representable float is 0x1.fffffep+127, and the smallest
// representable subnormal is 0x0.000002p-126, which equals 0x1p-149.
// Therefore 127 and -149 are the limits of acceptable exponents in this test.
TEST(FromChars, HexdecimalFloatLimits) {
auto input_gen = [](int index) { return absl::StrCat("0x1.0p", index); };
auto expected_gen = [](int index) { return std::ldexp(1.0f, index); };
TestOverflowAndUnderflow<float>(input_gen, expected_gen, -149, 127);
}
// Check that overflow and underflow are caught correctly for decimal doubles.
//
// The largest representable double is about 1.8e308, and the smallest
// representable subnormal is about 5e-324. '1e-324' therefore rounds away from
// the smallest representable positive value. -323 and 308 are the limits of
// acceptable exponents in this test.
TEST(FromChars, DecimalDoubleLimits) {
auto input_gen = [](int index) { return absl::StrCat("1.0e", index); };
auto expected_gen = [](int index) { return Pow10(index); };
TestOverflowAndUnderflow<double>(input_gen, expected_gen, -323, 308);
}
// Check that overflow and underflow are caught correctly for decimal floats.
//
// The largest representable float is about 3.4e38, and the smallest
// representable subnormal is about 1.45e-45. '1e-45' therefore rounds towards
// the smallest representable positive value. -45 and 38 are the limits of
// acceptable exponents in this test.
TEST(FromChars, DecimalFloatLimits) {
auto input_gen = [](int index) { return absl::StrCat("1.0e", index); };
auto expected_gen = [](int index) { return Pow10(index); };
TestOverflowAndUnderflow<float>(input_gen, expected_gen, -45, 38);
}
} // namespace

File diff suppressed because it is too large Load diff

1338
third_party/abseil_cpp/absl/strings/cord.h vendored Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,60 @@
//
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef ABSL_STRINGS_CORD_TEST_HELPERS_H_
#define ABSL_STRINGS_CORD_TEST_HELPERS_H_
#include "absl/strings/cord.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
// Creates a multi-segment Cord from an iterable container of strings. The
// resulting Cord is guaranteed to have one segment for every string in the
// container. This allows code to be unit tested with multi-segment Cord
// inputs.
//
// Example:
//
// absl::Cord c = absl::MakeFragmentedCord({"A ", "fragmented ", "Cord"});
// EXPECT_FALSE(c.GetFlat(&unused));
//
// The mechanism by which this Cord is created is an implementation detail. Any
// implementation that produces a multi-segment Cord may produce a flat Cord in
// the future as new optimizations are added to the Cord class.
// MakeFragmentedCord will, however, always be updated to return a multi-segment
// Cord.
template <typename Container>
Cord MakeFragmentedCord(const Container& c) {
Cord result;
for (const auto& s : c) {
auto* external = new std::string(s);
Cord tmp = absl::MakeCordFromExternal(
*external, [external](absl::string_view) { delete external; });
tmp.Prepend(result);
result = tmp;
}
return result;
}
inline Cord MakeFragmentedCord(std::initializer_list<absl::string_view> list) {
return MakeFragmentedCord<std::initializer_list<absl::string_view>>(list);
}
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_CORD_TEST_HELPERS_H_

View file

@ -0,0 +1,949 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/escaping.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstring>
#include <iterator>
#include <limits>
#include <string>
#include "absl/base/internal/endian.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/internal/unaligned_access.h"
#include "absl/strings/internal/char_map.h"
#include "absl/strings/internal/escaping.h"
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/internal/utf8.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace {
// These are used for the leave_nulls_escaped argument to CUnescapeInternal().
constexpr bool kUnescapeNulls = false;
inline bool is_octal_digit(char c) { return ('0' <= c) && (c <= '7'); }
inline int hex_digit_to_int(char c) {
static_assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61,
"Character set must be ASCII.");
assert(absl::ascii_isxdigit(c));
int x = static_cast<unsigned char>(c);
if (x > '9') {
x += 9;
}
return x & 0xf;
}
inline bool IsSurrogate(char32_t c, absl::string_view src, std::string* error) {
if (c >= 0xD800 && c <= 0xDFFF) {
if (error) {
*error = absl::StrCat("invalid surrogate character (0xD800-DFFF): \\",
src);
}
return true;
}
return false;
}
// ----------------------------------------------------------------------
// CUnescapeInternal()
// Implements both CUnescape() and CUnescapeForNullTerminatedString().
//
// Unescapes C escape sequences and is the reverse of CEscape().
//
// If 'source' is valid, stores the unescaped string and its size in
// 'dest' and 'dest_len' respectively, and returns true. Otherwise
// returns false and optionally stores the error description in
// 'error'. Set 'error' to nullptr to disable error reporting.
//
// 'dest' should point to a buffer that is at least as big as 'source'.
// 'source' and 'dest' may be the same.
//
// NOTE: any changes to this function must also be reflected in the older
// UnescapeCEscapeSequences().
// ----------------------------------------------------------------------
bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
char* dest, ptrdiff_t* dest_len, std::string* error) {
char* d = dest;
const char* p = source.data();
const char* end = p + source.size();
const char* last_byte = end - 1;
// Small optimization for case where source = dest and there's no escaping
while (p == d && p < end && *p != '\\') p++, d++;
while (p < end) {
if (*p != '\\') {
*d++ = *p++;
} else {
if (++p > last_byte) { // skip past the '\\'
if (error) *error = "String cannot end with \\";
return false;
}
switch (*p) {
case 'a': *d++ = '\a'; break;
case 'b': *d++ = '\b'; break;
case 'f': *d++ = '\f'; break;
case 'n': *d++ = '\n'; break;
case 'r': *d++ = '\r'; break;
case 't': *d++ = '\t'; break;
case 'v': *d++ = '\v'; break;
case '\\': *d++ = '\\'; break;
case '?': *d++ = '\?'; break; // \? Who knew?
case '\'': *d++ = '\''; break;
case '"': *d++ = '\"'; break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7': {
// octal digit: 1 to 3 digits
const char* octal_start = p;
unsigned int ch = *p - '0';
if (p < last_byte && is_octal_digit(p[1])) ch = ch * 8 + *++p - '0';
if (p < last_byte && is_octal_digit(p[1]))
ch = ch * 8 + *++p - '0'; // now points at last digit
if (ch > 0xff) {
if (error) {
*error = "Value of \\" +
std::string(octal_start, p + 1 - octal_start) +
" exceeds 0xff";
}
return false;
}
if ((ch == 0) && leave_nulls_escaped) {
// Copy the escape sequence for the null character
const ptrdiff_t octal_size = p + 1 - octal_start;
*d++ = '\\';
memcpy(d, octal_start, octal_size);
d += octal_size;
break;
}
*d++ = ch;
break;
}
case 'x':
case 'X': {
if (p >= last_byte) {
if (error) *error = "String cannot end with \\x";
return false;
} else if (!absl::ascii_isxdigit(p[1])) {
if (error) *error = "\\x cannot be followed by a non-hex digit";
return false;
}
unsigned int ch = 0;
const char* hex_start = p;
while (p < last_byte && absl::ascii_isxdigit(p[1]))
// Arbitrarily many hex digits
ch = (ch << 4) + hex_digit_to_int(*++p);
if (ch > 0xFF) {
if (error) {
*error = "Value of \\" +
std::string(hex_start, p + 1 - hex_start) +
" exceeds 0xff";
}
return false;
}
if ((ch == 0) && leave_nulls_escaped) {
// Copy the escape sequence for the null character
const ptrdiff_t hex_size = p + 1 - hex_start;
*d++ = '\\';
memcpy(d, hex_start, hex_size);
d += hex_size;
break;
}
*d++ = ch;
break;
}
case 'u': {
// \uhhhh => convert 4 hex digits to UTF-8
char32_t rune = 0;
const char* hex_start = p;
if (p + 4 >= end) {
if (error) {
*error = "\\u must be followed by 4 hex digits: \\" +
std::string(hex_start, p + 1 - hex_start);
}
return false;
}
for (int i = 0; i < 4; ++i) {
// Look one char ahead.
if (absl::ascii_isxdigit(p[1])) {
rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p.
} else {
if (error) {
*error = "\\u must be followed by 4 hex digits: \\" +
std::string(hex_start, p + 1 - hex_start);
}
return false;
}
}
if ((rune == 0) && leave_nulls_escaped) {
// Copy the escape sequence for the null character
*d++ = '\\';
memcpy(d, hex_start, 5); // u0000
d += 5;
break;
}
if (IsSurrogate(rune, absl::string_view(hex_start, 5), error)) {
return false;
}
d += strings_internal::EncodeUTF8Char(d, rune);
break;
}
case 'U': {
// \Uhhhhhhhh => convert 8 hex digits to UTF-8
char32_t rune = 0;
const char* hex_start = p;
if (p + 8 >= end) {
if (error) {
*error = "\\U must be followed by 8 hex digits: \\" +
std::string(hex_start, p + 1 - hex_start);
}
return false;
}
for (int i = 0; i < 8; ++i) {
// Look one char ahead.
if (absl::ascii_isxdigit(p[1])) {
// Don't change rune until we're sure this
// is within the Unicode limit, but do advance p.
uint32_t newrune = (rune << 4) + hex_digit_to_int(*++p);
if (newrune > 0x10FFFF) {
if (error) {
*error = "Value of \\" +
std::string(hex_start, p + 1 - hex_start) +
" exceeds Unicode limit (0x10FFFF)";
}
return false;
} else {
rune = newrune;
}
} else {
if (error) {
*error = "\\U must be followed by 8 hex digits: \\" +
std::string(hex_start, p + 1 - hex_start);
}
return false;
}
}
if ((rune == 0) && leave_nulls_escaped) {
// Copy the escape sequence for the null character
*d++ = '\\';
memcpy(d, hex_start, 9); // U00000000
d += 9;
break;
}
if (IsSurrogate(rune, absl::string_view(hex_start, 9), error)) {
return false;
}
d += strings_internal::EncodeUTF8Char(d, rune);
break;
}
default: {
if (error) *error = std::string("Unknown escape sequence: \\") + *p;
return false;
}
}
p++; // read past letter we escaped
}
}
*dest_len = d - dest;
return true;
}
// ----------------------------------------------------------------------
// CUnescapeInternal()
//
// Same as above but uses a std::string for output. 'source' and 'dest'
// may be the same.
// ----------------------------------------------------------------------
bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
std::string* dest, std::string* error) {
strings_internal::STLStringResizeUninitialized(dest, source.size());
ptrdiff_t dest_size;
if (!CUnescapeInternal(source,
leave_nulls_escaped,
&(*dest)[0],
&dest_size,
error)) {
return false;
}
dest->erase(dest_size);
return true;
}
// ----------------------------------------------------------------------
// CEscape()
// CHexEscape()
// Utf8SafeCEscape()
// Utf8SafeCHexEscape()
// Escapes 'src' using C-style escape sequences. This is useful for
// preparing query flags. The 'Hex' version uses hexadecimal rather than
// octal sequences. The 'Utf8Safe' version does not touch UTF-8 bytes.
//
// Escaped chars: \n, \r, \t, ", ', \, and !absl::ascii_isprint().
// ----------------------------------------------------------------------
std::string CEscapeInternal(absl::string_view src, bool use_hex,
bool utf8_safe) {
std::string dest;
bool last_hex_escape = false; // true if last output char was \xNN.
for (unsigned char c : src) {
bool is_hex_escape = false;
switch (c) {
case '\n': dest.append("\\" "n"); break;
case '\r': dest.append("\\" "r"); break;
case '\t': dest.append("\\" "t"); break;
case '\"': dest.append("\\" "\""); break;
case '\'': dest.append("\\" "'"); break;
case '\\': dest.append("\\" "\\"); break;
default:
// Note that if we emit \xNN and the src character after that is a hex
// digit then that digit must be escaped too to prevent it being
// interpreted as part of the character code by C.
if ((!utf8_safe || c < 0x80) &&
(!absl::ascii_isprint(c) ||
(last_hex_escape && absl::ascii_isxdigit(c)))) {
if (use_hex) {
dest.append("\\" "x");
dest.push_back(numbers_internal::kHexChar[c / 16]);
dest.push_back(numbers_internal::kHexChar[c % 16]);
is_hex_escape = true;
} else {
dest.append("\\");
dest.push_back(numbers_internal::kHexChar[c / 64]);
dest.push_back(numbers_internal::kHexChar[(c % 64) / 8]);
dest.push_back(numbers_internal::kHexChar[c % 8]);
}
} else {
dest.push_back(c);
break;
}
}
last_hex_escape = is_hex_escape;
}
return dest;
}
/* clang-format off */
constexpr char c_escaped_len[256] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4, // \t, \n, \r
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // ", '
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // '0'..'9'
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A'..'O'
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, // 'P'..'Z', '\'
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a'..'o'
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, // 'p'..'z', DEL
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
};
/* clang-format on */
// Calculates the length of the C-style escaped version of 'src'.
// Assumes that non-printable characters are escaped using octal sequences, and
// that UTF-8 bytes are not handled specially.
inline size_t CEscapedLength(absl::string_view src) {
size_t escaped_len = 0;
for (unsigned char c : src) escaped_len += c_escaped_len[c];
return escaped_len;
}
void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) {
size_t escaped_len = CEscapedLength(src);
if (escaped_len == src.size()) {
dest->append(src.data(), src.size());
return;
}
size_t cur_dest_len = dest->size();
strings_internal::STLStringResizeUninitialized(dest,
cur_dest_len + escaped_len);
char* append_ptr = &(*dest)[cur_dest_len];
for (unsigned char c : src) {
int char_len = c_escaped_len[c];
if (char_len == 1) {
*append_ptr++ = c;
} else if (char_len == 2) {
switch (c) {
case '\n':
*append_ptr++ = '\\';
*append_ptr++ = 'n';
break;
case '\r':
*append_ptr++ = '\\';
*append_ptr++ = 'r';
break;
case '\t':
*append_ptr++ = '\\';
*append_ptr++ = 't';
break;
case '\"':
*append_ptr++ = '\\';
*append_ptr++ = '\"';
break;
case '\'':
*append_ptr++ = '\\';
*append_ptr++ = '\'';
break;
case '\\':
*append_ptr++ = '\\';
*append_ptr++ = '\\';
break;
}
} else {
*append_ptr++ = '\\';
*append_ptr++ = '0' + c / 64;
*append_ptr++ = '0' + (c % 64) / 8;
*append_ptr++ = '0' + c % 8;
}
}
}
bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest,
size_t szdest, const signed char* unbase64,
size_t* len) {
static const char kPad64Equals = '=';
static const char kPad64Dot = '.';
size_t destidx = 0;
int decode = 0;
int state = 0;
unsigned int ch = 0;
unsigned int temp = 0;
// If "char" is signed by default, using *src as an array index results in
// accessing negative array elements. Treat the input as a pointer to
// unsigned char to avoid this.
const unsigned char* src = reinterpret_cast<const unsigned char*>(src_param);
// The GET_INPUT macro gets the next input character, skipping
// over any whitespace, and stopping when we reach the end of the
// string or when we read any non-data character. The arguments are
// an arbitrary identifier (used as a label for goto) and the number
// of data bytes that must remain in the input to avoid aborting the
// loop.
#define GET_INPUT(label, remain) \
label: \
--szsrc; \
ch = *src++; \
decode = unbase64[ch]; \
if (decode < 0) { \
if (absl::ascii_isspace(ch) && szsrc >= remain) goto label; \
state = 4 - remain; \
break; \
}
// if dest is null, we're just checking to see if it's legal input
// rather than producing output. (I suspect this could just be done
// with a regexp...). We duplicate the loop so this test can be
// outside it instead of in every iteration.
if (dest) {
// This loop consumes 4 input bytes and produces 3 output bytes
// per iteration. We can't know at the start that there is enough
// data left in the string for a full iteration, so the loop may
// break out in the middle; if so 'state' will be set to the
// number of input bytes read.
while (szsrc >= 4) {
// We'll start by optimistically assuming that the next four
// bytes of the string (src[0..3]) are four good data bytes
// (that is, no nulls, whitespace, padding chars, or illegal
// chars). We need to test src[0..2] for nulls individually
// before constructing temp to preserve the property that we
// never read past a null in the string (no matter how long
// szsrc claims the string is).
if (!src[0] || !src[1] || !src[2] ||
((temp = ((unsigned(unbase64[src[0]]) << 18) |
(unsigned(unbase64[src[1]]) << 12) |
(unsigned(unbase64[src[2]]) << 6) |
(unsigned(unbase64[src[3]])))) &
0x80000000)) {
// Iff any of those four characters was bad (null, illegal,
// whitespace, padding), then temp's high bit will be set
// (because unbase64[] is -1 for all bad characters).
//
// We'll back up and resort to the slower decoder, which knows
// how to handle those cases.
GET_INPUT(first, 4);
temp = decode;
GET_INPUT(second, 3);
temp = (temp << 6) | decode;
GET_INPUT(third, 2);
temp = (temp << 6) | decode;
GET_INPUT(fourth, 1);
temp = (temp << 6) | decode;
} else {
// We really did have four good data bytes, so advance four
// characters in the string.
szsrc -= 4;
src += 4;
}
// temp has 24 bits of input, so write that out as three bytes.
if (destidx + 3 > szdest) return false;
dest[destidx + 2] = temp;
temp >>= 8;
dest[destidx + 1] = temp;
temp >>= 8;
dest[destidx] = temp;
destidx += 3;
}
} else {
while (szsrc >= 4) {
if (!src[0] || !src[1] || !src[2] ||
((temp = ((unsigned(unbase64[src[0]]) << 18) |
(unsigned(unbase64[src[1]]) << 12) |
(unsigned(unbase64[src[2]]) << 6) |
(unsigned(unbase64[src[3]])))) &
0x80000000)) {
GET_INPUT(first_no_dest, 4);
GET_INPUT(second_no_dest, 3);
GET_INPUT(third_no_dest, 2);
GET_INPUT(fourth_no_dest, 1);
} else {
szsrc -= 4;
src += 4;
}
destidx += 3;
}
}
#undef GET_INPUT
// if the loop terminated because we read a bad character, return
// now.
if (decode < 0 && ch != kPad64Equals && ch != kPad64Dot &&
!absl::ascii_isspace(ch))
return false;
if (ch == kPad64Equals || ch == kPad64Dot) {
// if we stopped by hitting an '=' or '.', un-read that character -- we'll
// look at it again when we count to check for the proper number of
// equals signs at the end.
++szsrc;
--src;
} else {
// This loop consumes 1 input byte per iteration. It's used to
// clean up the 0-3 input bytes remaining when the first, faster
// loop finishes. 'temp' contains the data from 'state' input
// characters read by the first loop.
while (szsrc > 0) {
--szsrc;
ch = *src++;
decode = unbase64[ch];
if (decode < 0) {
if (absl::ascii_isspace(ch)) {
continue;
} else if (ch == kPad64Equals || ch == kPad64Dot) {
// back up one character; we'll read it again when we check
// for the correct number of pad characters at the end.
++szsrc;
--src;
break;
} else {
return false;
}
}
// Each input character gives us six bits of output.
temp = (temp << 6) | decode;
++state;
if (state == 4) {
// If we've accumulated 24 bits of output, write that out as
// three bytes.
if (dest) {
if (destidx + 3 > szdest) return false;
dest[destidx + 2] = temp;
temp >>= 8;
dest[destidx + 1] = temp;
temp >>= 8;
dest[destidx] = temp;
}
destidx += 3;
state = 0;
temp = 0;
}
}
}
// Process the leftover data contained in 'temp' at the end of the input.
int expected_equals = 0;
switch (state) {
case 0:
// Nothing left over; output is a multiple of 3 bytes.
break;
case 1:
// Bad input; we have 6 bits left over.
return false;
case 2:
// Produce one more output byte from the 12 input bits we have left.
if (dest) {
if (destidx + 1 > szdest) return false;
temp >>= 4;
dest[destidx] = temp;
}
++destidx;
expected_equals = 2;
break;
case 3:
// Produce two more output bytes from the 18 input bits we have left.
if (dest) {
if (destidx + 2 > szdest) return false;
temp >>= 2;
dest[destidx + 1] = temp;
temp >>= 8;
dest[destidx] = temp;
}
destidx += 2;
expected_equals = 1;
break;
default:
// state should have no other values at this point.
ABSL_RAW_LOG(FATAL, "This can't happen; base64 decoder state = %d",
state);
}
// The remainder of the string should be all whitespace, mixed with
// exactly 0 equals signs, or exactly 'expected_equals' equals
// signs. (Always accepting 0 equals signs is an Abseil extension
// not covered in the RFC, as is accepting dot as the pad character.)
int equals = 0;
while (szsrc > 0) {
if (*src == kPad64Equals || *src == kPad64Dot)
++equals;
else if (!absl::ascii_isspace(*src))
return false;
--szsrc;
++src;
}
const bool ok = (equals == 0 || equals == expected_equals);
if (ok) *len = destidx;
return ok;
}
// The arrays below were generated by the following code
// #include <sys/time.h>
// #include <stdlib.h>
// #include <string.h>
// main()
// {
// static const char Base64[] =
// "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
// char* pos;
// int idx, i, j;
// printf(" ");
// for (i = 0; i < 255; i += 8) {
// for (j = i; j < i + 8; j++) {
// pos = strchr(Base64, j);
// if ((pos == nullptr) || (j == 0))
// idx = -1;
// else
// idx = pos - Base64;
// if (idx == -1)
// printf(" %2d, ", idx);
// else
// printf(" %2d/*%c*/,", idx, j);
// }
// printf("\n ");
// }
// }
//
// where the value of "Base64[]" was replaced by one of the base-64 conversion
// tables from the functions below.
/* clang-format off */
constexpr signed char kUnBase64[] = {
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */,
52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,
-1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1,
-1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1
};
constexpr signed char kUnWebSafeBase64[] = {
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, 62/*-*/, -1, -1,
52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,
-1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, 63/*_*/,
-1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1
};
/* clang-format on */
constexpr char kWebSafeBase64Chars[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
template <typename String>
bool Base64UnescapeInternal(const char* src, size_t slen, String* dest,
const signed char* unbase64) {
// Determine the size of the output string. Base64 encodes every 3 bytes into
// 4 characters. any leftover chars are added directly for good measure.
// This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548
const size_t dest_len = 3 * (slen / 4) + (slen % 4);
strings_internal::STLStringResizeUninitialized(dest, dest_len);
// We are getting the destination buffer by getting the beginning of the
// string and converting it into a char *.
size_t len;
const bool ok =
Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len);
if (!ok) {
dest->clear();
return false;
}
// could be shorter if there was padding
assert(len <= dest_len);
dest->erase(len);
return true;
}
/* clang-format off */
constexpr char kHexValueLenient[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, // '0'..'9'
0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'A'..'F'
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'a'..'f'
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
/* clang-format on */
// This is a templated function so that T can be either a char*
// or a string. This works because we use the [] operator to access
// individual characters at a time.
template <typename T>
void HexStringToBytesInternal(const char* from, T to, ptrdiff_t num) {
for (int i = 0; i < num; i++) {
to[i] = (kHexValueLenient[from[i * 2] & 0xFF] << 4) +
(kHexValueLenient[from[i * 2 + 1] & 0xFF]);
}
}
// This is a templated function so that T can be either a char* or a
// std::string.
template <typename T>
void BytesToHexStringInternal(const unsigned char* src, T dest, ptrdiff_t num) {
auto dest_ptr = &dest[0];
for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) {
const char* hex_p = &numbers_internal::kHexTable[*src_ptr * 2];
std::copy(hex_p, hex_p + 2, dest_ptr);
}
}
} // namespace
// ----------------------------------------------------------------------
// CUnescape()
//
// See CUnescapeInternal() for implementation details.
// ----------------------------------------------------------------------
bool CUnescape(absl::string_view source, std::string* dest,
std::string* error) {
return CUnescapeInternal(source, kUnescapeNulls, dest, error);
}
std::string CEscape(absl::string_view src) {
std::string dest;
CEscapeAndAppendInternal(src, &dest);
return dest;
}
std::string CHexEscape(absl::string_view src) {
return CEscapeInternal(src, true, false);
}
std::string Utf8SafeCEscape(absl::string_view src) {
return CEscapeInternal(src, false, true);
}
std::string Utf8SafeCHexEscape(absl::string_view src) {
return CEscapeInternal(src, true, true);
}
// ----------------------------------------------------------------------
// Base64Unescape() - base64 decoder
// Base64Escape() - base64 encoder
// WebSafeBase64Unescape() - Google's variation of base64 decoder
// WebSafeBase64Escape() - Google's variation of base64 encoder
//
// Check out
// http://tools.ietf.org/html/rfc2045 for formal description, but what we
// care about is that...
// Take the encoded stuff in groups of 4 characters and turn each
// character into a code 0 to 63 thus:
// A-Z map to 0 to 25
// a-z map to 26 to 51
// 0-9 map to 52 to 61
// +(- for WebSafe) maps to 62
// /(_ for WebSafe) maps to 63
// There will be four numbers, all less than 64 which can be represented
// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
// Arrange the 6 digit binary numbers into three bytes as such:
// aaaaaabb bbbbcccc ccdddddd
// Equals signs (one or two) are used at the end of the encoded block to
// indicate that the text was not an integer multiple of three bytes long.
// ----------------------------------------------------------------------
bool Base64Unescape(absl::string_view src, std::string* dest) {
return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64);
}
bool WebSafeBase64Unescape(absl::string_view src, std::string* dest) {
return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64);
}
void Base64Escape(absl::string_view src, std::string* dest) {
strings_internal::Base64EscapeInternal(
reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest,
true, strings_internal::kBase64Chars);
}
void WebSafeBase64Escape(absl::string_view src, std::string* dest) {
strings_internal::Base64EscapeInternal(
reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest,
false, kWebSafeBase64Chars);
}
std::string Base64Escape(absl::string_view src) {
std::string dest;
strings_internal::Base64EscapeInternal(
reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest,
true, strings_internal::kBase64Chars);
return dest;
}
std::string WebSafeBase64Escape(absl::string_view src) {
std::string dest;
strings_internal::Base64EscapeInternal(
reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest,
false, kWebSafeBase64Chars);
return dest;
}
std::string HexStringToBytes(absl::string_view from) {
std::string result;
const auto num = from.size() / 2;
strings_internal::STLStringResizeUninitialized(&result, num);
absl::HexStringToBytesInternal<std::string&>(from.data(), result, num);
return result;
}
std::string BytesToHexString(absl::string_view from) {
std::string result;
strings_internal::STLStringResizeUninitialized(&result, 2 * from.size());
absl::BytesToHexStringInternal<std::string&>(
reinterpret_cast<const unsigned char*>(from.data()), result, from.size());
return result;
}
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,164 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: escaping.h
// -----------------------------------------------------------------------------
//
// This header file contains string utilities involved in escaping and
// unescaping strings in various ways.
#ifndef ABSL_STRINGS_ESCAPING_H_
#define ABSL_STRINGS_ESCAPING_H_
#include <cstddef>
#include <string>
#include <vector>
#include "absl/base/macros.h"
#include "absl/strings/ascii.h"
#include "absl/strings/str_join.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
// CUnescape()
//
// Unescapes a `source` string and copies it into `dest`, rewriting C-style
// escape sequences (https://en.cppreference.com/w/cpp/language/escape) into
// their proper code point equivalents, returning `true` if successful.
//
// The following unescape sequences can be handled:
//
// * ASCII escape sequences ('\n','\r','\\', etc.) to their ASCII equivalents
// * Octal escape sequences ('\nnn') to byte nnn. The unescaped value must
// resolve to a single byte or an error will occur. E.g. values greater than
// 0xff will produce an error.
// * Hexadecimal escape sequences ('\xnn') to byte nn. While an arbitrary
// number of following digits are allowed, the unescaped value must resolve
// to a single byte or an error will occur. E.g. '\x0045' is equivalent to
// '\x45', but '\x1234' will produce an error.
// * Unicode escape sequences ('\unnnn' for exactly four hex digits or
// '\Unnnnnnnn' for exactly eight hex digits, which will be encoded in
// UTF-8. (E.g., `\u2019` unescapes to the three bytes 0xE2, 0x80, and
// 0x99).
//
// If any errors are encountered, this function returns `false`, leaving the
// `dest` output parameter in an unspecified state, and stores the first
// encountered error in `error`. To disable error reporting, set `error` to
// `nullptr` or use the overload with no error reporting below.
//
// Example:
//
// std::string s = "foo\\rbar\\nbaz\\t";
// std::string unescaped_s;
// if (!absl::CUnescape(s, &unescaped_s) {
// ...
// }
// EXPECT_EQ(unescaped_s, "foo\rbar\nbaz\t");
bool CUnescape(absl::string_view source, std::string* dest, std::string* error);
// Overload of `CUnescape()` with no error reporting.
inline bool CUnescape(absl::string_view source, std::string* dest) {
return CUnescape(source, dest, nullptr);
}
// CEscape()
//
// Escapes a 'src' string using C-style escapes sequences
// (https://en.cppreference.com/w/cpp/language/escape), escaping other
// non-printable/non-whitespace bytes as octal sequences (e.g. "\377").
//
// Example:
//
// std::string s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n";
// std::string escaped_s = absl::CEscape(s);
// EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\010\\t\\n\\013\\014\\r\\n");
std::string CEscape(absl::string_view src);
// CHexEscape()
//
// Escapes a 'src' string using C-style escape sequences, escaping
// other non-printable/non-whitespace bytes as hexadecimal sequences (e.g.
// "\xFF").
//
// Example:
//
// std::string s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n";
// std::string escaped_s = absl::CHexEscape(s);
// EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\x08\\t\\n\\x0b\\x0c\\r\\n");
std::string CHexEscape(absl::string_view src);
// Utf8SafeCEscape()
//
// Escapes a 'src' string using C-style escape sequences, escaping bytes as
// octal sequences, and passing through UTF-8 characters without conversion.
// I.e., when encountering any bytes with their high bit set, this function
// will not escape those values, whether or not they are valid UTF-8.
std::string Utf8SafeCEscape(absl::string_view src);
// Utf8SafeCHexEscape()
//
// Escapes a 'src' string using C-style escape sequences, escaping bytes as
// hexadecimal sequences, and passing through UTF-8 characters without
// conversion.
std::string Utf8SafeCHexEscape(absl::string_view src);
// Base64Unescape()
//
// Converts a `src` string encoded in Base64 to its binary equivalent, writing
// it to a `dest` buffer, returning `true` on success. If `src` contains invalid
// characters, `dest` is cleared and returns `false`.
bool Base64Unescape(absl::string_view src, std::string* dest);
// WebSafeBase64Unescape()
//
// Converts a `src` string encoded in Base64 to its binary equivalent, writing
// it to a `dest` buffer, but using '-' instead of '+', and '_' instead of '/'.
// If `src` contains invalid characters, `dest` is cleared and returns `false`.
bool WebSafeBase64Unescape(absl::string_view src, std::string* dest);
// Base64Escape()
//
// Encodes a `src` string into a base64-encoded string, with padding characters.
// This function conforms with RFC 4648 section 4 (base64).
void Base64Escape(absl::string_view src, std::string* dest);
std::string Base64Escape(absl::string_view src);
// WebSafeBase64Escape()
//
// Encodes a `src` string into a base64-like string, using '-' instead of '+'
// and '_' instead of '/', and without padding. This function conforms with RFC
// 4648 section 5 (base64url).
void WebSafeBase64Escape(absl::string_view src, std::string* dest);
std::string WebSafeBase64Escape(absl::string_view src);
// HexStringToBytes()
//
// Converts an ASCII hex string into bytes, returning binary data of length
// `from.size()/2`.
std::string HexStringToBytes(absl::string_view from);
// BytesToHexString()
//
// Converts binary data into an ASCII text string, returning a string of size
// `2*from.size()`.
std::string BytesToHexString(absl::string_view from);
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_ESCAPING_H_

View file

@ -0,0 +1,94 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/escaping.h"
#include <cstdio>
#include <cstring>
#include <random>
#include "benchmark/benchmark.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/internal/escaping_test_common.h"
namespace {
void BM_CUnescapeHexString(benchmark::State& state) {
std::string src;
for (int i = 0; i < 50; i++) {
src += "\\x55";
}
std::string dest;
for (auto _ : state) {
absl::CUnescape(src, &dest);
}
}
BENCHMARK(BM_CUnescapeHexString);
void BM_WebSafeBase64Escape_string(benchmark::State& state) {
std::string raw;
for (int i = 0; i < 10; ++i) {
for (const auto& test_set : absl::strings_internal::base64_strings()) {
raw += std::string(test_set.plaintext);
}
}
// The actual benchmark loop is tiny...
std::string escaped;
for (auto _ : state) {
absl::WebSafeBase64Escape(raw, &escaped);
}
// We want to be sure the compiler doesn't throw away the loop above,
// and the easiest way to ensure that is to round-trip the results and verify
// them.
std::string round_trip;
absl::WebSafeBase64Unescape(escaped, &round_trip);
ABSL_RAW_CHECK(round_trip == raw, "");
}
BENCHMARK(BM_WebSafeBase64Escape_string);
// Used for the CEscape benchmarks
const char kStringValueNoEscape[] = "1234567890";
const char kStringValueSomeEscaped[] = "123\n56789\xA1";
const char kStringValueMostEscaped[] = "\xA1\xA2\ny\xA4\xA5\xA6z\b\r";
void CEscapeBenchmarkHelper(benchmark::State& state, const char* string_value,
int max_len) {
std::string src;
while (src.size() < max_len) {
absl::StrAppend(&src, string_value);
}
for (auto _ : state) {
absl::CEscape(src);
}
}
void BM_CEscape_NoEscape(benchmark::State& state) {
CEscapeBenchmarkHelper(state, kStringValueNoEscape, state.range(0));
}
BENCHMARK(BM_CEscape_NoEscape)->Range(1, 1 << 14);
void BM_CEscape_SomeEscaped(benchmark::State& state) {
CEscapeBenchmarkHelper(state, kStringValueSomeEscaped, state.range(0));
}
BENCHMARK(BM_CEscape_SomeEscaped)->Range(1, 1 << 14);
void BM_CEscape_MostEscaped(benchmark::State& state) {
CEscapeBenchmarkHelper(state, kStringValueMostEscaped, state.range(0));
}
BENCHMARK(BM_CEscape_MostEscaped)->Range(1, 1 << 14);
} // namespace

View file

@ -0,0 +1,664 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/escaping.h"
#include <array>
#include <cstdio>
#include <cstring>
#include <memory>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/container/fixed_array.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/internal/escaping_test_common.h"
namespace {
struct epair {
std::string escaped;
std::string unescaped;
};
TEST(CEscape, EscapeAndUnescape) {
const std::string inputs[] = {
std::string("foo\nxx\r\b\0023"),
std::string(""),
std::string("abc"),
std::string("\1chad_rules"),
std::string("\1arnar_drools"),
std::string("xxxx\r\t'\"\\"),
std::string("\0xx\0", 4),
std::string("\x01\x31"),
std::string("abc\xb\x42\141bc"),
std::string("123\1\x31\x32\x33"),
std::string("\xc1\xca\x1b\x62\x19o\xcc\x04"),
std::string(
"\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name"),
};
// Do this twice, once for octal escapes and once for hex escapes.
for (int kind = 0; kind < 4; kind++) {
for (const std::string& original : inputs) {
std::string escaped;
switch (kind) {
case 0:
escaped = absl::CEscape(original);
break;
case 1:
escaped = absl::CHexEscape(original);
break;
case 2:
escaped = absl::Utf8SafeCEscape(original);
break;
case 3:
escaped = absl::Utf8SafeCHexEscape(original);
break;
}
std::string unescaped_str;
EXPECT_TRUE(absl::CUnescape(escaped, &unescaped_str));
EXPECT_EQ(unescaped_str, original);
unescaped_str.erase();
std::string error;
EXPECT_TRUE(absl::CUnescape(escaped, &unescaped_str, &error));
EXPECT_EQ(error, "");
// Check in-place unescaping
std::string s = escaped;
EXPECT_TRUE(absl::CUnescape(s, &s));
ASSERT_EQ(s, original);
}
}
// Check that all possible two character strings can be escaped then
// unescaped successfully.
for (int char0 = 0; char0 < 256; char0++) {
for (int char1 = 0; char1 < 256; char1++) {
char chars[2];
chars[0] = char0;
chars[1] = char1;
std::string s(chars, 2);
std::string escaped = absl::CHexEscape(s);
std::string unescaped;
EXPECT_TRUE(absl::CUnescape(escaped, &unescaped));
EXPECT_EQ(s, unescaped);
}
}
}
TEST(CEscape, BasicEscaping) {
epair oct_values[] = {
{"foo\\rbar\\nbaz\\t", "foo\rbar\nbaz\t"},
{"\\'full of \\\"sound\\\" and \\\"fury\\\"\\'",
"'full of \"sound\" and \"fury\"'"},
{"signi\\\\fying\\\\ nothing\\\\", "signi\\fying\\ nothing\\"},
{"\\010\\t\\n\\013\\014\\r", "\010\011\012\013\014\015"}
};
epair hex_values[] = {
{"ubik\\rubik\\nubik\\t", "ubik\rubik\nubik\t"},
{"I\\\'ve just seen a \\\"face\\\"",
"I've just seen a \"face\""},
{"hel\\\\ter\\\\skel\\\\ter\\\\", "hel\\ter\\skel\\ter\\"},
{"\\x08\\t\\n\\x0b\\x0c\\r", "\010\011\012\013\014\015"}
};
epair utf8_oct_values[] = {
{"\xe8\xb0\xb7\xe6\xad\x8c\\r\xe8\xb0\xb7\xe6\xad\x8c\\nbaz\\t",
"\xe8\xb0\xb7\xe6\xad\x8c\r\xe8\xb0\xb7\xe6\xad\x8c\nbaz\t"},
{"\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name",
"\"\xe8\xb0\xb7\xe6\xad\x8c\" is Google\'s Chinese name"},
{"\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\\\are\\\\Japanese\\\\chars\\\\",
"\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\are\\Japanese\\chars\\"},
{"\xed\x81\xac\xeb\xa1\xac\\010\\t\\n\\013\\014\\r",
"\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"}
};
epair utf8_hex_values[] = {
{"\x20\xe4\xbd\xa0\\t\xe5\xa5\xbd,\\r!\\n",
"\x20\xe4\xbd\xa0\t\xe5\xa5\xbd,\r!\n"},
{"\xe8\xa9\xa6\xe9\xa8\x93\\\' means \\\"test\\\"",
"\xe8\xa9\xa6\xe9\xa8\x93\' means \"test\""},
{"\\\\\xe6\x88\x91\\\\:\\\\\xe6\x9d\xa8\xe6\xac\xa2\\\\",
"\\\xe6\x88\x91\\:\\\xe6\x9d\xa8\xe6\xac\xa2\\"},
{"\xed\x81\xac\xeb\xa1\xac\\x08\\t\\n\\x0b\\x0c\\r",
"\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"}
};
for (const epair& val : oct_values) {
std::string escaped = absl::CEscape(val.unescaped);
EXPECT_EQ(escaped, val.escaped);
}
for (const epair& val : hex_values) {
std::string escaped = absl::CHexEscape(val.unescaped);
EXPECT_EQ(escaped, val.escaped);
}
for (const epair& val : utf8_oct_values) {
std::string escaped = absl::Utf8SafeCEscape(val.unescaped);
EXPECT_EQ(escaped, val.escaped);
}
for (const epair& val : utf8_hex_values) {
std::string escaped = absl::Utf8SafeCHexEscape(val.unescaped);
EXPECT_EQ(escaped, val.escaped);
}
}
TEST(Unescape, BasicFunction) {
epair tests[] =
{{"", ""},
{"\\u0030", "0"},
{"\\u00A3", "\xC2\xA3"},
{"\\u22FD", "\xE2\x8B\xBD"},
{"\\U00010000", "\xF0\x90\x80\x80"},
{"\\U0010FFFD", "\xF4\x8F\xBF\xBD"}};
for (const epair& val : tests) {
std::string out;
EXPECT_TRUE(absl::CUnescape(val.escaped, &out));
EXPECT_EQ(out, val.unescaped);
}
std::string bad[] = {"\\u1", // too short
"\\U1", // too short
"\\Uffffff", // exceeds 0x10ffff (largest Unicode)
"\\U00110000", // exceeds 0x10ffff (largest Unicode)
"\\uD835", // surrogate character (D800-DFFF)
"\\U0000DD04", // surrogate character (D800-DFFF)
"\\777", // exceeds 0xff
"\\xABCD"}; // exceeds 0xff
for (const std::string& e : bad) {
std::string error;
std::string out;
EXPECT_FALSE(absl::CUnescape(e, &out, &error));
EXPECT_FALSE(error.empty());
out.erase();
EXPECT_FALSE(absl::CUnescape(e, &out));
}
}
class CUnescapeTest : public testing::Test {
protected:
static const char kStringWithMultipleOctalNulls[];
static const char kStringWithMultipleHexNulls[];
static const char kStringWithMultipleUnicodeNulls[];
std::string result_string_;
};
const char CUnescapeTest::kStringWithMultipleOctalNulls[] =
"\\0\\n" // null escape \0 plus newline
"0\\n" // just a number 0 (not a null escape) plus newline
"\\00\\12" // null escape \00 plus octal newline code
"\\000"; // null escape \000
// This has the same ingredients as kStringWithMultipleOctalNulls
// but with \x hex escapes instead of octal escapes.
const char CUnescapeTest::kStringWithMultipleHexNulls[] =
"\\x0\\n"
"0\\n"
"\\x00\\xa"
"\\x000";
const char CUnescapeTest::kStringWithMultipleUnicodeNulls[] =
"\\u0000\\n" // short-form (4-digit) null escape plus newline
"0\\n" // just a number 0 (not a null escape) plus newline
"\\U00000000"; // long-form (8-digit) null escape
TEST_F(CUnescapeTest, Unescapes1CharOctalNull) {
std::string original_string = "\\0";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes2CharOctalNull) {
std::string original_string = "\\00";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes3CharOctalNull) {
std::string original_string = "\\000";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes1CharHexNull) {
std::string original_string = "\\x0";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes2CharHexNull) {
std::string original_string = "\\x00";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes3CharHexNull) {
std::string original_string = "\\x000";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes4CharUnicodeNull) {
std::string original_string = "\\u0000";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes8CharUnicodeNull) {
std::string original_string = "\\U00000000";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, UnescapesMultipleOctalNulls) {
std::string original_string(kStringWithMultipleOctalNulls);
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
// All escapes, including newlines and null escapes, should have been
// converted to the equivalent characters.
EXPECT_EQ(std::string("\0\n"
"0\n"
"\0\n"
"\0",
7),
result_string_);
}
TEST_F(CUnescapeTest, UnescapesMultipleHexNulls) {
std::string original_string(kStringWithMultipleHexNulls);
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0\n"
"0\n"
"\0\n"
"\0",
7),
result_string_);
}
TEST_F(CUnescapeTest, UnescapesMultipleUnicodeNulls) {
std::string original_string(kStringWithMultipleUnicodeNulls);
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0\n"
"0\n"
"\0",
5),
result_string_);
}
static struct {
absl::string_view plaintext;
absl::string_view cyphertext;
} const base64_tests[] = {
// Empty string.
{{"", 0}, {"", 0}},
{{nullptr, 0},
{"", 0}}, // if length is zero, plaintext ptr must be ignored!
// Basic bit patterns;
// values obtained with "echo -n '...' | uuencode -m test"
{{"\000", 1}, "AA=="},
{{"\001", 1}, "AQ=="},
{{"\002", 1}, "Ag=="},
{{"\004", 1}, "BA=="},
{{"\010", 1}, "CA=="},
{{"\020", 1}, "EA=="},
{{"\040", 1}, "IA=="},
{{"\100", 1}, "QA=="},
{{"\200", 1}, "gA=="},
{{"\377", 1}, "/w=="},
{{"\376", 1}, "/g=="},
{{"\375", 1}, "/Q=="},
{{"\373", 1}, "+w=="},
{{"\367", 1}, "9w=="},
{{"\357", 1}, "7w=="},
{{"\337", 1}, "3w=="},
{{"\277", 1}, "vw=="},
{{"\177", 1}, "fw=="},
{{"\000\000", 2}, "AAA="},
{{"\000\001", 2}, "AAE="},
{{"\000\002", 2}, "AAI="},
{{"\000\004", 2}, "AAQ="},
{{"\000\010", 2}, "AAg="},
{{"\000\020", 2}, "ABA="},
{{"\000\040", 2}, "ACA="},
{{"\000\100", 2}, "AEA="},
{{"\000\200", 2}, "AIA="},
{{"\001\000", 2}, "AQA="},
{{"\002\000", 2}, "AgA="},
{{"\004\000", 2}, "BAA="},
{{"\010\000", 2}, "CAA="},
{{"\020\000", 2}, "EAA="},
{{"\040\000", 2}, "IAA="},
{{"\100\000", 2}, "QAA="},
{{"\200\000", 2}, "gAA="},
{{"\377\377", 2}, "//8="},
{{"\377\376", 2}, "//4="},
{{"\377\375", 2}, "//0="},
{{"\377\373", 2}, "//s="},
{{"\377\367", 2}, "//c="},
{{"\377\357", 2}, "/+8="},
{{"\377\337", 2}, "/98="},
{{"\377\277", 2}, "/78="},
{{"\377\177", 2}, "/38="},
{{"\376\377", 2}, "/v8="},
{{"\375\377", 2}, "/f8="},
{{"\373\377", 2}, "+/8="},
{{"\367\377", 2}, "9/8="},
{{"\357\377", 2}, "7/8="},
{{"\337\377", 2}, "3/8="},
{{"\277\377", 2}, "v/8="},
{{"\177\377", 2}, "f/8="},
{{"\000\000\000", 3}, "AAAA"},
{{"\000\000\001", 3}, "AAAB"},
{{"\000\000\002", 3}, "AAAC"},
{{"\000\000\004", 3}, "AAAE"},
{{"\000\000\010", 3}, "AAAI"},
{{"\000\000\020", 3}, "AAAQ"},
{{"\000\000\040", 3}, "AAAg"},
{{"\000\000\100", 3}, "AABA"},
{{"\000\000\200", 3}, "AACA"},
{{"\000\001\000", 3}, "AAEA"},
{{"\000\002\000", 3}, "AAIA"},
{{"\000\004\000", 3}, "AAQA"},
{{"\000\010\000", 3}, "AAgA"},
{{"\000\020\000", 3}, "ABAA"},
{{"\000\040\000", 3}, "ACAA"},
{{"\000\100\000", 3}, "AEAA"},
{{"\000\200\000", 3}, "AIAA"},
{{"\001\000\000", 3}, "AQAA"},
{{"\002\000\000", 3}, "AgAA"},
{{"\004\000\000", 3}, "BAAA"},
{{"\010\000\000", 3}, "CAAA"},
{{"\020\000\000", 3}, "EAAA"},
{{"\040\000\000", 3}, "IAAA"},
{{"\100\000\000", 3}, "QAAA"},
{{"\200\000\000", 3}, "gAAA"},
{{"\377\377\377", 3}, "////"},
{{"\377\377\376", 3}, "///+"},
{{"\377\377\375", 3}, "///9"},
{{"\377\377\373", 3}, "///7"},
{{"\377\377\367", 3}, "///3"},
{{"\377\377\357", 3}, "///v"},
{{"\377\377\337", 3}, "///f"},
{{"\377\377\277", 3}, "//+/"},
{{"\377\377\177", 3}, "//9/"},
{{"\377\376\377", 3}, "//7/"},
{{"\377\375\377", 3}, "//3/"},
{{"\377\373\377", 3}, "//v/"},
{{"\377\367\377", 3}, "//f/"},
{{"\377\357\377", 3}, "/+//"},
{{"\377\337\377", 3}, "/9//"},
{{"\377\277\377", 3}, "/7//"},
{{"\377\177\377", 3}, "/3//"},
{{"\376\377\377", 3}, "/v//"},
{{"\375\377\377", 3}, "/f//"},
{{"\373\377\377", 3}, "+///"},
{{"\367\377\377", 3}, "9///"},
{{"\357\377\377", 3}, "7///"},
{{"\337\377\377", 3}, "3///"},
{{"\277\377\377", 3}, "v///"},
{{"\177\377\377", 3}, "f///"},
// Random numbers: values obtained with
//
// #! /bin/bash
// dd bs=$1 count=1 if=/dev/random of=/tmp/bar.random
// od -N $1 -t o1 /tmp/bar.random
// uuencode -m test < /tmp/bar.random
//
// where $1 is the number of bytes (2, 3)
{{"\243\361", 2}, "o/E="},
{{"\024\167", 2}, "FHc="},
{{"\313\252", 2}, "y6o="},
{{"\046\041", 2}, "JiE="},
{{"\145\236", 2}, "ZZ4="},
{{"\254\325", 2}, "rNU="},
{{"\061\330", 2}, "Mdg="},
{{"\245\032", 2}, "pRo="},
{{"\006\000", 2}, "BgA="},
{{"\375\131", 2}, "/Vk="},
{{"\303\210", 2}, "w4g="},
{{"\040\037", 2}, "IB8="},
{{"\261\372", 2}, "sfo="},
{{"\335\014", 2}, "3Qw="},
{{"\233\217", 2}, "m48="},
{{"\373\056", 2}, "+y4="},
{{"\247\232", 2}, "p5o="},
{{"\107\053", 2}, "Rys="},
{{"\204\077", 2}, "hD8="},
{{"\276\211", 2}, "vok="},
{{"\313\110", 2}, "y0g="},
{{"\363\376", 2}, "8/4="},
{{"\251\234", 2}, "qZw="},
{{"\103\262", 2}, "Q7I="},
{{"\142\312", 2}, "Yso="},
{{"\067\211", 2}, "N4k="},
{{"\220\001", 2}, "kAE="},
{{"\152\240", 2}, "aqA="},
{{"\367\061", 2}, "9zE="},
{{"\133\255", 2}, "W60="},
{{"\176\035", 2}, "fh0="},
{{"\032\231", 2}, "Gpk="},
{{"\013\007\144", 3}, "Cwdk"},
{{"\030\112\106", 3}, "GEpG"},
{{"\047\325\046", 3}, "J9Um"},
{{"\310\160\022", 3}, "yHAS"},
{{"\131\100\237", 3}, "WUCf"},
{{"\064\342\134", 3}, "NOJc"},
{{"\010\177\004", 3}, "CH8E"},
{{"\345\147\205", 3}, "5WeF"},
{{"\300\343\360", 3}, "wOPw"},
{{"\061\240\201", 3}, "MaCB"},
{{"\225\333\044", 3}, "ldsk"},
{{"\215\137\352", 3}, "jV/q"},
{{"\371\147\160", 3}, "+Wdw"},
{{"\030\320\051", 3}, "GNAp"},
{{"\044\174\241", 3}, "JHyh"},
{{"\260\127\037", 3}, "sFcf"},
{{"\111\045\033", 3}, "SSUb"},
{{"\202\114\107", 3}, "gkxH"},
{{"\057\371\042", 3}, "L/ki"},
{{"\223\247\244", 3}, "k6ek"},
{{"\047\216\144", 3}, "J45k"},
{{"\203\070\327", 3}, "gzjX"},
{{"\247\140\072", 3}, "p2A6"},
{{"\124\115\116", 3}, "VE1O"},
{{"\157\162\050", 3}, "b3Io"},
{{"\357\223\004", 3}, "75ME"},
{{"\052\117\156", 3}, "Kk9u"},
{{"\347\154\000", 3}, "52wA"},
{{"\303\012\142", 3}, "wwpi"},
{{"\060\035\362", 3}, "MB3y"},
{{"\130\226\361", 3}, "WJbx"},
{{"\173\013\071", 3}, "ews5"},
{{"\336\004\027", 3}, "3gQX"},
{{"\357\366\234", 3}, "7/ac"},
{{"\353\304\111", 3}, "68RJ"},
{{"\024\264\131", 3}, "FLRZ"},
{{"\075\114\251", 3}, "PUyp"},
{{"\315\031\225", 3}, "zRmV"},
{{"\154\201\276", 3}, "bIG+"},
{{"\200\066\072", 3}, "gDY6"},
{{"\142\350\267", 3}, "Yui3"},
{{"\033\000\166", 3}, "GwB2"},
{{"\210\055\077", 3}, "iC0/"},
{{"\341\037\124", 3}, "4R9U"},
{{"\161\103\152", 3}, "cUNq"},
{{"\270\142\131", 3}, "uGJZ"},
{{"\337\076\074", 3}, "3z48"},
{{"\375\106\362", 3}, "/Uby"},
{{"\227\301\127", 3}, "l8FX"},
{{"\340\002\234", 3}, "4AKc"},
{{"\121\064\033", 3}, "UTQb"},
{{"\157\134\143", 3}, "b1xj"},
{{"\247\055\327", 3}, "py3X"},
{{"\340\142\005", 3}, "4GIF"},
{{"\060\260\143", 3}, "MLBj"},
{{"\075\203\170", 3}, "PYN4"},
{{"\143\160\016", 3}, "Y3AO"},
{{"\313\013\063", 3}, "ywsz"},
{{"\174\236\135", 3}, "fJ5d"},
{{"\103\047\026", 3}, "QycW"},
{{"\365\005\343", 3}, "9QXj"},
{{"\271\160\223", 3}, "uXCT"},
{{"\362\255\172", 3}, "8q16"},
{{"\113\012\015", 3}, "SwoN"},
// various lengths, generated by this python script:
//
// from std::string import lowercase as lc
// for i in range(27):
// print '{ %2d, "%s",%s "%s" },' % (i, lc[:i], ' ' * (26-i),
// lc[:i].encode('base64').strip())
{{"", 0}, {"", 0}},
{"a", "YQ=="},
{"ab", "YWI="},
{"abc", "YWJj"},
{"abcd", "YWJjZA=="},
{"abcde", "YWJjZGU="},
{"abcdef", "YWJjZGVm"},
{"abcdefg", "YWJjZGVmZw=="},
{"abcdefgh", "YWJjZGVmZ2g="},
{"abcdefghi", "YWJjZGVmZ2hp"},
{"abcdefghij", "YWJjZGVmZ2hpag=="},
{"abcdefghijk", "YWJjZGVmZ2hpams="},
{"abcdefghijkl", "YWJjZGVmZ2hpamts"},
{"abcdefghijklm", "YWJjZGVmZ2hpamtsbQ=="},
{"abcdefghijklmn", "YWJjZGVmZ2hpamtsbW4="},
{"abcdefghijklmno", "YWJjZGVmZ2hpamtsbW5v"},
{"abcdefghijklmnop", "YWJjZGVmZ2hpamtsbW5vcA=="},
{"abcdefghijklmnopq", "YWJjZGVmZ2hpamtsbW5vcHE="},
{"abcdefghijklmnopqr", "YWJjZGVmZ2hpamtsbW5vcHFy"},
{"abcdefghijklmnopqrs", "YWJjZGVmZ2hpamtsbW5vcHFycw=="},
{"abcdefghijklmnopqrst", "YWJjZGVmZ2hpamtsbW5vcHFyc3Q="},
{"abcdefghijklmnopqrstu", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1"},
{"abcdefghijklmnopqrstuv", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dg=="},
{"abcdefghijklmnopqrstuvw", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnc="},
{"abcdefghijklmnopqrstuvwx", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4"},
{"abcdefghijklmnopqrstuvwxy", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eQ=="},
{"abcdefghijklmnopqrstuvwxyz", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo="},
};
template <typename StringType>
void TestEscapeAndUnescape() {
// Check the short strings; this tests the math (and boundaries)
for (const auto& tc : base64_tests) {
StringType encoded("this junk should be ignored");
absl::Base64Escape(tc.plaintext, &encoded);
EXPECT_EQ(encoded, tc.cyphertext);
EXPECT_EQ(absl::Base64Escape(tc.plaintext), tc.cyphertext);
StringType decoded("this junk should be ignored");
EXPECT_TRUE(absl::Base64Unescape(encoded, &decoded));
EXPECT_EQ(decoded, tc.plaintext);
StringType websafe(tc.cyphertext);
for (int c = 0; c < websafe.size(); ++c) {
if ('+' == websafe[c]) websafe[c] = '-';
if ('/' == websafe[c]) websafe[c] = '_';
if ('=' == websafe[c]) {
websafe.resize(c);
break;
}
}
encoded = "this junk should be ignored";
absl::WebSafeBase64Escape(tc.plaintext, &encoded);
EXPECT_EQ(encoded, websafe);
EXPECT_EQ(absl::WebSafeBase64Escape(tc.plaintext), websafe);
// Let's try the string version of the decoder
decoded = "this junk should be ignored";
EXPECT_TRUE(absl::WebSafeBase64Unescape(websafe, &decoded));
EXPECT_EQ(decoded, tc.plaintext);
}
// Now try the long strings, this tests the streaming
for (const auto& tc : absl::strings_internal::base64_strings()) {
StringType buffer;
absl::WebSafeBase64Escape(tc.plaintext, &buffer);
EXPECT_EQ(tc.cyphertext, buffer);
EXPECT_EQ(absl::WebSafeBase64Escape(tc.plaintext), tc.cyphertext);
}
// Verify the behavior when decoding bad data
{
absl::string_view data_set[] = {"ab-/", absl::string_view("\0bcd", 4),
absl::string_view("abc.\0", 5)};
for (absl::string_view bad_data : data_set) {
StringType buf;
EXPECT_FALSE(absl::Base64Unescape(bad_data, &buf));
EXPECT_FALSE(absl::WebSafeBase64Unescape(bad_data, &buf));
EXPECT_TRUE(buf.empty());
}
}
}
TEST(Base64, EscapeAndUnescape) {
TestEscapeAndUnescape<std::string>();
}
TEST(Base64, DISABLED_HugeData) {
const size_t kSize = size_t(3) * 1000 * 1000 * 1000;
static_assert(kSize % 3 == 0, "kSize must be divisible by 3");
const std::string huge(kSize, 'x');
std::string escaped;
absl::Base64Escape(huge, &escaped);
// Generates the string that should match a base64 encoded "xxx..." string.
// "xxx" in base64 is "eHh4".
std::string expected_encoding;
expected_encoding.reserve(kSize / 3 * 4);
for (size_t i = 0; i < kSize / 3; ++i) {
expected_encoding.append("eHh4");
}
EXPECT_EQ(expected_encoding, escaped);
std::string unescaped;
EXPECT_TRUE(absl::Base64Unescape(escaped, &unescaped));
EXPECT_EQ(huge, unescaped);
}
TEST(HexAndBack, HexStringToBytes_and_BytesToHexString) {
std::string hex_mixed = "0123456789abcdefABCDEF";
std::string bytes_expected = "\x01\x23\x45\x67\x89\xab\xcd\xef\xAB\xCD\xEF";
std::string hex_only_lower = "0123456789abcdefabcdef";
std::string bytes_result = absl::HexStringToBytes(hex_mixed);
EXPECT_EQ(bytes_expected, bytes_result);
std::string prefix_valid = hex_mixed + "?";
std::string prefix_valid_result = absl::HexStringToBytes(
absl::string_view(prefix_valid.data(), prefix_valid.size() - 1));
EXPECT_EQ(bytes_expected, prefix_valid_result);
std::string infix_valid = "?" + hex_mixed + "???";
std::string infix_valid_result = absl::HexStringToBytes(
absl::string_view(infix_valid.data() + 1, hex_mixed.size()));
EXPECT_EQ(bytes_expected, infix_valid_result);
std::string hex_result = absl::BytesToHexString(bytes_expected);
EXPECT_EQ(hex_only_lower, hex_result);
}
} // namespace

View file

@ -0,0 +1,156 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Character Map Class
//
// A fast, bit-vector map for 8-bit unsigned characters.
// This class is useful for non-character purposes as well.
#ifndef ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
#define ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
#include <cstddef>
#include <cstdint>
#include <cstring>
#include "absl/base/macros.h"
#include "absl/base/port.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
class Charmap {
public:
constexpr Charmap() : m_() {}
// Initializes with a given char*. Note that NUL is not treated as
// a terminator, but rather a char to be flicked.
Charmap(const char* str, int len) : m_() {
while (len--) SetChar(*str++);
}
// Initializes with a given char*. NUL is treated as a terminator
// and will not be in the charmap.
explicit Charmap(const char* str) : m_() {
while (*str) SetChar(*str++);
}
constexpr bool contains(unsigned char c) const {
return (m_[c / 64] >> (c % 64)) & 0x1;
}
// Returns true if and only if a character exists in both maps.
bool IntersectsWith(const Charmap& c) const {
for (size_t i = 0; i < ABSL_ARRAYSIZE(m_); ++i) {
if ((m_[i] & c.m_[i]) != 0) return true;
}
return false;
}
bool IsZero() const {
for (uint64_t c : m_) {
if (c != 0) return false;
}
return true;
}
// Containing only a single specified char.
static constexpr Charmap Char(char x) {
return Charmap(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
CharMaskForWord(x, 2), CharMaskForWord(x, 3));
}
// Containing all the chars in the C-string 's'.
// Note that this is expensively recursive because of the C++11 constexpr
// formulation. Use only in constexpr initializers.
static constexpr Charmap FromString(const char* s) {
return *s == 0 ? Charmap() : (Char(*s) | FromString(s + 1));
}
// Containing all the chars in the closed interval [lo,hi].
static constexpr Charmap Range(char lo, char hi) {
return Charmap(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
}
friend constexpr Charmap operator&(const Charmap& a, const Charmap& b) {
return Charmap(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
a.m_[3] & b.m_[3]);
}
friend constexpr Charmap operator|(const Charmap& a, const Charmap& b) {
return Charmap(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
a.m_[3] | b.m_[3]);
}
friend constexpr Charmap operator~(const Charmap& a) {
return Charmap(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
}
private:
constexpr Charmap(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
: m_{b0, b1, b2, b3} {}
static constexpr uint64_t RangeForWord(unsigned char lo, unsigned char hi,
uint64_t word) {
return OpenRangeFromZeroForWord(hi + 1, word) &
~OpenRangeFromZeroForWord(lo, word);
}
// All the chars in the specified word of the range [0, upper).
static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
uint64_t word) {
return (upper <= 64 * word)
? 0
: (upper >= 64 * (word + 1))
? ~static_cast<uint64_t>(0)
: (~static_cast<uint64_t>(0) >> (64 - upper % 64));
}
static constexpr uint64_t CharMaskForWord(unsigned char x, uint64_t word) {
return (x / 64 == word) ? (static_cast<uint64_t>(1) << (x % 64)) : 0;
}
private:
void SetChar(unsigned char c) {
m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64);
}
uint64_t m_[4];
};
// Mirror the char-classifying predicates in <cctype>
constexpr Charmap UpperCharmap() { return Charmap::Range('A', 'Z'); }
constexpr Charmap LowerCharmap() { return Charmap::Range('a', 'z'); }
constexpr Charmap DigitCharmap() { return Charmap::Range('0', '9'); }
constexpr Charmap AlphaCharmap() { return LowerCharmap() | UpperCharmap(); }
constexpr Charmap AlnumCharmap() { return DigitCharmap() | AlphaCharmap(); }
constexpr Charmap XDigitCharmap() {
return DigitCharmap() | Charmap::Range('A', 'F') | Charmap::Range('a', 'f');
}
constexpr Charmap PrintCharmap() { return Charmap::Range(0x20, 0x7e); }
constexpr Charmap SpaceCharmap() { return Charmap::FromString("\t\n\v\f\r "); }
constexpr Charmap CntrlCharmap() {
return Charmap::Range(0, 0x7f) & ~PrintCharmap();
}
constexpr Charmap BlankCharmap() { return Charmap::FromString("\t "); }
constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); }
constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); }
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_

View file

@ -0,0 +1,61 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/char_map.h"
#include <cstdint>
#include "benchmark/benchmark.h"
namespace {
absl::strings_internal::Charmap MakeBenchmarkMap() {
absl::strings_internal::Charmap m;
uint32_t x[] = {0x0, 0x1, 0x2, 0x3, 0xf, 0xe, 0xd, 0xc};
for (uint32_t& t : x) t *= static_cast<uint32_t>(0x11111111UL);
for (uint32_t i = 0; i < 256; ++i) {
if ((x[i / 32] >> (i % 32)) & 1)
m = m | absl::strings_internal::Charmap::Char(i);
}
return m;
}
// Micro-benchmark for Charmap::contains.
void BM_Contains(benchmark::State& state) {
// Loop-body replicated 10 times to increase time per iteration.
// Argument continuously changed to avoid generating common subexpressions.
const absl::strings_internal::Charmap benchmark_map = MakeBenchmarkMap();
unsigned char c = 0;
int ops = 0;
for (auto _ : state) {
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
}
benchmark::DoNotOptimize(ops);
}
BENCHMARK(BM_Contains);
// We don't bother benchmarking Charmap::IsZero or Charmap::IntersectsWith;
// their running time is data-dependent and it is not worth characterizing
// "typical" data.
} // namespace

View file

@ -0,0 +1,172 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/char_map.h"
#include <cctype>
#include <string>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace {
constexpr absl::strings_internal::Charmap everything_map =
~absl::strings_internal::Charmap();
constexpr absl::strings_internal::Charmap nothing_map{};
TEST(Charmap, AllTests) {
const absl::strings_internal::Charmap also_nothing_map("", 0);
ASSERT_TRUE(everything_map.contains('\0'));
ASSERT_TRUE(!nothing_map.contains('\0'));
ASSERT_TRUE(!also_nothing_map.contains('\0'));
for (unsigned char ch = 1; ch != 0; ++ch) {
ASSERT_TRUE(everything_map.contains(ch));
ASSERT_TRUE(!nothing_map.contains(ch));
ASSERT_TRUE(!also_nothing_map.contains(ch));
}
const absl::strings_internal::Charmap symbols("&@#@^!@?", 5);
ASSERT_TRUE(symbols.contains('&'));
ASSERT_TRUE(symbols.contains('@'));
ASSERT_TRUE(symbols.contains('#'));
ASSERT_TRUE(symbols.contains('^'));
ASSERT_TRUE(!symbols.contains('!'));
ASSERT_TRUE(!symbols.contains('?'));
int cnt = 0;
for (unsigned char ch = 1; ch != 0; ++ch)
cnt += symbols.contains(ch);
ASSERT_EQ(cnt, 4);
const absl::strings_internal::Charmap lets("^abcde", 3);
const absl::strings_internal::Charmap lets2("fghij\0klmnop", 10);
const absl::strings_internal::Charmap lets3("fghij\0klmnop");
ASSERT_TRUE(lets2.contains('k'));
ASSERT_TRUE(!lets3.contains('k'));
ASSERT_TRUE(symbols.IntersectsWith(lets));
ASSERT_TRUE(!lets2.IntersectsWith(lets));
ASSERT_TRUE(lets.IntersectsWith(symbols));
ASSERT_TRUE(!lets.IntersectsWith(lets2));
ASSERT_TRUE(nothing_map.IsZero());
ASSERT_TRUE(!lets.IsZero());
}
namespace {
std::string Members(const absl::strings_internal::Charmap& m) {
std::string r;
for (size_t i = 0; i < 256; ++i)
if (m.contains(i)) r.push_back(i);
return r;
}
std::string ClosedRangeString(unsigned char lo, unsigned char hi) {
// Don't depend on lo<hi. Just increment until lo==hi.
std::string s;
while (true) {
s.push_back(lo);
if (lo == hi) break;
++lo;
}
return s;
}
} // namespace
TEST(Charmap, Constexpr) {
constexpr absl::strings_internal::Charmap kEmpty = nothing_map;
EXPECT_THAT(Members(kEmpty), "");
constexpr absl::strings_internal::Charmap kA =
absl::strings_internal::Charmap::Char('A');
EXPECT_THAT(Members(kA), "A");
constexpr absl::strings_internal::Charmap kAZ =
absl::strings_internal::Charmap::Range('A', 'Z');
EXPECT_THAT(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
constexpr absl::strings_internal::Charmap kIdentifier =
absl::strings_internal::Charmap::Range('0', '9') |
absl::strings_internal::Charmap::Range('A', 'Z') |
absl::strings_internal::Charmap::Range('a', 'z') |
absl::strings_internal::Charmap::Char('_');
EXPECT_THAT(Members(kIdentifier),
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"_"
"abcdefghijklmnopqrstuvwxyz");
constexpr absl::strings_internal::Charmap kAll = everything_map;
for (size_t i = 0; i < 256; ++i) {
EXPECT_TRUE(kAll.contains(i)) << i;
}
constexpr absl::strings_internal::Charmap kHello =
absl::strings_internal::Charmap::FromString("Hello, world!");
EXPECT_THAT(Members(kHello), " !,Hdelorw");
// test negation and intersection
constexpr absl::strings_internal::Charmap kABC =
absl::strings_internal::Charmap::Range('A', 'Z') &
~absl::strings_internal::Charmap::Range('D', 'Z');
EXPECT_THAT(Members(kABC), "ABC");
}
TEST(Charmap, Range) {
// Exhaustive testing takes too long, so test some of the boundaries that
// are perhaps going to cause trouble.
std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15,
16, 17, 30, 31, 32, 33, 63, 64, 65,
127, 128, 129, 223, 224, 225, 254, 255};
for (auto lo = poi.begin(); lo != poi.end(); ++lo) {
SCOPED_TRACE(*lo);
for (auto hi = lo; hi != poi.end(); ++hi) {
SCOPED_TRACE(*hi);
EXPECT_THAT(Members(absl::strings_internal::Charmap::Range(*lo, *hi)),
ClosedRangeString(*lo, *hi));
}
}
}
bool AsBool(int x) { return static_cast<bool>(x); }
TEST(CharmapCtype, Match) {
for (int c = 0; c < 256; ++c) {
SCOPED_TRACE(c);
SCOPED_TRACE(static_cast<char>(c));
EXPECT_EQ(AsBool(std::isupper(c)),
absl::strings_internal::UpperCharmap().contains(c));
EXPECT_EQ(AsBool(std::islower(c)),
absl::strings_internal::LowerCharmap().contains(c));
EXPECT_EQ(AsBool(std::isdigit(c)),
absl::strings_internal::DigitCharmap().contains(c));
EXPECT_EQ(AsBool(std::isalpha(c)),
absl::strings_internal::AlphaCharmap().contains(c));
EXPECT_EQ(AsBool(std::isalnum(c)),
absl::strings_internal::AlnumCharmap().contains(c));
EXPECT_EQ(AsBool(std::isxdigit(c)),
absl::strings_internal::XDigitCharmap().contains(c));
EXPECT_EQ(AsBool(std::isprint(c)),
absl::strings_internal::PrintCharmap().contains(c));
EXPECT_EQ(AsBool(std::isspace(c)),
absl::strings_internal::SpaceCharmap().contains(c));
EXPECT_EQ(AsBool(std::iscntrl(c)),
absl::strings_internal::CntrlCharmap().contains(c));
EXPECT_EQ(AsBool(std::isblank(c)),
absl::strings_internal::BlankCharmap().contains(c));
EXPECT_EQ(AsBool(std::isgraph(c)),
absl::strings_internal::GraphCharmap().contains(c));
EXPECT_EQ(AsBool(std::ispunct(c)),
absl::strings_internal::PunctCharmap().contains(c));
}
}
} // namespace

View file

@ -0,0 +1,359 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_bigint.h"
#include <algorithm>
#include <cassert>
#include <string>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
namespace {
// Table containing some large powers of 5, for fast computation.
// Constant step size for entries in the kLargePowersOfFive table. Each entry
// is larger than the previous entry by a factor of 5**kLargePowerOfFiveStep
// (or 5**27).
//
// In other words, the Nth entry in the table is 5**(27*N).
//
// 5**27 is the largest power of 5 that fits in 64 bits.
constexpr int kLargePowerOfFiveStep = 27;
// The largest legal index into the kLargePowersOfFive table.
//
// In other words, the largest precomputed power of 5 is 5**(27*20).
constexpr int kLargestPowerOfFiveIndex = 20;
// Table of powers of (5**27), up to (5**27)**20 == 5**540.
//
// Used to generate large powers of 5 while limiting the number of repeated
// multiplications required.
//
// clang-format off
const uint32_t kLargePowersOfFive[] = {
// 5**27 (i=1), start=0, end=2
0xfa10079dU, 0x6765c793U,
// 5**54 (i=2), start=2, end=6
0x97d9f649U, 0x6664242dU, 0x29939b14U, 0x29c30f10U,
// 5**81 (i=3), start=6, end=12
0xc4f809c5U, 0x7bf3f22aU, 0x67bdae34U, 0xad340517U, 0x369d1b5fU, 0x10de1593U,
// 5**108 (i=4), start=12, end=20
0x92b260d1U, 0x9efff7c7U, 0x81de0ec6U, 0xaeba5d56U, 0x410664a4U, 0x4f40737aU,
0x20d3846fU, 0x06d00f73U,
// 5**135 (i=5), start=20, end=30
0xff1b172dU, 0x13a1d71cU, 0xefa07617U, 0x7f682d3dU, 0xff8c90c0U, 0x3f0131e7U,
0x3fdcb9feU, 0x917b0177U, 0x16c407a7U, 0x02c06b9dU,
// 5**162 (i=6), start=30, end=42
0x960f7199U, 0x056667ecU, 0xe07aefd8U, 0x80f2b9ccU, 0x8273f5e3U, 0xeb9a214aU,
0x40b38005U, 0x0e477ad4U, 0x277d08e6U, 0xfa28b11eU, 0xd3f7d784U, 0x011c835bU,
// 5**189 (i=7), start=42, end=56
0xf723d9d5U, 0x3282d3f3U, 0xe00857d1U, 0x69659d25U, 0x2cf117cfU, 0x24da6d07U,
0x954d1417U, 0x3e5d8cedU, 0x7a8bb766U, 0xfd785ae6U, 0x645436d2U, 0x40c78b34U,
0x94151217U, 0x0072e9f7U,
// 5**216 (i=8), start=56, end=72
0x2b416aa1U, 0x7893c5a7U, 0xe37dc6d4U, 0x2bad2beaU, 0xf0fc846cU, 0x7575ae4bU,
0x62587b14U, 0x83b67a34U, 0x02110cdbU, 0xf7992f55U, 0x00deb022U, 0xa4a23becU,
0x8af5c5cdU, 0xb85b654fU, 0x818df38bU, 0x002e69d2U,
// 5**243 (i=9), start=72, end=90
0x3518cbbdU, 0x20b0c15fU, 0x38756c2fU, 0xfb5dc3ddU, 0x22ad2d94U, 0xbf35a952U,
0xa699192aU, 0x9a613326U, 0xad2a9cedU, 0xd7f48968U, 0xe87dfb54U, 0xc8f05db6U,
0x5ef67531U, 0x31c1ab49U, 0xe202ac9fU, 0x9b2957b5U, 0xa143f6d3U, 0x0012bf07U,
// 5**270 (i=10), start=90, end=110
0x8b971de9U, 0x21aba2e1U, 0x63944362U, 0x57172336U, 0xd9544225U, 0xfb534166U,
0x08c563eeU, 0x14640ee2U, 0x24e40d31U, 0x02b06537U, 0x03887f14U, 0x0285e533U,
0xb744ef26U, 0x8be3a6c4U, 0x266979b4U, 0x6761ece2U, 0xd9cb39e4U, 0xe67de319U,
0x0d39e796U, 0x00079250U,
// 5**297 (i=11), start=110, end=132
0x260eb6e5U, 0xf414a796U, 0xee1a7491U, 0xdb9368ebU, 0xf50c105bU, 0x59157750U,
0x9ed2fb5cU, 0xf6e56d8bU, 0xeaee8d23U, 0x0f319f75U, 0x2aa134d6U, 0xac2908e9U,
0xd4413298U, 0x02f02a55U, 0x989d5a7aU, 0x70dde184U, 0xba8040a7U, 0x03200981U,
0xbe03b11cU, 0x3c1c2a18U, 0xd60427a1U, 0x00030ee0U,
// 5**324 (i=12), start=132, end=156
0xce566d71U, 0xf1c4aa25U, 0x4e93ca53U, 0xa72283d0U, 0x551a73eaU, 0x3d0538e2U,
0x8da4303fU, 0x6a58de60U, 0x0e660221U, 0x49cf61a6U, 0x8d058fc1U, 0xb9d1a14cU,
0x4bab157dU, 0xc85c6932U, 0x518c8b9eU, 0x9b92b8d0U, 0x0d8a0e21U, 0xbd855df9U,
0xb3ea59a1U, 0x8da29289U, 0x4584d506U, 0x3752d80fU, 0xb72569c6U, 0x00013c33U,
// 5**351 (i=13), start=156, end=182
0x190f354dU, 0x83695cfeU, 0xe5a4d0c7U, 0xb60fb7e8U, 0xee5bbcc4U, 0xb922054cU,
0xbb4f0d85U, 0x48394028U, 0x1d8957dbU, 0x0d7edb14U, 0x4ecc7587U, 0x505e9e02U,
0x4c87f36bU, 0x99e66bd6U, 0x44b9ed35U, 0x753037d4U, 0xe5fe5f27U, 0x2742c203U,
0x13b2ed2bU, 0xdc525d2cU, 0xe6fde59aU, 0x77ffb18fU, 0x13c5752cU, 0x08a84bccU,
0x859a4940U, 0x00007fb6U,
// 5**378 (i=14), start=182, end=210
0x4f98cb39U, 0xa60edbbcU, 0x83b5872eU, 0xa501acffU, 0x9cc76f78U, 0xbadd4c73U,
0x43e989faU, 0xca7acf80U, 0x2e0c824fU, 0xb19f4ffcU, 0x092fd81cU, 0xe4eb645bU,
0xa1ff84c2U, 0x8a5a83baU, 0xa8a1fae9U, 0x1db43609U, 0xb0fed50bU, 0x0dd7d2bdU,
0x7d7accd8U, 0x91fa640fU, 0x37dcc6c5U, 0x1c417fd5U, 0xe4d462adU, 0xe8a43399U,
0x131bf9a5U, 0x8df54d29U, 0x36547dc1U, 0x00003395U,
// 5**405 (i=15), start=210, end=240
0x5bd330f5U, 0x77d21967U, 0x1ac481b7U, 0x6be2f7ceU, 0x7f4792a9U, 0xe84c2c52U,
0x84592228U, 0x9dcaf829U, 0xdab44ce1U, 0x3d0c311bU, 0x532e297dU, 0x4704e8b4U,
0x9cdc32beU, 0x41e64d9dU, 0x7717bea1U, 0xa824c00dU, 0x08f50b27U, 0x0f198d77U,
0x49bbfdf0U, 0x025c6c69U, 0xd4e55cd3U, 0xf083602bU, 0xb9f0fecdU, 0xc0864aeaU,
0x9cb98681U, 0xaaf620e9U, 0xacb6df30U, 0x4faafe66U, 0x8af13c3bU, 0x000014d5U,
// 5**432 (i=16), start=240, end=272
0x682bb941U, 0x89a9f297U, 0xcba75d7bU, 0x404217b1U, 0xb4e519e9U, 0xa1bc162bU,
0xf7f5910aU, 0x98715af5U, 0x2ff53e57U, 0xe3ef118cU, 0x490c4543U, 0xbc9b1734U,
0x2affbe4dU, 0x4cedcb4cU, 0xfb14e99eU, 0x35e34212U, 0xece39c24U, 0x07673ab3U,
0xe73115ddU, 0xd15d38e7U, 0x093eed3bU, 0xf8e7eac5U, 0x78a8cc80U, 0x25227aacU,
0x3f590551U, 0x413da1cbU, 0xdf643a55U, 0xab65ad44U, 0xd70b23d7U, 0xc672cd76U,
0x3364ea62U, 0x0000086aU,
// 5**459 (i=17), start=272, end=306
0x22f163ddU, 0x23cf07acU, 0xbe2af6c2U, 0xf412f6f6U, 0xc3ff541eU, 0x6eeaf7deU,
0xa47047e0U, 0x408cda92U, 0x0f0eeb08U, 0x56deba9dU, 0xcfc6b090U, 0x8bbbdf04U,
0x3933cdb3U, 0x9e7bb67dU, 0x9f297035U, 0x38946244U, 0xee1d37bbU, 0xde898174U,
0x63f3559dU, 0x705b72fbU, 0x138d27d9U, 0xf8603a78U, 0x735eec44U, 0xe30987d5U,
0xc6d38070U, 0x9cfe548eU, 0x9ff01422U, 0x7c564aa8U, 0x91cc60baU, 0xcbc3565dU,
0x7550a50bU, 0x6909aeadU, 0x13234c45U, 0x00000366U,
// 5**486 (i=18), start=306, end=342
0x17954989U, 0x3a7d7709U, 0x98042de5U, 0xa9011443U, 0x45e723c2U, 0x269ffd6fU,
0x58852a46U, 0xaaa1042aU, 0x2eee8153U, 0xb2b6c39eU, 0xaf845b65U, 0xf6c365d7U,
0xe4cffb2bU, 0xc840e90cU, 0xabea8abbU, 0x5c58f8d2U, 0x5c19fa3aU, 0x4670910aU,
0x4449f21cU, 0xefa645b3U, 0xcc427decU, 0x083c3d73U, 0x467cb413U, 0x6fe10ae4U,
0x3caffc72U, 0x9f8da55eU, 0x5e5c8ea7U, 0x490594bbU, 0xf0871b0bU, 0xdd89816cU,
0x8e931df8U, 0xe85ce1c9U, 0xcca090a5U, 0x575fa16bU, 0x6b9f106cU, 0x0000015fU,
// 5**513 (i=19), start=342, end=380
0xee20d805U, 0x57bc3c07U, 0xcdea624eU, 0xd3f0f52dU, 0x9924b4f4U, 0xcf968640U,
0x61d41962U, 0xe87fb464U, 0xeaaf51c7U, 0x564c8b60U, 0xccda4028U, 0x529428bbU,
0x313a1fa8U, 0x96bd0f94U, 0x7a82ebaaU, 0xad99e7e9U, 0xf2668cd4U, 0xbe33a45eU,
0xfd0db669U, 0x87ee369fU, 0xd3ec20edU, 0x9c4d7db7U, 0xdedcf0d8U, 0x7cd2ca64U,
0xe25a6577U, 0x61003fd4U, 0xe56f54ccU, 0x10b7c748U, 0x40526e5eU, 0x7300ae87U,
0x5c439261U, 0x2c0ff469U, 0xbf723f12U, 0xb2379b61U, 0xbf59b4f5U, 0xc91b1c3fU,
0xf0046d27U, 0x0000008dU,
// 5**540 (i=20), start=380, end=420
0x525c9e11U, 0xf4e0eb41U, 0xebb2895dU, 0x5da512f9U, 0x7d9b29d4U, 0x452f4edcU,
0x0b90bc37U, 0x341777cbU, 0x63d269afU, 0x1da77929U, 0x0a5c1826U, 0x77991898U,
0x5aeddf86U, 0xf853a877U, 0x538c31ccU, 0xe84896daU, 0xb7a0010bU, 0x17ef4de5U,
0xa52a2adeU, 0x029fd81cU, 0x987ce701U, 0x27fefd77U, 0xdb46c66fU, 0x5d301900U,
0x496998c0U, 0xbb6598b9U, 0x5eebb607U, 0xe547354aU, 0xdf4a2f7eU, 0xf06c4955U,
0x96242ffaU, 0x1775fb27U, 0xbecc58ceU, 0xebf2a53bU, 0x3eaad82aU, 0xf41137baU,
0x573e6fbaU, 0xfb4866b8U, 0x54002148U, 0x00000039U,
};
// clang-format on
// Returns a pointer to the big integer data for (5**27)**i. i must be
// between 1 and 20, inclusive.
const uint32_t* LargePowerOfFiveData(int i) {
return kLargePowersOfFive + i * (i - 1);
}
// Returns the size of the big integer data for (5**27)**i, in words. i must be
// between 1 and 20, inclusive.
int LargePowerOfFiveSize(int i) { return 2 * i; }
} // namespace
ABSL_DLL const uint32_t kFiveToNth[14] = {
1, 5, 25, 125, 625, 3125, 15625,
78125, 390625, 1953125, 9765625, 48828125, 244140625, 1220703125,
};
ABSL_DLL const uint32_t kTenToNth[10] = {
1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000,
};
template <int max_words>
int BigUnsigned<max_words>::ReadFloatMantissa(const ParsedFloat& fp,
int significant_digits) {
SetToZero();
assert(fp.type == FloatType::kNumber);
if (fp.subrange_begin == nullptr) {
// We already exactly parsed the mantissa, so no more work is necessary.
words_[0] = fp.mantissa & 0xffffffffu;
words_[1] = fp.mantissa >> 32;
if (words_[1]) {
size_ = 2;
} else if (words_[0]) {
size_ = 1;
}
return fp.exponent;
}
int exponent_adjust =
ReadDigits(fp.subrange_begin, fp.subrange_end, significant_digits);
return fp.literal_exponent + exponent_adjust;
}
template <int max_words>
int BigUnsigned<max_words>::ReadDigits(const char* begin, const char* end,
int significant_digits) {
assert(significant_digits <= Digits10() + 1);
SetToZero();
bool after_decimal_point = false;
// Discard any leading zeroes before the decimal point
while (begin < end && *begin == '0') {
++begin;
}
int dropped_digits = 0;
// Discard any trailing zeroes. These may or may not be after the decimal
// point.
while (begin < end && *std::prev(end) == '0') {
--end;
++dropped_digits;
}
if (begin < end && *std::prev(end) == '.') {
// If the string ends in '.', either before or after dropping zeroes, then
// drop the decimal point and look for more digits to drop.
dropped_digits = 0;
--end;
while (begin < end && *std::prev(end) == '0') {
--end;
++dropped_digits;
}
} else if (dropped_digits) {
// We dropped digits, and aren't sure if they're before or after the decimal
// point. Figure that out now.
const char* dp = std::find(begin, end, '.');
if (dp != end) {
// The dropped trailing digits were after the decimal point, so don't
// count them.
dropped_digits = 0;
}
}
// Any non-fraction digits we dropped need to be accounted for in our exponent
// adjustment.
int exponent_adjust = dropped_digits;
uint32_t queued = 0;
int digits_queued = 0;
for (; begin != end && significant_digits > 0; ++begin) {
if (*begin == '.') {
after_decimal_point = true;
continue;
}
if (after_decimal_point) {
// For each fractional digit we emit in our parsed integer, adjust our
// decimal exponent to compensate.
--exponent_adjust;
}
int digit = (*begin - '0');
--significant_digits;
if (significant_digits == 0 && std::next(begin) != end &&
(digit == 0 || digit == 5)) {
// If this is the very last significant digit, but insignificant digits
// remain, we know that the last of those remaining significant digits is
// nonzero. (If it wasn't, we would have stripped it before we got here.)
// So if this final digit is a 0 or 5, adjust it upward by 1.
//
// This adjustment is what allows incredibly large mantissas ending in
// 500000...000000000001 to correctly round up, rather than to nearest.
++digit;
}
queued = 10 * queued + digit;
++digits_queued;
if (digits_queued == kMaxSmallPowerOfTen) {
MultiplyBy(kTenToNth[kMaxSmallPowerOfTen]);
AddWithCarry(0, queued);
queued = digits_queued = 0;
}
}
// Encode any remaining digits.
if (digits_queued) {
MultiplyBy(kTenToNth[digits_queued]);
AddWithCarry(0, queued);
}
// If any insignificant digits remain, we will drop them. But if we have not
// yet read the decimal point, then we have to adjust the exponent to account
// for the dropped digits.
if (begin < end && !after_decimal_point) {
// This call to std::find will result in a pointer either to the decimal
// point, or to the end of our buffer if there was none.
//
// Either way, [begin, decimal_point) will contain the set of dropped digits
// that require an exponent adjustment.
const char* decimal_point = std::find(begin, end, '.');
exponent_adjust += (decimal_point - begin);
}
return exponent_adjust;
}
template <int max_words>
/* static */ BigUnsigned<max_words> BigUnsigned<max_words>::FiveToTheNth(
int n) {
BigUnsigned answer(1u);
// Seed from the table of large powers, if possible.
bool first_pass = true;
while (n >= kLargePowerOfFiveStep) {
int big_power =
std::min(n / kLargePowerOfFiveStep, kLargestPowerOfFiveIndex);
if (first_pass) {
// just copy, rather than multiplying by 1
std::copy(
LargePowerOfFiveData(big_power),
LargePowerOfFiveData(big_power) + LargePowerOfFiveSize(big_power),
answer.words_);
answer.size_ = LargePowerOfFiveSize(big_power);
first_pass = false;
} else {
answer.MultiplyBy(LargePowerOfFiveSize(big_power),
LargePowerOfFiveData(big_power));
}
n -= kLargePowerOfFiveStep * big_power;
}
answer.MultiplyByFiveToTheNth(n);
return answer;
}
template <int max_words>
void BigUnsigned<max_words>::MultiplyStep(int original_size,
const uint32_t* other_words,
int other_size, int step) {
int this_i = std::min(original_size - 1, step);
int other_i = step - this_i;
uint64_t this_word = 0;
uint64_t carry = 0;
for (; this_i >= 0 && other_i < other_size; --this_i, ++other_i) {
uint64_t product = words_[this_i];
product *= other_words[other_i];
this_word += product;
carry += (this_word >> 32);
this_word &= 0xffffffff;
}
AddWithCarry(step + 1, carry);
words_[step] = this_word & 0xffffffff;
if (this_word > 0 && size_ <= step) {
size_ = step + 1;
}
}
template <int max_words>
std::string BigUnsigned<max_words>::ToString() const {
BigUnsigned<max_words> copy = *this;
std::string result;
// Build result in reverse order
while (copy.size() > 0) {
int next_digit = copy.DivMod<10>();
result.push_back('0' + next_digit);
}
if (result.empty()) {
result.push_back('0');
}
std::reverse(result.begin(), result.end());
return result;
}
template class BigUnsigned<4>;
template class BigUnsigned<84>;
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,423 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
#define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
#include <algorithm>
#include <cstdint>
#include <iostream>
#include <string>
#include "absl/base/config.h"
#include "absl/strings/ascii.h"
#include "absl/strings/internal/charconv_parse.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// The largest power that 5 that can be raised to, and still fit in a uint32_t.
constexpr int kMaxSmallPowerOfFive = 13;
// The largest power that 10 that can be raised to, and still fit in a uint32_t.
constexpr int kMaxSmallPowerOfTen = 9;
ABSL_DLL extern const uint32_t
kFiveToNth[kMaxSmallPowerOfFive + 1];
ABSL_DLL extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1];
// Large, fixed-width unsigned integer.
//
// Exact rounding for decimal-to-binary floating point conversion requires very
// large integer math, but a design goal of absl::from_chars is to avoid
// allocating memory. The integer precision needed for decimal-to-binary
// conversions is large but bounded, so a huge fixed-width integer class
// suffices.
//
// This is an intentionally limited big integer class. Only needed operations
// are implemented. All storage lives in an array data member, and all
// arithmetic is done in-place, to avoid requiring separate storage for operand
// and result.
//
// This is an internal class. Some methods live in the .cc file, and are
// instantiated only for the values of max_words we need.
template <int max_words>
class BigUnsigned {
public:
static_assert(max_words == 4 || max_words == 84,
"unsupported max_words value");
BigUnsigned() : size_(0), words_{} {}
explicit constexpr BigUnsigned(uint64_t v)
: size_((v >> 32) ? 2 : v ? 1 : 0),
words_{static_cast<uint32_t>(v & 0xffffffffu),
static_cast<uint32_t>(v >> 32)} {}
// Constructs a BigUnsigned from the given string_view containing a decimal
// value. If the input string is not a decimal integer, constructs a 0
// instead.
explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} {
// Check for valid input, returning a 0 otherwise. This is reasonable
// behavior only because this constructor is for unit tests.
if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() ||
sv.empty()) {
return;
}
int exponent_adjust =
ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1);
if (exponent_adjust > 0) {
MultiplyByTenToTheNth(exponent_adjust);
}
}
// Loads the mantissa value of a previously-parsed float.
//
// Returns the associated decimal exponent. The value of the parsed float is
// exactly *this * 10**exponent.
int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits);
// Returns the number of decimal digits of precision this type provides. All
// numbers with this many decimal digits or fewer are representable by this
// type.
//
// Analagous to std::numeric_limits<BigUnsigned>::digits10.
static constexpr int Digits10() {
// 9975007/1035508 is very slightly less than log10(2**32).
return static_cast<uint64_t>(max_words) * 9975007 / 1035508;
}
// Shifts left by the given number of bits.
void ShiftLeft(int count) {
if (count > 0) {
const int word_shift = count / 32;
if (word_shift >= max_words) {
SetToZero();
return;
}
size_ = (std::min)(size_ + word_shift, max_words);
count %= 32;
if (count == 0) {
std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_);
} else {
for (int i = (std::min)(size_, max_words - 1); i > word_shift; --i) {
words_[i] = (words_[i - word_shift] << count) |
(words_[i - word_shift - 1] >> (32 - count));
}
words_[word_shift] = words_[0] << count;
// Grow size_ if necessary.
if (size_ < max_words && words_[size_]) {
++size_;
}
}
std::fill(words_, words_ + word_shift, 0u);
}
}
// Multiplies by v in-place.
void MultiplyBy(uint32_t v) {
if (size_ == 0 || v == 1) {
return;
}
if (v == 0) {
SetToZero();
return;
}
const uint64_t factor = v;
uint64_t window = 0;
for (int i = 0; i < size_; ++i) {
window += factor * words_[i];
words_[i] = window & 0xffffffff;
window >>= 32;
}
// If carry bits remain and there's space for them, grow size_.
if (window && size_ < max_words) {
words_[size_] = window & 0xffffffff;
++size_;
}
}
void MultiplyBy(uint64_t v) {
uint32_t words[2];
words[0] = static_cast<uint32_t>(v);
words[1] = static_cast<uint32_t>(v >> 32);
if (words[1] == 0) {
MultiplyBy(words[0]);
} else {
MultiplyBy(2, words);
}
}
// Multiplies in place by 5 to the power of n. n must be non-negative.
void MultiplyByFiveToTheNth(int n) {
while (n >= kMaxSmallPowerOfFive) {
MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]);
n -= kMaxSmallPowerOfFive;
}
if (n > 0) {
MultiplyBy(kFiveToNth[n]);
}
}
// Multiplies in place by 10 to the power of n. n must be non-negative.
void MultiplyByTenToTheNth(int n) {
if (n > kMaxSmallPowerOfTen) {
// For large n, raise to a power of 5, then shift left by the same amount.
// (10**n == 5**n * 2**n.) This requires fewer multiplications overall.
MultiplyByFiveToTheNth(n);
ShiftLeft(n);
} else if (n > 0) {
// We can do this more quickly for very small N by using a single
// multiplication.
MultiplyBy(kTenToNth[n]);
}
}
// Returns the value of 5**n, for non-negative n. This implementation uses
// a lookup table, and is faster then seeding a BigUnsigned with 1 and calling
// MultiplyByFiveToTheNth().
static BigUnsigned FiveToTheNth(int n);
// Multiplies by another BigUnsigned, in-place.
template <int M>
void MultiplyBy(const BigUnsigned<M>& other) {
MultiplyBy(other.size(), other.words());
}
void SetToZero() {
std::fill(words_, words_ + size_, 0u);
size_ = 0;
}
// Returns the value of the nth word of this BigUnsigned. This is
// range-checked, and returns 0 on out-of-bounds accesses.
uint32_t GetWord(int index) const {
if (index < 0 || index >= size_) {
return 0;
}
return words_[index];
}
// Returns this integer as a decimal string. This is not used in the decimal-
// to-binary conversion; it is intended to aid in testing.
std::string ToString() const;
int size() const { return size_; }
const uint32_t* words() const { return words_; }
private:
// Reads the number between [begin, end), possibly containing a decimal point,
// into this BigUnsigned.
//
// Callers are required to ensure [begin, end) contains a valid number, with
// one or more decimal digits and at most one decimal point. This routine
// will behave unpredictably if these preconditions are not met.
//
// Only the first `significant_digits` digits are read. Digits beyond this
// limit are "sticky": If the final significant digit is 0 or 5, and if any
// dropped digit is nonzero, then that final significant digit is adjusted up
// to 1 or 6. This adjustment allows for precise rounding.
//
// Returns `exponent_adjustment`, a power-of-ten exponent adjustment to
// account for the decimal point and for dropped significant digits. After
// this function returns,
// actual_value_of_parsed_string ~= *this * 10**exponent_adjustment.
int ReadDigits(const char* begin, const char* end, int significant_digits);
// Performs a step of big integer multiplication. This computes the full
// (64-bit-wide) values that should be added at the given index (step), and
// adds to that location in-place.
//
// Because our math all occurs in place, we must multiply starting from the
// highest word working downward. (This is a bit more expensive due to the
// extra carries involved.)
//
// This must be called in steps, for each word to be calculated, starting from
// the high end and working down to 0. The first value of `step` should be
// `std::min(original_size + other.size_ - 2, max_words - 1)`.
// The reason for this expression is that multiplying the i'th word from one
// multiplicand and the j'th word of another multiplicand creates a
// two-word-wide value to be stored at the (i+j)'th element. The highest
// word indices we will access are `original_size - 1` from this object, and
// `other.size_ - 1` from our operand. Therefore,
// `original_size + other.size_ - 2` is the first step we should calculate,
// but limited on an upper bound by max_words.
// Working from high-to-low ensures that we do not overwrite the portions of
// the initial value of *this which are still needed for later steps.
//
// Once called with step == 0, *this contains the result of the
// multiplication.
//
// `original_size` is the size_ of *this before the first call to
// MultiplyStep(). `other_words` and `other_size` are the contents of our
// operand. `step` is the step to perform, as described above.
void MultiplyStep(int original_size, const uint32_t* other_words,
int other_size, int step);
void MultiplyBy(int other_size, const uint32_t* other_words) {
const int original_size = size_;
const int first_step =
(std::min)(original_size + other_size - 2, max_words - 1);
for (int step = first_step; step >= 0; --step) {
MultiplyStep(original_size, other_words, other_size, step);
}
}
// Adds a 32-bit value to the index'th word, with carry.
void AddWithCarry(int index, uint32_t value) {
if (value) {
while (index < max_words && value > 0) {
words_[index] += value;
// carry if we overflowed in this word:
if (value > words_[index]) {
value = 1;
++index;
} else {
value = 0;
}
}
size_ = (std::min)(max_words, (std::max)(index + 1, size_));
}
}
void AddWithCarry(int index, uint64_t value) {
if (value && index < max_words) {
uint32_t high = value >> 32;
uint32_t low = value & 0xffffffff;
words_[index] += low;
if (words_[index] < low) {
++high;
if (high == 0) {
// Carry from the low word caused our high word to overflow.
// Short circuit here to do the right thing.
AddWithCarry(index + 2, static_cast<uint32_t>(1));
return;
}
}
if (high > 0) {
AddWithCarry(index + 1, high);
} else {
// Normally 32-bit AddWithCarry() sets size_, but since we don't call
// it when `high` is 0, do it ourselves here.
size_ = (std::min)(max_words, (std::max)(index + 1, size_));
}
}
}
// Divide this in place by a constant divisor. Returns the remainder of the
// division.
template <uint32_t divisor>
uint32_t DivMod() {
uint64_t accumulator = 0;
for (int i = size_ - 1; i >= 0; --i) {
accumulator <<= 32;
accumulator += words_[i];
// accumulator / divisor will never overflow an int32_t in this loop
words_[i] = static_cast<uint32_t>(accumulator / divisor);
accumulator = accumulator % divisor;
}
while (size_ > 0 && words_[size_ - 1] == 0) {
--size_;
}
return static_cast<uint32_t>(accumulator);
}
// The number of elements in words_ that may carry significant values.
// All elements beyond this point are 0.
//
// When size_ is 0, this BigUnsigned stores the value 0.
// When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is
// nonzero. This can occur due to overflow truncation.
// In particular, x.size_ != y.size_ does *not* imply x != y.
int size_;
uint32_t words_[max_words];
};
// Compares two big integer instances.
//
// Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs.
template <int N, int M>
int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
int limit = (std::max)(lhs.size(), rhs.size());
for (int i = limit - 1; i >= 0; --i) {
const uint32_t lhs_word = lhs.GetWord(i);
const uint32_t rhs_word = rhs.GetWord(i);
if (lhs_word < rhs_word) {
return -1;
} else if (lhs_word > rhs_word) {
return 1;
}
}
return 0;
}
template <int N, int M>
bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
int limit = (std::max)(lhs.size(), rhs.size());
for (int i = 0; i < limit; ++i) {
if (lhs.GetWord(i) != rhs.GetWord(i)) {
return false;
}
}
return true;
}
template <int N, int M>
bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return !(lhs == rhs);
}
template <int N, int M>
bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return Compare(lhs, rhs) == -1;
}
template <int N, int M>
bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return rhs < lhs;
}
template <int N, int M>
bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return !(rhs < lhs);
}
template <int N, int M>
bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return !(lhs < rhs);
}
// Output operator for BigUnsigned, for testing purposes only.
template <int N>
std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) {
return os << num.ToString();
}
// Explicit instantiation declarations for the sizes of BigUnsigned that we
// are using.
//
// For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is
// still bigger than an int128, and 84 is a large value we will want to use
// in the from_chars implementation.
//
// Comments justifying the use of 84 belong in the from_chars implementation,
// and will be added in a follow-up CL.
extern template class BigUnsigned<4>;
extern template class BigUnsigned<84>;
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_

View file

@ -0,0 +1,205 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_bigint.h"
#include <string>
#include "gtest/gtest.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
TEST(BigUnsigned, ShiftLeft) {
{
// Check that 3 * 2**100 is calculated correctly
BigUnsigned<4> num(3u);
num.ShiftLeft(100);
EXPECT_EQ(num, BigUnsigned<4>("3802951800684688204490109616128"));
}
{
// Test that overflow is truncated properly.
// 15 is 4 bits long, and BigUnsigned<4> is a 128-bit bigint.
// Shifting left by 125 bits should truncate off the high bit, so that
// 15 << 125 == 7 << 125
// after truncation.
BigUnsigned<4> a(15u);
BigUnsigned<4> b(7u);
BigUnsigned<4> c(3u);
a.ShiftLeft(125);
b.ShiftLeft(125);
c.ShiftLeft(125);
EXPECT_EQ(a, b);
EXPECT_NE(a, c);
}
{
// Same test, larger bigint:
BigUnsigned<84> a(15u);
BigUnsigned<84> b(7u);
BigUnsigned<84> c(3u);
a.ShiftLeft(84 * 32 - 3);
b.ShiftLeft(84 * 32 - 3);
c.ShiftLeft(84 * 32 - 3);
EXPECT_EQ(a, b);
EXPECT_NE(a, c);
}
{
// Check that incrementally shifting has the same result as doing it all at
// once (attempting to capture corner cases.)
const std::string seed = "1234567890123456789012345678901234567890";
BigUnsigned<84> a(seed);
for (int i = 1; i <= 84 * 32; ++i) {
a.ShiftLeft(1);
BigUnsigned<84> b(seed);
b.ShiftLeft(i);
EXPECT_EQ(a, b);
}
// And we should have fully rotated all bits off by now:
EXPECT_EQ(a, BigUnsigned<84>(0u));
}
}
TEST(BigUnsigned, MultiplyByUint32) {
const BigUnsigned<84> factorial_100(
"933262154439441526816992388562667004907159682643816214685929638952175999"
"932299156089414639761565182862536979208272237582511852109168640000000000"
"00000000000000");
BigUnsigned<84> a(1u);
for (uint32_t i = 1; i <= 100; ++i) {
a.MultiplyBy(i);
}
EXPECT_EQ(a, BigUnsigned<84>(factorial_100));
}
TEST(BigUnsigned, MultiplyByBigUnsigned) {
{
// Put the terms of factorial_200 into two bigints, and multiply them
// together.
const BigUnsigned<84> factorial_200(
"7886578673647905035523632139321850622951359776871732632947425332443594"
"4996340334292030428401198462390417721213891963883025764279024263710506"
"1926624952829931113462857270763317237396988943922445621451664240254033"
"2918641312274282948532775242424075739032403212574055795686602260319041"
"7032406235170085879617892222278962370389737472000000000000000000000000"
"0000000000000000000000000");
BigUnsigned<84> evens(1u);
BigUnsigned<84> odds(1u);
for (uint32_t i = 1; i < 200; i += 2) {
odds.MultiplyBy(i);
evens.MultiplyBy(i + 1);
}
evens.MultiplyBy(odds);
EXPECT_EQ(evens, factorial_200);
}
{
// Multiply various powers of 10 together.
for (int a = 0 ; a < 700; a += 25) {
SCOPED_TRACE(a);
BigUnsigned<84> a_value("3" + std::string(a, '0'));
for (int b = 0; b < (700 - a); b += 25) {
SCOPED_TRACE(b);
BigUnsigned<84> b_value("2" + std::string(b, '0'));
BigUnsigned<84> expected_product("6" + std::string(a + b, '0'));
b_value.MultiplyBy(a_value);
EXPECT_EQ(b_value, expected_product);
}
}
}
}
TEST(BigUnsigned, MultiplyByOverflow) {
{
// Check that multiplcation overflow predictably truncates.
// A big int with all bits on.
BigUnsigned<4> all_bits_on("340282366920938463463374607431768211455");
// Modulo 2**128, this is equal to -1. Therefore the square of this,
// modulo 2**128, should be 1.
all_bits_on.MultiplyBy(all_bits_on);
EXPECT_EQ(all_bits_on, BigUnsigned<4>(1u));
}
{
// Try multiplying a large bigint by 2**50, and compare the result to
// shifting.
BigUnsigned<4> value_1("12345678901234567890123456789012345678");
BigUnsigned<4> value_2("12345678901234567890123456789012345678");
BigUnsigned<4> two_to_fiftieth(1u);
two_to_fiftieth.ShiftLeft(50);
value_1.ShiftLeft(50);
value_2.MultiplyBy(two_to_fiftieth);
EXPECT_EQ(value_1, value_2);
}
}
TEST(BigUnsigned, FiveToTheNth) {
{
// Sanity check that MultiplyByFiveToTheNth gives consistent answers, up to
// and including overflow.
for (int i = 0; i < 1160; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(123u);
BigUnsigned<84> value_2(123u);
value_1.MultiplyByFiveToTheNth(i);
for (int j = 0; j < i; j++) {
value_2.MultiplyBy(5u);
}
EXPECT_EQ(value_1, value_2);
}
}
{
// Check that the faster, table-lookup-based static method returns the same
// result that multiplying in-place would return, up to and including
// overflow.
for (int i = 0; i < 1160; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(1u);
value_1.MultiplyByFiveToTheNth(i);
BigUnsigned<84> value_2 = BigUnsigned<84>::FiveToTheNth(i);
EXPECT_EQ(value_1, value_2);
}
}
}
TEST(BigUnsigned, TenToTheNth) {
{
// Sanity check MultiplyByTenToTheNth.
for (int i = 0; i < 800; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(123u);
BigUnsigned<84> value_2(123u);
value_1.MultiplyByTenToTheNth(i);
for (int j = 0; j < i; j++) {
value_2.MultiplyBy(10u);
}
EXPECT_EQ(value_1, value_2);
}
}
{
// Alternate testing approach, taking advantage of the decimal parser.
for (int i = 0; i < 200; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(135u);
value_1.MultiplyByTenToTheNth(i);
BigUnsigned<84> value_2("135" + std::string(i, '0'));
EXPECT_EQ(value_1, value_2);
}
}
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,504 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_parse.h"
#include "absl/strings/charconv.h"
#include <cassert>
#include <cstdint>
#include <limits>
#include "absl/strings/internal/memutil.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace {
// ParseFloat<10> will read the first 19 significant digits of the mantissa.
// This number was chosen for multiple reasons.
//
// (a) First, for whatever integer type we choose to represent the mantissa, we
// want to choose the largest possible number of decimal digits for that integer
// type. We are using uint64_t, which can express any 19-digit unsigned
// integer.
//
// (b) Second, we need to parse enough digits that the binary value of any
// mantissa we capture has more bits of resolution than the mantissa
// representation in the target float. Our algorithm requires at least 3 bits
// of headway, but 19 decimal digits give a little more than that.
//
// The following static assertions verify the above comments:
constexpr int kDecimalMantissaDigitsMax = 19;
static_assert(std::numeric_limits<uint64_t>::digits10 ==
kDecimalMantissaDigitsMax,
"(a) above");
// IEEE doubles, which we assume in Abseil, have 53 binary bits of mantissa.
static_assert(std::numeric_limits<double>::is_iec559, "IEEE double assumed");
static_assert(std::numeric_limits<double>::radix == 2, "IEEE double fact");
static_assert(std::numeric_limits<double>::digits == 53, "IEEE double fact");
// The lowest valued 19-digit decimal mantissa we can read still contains
// sufficient information to reconstruct a binary mantissa.
static_assert(1000000000000000000u > (uint64_t(1) << (53 + 3)), "(b) above");
// ParseFloat<16> will read the first 15 significant digits of the mantissa.
//
// Because a base-16-to-base-2 conversion can be done exactly, we do not need
// to maximize the number of scanned hex digits to improve our conversion. What
// is required is to scan two more bits than the mantissa can represent, so that
// we always round correctly.
//
// (One extra bit does not suffice to perform correct rounding, since a number
// exactly halfway between two representable floats has unique rounding rules,
// so we need to differentiate between a "halfway between" number and a "closer
// to the larger value" number.)
constexpr int kHexadecimalMantissaDigitsMax = 15;
// The minimum number of significant bits that will be read from
// kHexadecimalMantissaDigitsMax hex digits. We must subtract by three, since
// the most significant digit can be a "1", which only contributes a single
// significant bit.
constexpr int kGuaranteedHexadecimalMantissaBitPrecision =
4 * kHexadecimalMantissaDigitsMax - 3;
static_assert(kGuaranteedHexadecimalMantissaBitPrecision >
std::numeric_limits<double>::digits + 2,
"kHexadecimalMantissaDigitsMax too small");
// We also impose a limit on the number of significant digits we will read from
// an exponent, to avoid having to deal with integer overflow. We use 9 for
// this purpose.
//
// If we read a 9 digit exponent, the end result of the conversion will
// necessarily be infinity or zero, depending on the sign of the exponent.
// Therefore we can just drop extra digits on the floor without any extra
// logic.
constexpr int kDecimalExponentDigitsMax = 9;
static_assert(std::numeric_limits<int>::digits10 >= kDecimalExponentDigitsMax,
"int type too small");
// To avoid incredibly large inputs causing integer overflow for our exponent,
// we impose an arbitrary but very large limit on the number of significant
// digits we will accept. The implementation refuses to match a string with
// more consecutive significant mantissa digits than this.
constexpr int kDecimalDigitLimit = 50000000;
// Corresponding limit for hexadecimal digit inputs. This is one fourth the
// amount of kDecimalDigitLimit, since each dropped hexadecimal digit requires
// a binary exponent adjustment of 4.
constexpr int kHexadecimalDigitLimit = kDecimalDigitLimit / 4;
// The largest exponent we can read is 999999999 (per
// kDecimalExponentDigitsMax), and the largest exponent adjustment we can get
// from dropped mantissa digits is 2 * kDecimalDigitLimit, and the sum of these
// comfortably fits in an integer.
//
// We count kDecimalDigitLimit twice because there are independent limits for
// numbers before and after the decimal point. (In the case where there are no
// significant digits before the decimal point, there are independent limits for
// post-decimal-point leading zeroes and for significant digits.)
static_assert(999999999 + 2 * kDecimalDigitLimit <
std::numeric_limits<int>::max(),
"int type too small");
static_assert(999999999 + 2 * (4 * kHexadecimalDigitLimit) <
std::numeric_limits<int>::max(),
"int type too small");
// Returns true if the provided bitfield allows parsing an exponent value
// (e.g., "1.5e100").
bool AllowExponent(chars_format flags) {
bool fixed = (flags & chars_format::fixed) == chars_format::fixed;
bool scientific =
(flags & chars_format::scientific) == chars_format::scientific;
return scientific || !fixed;
}
// Returns true if the provided bitfield requires an exponent value be present.
bool RequireExponent(chars_format flags) {
bool fixed = (flags & chars_format::fixed) == chars_format::fixed;
bool scientific =
(flags & chars_format::scientific) == chars_format::scientific;
return scientific && !fixed;
}
const int8_t kAsciiToInt[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1};
// Returns true if `ch` is a digit in the given base
template <int base>
bool IsDigit(char ch);
// Converts a valid `ch` to its digit value in the given base.
template <int base>
unsigned ToDigit(char ch);
// Returns true if `ch` is the exponent delimiter for the given base.
template <int base>
bool IsExponentCharacter(char ch);
// Returns the maximum number of significant digits we will read for a float
// in the given base.
template <int base>
constexpr int MantissaDigitsMax();
// Returns the largest consecutive run of digits we will accept when parsing a
// number in the given base.
template <int base>
constexpr int DigitLimit();
// Returns the amount the exponent must be adjusted by for each dropped digit.
// (For decimal this is 1, since the digits are in base 10 and the exponent base
// is also 10, but for hexadecimal this is 4, since the digits are base 16 but
// the exponent base is 2.)
template <int base>
constexpr int DigitMagnitude();
template <>
bool IsDigit<10>(char ch) {
return ch >= '0' && ch <= '9';
}
template <>
bool IsDigit<16>(char ch) {
return kAsciiToInt[static_cast<unsigned char>(ch)] >= 0;
}
template <>
unsigned ToDigit<10>(char ch) {
return ch - '0';
}
template <>
unsigned ToDigit<16>(char ch) {
return kAsciiToInt[static_cast<unsigned char>(ch)];
}
template <>
bool IsExponentCharacter<10>(char ch) {
return ch == 'e' || ch == 'E';
}
template <>
bool IsExponentCharacter<16>(char ch) {
return ch == 'p' || ch == 'P';
}
template <>
constexpr int MantissaDigitsMax<10>() {
return kDecimalMantissaDigitsMax;
}
template <>
constexpr int MantissaDigitsMax<16>() {
return kHexadecimalMantissaDigitsMax;
}
template <>
constexpr int DigitLimit<10>() {
return kDecimalDigitLimit;
}
template <>
constexpr int DigitLimit<16>() {
return kHexadecimalDigitLimit;
}
template <>
constexpr int DigitMagnitude<10>() {
return 1;
}
template <>
constexpr int DigitMagnitude<16>() {
return 4;
}
// Reads decimal digits from [begin, end) into *out. Returns the number of
// digits consumed.
//
// After max_digits has been read, keeps consuming characters, but no longer
// adjusts *out. If a nonzero digit is dropped this way, *dropped_nonzero_digit
// is set; otherwise, it is left unmodified.
//
// If no digits are matched, returns 0 and leaves *out unchanged.
//
// ConsumeDigits does not protect against overflow on *out; max_digits must
// be chosen with respect to type T to avoid the possibility of overflow.
template <int base, typename T>
std::size_t ConsumeDigits(const char* begin, const char* end, int max_digits,
T* out, bool* dropped_nonzero_digit) {
if (base == 10) {
assert(max_digits <= std::numeric_limits<T>::digits10);
} else if (base == 16) {
assert(max_digits * 4 <= std::numeric_limits<T>::digits);
}
const char* const original_begin = begin;
// Skip leading zeros, but only if *out is zero.
// They don't cause an overflow so we don't have to count them for
// `max_digits`.
while (!*out && end != begin && *begin == '0') ++begin;
T accumulator = *out;
const char* significant_digits_end =
(end - begin > max_digits) ? begin + max_digits : end;
while (begin < significant_digits_end && IsDigit<base>(*begin)) {
// Do not guard against *out overflow; max_digits was chosen to avoid this.
// Do assert against it, to detect problems in debug builds.
auto digit = static_cast<T>(ToDigit<base>(*begin));
assert(accumulator * base >= accumulator);
accumulator *= base;
assert(accumulator + digit >= accumulator);
accumulator += digit;
++begin;
}
bool dropped_nonzero = false;
while (begin < end && IsDigit<base>(*begin)) {
dropped_nonzero = dropped_nonzero || (*begin != '0');
++begin;
}
if (dropped_nonzero && dropped_nonzero_digit != nullptr) {
*dropped_nonzero_digit = true;
}
*out = accumulator;
return begin - original_begin;
}
// Returns true if `v` is one of the chars allowed inside parentheses following
// a NaN.
bool IsNanChar(char v) {
return (v == '_') || (v >= '0' && v <= '9') || (v >= 'a' && v <= 'z') ||
(v >= 'A' && v <= 'Z');
}
// Checks the range [begin, end) for a strtod()-formatted infinity or NaN. If
// one is found, sets `out` appropriately and returns true.
bool ParseInfinityOrNan(const char* begin, const char* end,
strings_internal::ParsedFloat* out) {
if (end - begin < 3) {
return false;
}
switch (*begin) {
case 'i':
case 'I': {
// An infinity string consists of the characters "inf" or "infinity",
// case insensitive.
if (strings_internal::memcasecmp(begin + 1, "nf", 2) != 0) {
return false;
}
out->type = strings_internal::FloatType::kInfinity;
if (end - begin >= 8 &&
strings_internal::memcasecmp(begin + 3, "inity", 5) == 0) {
out->end = begin + 8;
} else {
out->end = begin + 3;
}
return true;
}
case 'n':
case 'N': {
// A NaN consists of the characters "nan", case insensitive, optionally
// followed by a parenthesized sequence of zero or more alphanumeric
// characters and/or underscores.
if (strings_internal::memcasecmp(begin + 1, "an", 2) != 0) {
return false;
}
out->type = strings_internal::FloatType::kNan;
out->end = begin + 3;
// NaN is allowed to be followed by a parenthesized string, consisting of
// only the characters [a-zA-Z0-9_]. Match that if it's present.
begin += 3;
if (begin < end && *begin == '(') {
const char* nan_begin = begin + 1;
while (nan_begin < end && IsNanChar(*nan_begin)) {
++nan_begin;
}
if (nan_begin < end && *nan_begin == ')') {
// We found an extra NaN specifier range
out->subrange_begin = begin + 1;
out->subrange_end = nan_begin;
out->end = nan_begin + 1;
}
}
return true;
}
default:
return false;
}
}
} // namespace
namespace strings_internal {
template <int base>
strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end,
chars_format format_flags) {
strings_internal::ParsedFloat result;
// Exit early if we're given an empty range.
if (begin == end) return result;
// Handle the infinity and NaN cases.
if (ParseInfinityOrNan(begin, end, &result)) {
return result;
}
const char* const mantissa_begin = begin;
while (begin < end && *begin == '0') {
++begin; // skip leading zeros
}
uint64_t mantissa = 0;
int exponent_adjustment = 0;
bool mantissa_is_inexact = false;
std::size_t pre_decimal_digits = ConsumeDigits<base>(
begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact);
begin += pre_decimal_digits;
int digits_left;
if (pre_decimal_digits >= DigitLimit<base>()) {
// refuse to parse pathological inputs
return result;
} else if (pre_decimal_digits > MantissaDigitsMax<base>()) {
// We dropped some non-fraction digits on the floor. Adjust our exponent
// to compensate.
exponent_adjustment =
static_cast<int>(pre_decimal_digits - MantissaDigitsMax<base>());
digits_left = 0;
} else {
digits_left =
static_cast<int>(MantissaDigitsMax<base>() - pre_decimal_digits);
}
if (begin < end && *begin == '.') {
++begin;
if (mantissa == 0) {
// If we haven't seen any nonzero digits yet, keep skipping zeros. We
// have to adjust the exponent to reflect the changed place value.
const char* begin_zeros = begin;
while (begin < end && *begin == '0') {
++begin;
}
std::size_t zeros_skipped = begin - begin_zeros;
if (zeros_skipped >= DigitLimit<base>()) {
// refuse to parse pathological inputs
return result;
}
exponent_adjustment -= static_cast<int>(zeros_skipped);
}
std::size_t post_decimal_digits = ConsumeDigits<base>(
begin, end, digits_left, &mantissa, &mantissa_is_inexact);
begin += post_decimal_digits;
// Since `mantissa` is an integer, each significant digit we read after
// the decimal point requires an adjustment to the exponent. "1.23e0" will
// be stored as `mantissa` == 123 and `exponent` == -2 (that is,
// "123e-2").
if (post_decimal_digits >= DigitLimit<base>()) {
// refuse to parse pathological inputs
return result;
} else if (post_decimal_digits > digits_left) {
exponent_adjustment -= digits_left;
} else {
exponent_adjustment -= post_decimal_digits;
}
}
// If we've found no mantissa whatsoever, this isn't a number.
if (mantissa_begin == begin) {
return result;
}
// A bare "." doesn't count as a mantissa either.
if (begin - mantissa_begin == 1 && *mantissa_begin == '.') {
return result;
}
if (mantissa_is_inexact) {
// We dropped significant digits on the floor. Handle this appropriately.
if (base == 10) {
// If we truncated significant decimal digits, store the full range of the
// mantissa for future big integer math for exact rounding.
result.subrange_begin = mantissa_begin;
result.subrange_end = begin;
} else if (base == 16) {
// If we truncated hex digits, reflect this fact by setting the low
// ("sticky") bit. This allows for correct rounding in all cases.
mantissa |= 1;
}
}
result.mantissa = mantissa;
const char* const exponent_begin = begin;
result.literal_exponent = 0;
bool found_exponent = false;
if (AllowExponent(format_flags) && begin < end &&
IsExponentCharacter<base>(*begin)) {
bool negative_exponent = false;
++begin;
if (begin < end && *begin == '-') {
negative_exponent = true;
++begin;
} else if (begin < end && *begin == '+') {
++begin;
}
const char* const exponent_digits_begin = begin;
// Exponent is always expressed in decimal, even for hexadecimal floats.
begin += ConsumeDigits<10>(begin, end, kDecimalExponentDigitsMax,
&result.literal_exponent, nullptr);
if (begin == exponent_digits_begin) {
// there were no digits where we expected an exponent. We failed to read
// an exponent and should not consume the 'e' after all. Rewind 'begin'.
found_exponent = false;
begin = exponent_begin;
} else {
found_exponent = true;
if (negative_exponent) {
result.literal_exponent = -result.literal_exponent;
}
}
}
if (!found_exponent && RequireExponent(format_flags)) {
// Provided flags required an exponent, but none was found. This results
// in a failure to scan.
return result;
}
// Success!
result.type = strings_internal::FloatType::kNumber;
if (result.mantissa > 0) {
result.exponent = result.literal_exponent +
(DigitMagnitude<base>() * exponent_adjustment);
} else {
result.exponent = 0;
}
result.end = begin;
return result;
}
template ParsedFloat ParseFloat<10>(const char* begin, const char* end,
chars_format format_flags);
template ParsedFloat ParseFloat<16>(const char* begin, const char* end,
chars_format format_flags);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,99 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_
#define ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_
#include <cstdint>
#include "absl/base/config.h"
#include "absl/strings/charconv.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// Enum indicating whether a parsed float is a number or special value.
enum class FloatType { kNumber, kInfinity, kNan };
// The decomposed parts of a parsed `float` or `double`.
struct ParsedFloat {
// Representation of the parsed mantissa, with the decimal point adjusted to
// make it an integer.
//
// During decimal scanning, this contains 19 significant digits worth of
// mantissa value. If digits beyond this point are found, they
// are truncated, and if any of these dropped digits are nonzero, then
// `mantissa` is inexact, and the full mantissa is stored in [subrange_begin,
// subrange_end).
//
// During hexadecimal scanning, this contains 15 significant hex digits worth
// of mantissa value. Digits beyond this point are sticky -- they are
// truncated, but if any dropped digits are nonzero, the low bit of mantissa
// will be set. (This allows for precise rounding, and avoids the need
// to store the full mantissa in [subrange_begin, subrange_end).)
uint64_t mantissa = 0;
// Floating point expontent. This reflects any decimal point adjustments and
// any truncated digits from the mantissa. The absolute value of the parsed
// number is represented by mantissa * (base ** exponent), where base==10 for
// decimal floats, and base==2 for hexadecimal floats.
int exponent = 0;
// The literal exponent value scanned from the input, or 0 if none was
// present. This does not reflect any adjustments applied to mantissa.
int literal_exponent = 0;
// The type of number scanned.
FloatType type = FloatType::kNumber;
// When non-null, [subrange_begin, subrange_end) marks a range of characters
// that require further processing. The meaning is dependent on float type.
// If type == kNumber and this is set, this is a "wide input": the input
// mantissa contained more than 19 digits. The range contains the full
// mantissa. It plus `literal_exponent` need to be examined to find the best
// floating point match.
// If type == kNan and this is set, the range marks the contents of a
// matched parenthesized character region after the NaN.
const char* subrange_begin = nullptr;
const char* subrange_end = nullptr;
// One-past-the-end of the successfully parsed region, or nullptr if no
// matching pattern was found.
const char* end = nullptr;
};
// Read the floating point number in the provided range, and populate
// ParsedFloat accordingly.
//
// format_flags is a bitmask value specifying what patterns this API will match.
// `scientific` and `fixed` are honored per std::from_chars rules
// ([utility.from.chars], C++17): if exactly one of these bits is set, then an
// exponent is required, or dislallowed, respectively.
//
// Template parameter `base` must be either 10 or 16. For base 16, a "0x" is
// *not* consumed. The `hex` bit from format_flags is ignored by ParseFloat.
template <int base>
ParsedFloat ParseFloat(const char* begin, const char* end,
absl::chars_format format_flags);
extern template ParsedFloat ParseFloat<10>(const char* begin, const char* end,
absl::chars_format format_flags);
extern template ParsedFloat ParseFloat<16>(const char* begin, const char* end,
absl::chars_format format_flags);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_

View file

@ -0,0 +1,357 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_parse.h"
#include <string>
#include <utility>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/str_cat.h"
using absl::chars_format;
using absl::strings_internal::FloatType;
using absl::strings_internal::ParsedFloat;
using absl::strings_internal::ParseFloat;
namespace {
// Check that a given string input is parsed to the expected mantissa and
// exponent.
//
// Input string `s` must contain a '$' character. It marks the end of the
// characters that should be consumed by the match. It is stripped from the
// input to ParseFloat.
//
// If input string `s` contains '[' and ']' characters, these mark the region
// of characters that should be marked as the "subrange". For NaNs, this is
// the location of the extended NaN string. For numbers, this is the location
// of the full, over-large mantissa.
template <int base>
void ExpectParsedFloat(std::string s, absl::chars_format format_flags,
FloatType expected_type, uint64_t expected_mantissa,
int expected_exponent,
int expected_literal_exponent = -999) {
SCOPED_TRACE(s);
int begin_subrange = -1;
int end_subrange = -1;
// If s contains '[' and ']', then strip these characters and set the subrange
// indices appropriately.
std::string::size_type open_bracket_pos = s.find('[');
if (open_bracket_pos != std::string::npos) {
begin_subrange = static_cast<int>(open_bracket_pos);
s.replace(open_bracket_pos, 1, "");
std::string::size_type close_bracket_pos = s.find(']');
ABSL_RAW_CHECK(close_bracket_pos != absl::string_view::npos,
"Test input contains [ without matching ]");
end_subrange = static_cast<int>(close_bracket_pos);
s.replace(close_bracket_pos, 1, "");
}
const std::string::size_type expected_characters_matched = s.find('$');
ABSL_RAW_CHECK(expected_characters_matched != std::string::npos,
"Input string must contain $");
s.replace(expected_characters_matched, 1, "");
ParsedFloat parsed =
ParseFloat<base>(s.data(), s.data() + s.size(), format_flags);
EXPECT_NE(parsed.end, nullptr);
if (parsed.end == nullptr) {
return; // The following tests are not useful if we fully failed to parse
}
EXPECT_EQ(parsed.type, expected_type);
if (begin_subrange == -1) {
EXPECT_EQ(parsed.subrange_begin, nullptr);
EXPECT_EQ(parsed.subrange_end, nullptr);
} else {
EXPECT_EQ(parsed.subrange_begin, s.data() + begin_subrange);
EXPECT_EQ(parsed.subrange_end, s.data() + end_subrange);
}
if (parsed.type == FloatType::kNumber) {
EXPECT_EQ(parsed.mantissa, expected_mantissa);
EXPECT_EQ(parsed.exponent, expected_exponent);
if (expected_literal_exponent != -999) {
EXPECT_EQ(parsed.literal_exponent, expected_literal_exponent);
}
}
auto characters_matched = static_cast<int>(parsed.end - s.data());
EXPECT_EQ(characters_matched, expected_characters_matched);
}
// Check that a given string input is parsed to the expected mantissa and
// exponent.
//
// Input string `s` must contain a '$' character. It marks the end of the
// characters that were consumed by the match.
template <int base>
void ExpectNumber(std::string s, absl::chars_format format_flags,
uint64_t expected_mantissa, int expected_exponent,
int expected_literal_exponent = -999) {
ExpectParsedFloat<base>(std::move(s), format_flags, FloatType::kNumber,
expected_mantissa, expected_exponent,
expected_literal_exponent);
}
// Check that a given string input is parsed to the given special value.
//
// This tests against both number bases, since infinities and NaNs have
// identical representations in both modes.
void ExpectSpecial(const std::string& s, absl::chars_format format_flags,
FloatType type) {
ExpectParsedFloat<10>(s, format_flags, type, 0, 0);
ExpectParsedFloat<16>(s, format_flags, type, 0, 0);
}
// Check that a given input string is not matched by Float.
template <int base>
void ExpectFailedParse(absl::string_view s, absl::chars_format format_flags) {
ParsedFloat parsed =
ParseFloat<base>(s.data(), s.data() + s.size(), format_flags);
EXPECT_EQ(parsed.end, nullptr);
}
TEST(ParseFloat, SimpleValue) {
// Test that various forms of floating point numbers all parse correctly.
ExpectNumber<10>("1.23456789e5$", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e+5$", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789E5$", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e05$", chars_format::general, 123456789, -3);
ExpectNumber<10>("123.456789e3$", chars_format::general, 123456789, -3);
ExpectNumber<10>("0.000123456789e9$", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456789e-3$", chars_format::general, 123456789, -3);
ExpectNumber<16>("1.234abcdefp28$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234abcdefp+28$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234ABCDEFp28$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234AbCdEfP0028$", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("123.4abcdefp20$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("0.0001234abcdefp44$", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1234abcd.ef$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1234abcdefp-8$", chars_format::general, 0x1234abcdef, -8);
// ExpectNumber does not attempt to drop trailing zeroes.
ExpectNumber<10>("0001.2345678900e005$", chars_format::general, 12345678900,
-5);
ExpectNumber<16>("0001.234abcdef000p28$", chars_format::general,
0x1234abcdef000, -20);
// Ensure non-matching characters after a number are ignored, even when they
// look like potentially matching characters.
ExpectNumber<10>("1.23456789e5$ ", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$e5e5", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$.25", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$-", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$PUPPERS!!!", chars_format::general, 123456789,
-3);
ExpectNumber<10>("123456.789$efghij", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$e", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$p5", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$.10", chars_format::general, 123456789, -3);
ExpectNumber<16>("1.234abcdefp28$ ", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1.234abcdefp28$p28", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1.234abcdefp28$.125", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1.234abcdefp28$-", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234abcdefp28$KITTEHS!!!", chars_format::general,
0x1234abcdef, -8);
ExpectNumber<16>("1234abcd.ef$ghijk", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1234abcd.ef$p", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1234abcd.ef$.10", chars_format::general, 0x1234abcdef, -8);
// Ensure we can read a full resolution mantissa without overflow.
ExpectNumber<10>("9999999999999999999$", chars_format::general,
9999999999999999999u, 0);
ExpectNumber<16>("fffffffffffffff$", chars_format::general,
0xfffffffffffffffu, 0);
// Check that zero is consistently read.
ExpectNumber<10>("0$", chars_format::general, 0, 0);
ExpectNumber<16>("0$", chars_format::general, 0, 0);
ExpectNumber<10>("000000000000000000000000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<16>("000000000000000000000000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<10>("0000000000000000000000.000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<16>("0000000000000000000000.000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<10>("0.00000000000000000000000000000000e123456$",
chars_format::general, 0, 0);
ExpectNumber<16>("0.00000000000000000000000000000000p123456$",
chars_format::general, 0, 0);
}
TEST(ParseFloat, LargeDecimalMantissa) {
// After 19 significant decimal digits in the mantissa, ParsedFloat will
// truncate additional digits. We need to test that:
// 1) the truncation to 19 digits happens
// 2) the returned exponent reflects the dropped significant digits
// 3) a correct literal_exponent is set
//
// If and only if a significant digit is found after 19 digits, then the
// entirety of the mantissa in case the exact value is needed to make a
// rounding decision. The [ and ] characters below denote where such a
// subregion was marked by by ParseFloat. They are not part of the input.
// Mark a capture group only if a dropped digit is significant (nonzero).
ExpectNumber<10>("100000000000000000000000000$", chars_format::general,
1000000000000000000,
/* adjusted exponent */ 8);
ExpectNumber<10>("123456789123456789100000000$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8);
ExpectNumber<10>("[123456789123456789123456789]$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
ExpectNumber<10>("[123456789123456789100000009]$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
ExpectNumber<10>("[123456789123456789120000000]$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
// Leading zeroes should not count towards the 19 significant digit limit
ExpectNumber<10>("[00000000123456789123456789123456789]$",
chars_format::general, 1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
ExpectNumber<10>("00000000123456789123456789100000000$",
chars_format::general, 1234567891234567891,
/* adjusted exponent */ 8);
// Truncated digits after the decimal point should not cause a further
// exponent adjustment.
ExpectNumber<10>("1.234567891234567891e123$", chars_format::general,
1234567891234567891, 105);
ExpectNumber<10>("[1.23456789123456789123456789]e123$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 105,
/* literal exponent */ 123);
// Ensure we truncate, and not round. (The from_chars algorithm we use
// depends on our guess missing low, if it misses, so we need the rounding
// error to be downward.)
ExpectNumber<10>("[1999999999999999999999]$", chars_format::general,
1999999999999999999,
/* adjusted exponent */ 3,
/* literal exponent */ 0);
}
TEST(ParseFloat, LargeHexadecimalMantissa) {
// After 15 significant hex digits in the mantissa, ParsedFloat will treat
// additional digits as sticky, We need to test that:
// 1) The truncation to 15 digits happens
// 2) The returned exponent reflects the dropped significant digits
// 3) If a nonzero digit is dropped, the low bit of mantissa is set.
ExpectNumber<16>("123456789abcdef123456789abcdef$", chars_format::general,
0x123456789abcdef, 60);
// Leading zeroes should not count towards the 15 significant digit limit
ExpectNumber<16>("000000123456789abcdef123456789abcdef$",
chars_format::general, 0x123456789abcdef, 60);
// Truncated digits after the radix point should not cause a further
// exponent adjustment.
ExpectNumber<16>("1.23456789abcdefp100$", chars_format::general,
0x123456789abcdef, 44);
ExpectNumber<16>("1.23456789abcdef123456789abcdefp100$",
chars_format::general, 0x123456789abcdef, 44);
// test sticky digit behavior. The low bit should be set iff any dropped
// digit is nonzero.
ExpectNumber<16>("123456789abcdee123456789abcdee$", chars_format::general,
0x123456789abcdef, 60);
ExpectNumber<16>("123456789abcdee000000000000001$", chars_format::general,
0x123456789abcdef, 60);
ExpectNumber<16>("123456789abcdee000000000000000$", chars_format::general,
0x123456789abcdee, 60);
}
TEST(ParseFloat, ScientificVsFixed) {
// In fixed mode, an exponent is never matched (but the remainder of the
// number will be matched.)
ExpectNumber<10>("1.23456789$e5", chars_format::fixed, 123456789, -8);
ExpectNumber<10>("123456.789$", chars_format::fixed, 123456789, -3);
ExpectNumber<16>("1.234abcdef$p28", chars_format::fixed, 0x1234abcdef, -36);
ExpectNumber<16>("1234abcd.ef$", chars_format::fixed, 0x1234abcdef, -8);
// In scientific mode, numbers don't match *unless* they have an exponent.
ExpectNumber<10>("1.23456789e5$", chars_format::scientific, 123456789, -3);
ExpectFailedParse<10>("-123456.789$", chars_format::scientific);
ExpectNumber<16>("1.234abcdefp28$", chars_format::scientific, 0x1234abcdef,
-8);
ExpectFailedParse<16>("1234abcd.ef$", chars_format::scientific);
}
TEST(ParseFloat, Infinity) {
ExpectFailedParse<10>("in", chars_format::general);
ExpectFailedParse<16>("in", chars_format::general);
ExpectFailedParse<10>("inx", chars_format::general);
ExpectFailedParse<16>("inx", chars_format::general);
ExpectSpecial("inf$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("Inf$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("INF$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("inf$inite", chars_format::general, FloatType::kInfinity);
ExpectSpecial("iNfInItY$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("infinity$!!!", chars_format::general, FloatType::kInfinity);
}
TEST(ParseFloat, NaN) {
ExpectFailedParse<10>("na", chars_format::general);
ExpectFailedParse<16>("na", chars_format::general);
ExpectFailedParse<10>("nah", chars_format::general);
ExpectFailedParse<16>("nah", chars_format::general);
ExpectSpecial("nan$", chars_format::general, FloatType::kNan);
ExpectSpecial("NaN$", chars_format::general, FloatType::kNan);
ExpectSpecial("nAn$", chars_format::general, FloatType::kNan);
ExpectSpecial("NAN$", chars_format::general, FloatType::kNan);
ExpectSpecial("NaN$aNaNaNaNaBatman!", chars_format::general, FloatType::kNan);
// A parenthesized sequence of the characters [a-zA-Z0-9_] is allowed to
// appear after an NaN. Check that this is allowed, and that the correct
// characters are grouped.
//
// (The characters [ and ] in the pattern below delimit the expected matched
// subgroup; they are not part of the input passed to ParseFloat.)
ExpectSpecial("nan([0xabcdef])$", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([0xabcdef])$...", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([0xabcdef])$)...", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([])$", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([aAzZ09_])$", chars_format::general, FloatType::kNan);
// If the subgroup contains illegal characters, don't match it at all.
ExpectSpecial("nan$(bad-char)", chars_format::general, FloatType::kNan);
// Also cope with a missing close paren.
ExpectSpecial("nan$(0xabcdef", chars_format::general, FloatType::kNan);
}
} // namespace

View file

@ -0,0 +1,150 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
#define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
#include <atomic>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <type_traits>
#include "absl/meta/type_traits.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// Wraps std::atomic for reference counting.
class Refcount {
public:
Refcount() : count_{1} {}
~Refcount() {}
// Increments the reference count by 1. Imposes no memory ordering.
inline void Increment() { count_.fetch_add(1, std::memory_order_relaxed); }
// Asserts that the current refcount is greater than 0. If the refcount is
// greater than 1, decrements the reference count by 1.
//
// Returns false if there are no references outstanding; true otherwise.
// Inserts barriers to ensure that state written before this method returns
// false will be visible to a thread that just observed this method returning
// false.
inline bool Decrement() {
int32_t refcount = count_.load(std::memory_order_acquire);
assert(refcount > 0);
return refcount != 1 && count_.fetch_sub(1, std::memory_order_acq_rel) != 1;
}
// Same as Decrement but expect that refcount is greater than 1.
inline bool DecrementExpectHighRefcount() {
int32_t refcount = count_.fetch_sub(1, std::memory_order_acq_rel);
assert(refcount > 0);
return refcount != 1;
}
// Returns the current reference count using acquire semantics.
inline int32_t Get() const { return count_.load(std::memory_order_acquire); }
// Returns whether the atomic integer is 1.
// If the reference count is used in the conventional way, a
// reference count of 1 implies that the current thread owns the
// reference and no other thread shares it.
// This call performs the test for a reference count of one, and
// performs the memory barrier needed for the owning thread
// to act on the object, knowing that it has exclusive access to the
// object.
inline bool IsOne() { return count_.load(std::memory_order_acquire) == 1; }
private:
std::atomic<int32_t> count_;
};
// The overhead of a vtable is too much for Cord, so we roll our own subclasses
// using only a single byte to differentiate classes from each other - the "tag"
// byte. Define the subclasses first so we can provide downcasting helper
// functions in the base class.
struct CordRepConcat;
struct CordRepSubstring;
struct CordRepExternal;
struct CordRep {
// The following three fields have to be less than 32 bytes since
// that is the smallest supported flat node size.
size_t length;
Refcount refcount;
// If tag < FLAT, it represents CordRepKind and indicates the type of node.
// Otherwise, the node type is CordRepFlat and the tag is the encoded size.
uint8_t tag;
char data[1]; // Starting point for flat array: MUST BE LAST FIELD of CordRep
inline CordRepConcat* concat();
inline const CordRepConcat* concat() const;
inline CordRepSubstring* substring();
inline const CordRepSubstring* substring() const;
inline CordRepExternal* external();
inline const CordRepExternal* external() const;
};
struct CordRepConcat : public CordRep {
CordRep* left;
CordRep* right;
uint8_t depth() const { return static_cast<uint8_t>(data[0]); }
void set_depth(uint8_t depth) { data[0] = static_cast<char>(depth); }
};
struct CordRepSubstring : public CordRep {
size_t start; // Starting offset of substring in child
CordRep* child;
};
// TODO(strel): replace the following logic (and related functions in cord.cc)
// with container_internal::Layout.
// Alignment requirement for CordRepExternal so that the type erased releaser
// will be stored at a suitably aligned address.
constexpr size_t ExternalRepAlignment() {
#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__)
return __STDCPP_DEFAULT_NEW_ALIGNMENT__;
#else
return alignof(max_align_t);
#endif
}
// Type for function pointer that will invoke and destroy the type-erased
// releaser function object. Accepts a pointer to the releaser and the
// `string_view` that were passed in to `NewExternalRep` below. The return value
// is the size of the `Releaser` type.
using ExternalReleaserInvoker = size_t (*)(void*, absl::string_view);
// External CordReps are allocated together with a type erased releaser. The
// releaser is stored in the memory directly following the CordRepExternal.
struct alignas(ExternalRepAlignment()) CordRepExternal : public CordRep {
const char* base;
// Pointer to function that knows how to call and destroy the releaser.
ExternalReleaserInvoker releaser_invoker;
};
// TODO(strel): look into removing, it doesn't seem like anything relies on this
static_assert(sizeof(CordRepConcat) == sizeof(CordRepSubstring), "");
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_

View file

@ -0,0 +1,180 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/escaping.h"
#include "absl/base/internal/endian.h"
#include "absl/base/internal/raw_logging.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
const char kBase64Chars[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
// Base64 encodes three bytes of input at a time. If the input is not
// divisible by three, we pad as appropriate.
//
// (from https://tools.ietf.org/html/rfc3548)
// Special processing is performed if fewer than 24 bits are available
// at the end of the data being encoded. A full encoding quantum is
// always completed at the end of a quantity. When fewer than 24 input
// bits are available in an input group, zero bits are added (on the
// right) to form an integral number of 6-bit groups. Padding at the
// end of the data is performed using the '=' character. Since all base
// 64 input is an integral number of octets, only the following cases
// can arise:
// Base64 encodes each three bytes of input into four bytes of output.
size_t len = (input_len / 3) * 4;
if (input_len % 3 == 0) {
// (from https://tools.ietf.org/html/rfc3548)
// (1) the final quantum of encoding input is an integral multiple of 24
// bits; here, the final unit of encoded output will be an integral
// multiple of 4 characters with no "=" padding,
} else if (input_len % 3 == 1) {
// (from https://tools.ietf.org/html/rfc3548)
// (2) the final quantum of encoding input is exactly 8 bits; here, the
// final unit of encoded output will be two characters followed by two
// "=" padding characters, or
len += 2;
if (do_padding) {
len += 2;
}
} else { // (input_len % 3 == 2)
// (from https://tools.ietf.org/html/rfc3548)
// (3) the final quantum of encoding input is exactly 16 bits; here, the
// final unit of encoded output will be three characters followed by one
// "=" padding character.
len += 3;
if (do_padding) {
len += 1;
}
}
assert(len >= input_len); // make sure we didn't overflow
return len;
}
size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
size_t szdest, const char* base64,
bool do_padding) {
static const char kPad64 = '=';
if (szsrc * 4 > szdest * 3) return 0;
char* cur_dest = dest;
const unsigned char* cur_src = src;
char* const limit_dest = dest + szdest;
const unsigned char* const limit_src = src + szsrc;
// Three bytes of data encodes to four characters of cyphertext.
// So we can pump through three-byte chunks atomically.
if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3.
while (cur_src < limit_src - 3) { // While we have >= 32 bits.
uint32_t in = absl::big_endian::Load32(cur_src) >> 8;
cur_dest[0] = base64[in >> 18];
in &= 0x3FFFF;
cur_dest[1] = base64[in >> 12];
in &= 0xFFF;
cur_dest[2] = base64[in >> 6];
in &= 0x3F;
cur_dest[3] = base64[in];
cur_dest += 4;
cur_src += 3;
}
}
// To save time, we didn't update szdest or szsrc in the loop. So do it now.
szdest = limit_dest - cur_dest;
szsrc = limit_src - cur_src;
/* now deal with the tail (<=3 bytes) */
switch (szsrc) {
case 0:
// Nothing left; nothing more to do.
break;
case 1: {
// One byte left: this encodes to two characters, and (optionally)
// two pad characters to round out the four-character cypherblock.
if (szdest < 2) return 0;
uint32_t in = cur_src[0];
cur_dest[0] = base64[in >> 2];
in &= 0x3;
cur_dest[1] = base64[in << 4];
cur_dest += 2;
szdest -= 2;
if (do_padding) {
if (szdest < 2) return 0;
cur_dest[0] = kPad64;
cur_dest[1] = kPad64;
cur_dest += 2;
szdest -= 2;
}
break;
}
case 2: {
// Two bytes left: this encodes to three characters, and (optionally)
// one pad character to round out the four-character cypherblock.
if (szdest < 3) return 0;
uint32_t in = absl::big_endian::Load16(cur_src);
cur_dest[0] = base64[in >> 10];
in &= 0x3FF;
cur_dest[1] = base64[in >> 4];
in &= 0x00F;
cur_dest[2] = base64[in << 2];
cur_dest += 3;
szdest -= 3;
if (do_padding) {
if (szdest < 1) return 0;
cur_dest[0] = kPad64;
cur_dest += 1;
szdest -= 1;
}
break;
}
case 3: {
// Three bytes left: same as in the big loop above. We can't do this in
// the loop because the loop above always reads 4 bytes, and the fourth
// byte is past the end of the input.
if (szdest < 4) return 0;
uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1);
cur_dest[0] = base64[in >> 18];
in &= 0x3FFFF;
cur_dest[1] = base64[in >> 12];
in &= 0xFFF;
cur_dest[2] = base64[in >> 6];
in &= 0x3F;
cur_dest[3] = base64[in];
cur_dest += 4;
szdest -= 4;
break;
}
default:
// Should not be reached: blocks of 4 bytes are handled
// in the while loop before this switch statement.
ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc);
break;
}
return (cur_dest - dest);
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,58 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_H_
#define ABSL_STRINGS_INTERNAL_ESCAPING_H_
#include <cassert>
#include "absl/strings/internal/resize_uninitialized.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
ABSL_CONST_INIT extern const char kBase64Chars[];
// Calculates how long a string will be when it is base64 encoded given its
// length and whether or not the result should be padded.
size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding);
// Base64-encodes `src` using the alphabet provided in `base64` and writes the
// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars
// until its length is a multiple of 3. Returns the length of `dest`.
size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
size_t szdest, const char* base64, bool do_padding);
// Base64-encodes `src` using the alphabet provided in `base64` and writes the
// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars
// until its length is a multiple of 3.
template <typename String>
void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest,
bool do_padding, const char* base64_chars) {
const size_t calc_escaped_size =
CalculateBase64EscapedLenInternal(szsrc, do_padding);
STLStringResizeUninitialized(dest, calc_escaped_size);
const size_t escaped_len = Base64EscapeInternal(
src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding);
assert(calc_escaped_size == escaped_len);
dest->erase(escaped_len);
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_ESCAPING_H_

View file

@ -0,0 +1,133 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This test contains common things needed by both escaping_test.cc and
// escaping_benchmark.cc.
#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_
#define ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_
#include <array>
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
struct base64_testcase {
absl::string_view plaintext;
absl::string_view cyphertext;
};
inline const std::array<base64_testcase, 5>& base64_strings() {
static const std::array<base64_testcase, 5> testcase{{
// Some google quotes
// Cyphertext created with "uuencode (GNU sharutils) 4.6.3"
// (Note that we're testing the websafe encoding, though, so if
// you add messages, be sure to run "tr -- '+/' '-_'" on the output)
{ "I was always good at math and science, and I never realized "
"that was unusual or somehow undesirable. So one of the things "
"I care a lot about is helping to remove that stigma, "
"to show girls that you can be feminine, you can like the things "
"that girls like, but you can also be really good at technology. "
"You can be really good at building things."
" - Marissa Meyer, Newsweek, 2010-12-22" "\n",
"SSB3YXMgYWx3YXlzIGdvb2QgYXQgbWF0aCBhbmQgc2NpZW5jZSwgYW5kIEkg"
"bmV2ZXIgcmVhbGl6ZWQgdGhhdCB3YXMgdW51c3VhbCBvciBzb21laG93IHVu"
"ZGVzaXJhYmxlLiBTbyBvbmUgb2YgdGhlIHRoaW5ncyBJIGNhcmUgYSBsb3Qg"
"YWJvdXQgaXMgaGVscGluZyB0byByZW1vdmUgdGhhdCBzdGlnbWEsIHRvIHNo"
"b3cgZ2lybHMgdGhhdCB5b3UgY2FuIGJlIGZlbWluaW5lLCB5b3UgY2FuIGxp"
"a2UgdGhlIHRoaW5ncyB0aGF0IGdpcmxzIGxpa2UsIGJ1dCB5b3UgY2FuIGFs"
"c28gYmUgcmVhbGx5IGdvb2QgYXQgdGVjaG5vbG9neS4gWW91IGNhbiBiZSBy"
"ZWFsbHkgZ29vZCBhdCBidWlsZGluZyB0aGluZ3MuIC0gTWFyaXNzYSBNZXll"
"ciwgTmV3c3dlZWssIDIwMTAtMTItMjIK" },
{ "Typical first year for a new cluster: "
"~0.5 overheating "
"~1 PDU failure "
"~1 rack-move "
"~1 network rewiring "
"~20 rack failures "
"~5 racks go wonky "
"~8 network maintenances "
"~12 router reloads "
"~3 router failures "
"~dozens of minor 30-second blips for dns "
"~1000 individual machine failures "
"~thousands of hard drive failures "
"slow disks, bad memory, misconfigured machines, flaky machines, etc."
" - Jeff Dean, The Joys of Real Hardware" "\n",
"VHlwaWNhbCBmaXJzdCB5ZWFyIGZvciBhIG5ldyBjbHVzdGVyOiB-MC41IG92"
"ZXJoZWF0aW5nIH4xIFBEVSBmYWlsdXJlIH4xIHJhY2stbW92ZSB-MSBuZXR3"
"b3JrIHJld2lyaW5nIH4yMCByYWNrIGZhaWx1cmVzIH41IHJhY2tzIGdvIHdv"
"bmt5IH44IG5ldHdvcmsgbWFpbnRlbmFuY2VzIH4xMiByb3V0ZXIgcmVsb2Fk"
"cyB-MyByb3V0ZXIgZmFpbHVyZXMgfmRvemVucyBvZiBtaW5vciAzMC1zZWNv"
"bmQgYmxpcHMgZm9yIGRucyB-MTAwMCBpbmRpdmlkdWFsIG1hY2hpbmUgZmFp"
"bHVyZXMgfnRob3VzYW5kcyBvZiBoYXJkIGRyaXZlIGZhaWx1cmVzIHNsb3cg"
"ZGlza3MsIGJhZCBtZW1vcnksIG1pc2NvbmZpZ3VyZWQgbWFjaGluZXMsIGZs"
"YWt5IG1hY2hpbmVzLCBldGMuIC0gSmVmZiBEZWFuLCBUaGUgSm95cyBvZiBS"
"ZWFsIEhhcmR3YXJlCg" },
{ "I'm the head of the webspam team at Google. "
"That means that if you type your name into Google and get porn back, "
"it's my fault. Unless you're a porn star, in which case porn is a "
"completely reasonable response."
" - Matt Cutts, Google Plus" "\n",
"SSdtIHRoZSBoZWFkIG9mIHRoZSB3ZWJzcGFtIHRlYW0gYXQgR29vZ2xlLiAg"
"VGhhdCBtZWFucyB0aGF0IGlmIHlvdSB0eXBlIHlvdXIgbmFtZSBpbnRvIEdv"
"b2dsZSBhbmQgZ2V0IHBvcm4gYmFjaywgaXQncyBteSBmYXVsdC4gVW5sZXNz"
"IHlvdSdyZSBhIHBvcm4gc3RhciwgaW4gd2hpY2ggY2FzZSBwb3JuIGlzIGEg"
"Y29tcGxldGVseSByZWFzb25hYmxlIHJlc3BvbnNlLiAtIE1hdHQgQ3V0dHMs"
"IEdvb2dsZSBQbHVzCg" },
{ "It will still be a long time before machines approach human "
"intelligence. "
"But luckily, machines don't actually have to be intelligent; "
"they just have to fake it. Access to a wealth of information, "
"combined with a rudimentary decision-making capacity, "
"can often be almost as useful. Of course, the results are better yet "
"when coupled with intelligence. A reference librarian with access to "
"a good search engine is a formidable tool."
" - Craig Silverstein, Siemens Pictures of the Future, Spring 2004"
"\n",
"SXQgd2lsbCBzdGlsbCBiZSBhIGxvbmcgdGltZSBiZWZvcmUgbWFjaGluZXMg"
"YXBwcm9hY2ggaHVtYW4gaW50ZWxsaWdlbmNlLiBCdXQgbHVja2lseSwgbWFj"
"aGluZXMgZG9uJ3QgYWN0dWFsbHkgaGF2ZSB0byBiZSBpbnRlbGxpZ2VudDsg"
"dGhleSBqdXN0IGhhdmUgdG8gZmFrZSBpdC4gQWNjZXNzIHRvIGEgd2VhbHRo"
"IG9mIGluZm9ybWF0aW9uLCBjb21iaW5lZCB3aXRoIGEgcnVkaW1lbnRhcnkg"
"ZGVjaXNpb24tbWFraW5nIGNhcGFjaXR5LCBjYW4gb2Z0ZW4gYmUgYWxtb3N0"
"IGFzIHVzZWZ1bC4gT2YgY291cnNlLCB0aGUgcmVzdWx0cyBhcmUgYmV0dGVy"
"IHlldCB3aGVuIGNvdXBsZWQgd2l0aCBpbnRlbGxpZ2VuY2UuIEEgcmVmZXJl"
"bmNlIGxpYnJhcmlhbiB3aXRoIGFjY2VzcyB0byBhIGdvb2Qgc2VhcmNoIGVu"
"Z2luZSBpcyBhIGZvcm1pZGFibGUgdG9vbC4gLSBDcmFpZyBTaWx2ZXJzdGVp"
"biwgU2llbWVucyBQaWN0dXJlcyBvZiB0aGUgRnV0dXJlLCBTcHJpbmcgMjAw"
"NAo" },
// Degenerate edge case
{ "",
"" },
}};
return testcase;
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_

View file

@ -0,0 +1,112 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/memutil.h"
#include <cstdlib>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
int memcasecmp(const char* s1, const char* s2, size_t len) {
const unsigned char* us1 = reinterpret_cast<const unsigned char*>(s1);
const unsigned char* us2 = reinterpret_cast<const unsigned char*>(s2);
for (size_t i = 0; i < len; i++) {
const int diff =
int{static_cast<unsigned char>(absl::ascii_tolower(us1[i]))} -
int{static_cast<unsigned char>(absl::ascii_tolower(us2[i]))};
if (diff != 0) return diff;
}
return 0;
}
char* memdup(const char* s, size_t slen) {
void* copy;
if ((copy = malloc(slen)) == nullptr) return nullptr;
memcpy(copy, s, slen);
return reinterpret_cast<char*>(copy);
}
char* memrchr(const char* s, int c, size_t slen) {
for (const char* e = s + slen - 1; e >= s; e--) {
if (*e == c) return const_cast<char*>(e);
}
return nullptr;
}
size_t memspn(const char* s, size_t slen, const char* accept) {
const char* p = s;
const char* spanp;
char c, sc;
cont:
c = *p++;
if (slen-- == 0) return p - 1 - s;
for (spanp = accept; (sc = *spanp++) != '\0';)
if (sc == c) goto cont;
return p - 1 - s;
}
size_t memcspn(const char* s, size_t slen, const char* reject) {
const char* p = s;
const char* spanp;
char c, sc;
while (slen-- != 0) {
c = *p++;
for (spanp = reject; (sc = *spanp++) != '\0';)
if (sc == c) return p - 1 - s;
}
return p - s;
}
char* mempbrk(const char* s, size_t slen, const char* accept) {
const char* scanp;
int sc;
for (; slen; ++s, --slen) {
for (scanp = accept; (sc = *scanp++) != '\0';)
if (sc == *s) return const_cast<char*>(s);
}
return nullptr;
}
// This is significantly faster for case-sensitive matches with very
// few possible matches. See unit test for benchmarks.
const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle,
size_t neelen) {
if (0 == neelen) {
return phaystack; // even if haylen is 0
}
if (haylen < neelen) return nullptr;
const char* match;
const char* hayend = phaystack + haylen - neelen + 1;
// A static cast is used here to work around the fact that memchr returns
// a void* on Posix-compliant systems and const void* on Windows.
while ((match = static_cast<const char*>(
memchr(phaystack, pneedle[0], hayend - phaystack)))) {
if (memcmp(match, pneedle, neelen) == 0)
return match;
else
phaystack = match + 1;
}
return nullptr;
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,148 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// These routines provide mem versions of standard C string routines,
// such as strpbrk. They function exactly the same as the str versions,
// so if you wonder what they are, replace the word "mem" by
// "str" and check out the man page. I could return void*, as the
// strutil.h mem*() routines tend to do, but I return char* instead
// since this is by far the most common way these functions are called.
//
// The difference between the mem and str versions is the mem version
// takes a pointer and a length, rather than a '\0'-terminated string.
// The memcase* routines defined here assume the locale is "C"
// (they use absl::ascii_tolower instead of tolower).
//
// These routines are based on the BSD library.
//
// Here's a list of routines from string.h, and their mem analogues.
// Functions in lowercase are defined in string.h; those in UPPERCASE
// are defined here:
//
// strlen --
// strcat strncat MEMCAT
// strcpy strncpy memcpy
// -- memccpy (very cool function, btw)
// -- memmove
// -- memset
// strcmp strncmp memcmp
// strcasecmp strncasecmp MEMCASECMP
// strchr memchr
// strcoll --
// strxfrm --
// strdup strndup MEMDUP
// strrchr MEMRCHR
// strspn MEMSPN
// strcspn MEMCSPN
// strpbrk MEMPBRK
// strstr MEMSTR MEMMEM
// (g)strcasestr MEMCASESTR MEMCASEMEM
// strtok --
// strprefix MEMPREFIX (strprefix is from strutil.h)
// strcaseprefix MEMCASEPREFIX (strcaseprefix is from strutil.h)
// strsuffix MEMSUFFIX (strsuffix is from strutil.h)
// strcasesuffix MEMCASESUFFIX (strcasesuffix is from strutil.h)
// -- MEMIS
// -- MEMCASEIS
// strcount MEMCOUNT (strcount is from strutil.h)
#ifndef ABSL_STRINGS_INTERNAL_MEMUTIL_H_
#define ABSL_STRINGS_INTERNAL_MEMUTIL_H_
#include <cstddef>
#include <cstring>
#include "absl/base/port.h" // disable some warnings on Windows
#include "absl/strings/ascii.h" // for absl::ascii_tolower
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
inline char* memcat(char* dest, size_t destlen, const char* src,
size_t srclen) {
return reinterpret_cast<char*>(memcpy(dest + destlen, src, srclen));
}
int memcasecmp(const char* s1, const char* s2, size_t len);
char* memdup(const char* s, size_t slen);
char* memrchr(const char* s, int c, size_t slen);
size_t memspn(const char* s, size_t slen, const char* accept);
size_t memcspn(const char* s, size_t slen, const char* reject);
char* mempbrk(const char* s, size_t slen, const char* accept);
// This is for internal use only. Don't call this directly
template <bool case_sensitive>
const char* int_memmatch(const char* haystack, size_t haylen,
const char* needle, size_t neelen) {
if (0 == neelen) {
return haystack; // even if haylen is 0
}
const char* hayend = haystack + haylen;
const char* needlestart = needle;
const char* needleend = needlestart + neelen;
for (; haystack < hayend; ++haystack) {
char hay = case_sensitive
? *haystack
: absl::ascii_tolower(static_cast<unsigned char>(*haystack));
char nee = case_sensitive
? *needle
: absl::ascii_tolower(static_cast<unsigned char>(*needle));
if (hay == nee) {
if (++needle == needleend) {
return haystack + 1 - neelen;
}
} else if (needle != needlestart) {
// must back up haystack in case a prefix matched (find "aab" in "aaab")
haystack -= needle - needlestart; // for loop will advance one more
needle = needlestart;
}
}
return nullptr;
}
// These are the guys you can call directly
inline const char* memstr(const char* phaystack, size_t haylen,
const char* pneedle) {
return int_memmatch<true>(phaystack, haylen, pneedle, strlen(pneedle));
}
inline const char* memcasestr(const char* phaystack, size_t haylen,
const char* pneedle) {
return int_memmatch<false>(phaystack, haylen, pneedle, strlen(pneedle));
}
inline const char* memmem(const char* phaystack, size_t haylen,
const char* pneedle, size_t needlelen) {
return int_memmatch<true>(phaystack, haylen, pneedle, needlelen);
}
inline const char* memcasemem(const char* phaystack, size_t haylen,
const char* pneedle, size_t needlelen) {
return int_memmatch<false>(phaystack, haylen, pneedle, needlelen);
}
// This is significantly faster for case-sensitive matches with very
// few possible matches. See unit test for benchmarks.
const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle,
size_t neelen);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_MEMUTIL_H_

View file

@ -0,0 +1,323 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/memutil.h"
#include <algorithm>
#include <cstdlib>
#include "benchmark/benchmark.h"
#include "absl/strings/ascii.h"
// We fill the haystack with aaaaaaaaaaaaaaaaaa...aaaab.
// That gives us:
// - an easy search: 'b'
// - a medium search: 'ab'. That means every letter is a possible match.
// - a pathological search: 'aaaaaa.......aaaaab' (half as many a's as haytack)
// We benchmark case-sensitive and case-insensitive versions of
// three memmem implementations:
// - memmem() from memutil.h
// - search() from STL
// - memmatch(), a custom implementation using memchr and memcmp.
// Here are sample results:
//
// Run on (12 X 3800 MHz CPU s)
// CPU Caches:
// L1 Data 32K (x6)
// L1 Instruction 32K (x6)
// L2 Unified 256K (x6)
// L3 Unified 15360K (x1)
// ----------------------------------------------------------------
// Benchmark Time CPU Iterations
// ----------------------------------------------------------------
// BM_Memmem 3583 ns 3582 ns 196469 2.59966GB/s
// BM_MemmemMedium 13743 ns 13742 ns 50901 693.986MB/s
// BM_MemmemPathological 13695030 ns 13693977 ns 51 713.133kB/s
// BM_Memcasemem 3299 ns 3299 ns 212942 2.82309GB/s
// BM_MemcasememMedium 16407 ns 16406 ns 42170 581.309MB/s
// BM_MemcasememPathological 17267745 ns 17266030 ns 41 565.598kB/s
// BM_Search 1610 ns 1609 ns 431321 5.78672GB/s
// BM_SearchMedium 11111 ns 11110 ns 63001 858.414MB/s
// BM_SearchPathological 12117390 ns 12116397 ns 58 805.984kB/s
// BM_Searchcase 3081 ns 3081 ns 229949 3.02313GB/s
// BM_SearchcaseMedium 16003 ns 16001 ns 44170 595.998MB/s
// BM_SearchcasePathological 15823413 ns 15821909 ns 44 617.222kB/s
// BM_Memmatch 197 ns 197 ns 3584225 47.2951GB/s
// BM_MemmatchMedium 52333 ns 52329 ns 13280 182.244MB/s
// BM_MemmatchPathological 659799 ns 659727 ns 1058 14.4556MB/s
// BM_Memcasematch 5460 ns 5460 ns 127606 1.70586GB/s
// BM_MemcasematchMedium 32861 ns 32857 ns 21258 290.248MB/s
// BM_MemcasematchPathological 15154243 ns 15153089 ns 46 644.464kB/s
// BM_MemmemStartup 5 ns 5 ns 150821500
// BM_SearchStartup 5 ns 5 ns 150644203
// BM_MemmatchStartup 7 ns 7 ns 97068802
//
// Conclusions:
//
// The following recommendations are based on the sample results above. However,
// we have found that the performance of STL search can vary significantly
// depending on compiler and standard library implementation. We recommend you
// run the benchmarks for yourself on relevant platforms.
//
// If you need case-insensitive, STL search is slightly better than memmem for
// all cases.
//
// Case-sensitive is more subtle:
// Custom memmatch is _very_ fast at scanning, so if you have very few possible
// matches in your haystack, that's the way to go. Performance drops
// significantly with more matches.
//
// STL search is slightly faster than memmem in the medium and pathological
// benchmarks. However, the performance of memmem is currently more dependable
// across platforms and build configurations.
namespace {
constexpr int kHaystackSize = 10000;
constexpr int64_t kHaystackSize64 = kHaystackSize;
const char* MakeHaystack() {
char* haystack = new char[kHaystackSize];
for (int i = 0; i < kHaystackSize - 1; ++i) haystack[i] = 'a';
haystack[kHaystackSize - 1] = 'b';
return haystack;
}
const char* const kHaystack = MakeHaystack();
void BM_Memmem(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::strings_internal::memmem(kHaystack, kHaystackSize, "b", 1));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Memmem);
void BM_MemmemMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::strings_internal::memmem(kHaystack, kHaystackSize, "ab", 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemmemMedium);
void BM_MemmemPathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(absl::strings_internal::memmem(
kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
kHaystackSize - kHaystackSize / 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemmemPathological);
void BM_Memcasemem(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "b", 1));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Memcasemem);
void BM_MemcasememMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "ab", 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemcasememMedium);
void BM_MemcasememPathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(absl::strings_internal::memcasemem(
kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
kHaystackSize - kHaystackSize / 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemcasememPathological);
bool case_eq(const char a, const char b) {
return absl::ascii_tolower(a) == absl::ascii_tolower(b);
}
void BM_Search(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize - 1,
kHaystack + kHaystackSize));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Search);
void BM_SearchMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize - 2,
kHaystack + kHaystackSize));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_SearchMedium);
void BM_SearchPathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize / 2,
kHaystack + kHaystackSize));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_SearchPathological);
void BM_Searchcase(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize - 1,
kHaystack + kHaystackSize, case_eq));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Searchcase);
void BM_SearchcaseMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize - 2,
kHaystack + kHaystackSize, case_eq));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_SearchcaseMedium);
void BM_SearchcasePathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize / 2,
kHaystack + kHaystackSize, case_eq));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_SearchcasePathological);
char* memcasechr(const char* s, int c, size_t slen) {
c = absl::ascii_tolower(c);
for (; slen; ++s, --slen) {
if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s);
}
return nullptr;
}
const char* memcasematch(const char* phaystack, size_t haylen,
const char* pneedle, size_t neelen) {
if (0 == neelen) {
return phaystack; // even if haylen is 0
}
if (haylen < neelen) return nullptr;
const char* match;
const char* hayend = phaystack + haylen - neelen + 1;
while ((match = static_cast<char*>(
memcasechr(phaystack, pneedle[0], hayend - phaystack)))) {
if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0)
return match;
else
phaystack = match + 1;
}
return nullptr;
}
void BM_Memmatch(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::strings_internal::memmatch(kHaystack, kHaystackSize, "b", 1));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Memmatch);
void BM_MemmatchMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::strings_internal::memmatch(kHaystack, kHaystackSize, "ab", 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemmatchMedium);
void BM_MemmatchPathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(absl::strings_internal::memmatch(
kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
kHaystackSize - kHaystackSize / 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemmatchPathological);
void BM_Memcasematch(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "b", 1));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Memcasematch);
void BM_MemcasematchMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "ab", 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemcasematchMedium);
void BM_MemcasematchPathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize,
kHaystack + kHaystackSize / 2,
kHaystackSize - kHaystackSize / 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemcasematchPathological);
void BM_MemmemStartup(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(absl::strings_internal::memmem(
kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1));
}
}
BENCHMARK(BM_MemmemStartup);
void BM_SearchStartup(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
std::search(kHaystack + kHaystackSize - 10, kHaystack + kHaystackSize,
kHaystack + kHaystackSize - 1, kHaystack + kHaystackSize));
}
}
BENCHMARK(BM_SearchStartup);
void BM_MemmatchStartup(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(absl::strings_internal::memmatch(
kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1));
}
}
BENCHMARK(BM_MemmatchStartup);
} // namespace

View file

@ -0,0 +1,179 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Unit test for memutil.cc
#include "absl/strings/internal/memutil.h"
#include <cstdlib>
#include "gtest/gtest.h"
#include "absl/strings/ascii.h"
namespace {
static char* memcasechr(const char* s, int c, size_t slen) {
c = absl::ascii_tolower(c);
for (; slen; ++s, --slen) {
if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s);
}
return nullptr;
}
static const char* memcasematch(const char* phaystack, size_t haylen,
const char* pneedle, size_t neelen) {
if (0 == neelen) {
return phaystack; // even if haylen is 0
}
if (haylen < neelen) return nullptr;
const char* match;
const char* hayend = phaystack + haylen - neelen + 1;
while ((match = static_cast<char*>(
memcasechr(phaystack, pneedle[0], hayend - phaystack)))) {
if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0)
return match;
else
phaystack = match + 1;
}
return nullptr;
}
TEST(MemUtilTest, AllTests) {
// check memutil functions
char a[1000];
absl::strings_internal::memcat(a, 0, "hello", sizeof("hello") - 1);
absl::strings_internal::memcat(a, 5, " there", sizeof(" there") - 1);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO there",
sizeof("hello there") - 1),
0);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf",
sizeof("hello there") - 1),
-1);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf",
sizeof("hello there") - 2),
0);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "whatever", 0), 0);
char* p = absl::strings_internal::memdup("hello", 5);
free(p);
p = absl::strings_internal::memrchr("hello there", 'e',
sizeof("hello there") - 1);
EXPECT_TRUE(p && p[-1] == 'r');
p = absl::strings_internal::memrchr("hello there", 'e',
sizeof("hello there") - 2);
EXPECT_TRUE(p && p[-1] == 'h');
p = absl::strings_internal::memrchr("hello there", 'u',
sizeof("hello there") - 1);
EXPECT_TRUE(p == nullptr);
int len = absl::strings_internal::memspn("hello there",
sizeof("hello there") - 1, "hole");
EXPECT_EQ(len, sizeof("hello") - 1);
len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
"u");
EXPECT_EQ(len, 0);
len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
"");
EXPECT_EQ(len, 0);
len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
"trole h");
EXPECT_EQ(len, sizeof("hello there") - 1);
len = absl::strings_internal::memspn("hello there!",
sizeof("hello there!") - 1, "trole h");
EXPECT_EQ(len, sizeof("hello there") - 1);
len = absl::strings_internal::memspn("hello there!",
sizeof("hello there!") - 2, "trole h!");
EXPECT_EQ(len, sizeof("hello there!") - 2);
len = absl::strings_internal::memcspn("hello there",
sizeof("hello there") - 1, "leho");
EXPECT_EQ(len, 0);
len = absl::strings_internal::memcspn("hello there",
sizeof("hello there") - 1, "u");
EXPECT_EQ(len, sizeof("hello there") - 1);
len = absl::strings_internal::memcspn("hello there",
sizeof("hello there") - 1, "");
EXPECT_EQ(len, sizeof("hello there") - 1);
len = absl::strings_internal::memcspn("hello there",
sizeof("hello there") - 1, " ");
EXPECT_EQ(len, 5);
p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
"leho");
EXPECT_TRUE(p && p[1] == 'e' && p[2] == 'l');
p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
"nu");
EXPECT_TRUE(p == nullptr);
p = absl::strings_internal::mempbrk("hello there!",
sizeof("hello there!") - 2, "!");
EXPECT_TRUE(p == nullptr);
p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
" t ");
EXPECT_TRUE(p && p[-1] == 'o' && p[1] == 't');
{
const char kHaystack[] = "0123456789";
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 0, "", 0), kHaystack);
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "012", 3),
kHaystack);
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "0xx", 1),
kHaystack);
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "789", 3),
kHaystack + 7);
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "9xx", 1),
kHaystack + 9);
EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "9xx", 3) ==
nullptr);
EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "xxx", 1) ==
nullptr);
}
{
const char kHaystack[] = "aBcDeFgHiJ";
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 0, "", 0),
kHaystack);
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Abc", 3),
kHaystack);
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Axx", 1),
kHaystack);
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "hIj", 3),
kHaystack + 7);
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 1),
kHaystack + 9);
EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 3) ==
nullptr);
EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "xxx", 1) ==
nullptr);
}
{
const char kHaystack[] = "0123456789";
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 0, "", 0), kHaystack);
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "012", 3),
kHaystack);
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "0xx", 1),
kHaystack);
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "789", 3),
kHaystack + 7);
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 1),
kHaystack + 9);
EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 3) ==
nullptr);
EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "xxx", 1) ==
nullptr);
}
}
} // namespace

View file

@ -0,0 +1,184 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file contains common things needed by numbers_test.cc,
// numbers_legacy_test.cc and numbers_benchmark.cc.
#ifndef ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_
#define ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_
#include <array>
#include <cstdint>
#include <limits>
#include <string>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
template <typename IntType>
inline bool Itoa(IntType value, int base, std::string* destination) {
destination->clear();
if (base <= 1 || base > 36) {
return false;
}
if (value == 0) {
destination->push_back('0');
return true;
}
bool negative = value < 0;
while (value != 0) {
const IntType next_value = value / base;
// Can't use std::abs here because of problems when IntType is unsigned.
int remainder =
static_cast<int>(value > next_value * base ? value - next_value * base
: next_value * base - value);
char c = remainder < 10 ? '0' + remainder : 'A' + remainder - 10;
destination->insert(0, 1, c);
value = next_value;
}
if (negative) {
destination->insert(0, 1, '-');
}
return true;
}
struct uint32_test_case {
const char* str;
bool expect_ok;
int base; // base to pass to the conversion function
uint32_t expected;
};
inline const std::array<uint32_test_case, 27>& strtouint32_test_cases() {
static const std::array<uint32_test_case, 27> test_cases{{
{"0xffffffff", true, 16, (std::numeric_limits<uint32_t>::max)()},
{"0x34234324", true, 16, 0x34234324},
{"34234324", true, 16, 0x34234324},
{"0", true, 16, 0},
{" \t\n 0xffffffff", true, 16, (std::numeric_limits<uint32_t>::max)()},
{" \f\v 46", true, 10, 46}, // must accept weird whitespace
{" \t\n 72717222", true, 8, 072717222},
{" \t\n 072717222", true, 8, 072717222},
{" \t\n 072717228", false, 8, 07271722},
{"0", true, 0, 0},
// Base-10 version.
{"34234324", true, 0, 34234324},
{"4294967295", true, 0, (std::numeric_limits<uint32_t>::max)()},
{"34234324 \n\t", true, 10, 34234324},
// Unusual base
{"0", true, 3, 0},
{"2", true, 3, 2},
{"11", true, 3, 4},
// Invalid uints.
{"", false, 0, 0},
{" ", false, 0, 0},
{"abc", false, 0, 0}, // would be valid hex, but prefix is missing
{"34234324a", false, 0, 34234324},
{"34234.3", false, 0, 34234},
{"-1", false, 0, 0},
{" -123", false, 0, 0},
{" \t\n -123", false, 0, 0},
// Out of bounds.
{"4294967296", false, 0, (std::numeric_limits<uint32_t>::max)()},
{"0x100000000", false, 0, (std::numeric_limits<uint32_t>::max)()},
{nullptr, false, 0, 0},
}};
return test_cases;
}
struct uint64_test_case {
const char* str;
bool expect_ok;
int base;
uint64_t expected;
};
inline const std::array<uint64_test_case, 34>& strtouint64_test_cases() {
static const std::array<uint64_test_case, 34> test_cases{{
{"0x3423432448783446", true, 16, int64_t{0x3423432448783446}},
{"3423432448783446", true, 16, int64_t{0x3423432448783446}},
{"0", true, 16, 0},
{"000", true, 0, 0},
{"0", true, 0, 0},
{" \t\n 0xffffffffffffffff", true, 16,
(std::numeric_limits<uint64_t>::max)()},
{"012345670123456701234", true, 8, int64_t{012345670123456701234}},
{"12345670123456701234", true, 8, int64_t{012345670123456701234}},
{"12845670123456701234", false, 8, 0},
// Base-10 version.
{"34234324487834466", true, 0, int64_t{34234324487834466}},
{" \t\n 18446744073709551615", true, 0,
(std::numeric_limits<uint64_t>::max)()},
{"34234324487834466 \n\t ", true, 0, int64_t{34234324487834466}},
{" \f\v 46", true, 10, 46}, // must accept weird whitespace
// Unusual base
{"0", true, 3, 0},
{"2", true, 3, 2},
{"11", true, 3, 4},
{"0", true, 0, 0},
// Invalid uints.
{"", false, 0, 0},
{" ", false, 0, 0},
{"abc", false, 0, 0},
{"34234324487834466a", false, 0, 0},
{"34234487834466.3", false, 0, 0},
{"-1", false, 0, 0},
{" -123", false, 0, 0},
{" \t\n -123", false, 0, 0},
// Out of bounds.
{"18446744073709551616", false, 10, 0},
{"18446744073709551616", false, 0, 0},
{"0x10000000000000000", false, 16,
(std::numeric_limits<uint64_t>::max)()},
{"0X10000000000000000", false, 16,
(std::numeric_limits<uint64_t>::max)()}, // 0X versus 0x.
{"0x10000000000000000", false, 0, (std::numeric_limits<uint64_t>::max)()},
{"0X10000000000000000", false, 0,
(std::numeric_limits<uint64_t>::max)()}, // 0X versus 0x.
{"0x1234", true, 16, 0x1234},
// Base-10 string version.
{"1234", true, 0, 1234},
{nullptr, false, 0, 0},
}};
return test_cases;
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_

View file

@ -0,0 +1,36 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/ostringstream.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
OStringStream::Buf::int_type OStringStream::overflow(int c) {
assert(s_);
if (!Buf::traits_type::eq_int_type(c, Buf::traits_type::eof()))
s_->push_back(static_cast<char>(c));
return 1;
}
std::streamsize OStringStream::xsputn(const char* s, std::streamsize n) {
assert(s_);
s_->append(s, n);
return n;
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,89 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
#define ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
#include <cassert>
#include <ostream>
#include <streambuf>
#include <string>
#include "absl/base/port.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// The same as std::ostringstream but appends to a user-specified std::string,
// and is faster. It is ~70% faster to create, ~50% faster to write to, and
// completely free to extract the result std::string.
//
// std::string s;
// OStringStream strm(&s);
// strm << 42 << ' ' << 3.14; // appends to `s`
//
// The stream object doesn't have to be named. Starting from C++11 operator<<
// works with rvalues of std::ostream.
//
// std::string s;
// OStringStream(&s) << 42 << ' ' << 3.14; // appends to `s`
//
// OStringStream is faster to create than std::ostringstream but it's still
// relatively slow. Avoid creating multiple streams where a single stream will
// do.
//
// Creates unnecessary instances of OStringStream: slow.
//
// std::string s;
// OStringStream(&s) << 42;
// OStringStream(&s) << ' ';
// OStringStream(&s) << 3.14;
//
// Creates a single instance of OStringStream and reuses it: fast.
//
// std::string s;
// OStringStream strm(&s);
// strm << 42;
// strm << ' ';
// strm << 3.14;
//
// Note: flush() has no effect. No reason to call it.
class OStringStream : private std::basic_streambuf<char>, public std::ostream {
public:
// The argument can be null, in which case you'll need to call str(p) with a
// non-null argument before you can write to the stream.
//
// The destructor of OStringStream doesn't use the std::string. It's OK to
// destroy the std::string before the stream.
explicit OStringStream(std::string* s) : std::ostream(this), s_(s) {}
std::string* str() { return s_; }
const std::string* str() const { return s_; }
void str(std::string* s) { s_ = s; }
private:
using Buf = std::basic_streambuf<char>;
Buf::int_type overflow(int c) override;
std::streamsize xsputn(const char* s, std::streamsize n) override;
std::string* s_;
};
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_

View file

@ -0,0 +1,106 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/ostringstream.h"
#include <sstream>
#include <string>
#include "benchmark/benchmark.h"
namespace {
enum StringType {
kNone,
kStdString,
};
// Benchmarks for std::ostringstream.
template <StringType kOutput>
void BM_StdStream(benchmark::State& state) {
const int num_writes = state.range(0);
const int bytes_per_write = state.range(1);
const std::string payload(bytes_per_write, 'x');
for (auto _ : state) {
std::ostringstream strm;
benchmark::DoNotOptimize(strm);
for (int i = 0; i != num_writes; ++i) {
strm << payload;
}
switch (kOutput) {
case kNone: {
break;
}
case kStdString: {
std::string s = strm.str();
benchmark::DoNotOptimize(s);
break;
}
}
}
}
// Create the stream, optionally write to it, then destroy it.
BENCHMARK_TEMPLATE(BM_StdStream, kNone)
->ArgPair(0, 0)
->ArgPair(1, 16) // 16 bytes is small enough for SSO
->ArgPair(1, 256) // 256 bytes requires heap allocation
->ArgPair(1024, 256);
// Create the stream, write to it, get std::string out, then destroy.
BENCHMARK_TEMPLATE(BM_StdStream, kStdString)
->ArgPair(1, 16) // 16 bytes is small enough for SSO
->ArgPair(1, 256) // 256 bytes requires heap allocation
->ArgPair(1024, 256);
// Benchmarks for OStringStream.
template <StringType kOutput>
void BM_CustomStream(benchmark::State& state) {
const int num_writes = state.range(0);
const int bytes_per_write = state.range(1);
const std::string payload(bytes_per_write, 'x');
for (auto _ : state) {
std::string out;
absl::strings_internal::OStringStream strm(&out);
benchmark::DoNotOptimize(strm);
for (int i = 0; i != num_writes; ++i) {
strm << payload;
}
switch (kOutput) {
case kNone: {
break;
}
case kStdString: {
std::string s = out;
benchmark::DoNotOptimize(s);
break;
}
}
}
}
// Create the stream, optionally write to it, then destroy it.
BENCHMARK_TEMPLATE(BM_CustomStream, kNone)
->ArgPair(0, 0)
->ArgPair(1, 16) // 16 bytes is small enough for SSO
->ArgPair(1, 256) // 256 bytes requires heap allocation
->ArgPair(1024, 256);
// Create the stream, write to it, get std::string out, then destroy.
// It's not useful in practice to extract std::string from OStringStream; we
// measure it for completeness.
BENCHMARK_TEMPLATE(BM_CustomStream, kStdString)
->ArgPair(1, 16) // 16 bytes is small enough for SSO
->ArgPair(1, 256) // 256 bytes requires heap allocation
->ArgPair(1024, 256);
} // namespace

View file

@ -0,0 +1,102 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/ostringstream.h"
#include <memory>
#include <ostream>
#include <string>
#include <type_traits>
#include "gtest/gtest.h"
namespace {
TEST(OStringStream, IsOStream) {
static_assert(
std::is_base_of<std::ostream, absl::strings_internal::OStringStream>(),
"");
}
TEST(OStringStream, ConstructDestroy) {
{
absl::strings_internal::OStringStream strm(nullptr);
EXPECT_EQ(nullptr, strm.str());
}
{
std::string s = "abc";
{
absl::strings_internal::OStringStream strm(&s);
EXPECT_EQ(&s, strm.str());
}
EXPECT_EQ("abc", s);
}
{
std::unique_ptr<std::string> s(new std::string);
absl::strings_internal::OStringStream strm(s.get());
s.reset();
}
}
TEST(OStringStream, Str) {
std::string s1;
absl::strings_internal::OStringStream strm(&s1);
const absl::strings_internal::OStringStream& c_strm(strm);
static_assert(std::is_same<decltype(strm.str()), std::string*>(), "");
static_assert(std::is_same<decltype(c_strm.str()), const std::string*>(), "");
EXPECT_EQ(&s1, strm.str());
EXPECT_EQ(&s1, c_strm.str());
strm.str(&s1);
EXPECT_EQ(&s1, strm.str());
EXPECT_EQ(&s1, c_strm.str());
std::string s2;
strm.str(&s2);
EXPECT_EQ(&s2, strm.str());
EXPECT_EQ(&s2, c_strm.str());
strm.str(nullptr);
EXPECT_EQ(nullptr, strm.str());
EXPECT_EQ(nullptr, c_strm.str());
}
TEST(OStreamStream, WriteToLValue) {
std::string s = "abc";
{
absl::strings_internal::OStringStream strm(&s);
EXPECT_EQ("abc", s);
strm << "";
EXPECT_EQ("abc", s);
strm << 42;
EXPECT_EQ("abc42", s);
strm << 'x' << 'y';
EXPECT_EQ("abc42xy", s);
}
EXPECT_EQ("abc42xy", s);
}
TEST(OStreamStream, WriteToRValue) {
std::string s = "abc";
absl::strings_internal::OStringStream(&s) << "";
EXPECT_EQ("abc", s);
absl::strings_internal::OStringStream(&s) << 42;
EXPECT_EQ("abc42", s);
absl::strings_internal::OStringStream(&s) << 'x' << 'y';
EXPECT_EQ("abc42xy", s);
}
} // namespace

View file

@ -0,0 +1,122 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/pow10_helper.h"
#include <cmath>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
namespace {
// The exact value of 1e23 falls precisely halfway between two representable
// doubles. Furthermore, the rounding rules we prefer (break ties by rounding
// to the nearest even) dictate in this case that the number should be rounded
// down, but this is not completely specified for floating-point literals in
// C++. (It just says to use the default rounding mode of the standard
// library.) We ensure the result we want by using a number that has an
// unambiguous correctly rounded answer.
constexpr double k1e23 = 9999999999999999e7;
constexpr double kPowersOfTen[] = {
0.0, 1e-323, 1e-322, 1e-321, 1e-320, 1e-319, 1e-318, 1e-317, 1e-316,
1e-315, 1e-314, 1e-313, 1e-312, 1e-311, 1e-310, 1e-309, 1e-308, 1e-307,
1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, 1e-299, 1e-298,
1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, 1e-290, 1e-289,
1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, 1e-281, 1e-280,
1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, 1e-272, 1e-271,
1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, 1e-263, 1e-262,
1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, 1e-254, 1e-253,
1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, 1e-245, 1e-244,
1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237, 1e-236, 1e-235,
1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, 1e-227, 1e-226,
1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219, 1e-218, 1e-217,
1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210, 1e-209, 1e-208,
1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201, 1e-200, 1e-199,
1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, 1e-191, 1e-190,
1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, 1e-182, 1e-181,
1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, 1e-173, 1e-172,
1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, 1e-164, 1e-163,
1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, 1e-155, 1e-154,
1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, 1e-146, 1e-145,
1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, 1e-137, 1e-136,
1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129, 1e-128, 1e-127,
1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, 1e-119, 1e-118,
1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, 1e-110, 1e-109,
1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, 1e-101, 1e-100,
1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, 1e-92, 1e-91,
1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, 1e-83, 1e-82,
1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, 1e-74, 1e-73,
1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, 1e-64,
1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55,
1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46,
1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, 1e-38, 1e-37,
1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28,
1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19,
1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10,
1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1,
1e+0, 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8,
1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17,
1e+18, 1e+19, 1e+20, 1e+21, 1e+22, k1e23, 1e+24, 1e+25, 1e+26,
1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35,
1e+36, 1e+37, 1e+38, 1e+39, 1e+40, 1e+41, 1e+42, 1e+43, 1e+44,
1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53,
1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, 1e+61, 1e+62,
1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71,
1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80,
1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89,
1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98,
1e+99, 1e+100, 1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107,
1e+108, 1e+109, 1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116,
1e+117, 1e+118, 1e+119, 1e+120, 1e+121, 1e+122, 1e+123, 1e+124, 1e+125,
1e+126, 1e+127, 1e+128, 1e+129, 1e+130, 1e+131, 1e+132, 1e+133, 1e+134,
1e+135, 1e+136, 1e+137, 1e+138, 1e+139, 1e+140, 1e+141, 1e+142, 1e+143,
1e+144, 1e+145, 1e+146, 1e+147, 1e+148, 1e+149, 1e+150, 1e+151, 1e+152,
1e+153, 1e+154, 1e+155, 1e+156, 1e+157, 1e+158, 1e+159, 1e+160, 1e+161,
1e+162, 1e+163, 1e+164, 1e+165, 1e+166, 1e+167, 1e+168, 1e+169, 1e+170,
1e+171, 1e+172, 1e+173, 1e+174, 1e+175, 1e+176, 1e+177, 1e+178, 1e+179,
1e+180, 1e+181, 1e+182, 1e+183, 1e+184, 1e+185, 1e+186, 1e+187, 1e+188,
1e+189, 1e+190, 1e+191, 1e+192, 1e+193, 1e+194, 1e+195, 1e+196, 1e+197,
1e+198, 1e+199, 1e+200, 1e+201, 1e+202, 1e+203, 1e+204, 1e+205, 1e+206,
1e+207, 1e+208, 1e+209, 1e+210, 1e+211, 1e+212, 1e+213, 1e+214, 1e+215,
1e+216, 1e+217, 1e+218, 1e+219, 1e+220, 1e+221, 1e+222, 1e+223, 1e+224,
1e+225, 1e+226, 1e+227, 1e+228, 1e+229, 1e+230, 1e+231, 1e+232, 1e+233,
1e+234, 1e+235, 1e+236, 1e+237, 1e+238, 1e+239, 1e+240, 1e+241, 1e+242,
1e+243, 1e+244, 1e+245, 1e+246, 1e+247, 1e+248, 1e+249, 1e+250, 1e+251,
1e+252, 1e+253, 1e+254, 1e+255, 1e+256, 1e+257, 1e+258, 1e+259, 1e+260,
1e+261, 1e+262, 1e+263, 1e+264, 1e+265, 1e+266, 1e+267, 1e+268, 1e+269,
1e+270, 1e+271, 1e+272, 1e+273, 1e+274, 1e+275, 1e+276, 1e+277, 1e+278,
1e+279, 1e+280, 1e+281, 1e+282, 1e+283, 1e+284, 1e+285, 1e+286, 1e+287,
1e+288, 1e+289, 1e+290, 1e+291, 1e+292, 1e+293, 1e+294, 1e+295, 1e+296,
1e+297, 1e+298, 1e+299, 1e+300, 1e+301, 1e+302, 1e+303, 1e+304, 1e+305,
1e+306, 1e+307, 1e+308,
};
} // namespace
double Pow10(int exp) {
if (exp < -324) {
return 0.0;
} else if (exp > 308) {
return INFINITY;
} else {
return kPowersOfTen[exp + 324];
}
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,40 @@
//
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This test helper library contains a table of powers of 10, to guarantee
// precise values are computed across the full range of doubles. We can't rely
// on the pow() function, because not all standard libraries ship a version
// that is precise.
#ifndef ABSL_STRINGS_INTERNAL_POW10_HELPER_H_
#define ABSL_STRINGS_INTERNAL_POW10_HELPER_H_
#include <vector>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// Computes the precise value of 10^exp. (I.e. the nearest representable
// double to the exact value, rounding to nearest-even in the (single) case of
// being exactly halfway between.)
double Pow10(int exp);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_POW10_HELPER_H_

View file

@ -0,0 +1,122 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/pow10_helper.h"
#include <cmath>
#include "gtest/gtest.h"
#include "absl/strings/str_format.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
namespace {
struct TestCase {
int power; // Testing Pow10(power)
uint64_t significand; // Raw bits of the expected value
int radix; // significand is adjusted by 2^radix
};
TEST(Pow10HelperTest, Works) {
// The logic in pow10_helper.cc is so simple that theoretically we don't even
// need a test. However, we're paranoid and believe that there may be
// compilers that don't round floating-point literals correctly, even though
// it is specified by the standard. We check various edge cases, just to be
// sure.
constexpr TestCase kTestCases[] = {
// Subnormals
{-323, 0x2, -1074},
{-322, 0x14, -1074},
{-321, 0xca, -1074},
{-320, 0x7e8, -1074},
{-319, 0x4f10, -1074},
{-318, 0x316a2, -1074},
{-317, 0x1ee257, -1074},
{-316, 0x134d761, -1074},
{-315, 0xc1069cd, -1074},
{-314, 0x78a42205, -1074},
{-313, 0x4b6695433, -1074},
{-312, 0x2f201d49fb, -1074},
{-311, 0x1d74124e3d1, -1074},
{-310, 0x12688b70e62b, -1074},
{-309, 0xb8157268fdaf, -1074},
{-308, 0x730d67819e8d2, -1074},
// Values that are very close to rounding the other way.
// Comment shows difference of significand from the true value.
{-307, 0x11fa182c40c60d, -1072}, // -.4588
{-290, 0x18f2b061aea072, -1016}, // .4854
{-276, 0x11BA03F5B21000, -969}, // .4709
{-259, 0x1899C2F6732210, -913}, // .4830
{-252, 0x1D53844EE47DD1, -890}, // -.4743
{-227, 0x1E5297287C2F45, -807}, // -.4708
{-198, 0x1322E220A5B17E, -710}, // -.4714
{-195, 0x12B010D3E1CF56, -700}, // .4928
{-192, 0x123FF06EEA847A, -690}, // .4968
{-163, 0x1708D0F84D3DE7, -594}, // -.4977
{-145, 0x13FAAC3E3FA1F3, -534}, // -.4785
{-111, 0x133D4032C2C7F5, -421}, // .4774
{-106, 0x1D5B561574765B, -405}, // -.4869
{-104, 0x16EF5B40C2FC77, -398}, // -.4741
{-88, 0x197683DF2F268D, -345}, // -.4738
{-86, 0x13E497065CD61F, -338}, // .4736
{-76, 0x17288E1271F513, -305}, // -.4761
{-63, 0x1A53FC9631D10D, -262}, // .4929
{-30, 0x14484BFEEBC2A0, -152}, // .4758
{-21, 0x12E3B40A0E9B4F, -122}, // -.4916
{-5, 0x14F8B588E368F1, -69}, // .4829
{23, 0x152D02C7E14AF6, 24}, // -.5000 (exactly, round-to-even)
{29, 0x1431E0FAE6D721, 44}, // -.4870
{34, 0x1ED09BEAD87C03, 60}, // -.4721
{70, 0x172EBAD6DDC73D, 180}, // .4733
{105, 0x1BE7ABD3781ECA, 296}, // -.4850
{126, 0x17A2ECC414A03F, 366}, // -.4999
{130, 0x1CDA62055B2D9E, 379}, // .4855
{165, 0x115D847AD00087, 496}, // -.4913
{172, 0x14B378469B6732, 519}, // .4818
{187, 0x1262DFEEBBB0F9, 569}, // -.4805
{210, 0x18557F31326BBB, 645}, // -.4992
{212, 0x1302CB5E6F642A, 652}, // -.4838
{215, 0x1290BA9A38C7D1, 662}, // -.4881
{236, 0x1F736F9B3494E9, 731}, // .4707
{244, 0x176EC98994F489, 758}, // .4924
{250, 0x1658E3AB795204, 778}, // -.4963
{252, 0x117571DDF6C814, 785}, // .4873
{254, 0x1B4781EAD1989E, 791}, // -.4887
{260, 0x1A03FDE214CAF1, 811}, // .4784
{284, 0x1585041B2C477F, 891}, // .4798
{304, 0x1D2A1BE4048F90, 957}, // -.4987
// Out-of-range values
{-324, 0x0, 0},
{-325, 0x0, 0},
{-326, 0x0, 0},
{309, 1, 2000},
{310, 1, 2000},
{311, 1, 2000},
};
for (const TestCase& test_case : kTestCases) {
EXPECT_EQ(Pow10(test_case.power),
std::ldexp(test_case.significand, test_case.radix))
<< absl::StrFormat("Failure for Pow10(%d): %a vs %a", test_case.power,
Pow10(test_case.power),
std::ldexp(test_case.significand, test_case.radix));
}
}
} // namespace
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,73 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
#define ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
#include <string>
#include <type_traits>
#include <utility>
#include "absl/base/port.h"
#include "absl/meta/type_traits.h" // for void_t
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// Is a subclass of true_type or false_type, depending on whether or not
// T has a __resize_default_init member.
template <typename string_type, typename = void>
struct ResizeUninitializedTraits {
using HasMember = std::false_type;
static void Resize(string_type* s, size_t new_size) { s->resize(new_size); }
};
// __resize_default_init is provided by libc++ >= 8.0
template <typename string_type>
struct ResizeUninitializedTraits<
string_type, absl::void_t<decltype(std::declval<string_type&>()
.__resize_default_init(237))> > {
using HasMember = std::true_type;
static void Resize(string_type* s, size_t new_size) {
s->__resize_default_init(new_size);
}
};
// Returns true if the std::string implementation supports a resize where
// the new characters added to the std::string are left untouched.
//
// (A better name might be "STLStringSupportsUninitializedResize", alluding to
// the previous function.)
template <typename string_type>
inline constexpr bool STLStringSupportsNontrashingResize(string_type*) {
return ResizeUninitializedTraits<string_type>::HasMember::value;
}
// Like str->resize(new_size), except any new characters added to "*str" as a
// result of resizing may be left uninitialized, rather than being filled with
// '0' bytes. Typically used when code is then going to overwrite the backing
// store of the std::string with known data.
template <typename string_type, typename = void>
inline void STLStringResizeUninitialized(string_type* s, size_t new_size) {
ResizeUninitializedTraits<string_type>::Resize(s, new_size);
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_

View file

@ -0,0 +1,82 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/resize_uninitialized.h"
#include "gtest/gtest.h"
namespace {
int resize_call_count = 0;
// A mock string class whose only purpose is to track how many times its
// resize() method has been called.
struct resizable_string {
size_t size() const { return 0; }
char& operator[](size_t) {
static char c = '\0';
return c;
}
void resize(size_t) { resize_call_count += 1; }
};
int resize_default_init_call_count = 0;
// A mock string class whose only purpose is to track how many times its
// resize() and __resize_default_init() methods have been called.
struct resize_default_init_string {
size_t size() const { return 0; }
char& operator[](size_t) {
static char c = '\0';
return c;
}
void resize(size_t) { resize_call_count += 1; }
void __resize_default_init(size_t) { resize_default_init_call_count += 1; }
};
TEST(ResizeUninit, WithAndWithout) {
resize_call_count = 0;
resize_default_init_call_count = 0;
{
resizable_string rs;
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 0);
EXPECT_FALSE(
absl::strings_internal::STLStringSupportsNontrashingResize(&rs));
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 0);
absl::strings_internal::STLStringResizeUninitialized(&rs, 237);
EXPECT_EQ(resize_call_count, 1);
EXPECT_EQ(resize_default_init_call_count, 0);
}
resize_call_count = 0;
resize_default_init_call_count = 0;
{
resize_default_init_string rus;
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 0);
EXPECT_TRUE(
absl::strings_internal::STLStringSupportsNontrashingResize(&rus));
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 0);
absl::strings_internal::STLStringResizeUninitialized(&rus, 237);
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 1);
}
}
} // namespace

View file

@ -0,0 +1,248 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Thie file provides the IsStrictlyBaseOfAndConvertibleToSTLContainer type
// trait metafunction to assist in working with the _GLIBCXX_DEBUG debug
// wrappers of STL containers.
//
// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
// absl/strings/str_split.h.
//
// IWYU pragma: private, include "absl/strings/str_split.h"
#ifndef ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_
#define ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_
#include <array>
#include <bitset>
#include <deque>
#include <forward_list>
#include <list>
#include <map>
#include <set>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "absl/meta/type_traits.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
template <typename C, template <typename...> class T>
struct IsSpecializationImpl : std::false_type {};
template <template <typename...> class T, typename... Args>
struct IsSpecializationImpl<T<Args...>, T> : std::true_type {};
template <typename C, template <typename...> class T>
using IsSpecialization = IsSpecializationImpl<absl::decay_t<C>, T>;
template <typename C>
struct IsArrayImpl : std::false_type {};
template <template <typename, size_t> class A, typename T, size_t N>
struct IsArrayImpl<A<T, N>> : std::is_same<A<T, N>, std::array<T, N>> {};
template <typename C>
using IsArray = IsArrayImpl<absl::decay_t<C>>;
template <typename C>
struct IsBitsetImpl : std::false_type {};
template <template <size_t> class B, size_t N>
struct IsBitsetImpl<B<N>> : std::is_same<B<N>, std::bitset<N>> {};
template <typename C>
using IsBitset = IsBitsetImpl<absl::decay_t<C>>;
template <typename C>
struct IsSTLContainer
: absl::disjunction<
IsArray<C>, IsBitset<C>, IsSpecialization<C, std::deque>,
IsSpecialization<C, std::forward_list>,
IsSpecialization<C, std::list>, IsSpecialization<C, std::map>,
IsSpecialization<C, std::multimap>, IsSpecialization<C, std::set>,
IsSpecialization<C, std::multiset>,
IsSpecialization<C, std::unordered_map>,
IsSpecialization<C, std::unordered_multimap>,
IsSpecialization<C, std::unordered_set>,
IsSpecialization<C, std::unordered_multiset>,
IsSpecialization<C, std::vector>> {};
template <typename C, template <typename...> class T, typename = void>
struct IsBaseOfSpecializationImpl : std::false_type {};
// IsBaseOfSpecializationImpl needs multiple partial specializations to SFINAE
// on the existence of container dependent types and plug them into the STL
// template.
template <typename C, template <typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T, absl::void_t<typename C::value_type, typename C::allocator_type>>
: std::is_base_of<C,
T<typename C::value_type, typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::key_compare,
typename C::allocator_type>>
: std::is_base_of<C, T<typename C::key_type, typename C::key_compare,
typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::mapped_type,
typename C::key_compare, typename C::allocator_type>>
: std::is_base_of<C,
T<typename C::key_type, typename C::mapped_type,
typename C::key_compare, typename C::allocator_type>> {
};
template <typename C, template <typename, typename, typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::hasher,
typename C::key_equal, typename C::allocator_type>>
: std::is_base_of<C, T<typename C::key_type, typename C::hasher,
typename C::key_equal, typename C::allocator_type>> {
};
template <typename C,
template <typename, typename, typename, typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::mapped_type,
typename C::hasher, typename C::key_equal,
typename C::allocator_type>>
: std::is_base_of<C, T<typename C::key_type, typename C::mapped_type,
typename C::hasher, typename C::key_equal,
typename C::allocator_type>> {};
template <typename C, template <typename...> class T>
using IsBaseOfSpecialization = IsBaseOfSpecializationImpl<absl::decay_t<C>, T>;
template <typename C>
struct IsBaseOfArrayImpl : std::false_type {};
template <template <typename, size_t> class A, typename T, size_t N>
struct IsBaseOfArrayImpl<A<T, N>> : std::is_base_of<A<T, N>, std::array<T, N>> {
};
template <typename C>
using IsBaseOfArray = IsBaseOfArrayImpl<absl::decay_t<C>>;
template <typename C>
struct IsBaseOfBitsetImpl : std::false_type {};
template <template <size_t> class B, size_t N>
struct IsBaseOfBitsetImpl<B<N>> : std::is_base_of<B<N>, std::bitset<N>> {};
template <typename C>
using IsBaseOfBitset = IsBaseOfBitsetImpl<absl::decay_t<C>>;
template <typename C>
struct IsBaseOfSTLContainer
: absl::disjunction<IsBaseOfArray<C>, IsBaseOfBitset<C>,
IsBaseOfSpecialization<C, std::deque>,
IsBaseOfSpecialization<C, std::forward_list>,
IsBaseOfSpecialization<C, std::list>,
IsBaseOfSpecialization<C, std::map>,
IsBaseOfSpecialization<C, std::multimap>,
IsBaseOfSpecialization<C, std::set>,
IsBaseOfSpecialization<C, std::multiset>,
IsBaseOfSpecialization<C, std::unordered_map>,
IsBaseOfSpecialization<C, std::unordered_multimap>,
IsBaseOfSpecialization<C, std::unordered_set>,
IsBaseOfSpecialization<C, std::unordered_multiset>,
IsBaseOfSpecialization<C, std::vector>> {};
template <typename C, template <typename...> class T, typename = void>
struct IsConvertibleToSpecializationImpl : std::false_type {};
// IsConvertibleToSpecializationImpl needs multiple partial specializations to
// SFINAE on the existence of container dependent types and plug them into the
// STL template.
template <typename C, template <typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T, absl::void_t<typename C::value_type, typename C::allocator_type>>
: std::is_convertible<
C, T<typename C::value_type, typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::key_compare,
typename C::allocator_type>>
: std::is_convertible<C, T<typename C::key_type, typename C::key_compare,
typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::mapped_type,
typename C::key_compare, typename C::allocator_type>>
: std::is_convertible<
C, T<typename C::key_type, typename C::mapped_type,
typename C::key_compare, typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::hasher,
typename C::key_equal, typename C::allocator_type>>
: std::is_convertible<
C, T<typename C::key_type, typename C::hasher, typename C::key_equal,
typename C::allocator_type>> {};
template <typename C,
template <typename, typename, typename, typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::mapped_type,
typename C::hasher, typename C::key_equal,
typename C::allocator_type>>
: std::is_convertible<C, T<typename C::key_type, typename C::mapped_type,
typename C::hasher, typename C::key_equal,
typename C::allocator_type>> {};
template <typename C, template <typename...> class T>
using IsConvertibleToSpecialization =
IsConvertibleToSpecializationImpl<absl::decay_t<C>, T>;
template <typename C>
struct IsConvertibleToArrayImpl : std::false_type {};
template <template <typename, size_t> class A, typename T, size_t N>
struct IsConvertibleToArrayImpl<A<T, N>>
: std::is_convertible<A<T, N>, std::array<T, N>> {};
template <typename C>
using IsConvertibleToArray = IsConvertibleToArrayImpl<absl::decay_t<C>>;
template <typename C>
struct IsConvertibleToBitsetImpl : std::false_type {};
template <template <size_t> class B, size_t N>
struct IsConvertibleToBitsetImpl<B<N>>
: std::is_convertible<B<N>, std::bitset<N>> {};
template <typename C>
using IsConvertibleToBitset = IsConvertibleToBitsetImpl<absl::decay_t<C>>;
template <typename C>
struct IsConvertibleToSTLContainer
: absl::disjunction<
IsConvertibleToArray<C>, IsConvertibleToBitset<C>,
IsConvertibleToSpecialization<C, std::deque>,
IsConvertibleToSpecialization<C, std::forward_list>,
IsConvertibleToSpecialization<C, std::list>,
IsConvertibleToSpecialization<C, std::map>,
IsConvertibleToSpecialization<C, std::multimap>,
IsConvertibleToSpecialization<C, std::set>,
IsConvertibleToSpecialization<C, std::multiset>,
IsConvertibleToSpecialization<C, std::unordered_map>,
IsConvertibleToSpecialization<C, std::unordered_multimap>,
IsConvertibleToSpecialization<C, std::unordered_set>,
IsConvertibleToSpecialization<C, std::unordered_multiset>,
IsConvertibleToSpecialization<C, std::vector>> {};
template <typename C>
struct IsStrictlyBaseOfAndConvertibleToSTLContainer
: absl::conjunction<absl::negation<IsSTLContainer<C>>,
IsBaseOfSTLContainer<C>,
IsConvertibleToSTLContainer<C>> {};
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_

View file

@ -0,0 +1,474 @@
//
// POSIX spec:
// http://pubs.opengroup.org/onlinepubs/009695399/functions/fprintf.html
//
#include "absl/strings/internal/str_format/arg.h"
#include <cassert>
#include <cerrno>
#include <cstdlib>
#include <string>
#include <type_traits>
#include "absl/base/port.h"
#include "absl/strings/internal/str_format/float_conversion.h"
#include "absl/strings/numbers.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
// Reduce *capacity by s.size(), clipped to a 0 minimum.
void ReducePadding(string_view s, size_t *capacity) {
*capacity = Excess(s.size(), *capacity);
}
// Reduce *capacity by n, clipped to a 0 minimum.
void ReducePadding(size_t n, size_t *capacity) {
*capacity = Excess(n, *capacity);
}
template <typename T>
struct MakeUnsigned : std::make_unsigned<T> {};
template <>
struct MakeUnsigned<absl::int128> {
using type = absl::uint128;
};
template <>
struct MakeUnsigned<absl::uint128> {
using type = absl::uint128;
};
template <typename T>
struct IsSigned : std::is_signed<T> {};
template <>
struct IsSigned<absl::int128> : std::true_type {};
template <>
struct IsSigned<absl::uint128> : std::false_type {};
// Integral digit printer.
// Call one of the PrintAs* routines after construction once.
// Use with_neg_and_zero/without_neg_or_zero/is_negative to access the results.
class IntDigits {
public:
// Print the unsigned integer as octal.
// Supports unsigned integral types and uint128.
template <typename T>
void PrintAsOct(T v) {
static_assert(!IsSigned<T>::value, "");
char *p = storage_ + sizeof(storage_);
do {
*--p = static_cast<char>('0' + (static_cast<size_t>(v) & 7));
v >>= 3;
} while (v);
start_ = p;
size_ = storage_ + sizeof(storage_) - p;
}
// Print the signed or unsigned integer as decimal.
// Supports all integral types.
template <typename T>
void PrintAsDec(T v) {
static_assert(std::is_integral<T>::value, "");
start_ = storage_;
size_ = numbers_internal::FastIntToBuffer(v, storage_) - storage_;
}
void PrintAsDec(int128 v) {
auto u = static_cast<uint128>(v);
bool add_neg = false;
if (v < 0) {
add_neg = true;
u = uint128{} - u;
}
PrintAsDec(u, add_neg);
}
void PrintAsDec(uint128 v, bool add_neg = false) {
// This function can be sped up if needed. We can call FastIntToBuffer
// twice, or fix FastIntToBuffer to support uint128.
char *p = storage_ + sizeof(storage_);
do {
p -= 2;
numbers_internal::PutTwoDigits(static_cast<size_t>(v % 100), p);
v /= 100;
} while (v);
if (p[0] == '0') {
// We printed one too many hexits.
++p;
}
if (add_neg) {
*--p = '-';
}
size_ = storage_ + sizeof(storage_) - p;
start_ = p;
}
// Print the unsigned integer as hex using lowercase.
// Supports unsigned integral types and uint128.
template <typename T>
void PrintAsHexLower(T v) {
static_assert(!IsSigned<T>::value, "");
char *p = storage_ + sizeof(storage_);
do {
p -= 2;
constexpr const char* table = numbers_internal::kHexTable;
std::memcpy(p, table + 2 * (static_cast<size_t>(v) & 0xFF), 2);
if (sizeof(T) == 1) break;
v >>= 8;
} while (v);
if (p[0] == '0') {
// We printed one too many digits.
++p;
}
start_ = p;
size_ = storage_ + sizeof(storage_) - p;
}
// Print the unsigned integer as hex using uppercase.
// Supports unsigned integral types and uint128.
template <typename T>
void PrintAsHexUpper(T v) {
static_assert(!IsSigned<T>::value, "");
char *p = storage_ + sizeof(storage_);
// kHexTable is only lowercase, so do it manually for uppercase.
do {
*--p = "0123456789ABCDEF"[static_cast<size_t>(v) & 15];
v >>= 4;
} while (v);
start_ = p;
size_ = storage_ + sizeof(storage_) - p;
}
// The printed value including the '-' sign if available.
// For inputs of value `0`, this will return "0"
string_view with_neg_and_zero() const { return {start_, size_}; }
// The printed value not including the '-' sign.
// For inputs of value `0`, this will return "".
string_view without_neg_or_zero() const {
static_assert('-' < '0', "The check below verifies both.");
size_t advance = start_[0] <= '0' ? 1 : 0;
return {start_ + advance, size_ - advance};
}
bool is_negative() const { return start_[0] == '-'; }
private:
const char *start_;
size_t size_;
// Max size: 128 bit value as octal -> 43 digits, plus sign char
char storage_[128 / 3 + 1 + 1];
};
// Note: 'o' conversions do not have a base indicator, it's just that
// the '#' flag is specified to modify the precision for 'o' conversions.
string_view BaseIndicator(const IntDigits &as_digits,
const FormatConversionSpecImpl conv) {
// always show 0x for %p.
bool alt = conv.has_alt_flag() ||
conv.conversion_char() == FormatConversionCharInternal::p;
bool hex = (conv.conversion_char() == FormatConversionCharInternal::x ||
conv.conversion_char() == FormatConversionCharInternal::X ||
conv.conversion_char() == FormatConversionCharInternal::p);
// From the POSIX description of '#' flag:
// "For x or X conversion specifiers, a non-zero result shall have
// 0x (or 0X) prefixed to it."
if (alt && hex && !as_digits.without_neg_or_zero().empty()) {
return conv.conversion_char() == FormatConversionCharInternal::X ? "0X"
: "0x";
}
return {};
}
string_view SignColumn(bool neg, const FormatConversionSpecImpl conv) {
if (conv.conversion_char() == FormatConversionCharInternal::d ||
conv.conversion_char() == FormatConversionCharInternal::i) {
if (neg) return "-";
if (conv.has_show_pos_flag()) return "+";
if (conv.has_sign_col_flag()) return " ";
}
return {};
}
bool ConvertCharImpl(unsigned char v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
size_t fill = 0;
if (conv.width() >= 0) fill = conv.width();
ReducePadding(1, &fill);
if (!conv.has_left_flag()) sink->Append(fill, ' ');
sink->Append(1, v);
if (conv.has_left_flag()) sink->Append(fill, ' ');
return true;
}
bool ConvertIntImplInnerSlow(const IntDigits &as_digits,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
// Print as a sequence of Substrings:
// [left_spaces][sign][base_indicator][zeroes][formatted][right_spaces]
size_t fill = 0;
if (conv.width() >= 0) fill = conv.width();
string_view formatted = as_digits.without_neg_or_zero();
ReducePadding(formatted, &fill);
string_view sign = SignColumn(as_digits.is_negative(), conv);
ReducePadding(sign, &fill);
string_view base_indicator = BaseIndicator(as_digits, conv);
ReducePadding(base_indicator, &fill);
int precision = conv.precision();
bool precision_specified = precision >= 0;
if (!precision_specified)
precision = 1;
if (conv.has_alt_flag() &&
conv.conversion_char() == FormatConversionCharInternal::o) {
// From POSIX description of the '#' (alt) flag:
// "For o conversion, it increases the precision (if necessary) to
// force the first digit of the result to be zero."
if (formatted.empty() || *formatted.begin() != '0') {
int needed = static_cast<int>(formatted.size()) + 1;
precision = std::max(precision, needed);
}
}
size_t num_zeroes = Excess(formatted.size(), precision);
ReducePadding(num_zeroes, &fill);
size_t num_left_spaces = !conv.has_left_flag() ? fill : 0;
size_t num_right_spaces = conv.has_left_flag() ? fill : 0;
// From POSIX description of the '0' (zero) flag:
// "For d, i, o, u, x, and X conversion specifiers, if a precision
// is specified, the '0' flag is ignored."
if (!precision_specified && conv.has_zero_flag()) {
num_zeroes += num_left_spaces;
num_left_spaces = 0;
}
sink->Append(num_left_spaces, ' ');
sink->Append(sign);
sink->Append(base_indicator);
sink->Append(num_zeroes, '0');
sink->Append(formatted);
sink->Append(num_right_spaces, ' ');
return true;
}
template <typename T>
bool ConvertIntArg(T v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
using U = typename MakeUnsigned<T>::type;
IntDigits as_digits;
// This odd casting is due to a bug in -Wswitch behavior in gcc49 which causes
// it to complain about a switch/case type mismatch, even though both are
// FormatConverionChar. Likely this is because at this point
// FormatConversionChar is declared, but not defined.
switch (static_cast<uint8_t>(conv.conversion_char())) {
case static_cast<uint8_t>(FormatConversionCharInternal::c):
return ConvertCharImpl(static_cast<unsigned char>(v), conv, sink);
case static_cast<uint8_t>(FormatConversionCharInternal::o):
as_digits.PrintAsOct(static_cast<U>(v));
break;
case static_cast<uint8_t>(FormatConversionCharInternal::x):
as_digits.PrintAsHexLower(static_cast<U>(v));
break;
case static_cast<uint8_t>(FormatConversionCharInternal::X):
as_digits.PrintAsHexUpper(static_cast<U>(v));
break;
case static_cast<uint8_t>(FormatConversionCharInternal::u):
as_digits.PrintAsDec(static_cast<U>(v));
break;
case static_cast<uint8_t>(FormatConversionCharInternal::d):
case static_cast<uint8_t>(FormatConversionCharInternal::i):
as_digits.PrintAsDec(v);
break;
case static_cast<uint8_t>(FormatConversionCharInternal::a):
case static_cast<uint8_t>(FormatConversionCharInternal::e):
case static_cast<uint8_t>(FormatConversionCharInternal::f):
case static_cast<uint8_t>(FormatConversionCharInternal::g):
case static_cast<uint8_t>(FormatConversionCharInternal::A):
case static_cast<uint8_t>(FormatConversionCharInternal::E):
case static_cast<uint8_t>(FormatConversionCharInternal::F):
case static_cast<uint8_t>(FormatConversionCharInternal::G):
return ConvertFloatImpl(static_cast<double>(v), conv, sink);
default:
ABSL_INTERNAL_ASSUME(false);
}
if (conv.is_basic()) {
sink->Append(as_digits.with_neg_and_zero());
return true;
}
return ConvertIntImplInnerSlow(as_digits, conv, sink);
}
template <typename T>
bool ConvertFloatArg(T v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return FormatConversionCharIsFloat(conv.conversion_char()) &&
ConvertFloatImpl(v, conv, sink);
}
inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
if (conv.is_basic()) {
sink->Append(v);
return true;
}
return sink->PutPaddedString(v, conv.width(), conv.precision(),
conv.has_left_flag());
}
} // namespace
// ==================== Strings ====================
StringConvertResult FormatConvertImpl(const std::string &v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertStringArg(v, conv, sink)};
}
StringConvertResult FormatConvertImpl(string_view v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertStringArg(v, conv, sink)};
}
ArgConvertResult<FormatConversionCharSetUnion(
FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)>
FormatConvertImpl(const char *v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
if (conv.conversion_char() == FormatConversionCharInternal::p)
return {FormatConvertImpl(VoidPtr(v), conv, sink).value};
size_t len;
if (v == nullptr) {
len = 0;
} else if (conv.precision() < 0) {
len = std::strlen(v);
} else {
// If precision is set, we look for the NUL-terminator on the valid range.
len = std::find(v, v + conv.precision(), '\0') - v;
}
return {ConvertStringArg(string_view(v, len), conv, sink)};
}
// ==================== Raw pointers ====================
ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl(
VoidPtr v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) {
if (!v.value) {
sink->Append("(nil)");
return {true};
}
IntDigits as_digits;
as_digits.PrintAsHexLower(v.value);
return {ConvertIntImplInnerSlow(as_digits, conv, sink)};
}
// ==================== Floats ====================
FloatingConvertResult FormatConvertImpl(float v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertFloatArg(v, conv, sink)};
}
FloatingConvertResult FormatConvertImpl(double v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertFloatArg(v, conv, sink)};
}
FloatingConvertResult FormatConvertImpl(long double v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertFloatArg(v, conv, sink)};
}
// ==================== Chars ====================
IntegralConvertResult FormatConvertImpl(char v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(signed char v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned char v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
// ==================== Ints ====================
IntegralConvertResult FormatConvertImpl(short v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(int v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(long v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(long long v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(absl::int128 v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(absl::uint128 v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_();
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,474 @@
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_
#include <string.h>
#include <wchar.h>
#include <cstdio>
#include <iomanip>
#include <limits>
#include <memory>
#include <sstream>
#include <string>
#include <type_traits>
#include "absl/base/port.h"
#include "absl/meta/type_traits.h"
#include "absl/numeric/int128.h"
#include "absl/strings/internal/str_format/extension.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
class Cord;
class FormatCountCapture;
class FormatSink;
namespace str_format_internal {
class FormatConversionSpec;
template <typename T, typename = void>
struct HasUserDefinedConvert : std::false_type {};
template <typename T>
struct HasUserDefinedConvert<T, void_t<decltype(AbslFormatConvert(
std::declval<const T&>(),
std::declval<const FormatConversionSpec&>(),
std::declval<FormatSink*>()))>>
: std::true_type {};
template <typename T>
class StreamedWrapper;
// If 'v' can be converted (in the printf sense) according to 'conv',
// then convert it, appending to `sink` and return `true`.
// Otherwise fail and return `false`.
// Raw pointers.
struct VoidPtr {
VoidPtr() = default;
template <typename T,
decltype(reinterpret_cast<uintptr_t>(std::declval<T*>())) = 0>
VoidPtr(T* ptr) // NOLINT
: value(ptr ? reinterpret_cast<uintptr_t>(ptr) : 0) {}
uintptr_t value;
};
template <FormatConversionCharSet C>
struct ArgConvertResult {
bool value;
};
template <FormatConversionCharSet C>
constexpr FormatConversionCharSet ExtractCharSet(ArgConvertResult<C>) {
return C;
}
using StringConvertResult =
ArgConvertResult<FormatConversionCharSetInternal::s>;
ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl(
VoidPtr v, FormatConversionSpecImpl conv, FormatSinkImpl* sink);
// Strings.
StringConvertResult FormatConvertImpl(const std::string& v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
StringConvertResult FormatConvertImpl(string_view v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
ArgConvertResult<FormatConversionCharSetUnion(
FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)>
FormatConvertImpl(const char* v, const FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
template <class AbslCord, typename std::enable_if<std::is_same<
AbslCord, absl::Cord>::value>::type* = nullptr>
StringConvertResult FormatConvertImpl(const AbslCord& value,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
bool is_left = conv.has_left_flag();
size_t space_remaining = 0;
int width = conv.width();
if (width >= 0) space_remaining = width;
size_t to_write = value.size();
int precision = conv.precision();
if (precision >= 0)
to_write = (std::min)(to_write, static_cast<size_t>(precision));
space_remaining = Excess(to_write, space_remaining);
if (space_remaining > 0 && !is_left) sink->Append(space_remaining, ' ');
for (string_view piece : value.Chunks()) {
if (piece.size() > to_write) {
piece.remove_suffix(piece.size() - to_write);
to_write = 0;
} else {
to_write -= piece.size();
}
sink->Append(piece);
if (to_write == 0) {
break;
}
}
if (space_remaining > 0 && is_left) sink->Append(space_remaining, ' ');
return {true};
}
using IntegralConvertResult = ArgConvertResult<FormatConversionCharSetUnion(
FormatConversionCharSetInternal::c,
FormatConversionCharSetInternal::kNumeric,
FormatConversionCharSetInternal::kStar)>;
using FloatingConvertResult =
ArgConvertResult<FormatConversionCharSetInternal::kFloating>;
// Floats.
FloatingConvertResult FormatConvertImpl(float v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
FloatingConvertResult FormatConvertImpl(double v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
FloatingConvertResult FormatConvertImpl(long double v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
// Chars.
IntegralConvertResult FormatConvertImpl(char v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(signed char v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned char v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
// Ints.
IntegralConvertResult FormatConvertImpl(short v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(int v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(long long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(int128 v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(uint128 v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
template <typename T, enable_if_t<std::is_same<T, bool>::value, int> = 0>
IntegralConvertResult FormatConvertImpl(T v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
return FormatConvertImpl(static_cast<int>(v), conv, sink);
}
// We provide this function to help the checker, but it is never defined.
// FormatArgImpl will use the underlying Convert functions instead.
template <typename T>
typename std::enable_if<std::is_enum<T>::value &&
!HasUserDefinedConvert<T>::value,
IntegralConvertResult>::type
FormatConvertImpl(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink);
template <typename T>
StringConvertResult FormatConvertImpl(const StreamedWrapper<T>& v,
FormatConversionSpecImpl conv,
FormatSinkImpl* out) {
std::ostringstream oss;
oss << v.v_;
if (!oss) return {false};
return str_format_internal::FormatConvertImpl(oss.str(), conv, out);
}
// Use templates and dependent types to delay evaluation of the function
// until after FormatCountCapture is fully defined.
struct FormatCountCaptureHelper {
template <class T = int>
static ArgConvertResult<FormatConversionCharSetInternal::n> ConvertHelper(
const FormatCountCapture& v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
const absl::enable_if_t<sizeof(T) != 0, FormatCountCapture>& v2 = v;
if (conv.conversion_char() !=
str_format_internal::FormatConversionCharInternal::n) {
return {false};
}
*v2.p_ = static_cast<int>(sink->size());
return {true};
}
};
template <class T = int>
ArgConvertResult<FormatConversionCharSetInternal::n> FormatConvertImpl(
const FormatCountCapture& v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
return FormatCountCaptureHelper::ConvertHelper(v, conv, sink);
}
// Helper friend struct to hide implementation details from the public API of
// FormatArgImpl.
struct FormatArgImplFriend {
template <typename Arg>
static bool ToInt(Arg arg, int* out) {
// A value initialized FormatConversionSpecImpl has a `none` conv, which
// tells the dispatcher to run the `int` conversion.
return arg.dispatcher_(arg.data_, {}, out);
}
template <typename Arg>
static bool Convert(Arg arg, FormatConversionSpecImpl conv,
FormatSinkImpl* out) {
return arg.dispatcher_(arg.data_, conv, out);
}
template <typename Arg>
static typename Arg::Dispatcher GetVTablePtrForTest(Arg arg) {
return arg.dispatcher_;
}
};
template <typename Arg>
constexpr FormatConversionCharSet ArgumentToConv() {
return absl::str_format_internal::ExtractCharSet(
decltype(str_format_internal::FormatConvertImpl(
std::declval<const Arg&>(),
std::declval<const FormatConversionSpecImpl&>(),
std::declval<FormatSinkImpl*>())){});
}
// A type-erased handle to a format argument.
class FormatArgImpl {
private:
enum { kInlinedSpace = 8 };
using VoidPtr = str_format_internal::VoidPtr;
union Data {
const void* ptr;
const volatile void* volatile_ptr;
char buf[kInlinedSpace];
};
using Dispatcher = bool (*)(Data, FormatConversionSpecImpl, void* out);
template <typename T>
struct store_by_value
: std::integral_constant<bool, (sizeof(T) <= kInlinedSpace) &&
(std::is_integral<T>::value ||
std::is_floating_point<T>::value ||
std::is_pointer<T>::value ||
std::is_same<VoidPtr, T>::value)> {};
enum StoragePolicy { ByPointer, ByVolatilePointer, ByValue };
template <typename T>
struct storage_policy
: std::integral_constant<StoragePolicy,
(std::is_volatile<T>::value
? ByVolatilePointer
: (store_by_value<T>::value ? ByValue
: ByPointer))> {
};
// To reduce the number of vtables we will decay values before hand.
// Anything with a user-defined Convert will get its own vtable.
// For everything else:
// - Decay char* and char arrays into `const char*`
// - Decay any other pointer to `const void*`
// - Decay all enums to their underlying type.
// - Decay function pointers to void*.
template <typename T, typename = void>
struct DecayType {
static constexpr bool kHasUserDefined =
str_format_internal::HasUserDefinedConvert<T>::value;
using type = typename std::conditional<
!kHasUserDefined && std::is_convertible<T, const char*>::value,
const char*,
typename std::conditional<!kHasUserDefined &&
std::is_convertible<T, VoidPtr>::value,
VoidPtr, const T&>::type>::type;
};
template <typename T>
struct DecayType<T,
typename std::enable_if<
!str_format_internal::HasUserDefinedConvert<T>::value &&
std::is_enum<T>::value>::type> {
using type = typename std::underlying_type<T>::type;
};
public:
template <typename T>
explicit FormatArgImpl(const T& value) {
using D = typename DecayType<T>::type;
static_assert(
std::is_same<D, const T&>::value || storage_policy<D>::value == ByValue,
"Decayed types must be stored by value");
Init(static_cast<D>(value));
}
private:
friend struct str_format_internal::FormatArgImplFriend;
template <typename T, StoragePolicy = storage_policy<T>::value>
struct Manager;
template <typename T>
struct Manager<T, ByPointer> {
static Data SetValue(const T& value) {
Data data;
data.ptr = std::addressof(value);
return data;
}
static const T& Value(Data arg) { return *static_cast<const T*>(arg.ptr); }
};
template <typename T>
struct Manager<T, ByVolatilePointer> {
static Data SetValue(const T& value) {
Data data;
data.volatile_ptr = &value;
return data;
}
static const T& Value(Data arg) {
return *static_cast<const T*>(arg.volatile_ptr);
}
};
template <typename T>
struct Manager<T, ByValue> {
static Data SetValue(const T& value) {
Data data;
memcpy(data.buf, &value, sizeof(value));
return data;
}
static T Value(Data arg) {
T value;
memcpy(&value, arg.buf, sizeof(T));
return value;
}
};
template <typename T>
void Init(const T& value) {
data_ = Manager<T>::SetValue(value);
dispatcher_ = &Dispatch<T>;
}
template <typename T>
static int ToIntVal(const T& val) {
using CommonType = typename std::conditional<std::is_signed<T>::value,
int64_t, uint64_t>::type;
if (static_cast<CommonType>(val) >
static_cast<CommonType>((std::numeric_limits<int>::max)())) {
return (std::numeric_limits<int>::max)();
} else if (std::is_signed<T>::value &&
static_cast<CommonType>(val) <
static_cast<CommonType>((std::numeric_limits<int>::min)())) {
return (std::numeric_limits<int>::min)();
}
return static_cast<int>(val);
}
template <typename T>
static bool ToInt(Data arg, int* out, std::true_type /* is_integral */,
std::false_type) {
*out = ToIntVal(Manager<T>::Value(arg));
return true;
}
template <typename T>
static bool ToInt(Data arg, int* out, std::false_type,
std::true_type /* is_enum */) {
*out = ToIntVal(static_cast<typename std::underlying_type<T>::type>(
Manager<T>::Value(arg)));
return true;
}
template <typename T>
static bool ToInt(Data, int*, std::false_type, std::false_type) {
return false;
}
template <typename T>
static bool Dispatch(Data arg, FormatConversionSpecImpl spec, void* out) {
// A `none` conv indicates that we want the `int` conversion.
if (ABSL_PREDICT_FALSE(spec.conversion_char() ==
FormatConversionCharInternal::kNone)) {
return ToInt<T>(arg, static_cast<int*>(out), std::is_integral<T>(),
std::is_enum<T>());
}
if (ABSL_PREDICT_FALSE(!Contains(ArgumentToConv<T>(),
spec.conversion_char()))) {
return false;
}
return str_format_internal::FormatConvertImpl(
Manager<T>::Value(arg), spec,
static_cast<FormatSinkImpl*>(out))
.value;
}
Data data_;
Dispatcher dispatcher_;
};
#define ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(T, E) \
E template bool FormatArgImpl::Dispatch<T>(Data, FormatConversionSpecImpl, \
void*)
#define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(...) \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(str_format_internal::VoidPtr, \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(bool, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(char, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(signed char, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned char, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(short, __VA_ARGS__); /* NOLINT */ \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned short, /* NOLINT */ \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(int, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned int, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long, __VA_ARGS__); /* NOLINT */ \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned long, /* NOLINT */ \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long long, /* NOLINT */ \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned long long, /* NOLINT */ \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(int128, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(uint128, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(float, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(double, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long double, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(const char*, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(std::string, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(string_view, __VA_ARGS__)
ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(extern);
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_

View file

@ -0,0 +1,114 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
#include "absl/strings/internal/str_format/arg.h"
#include <ostream>
#include <string>
#include "gtest/gtest.h"
#include "absl/strings/str_format.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
class FormatArgImplTest : public ::testing::Test {
public:
enum Color { kRed, kGreen, kBlue };
static const char *hi() { return "hi"; }
};
TEST_F(FormatArgImplTest, ToInt) {
int out = 0;
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(1), &out));
EXPECT_EQ(1, out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(-1), &out));
EXPECT_EQ(-1, out);
EXPECT_TRUE(
FormatArgImplFriend::ToInt(FormatArgImpl(static_cast<char>(64)), &out));
EXPECT_EQ(64, out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(
FormatArgImpl(static_cast<unsigned long long>(123456)), &out)); // NOLINT
EXPECT_EQ(123456, out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(
FormatArgImpl(static_cast<unsigned long long>( // NOLINT
std::numeric_limits<int>::max()) +
1),
&out));
EXPECT_EQ(std::numeric_limits<int>::max(), out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(
FormatArgImpl(static_cast<long long>( // NOLINT
std::numeric_limits<int>::min()) -
10),
&out));
EXPECT_EQ(std::numeric_limits<int>::min(), out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(false), &out));
EXPECT_EQ(0, out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(true), &out));
EXPECT_EQ(1, out);
EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(2.2), &out));
EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(3.2f), &out));
EXPECT_FALSE(FormatArgImplFriend::ToInt(
FormatArgImpl(static_cast<int *>(nullptr)), &out));
EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(hi()), &out));
EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl("hi"), &out));
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(kBlue), &out));
EXPECT_EQ(2, out);
}
extern const char kMyArray[];
TEST_F(FormatArgImplTest, CharArraysDecayToCharPtr) {
const char* a = "";
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl("")));
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl("A")));
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl("ABC")));
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(kMyArray)));
}
TEST_F(FormatArgImplTest, OtherPtrDecayToVoidPtr) {
auto expected = FormatArgImplFriend::GetVTablePtrForTest(
FormatArgImpl(static_cast<void *>(nullptr)));
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(
FormatArgImpl(static_cast<int *>(nullptr))),
expected);
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(
FormatArgImpl(static_cast<volatile int *>(nullptr))),
expected);
auto p = static_cast<void (*)()>([] {});
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(p)),
expected);
}
TEST_F(FormatArgImplTest, WorksWithCharArraysOfUnknownSize) {
std::string s;
FormatSinkImpl sink(&s);
FormatConversionSpecImpl conv;
FormatConversionSpecImplFriend::SetConversionChar(
FormatConversionCharInternal::s, &conv);
FormatConversionSpecImplFriend::SetFlags(Flags(), &conv);
FormatConversionSpecImplFriend::SetWidth(-1, &conv);
FormatConversionSpecImplFriend::SetPrecision(-1, &conv);
EXPECT_TRUE(
FormatArgImplFriend::Convert(FormatArgImpl(kMyArray), conv, &sink));
sink.Flush();
EXPECT_EQ("ABCDE", s);
}
const char kMyArray[] = "ABCDE";
} // namespace
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,245 @@
#include "absl/strings/internal/str_format/bind.h"
#include <cerrno>
#include <limits>
#include <sstream>
#include <string>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
inline bool BindFromPosition(int position, int* value,
absl::Span<const FormatArgImpl> pack) {
assert(position > 0);
if (static_cast<size_t>(position) > pack.size()) {
return false;
}
// -1 because positions are 1-based
return FormatArgImplFriend::ToInt(pack[position - 1], value);
}
class ArgContext {
public:
explicit ArgContext(absl::Span<const FormatArgImpl> pack) : pack_(pack) {}
// Fill 'bound' with the results of applying the context's argument pack
// to the specified 'unbound'. We synthesize a BoundConversion by
// lining up a UnboundConversion with a user argument. We also
// resolve any '*' specifiers for width and precision, so after
// this call, 'bound' has all the information it needs to be formatted.
// Returns false on failure.
bool Bind(const UnboundConversion* unbound, BoundConversion* bound);
private:
absl::Span<const FormatArgImpl> pack_;
};
inline bool ArgContext::Bind(const UnboundConversion* unbound,
BoundConversion* bound) {
const FormatArgImpl* arg = nullptr;
int arg_position = unbound->arg_position;
if (static_cast<size_t>(arg_position - 1) >= pack_.size()) return false;
arg = &pack_[arg_position - 1]; // 1-based
if (!unbound->flags.basic) {
int width = unbound->width.value();
bool force_left = false;
if (unbound->width.is_from_arg()) {
if (!BindFromPosition(unbound->width.get_from_arg(), &width, pack_))
return false;
if (width < 0) {
// "A negative field width is taken as a '-' flag followed by a
// positive field width."
force_left = true;
// Make sure we don't overflow the width when negating it.
width = -std::max(width, -std::numeric_limits<int>::max());
}
}
int precision = unbound->precision.value();
if (unbound->precision.is_from_arg()) {
if (!BindFromPosition(unbound->precision.get_from_arg(), &precision,
pack_))
return false;
}
FormatConversionSpecImplFriend::SetWidth(width, bound);
FormatConversionSpecImplFriend::SetPrecision(precision, bound);
if (force_left) {
Flags flags = unbound->flags;
flags.left = true;
FormatConversionSpecImplFriend::SetFlags(flags, bound);
} else {
FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound);
}
} else {
FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound);
FormatConversionSpecImplFriend::SetWidth(-1, bound);
FormatConversionSpecImplFriend::SetPrecision(-1, bound);
}
FormatConversionSpecImplFriend::SetConversionChar(unbound->conv, bound);
bound->set_arg(arg);
return true;
}
template <typename Converter>
class ConverterConsumer {
public:
ConverterConsumer(Converter converter, absl::Span<const FormatArgImpl> pack)
: converter_(converter), arg_context_(pack) {}
bool Append(string_view s) {
converter_.Append(s);
return true;
}
bool ConvertOne(const UnboundConversion& conv, string_view conv_string) {
BoundConversion bound;
if (!arg_context_.Bind(&conv, &bound)) return false;
return converter_.ConvertOne(bound, conv_string);
}
private:
Converter converter_;
ArgContext arg_context_;
};
template <typename Converter>
bool ConvertAll(const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args, Converter converter) {
if (format.has_parsed_conversion()) {
return format.parsed_conversion()->ProcessFormat(
ConverterConsumer<Converter>(converter, args));
} else {
return ParseFormatString(format.str(),
ConverterConsumer<Converter>(converter, args));
}
}
class DefaultConverter {
public:
explicit DefaultConverter(FormatSinkImpl* sink) : sink_(sink) {}
void Append(string_view s) const { sink_->Append(s); }
bool ConvertOne(const BoundConversion& bound, string_view /*conv*/) const {
return FormatArgImplFriend::Convert(*bound.arg(), bound, sink_);
}
private:
FormatSinkImpl* sink_;
};
class SummarizingConverter {
public:
explicit SummarizingConverter(FormatSinkImpl* sink) : sink_(sink) {}
void Append(string_view s) const { sink_->Append(s); }
bool ConvertOne(const BoundConversion& bound, string_view /*conv*/) const {
UntypedFormatSpecImpl spec("%d");
std::ostringstream ss;
ss << "{" << Streamable(spec, {*bound.arg()}) << ":"
<< FormatConversionSpecImplFriend::FlagsToString(bound);
if (bound.width() >= 0) ss << bound.width();
if (bound.precision() >= 0) ss << "." << bound.precision();
ss << bound.conversion_char() << "}";
Append(ss.str());
return true;
}
private:
FormatSinkImpl* sink_;
};
} // namespace
bool BindWithPack(const UnboundConversion* props,
absl::Span<const FormatArgImpl> pack,
BoundConversion* bound) {
return ArgContext(pack).Bind(props, bound);
}
std::string Summarize(const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
typedef SummarizingConverter Converter;
std::string out;
{
// inner block to destroy sink before returning out. It ensures a last
// flush.
FormatSinkImpl sink(&out);
if (!ConvertAll(format, args, Converter(&sink))) {
return "";
}
}
return out;
}
bool FormatUntyped(FormatRawSinkImpl raw_sink,
const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
FormatSinkImpl sink(raw_sink);
using Converter = DefaultConverter;
return ConvertAll(format, args, Converter(&sink));
}
std::ostream& Streamable::Print(std::ostream& os) const {
if (!FormatUntyped(&os, format_, args_)) os.setstate(std::ios::failbit);
return os;
}
std::string& AppendPack(std::string* out, const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
size_t orig = out->size();
if (ABSL_PREDICT_FALSE(!FormatUntyped(out, format, args))) {
out->erase(orig);
}
return *out;
}
std::string FormatPack(const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
std::string out;
if (ABSL_PREDICT_FALSE(!FormatUntyped(&out, format, args))) {
out.clear();
}
return out;
}
int FprintF(std::FILE* output, const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
FILERawSink sink(output);
if (!FormatUntyped(&sink, format, args)) {
errno = EINVAL;
return -1;
}
if (sink.error()) {
errno = sink.error();
return -1;
}
if (sink.count() > std::numeric_limits<int>::max()) {
errno = EFBIG;
return -1;
}
return static_cast<int>(sink.count());
}
int SnprintF(char* output, size_t size, const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
BufferRawSink sink(output, size ? size - 1 : 0);
if (!FormatUntyped(&sink, format, args)) {
errno = EINVAL;
return -1;
}
size_t total = sink.total_written();
if (size) output[std::min(total, size - 1)] = 0;
return static_cast<int>(total);
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,202 @@
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_
#include <array>
#include <cstdio>
#include <sstream>
#include <string>
#include "absl/base/port.h"
#include "absl/strings/internal/str_format/arg.h"
#include "absl/strings/internal/str_format/checker.h"
#include "absl/strings/internal/str_format/parser.h"
#include "absl/types/span.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
class UntypedFormatSpec;
namespace str_format_internal {
class BoundConversion : public FormatConversionSpecImpl {
public:
const FormatArgImpl* arg() const { return arg_; }
void set_arg(const FormatArgImpl* a) { arg_ = a; }
private:
const FormatArgImpl* arg_;
};
// This is the type-erased class that the implementation uses.
class UntypedFormatSpecImpl {
public:
UntypedFormatSpecImpl() = delete;
explicit UntypedFormatSpecImpl(string_view s)
: data_(s.data()), size_(s.size()) {}
explicit UntypedFormatSpecImpl(
const str_format_internal::ParsedFormatBase* pc)
: data_(pc), size_(~size_t{}) {}
bool has_parsed_conversion() const { return size_ == ~size_t{}; }
string_view str() const {
assert(!has_parsed_conversion());
return string_view(static_cast<const char*>(data_), size_);
}
const str_format_internal::ParsedFormatBase* parsed_conversion() const {
assert(has_parsed_conversion());
return static_cast<const str_format_internal::ParsedFormatBase*>(data_);
}
template <typename T>
static const UntypedFormatSpecImpl& Extract(const T& s) {
return s.spec_;
}
private:
const void* data_;
size_t size_;
};
template <typename T, FormatConversionCharSet...>
struct MakeDependent {
using type = T;
};
// Implicitly convertible from `const char*`, `string_view`, and the
// `ExtendedParsedFormat` type. This abstraction allows all format functions to
// operate on any without providing too many overloads.
template <FormatConversionCharSet... Args>
class FormatSpecTemplate
: public MakeDependent<UntypedFormatSpec, Args...>::type {
using Base = typename MakeDependent<UntypedFormatSpec, Args...>::type;
public:
#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
// Honeypot overload for when the string is not constexpr.
// We use the 'unavailable' attribute to give a better compiler error than
// just 'method is deleted'.
FormatSpecTemplate(...) // NOLINT
__attribute__((unavailable("Format string is not constexpr.")));
// Honeypot overload for when the format is constexpr and invalid.
// We use the 'unavailable' attribute to give a better compiler error than
// just 'method is deleted'.
// To avoid checking the format twice, we just check that the format is
// constexpr. If is it valid, then the overload below will kick in.
// We add the template here to make this overload have lower priority.
template <typename = void>
FormatSpecTemplate(const char* s) // NOLINT
__attribute__((
enable_if(str_format_internal::EnsureConstexpr(s), "constexpr trap"),
unavailable(
"Format specified does not match the arguments passed.")));
template <typename T = void>
FormatSpecTemplate(string_view s) // NOLINT
__attribute__((enable_if(str_format_internal::EnsureConstexpr(s),
"constexpr trap"))) {
static_assert(sizeof(T*) == 0,
"Format specified does not match the arguments passed.");
}
// Good format overload.
FormatSpecTemplate(const char* s) // NOLINT
__attribute__((enable_if(ValidFormatImpl<Args...>(s), "bad format trap")))
: Base(s) {}
FormatSpecTemplate(string_view s) // NOLINT
__attribute__((enable_if(ValidFormatImpl<Args...>(s), "bad format trap")))
: Base(s) {}
#else // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
FormatSpecTemplate(const char* s) : Base(s) {} // NOLINT
FormatSpecTemplate(string_view s) : Base(s) {} // NOLINT
#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
template <FormatConversionCharSet... C,
typename = typename std::enable_if<
AllOf(sizeof...(C) == sizeof...(Args), Contains(Args,
C)...)>::type>
FormatSpecTemplate(const ExtendedParsedFormat<C...>& pc) // NOLINT
: Base(&pc) {}
};
class Streamable {
public:
Streamable(const UntypedFormatSpecImpl& format,
absl::Span<const FormatArgImpl> args)
: format_(format) {
if (args.size() <= ABSL_ARRAYSIZE(few_args_)) {
for (size_t i = 0; i < args.size(); ++i) {
few_args_[i] = args[i];
}
args_ = absl::MakeSpan(few_args_, args.size());
} else {
many_args_.assign(args.begin(), args.end());
args_ = many_args_;
}
}
std::ostream& Print(std::ostream& os) const;
friend std::ostream& operator<<(std::ostream& os, const Streamable& l) {
return l.Print(os);
}
private:
const UntypedFormatSpecImpl& format_;
absl::Span<const FormatArgImpl> args_;
// if args_.size() is 4 or less:
FormatArgImpl few_args_[4] = {FormatArgImpl(0), FormatArgImpl(0),
FormatArgImpl(0), FormatArgImpl(0)};
// if args_.size() is more than 4:
std::vector<FormatArgImpl> many_args_;
};
// for testing
std::string Summarize(UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
bool BindWithPack(const UnboundConversion* props,
absl::Span<const FormatArgImpl> pack, BoundConversion* bound);
bool FormatUntyped(FormatRawSinkImpl raw_sink,
UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
std::string& AppendPack(std::string* out, UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
std::string FormatPack(const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
int FprintF(std::FILE* output, UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
int SnprintF(char* output, size_t size, UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
// Returned by Streamed(v). Converts via '%s' to the std::string created
// by std::ostream << v.
template <typename T>
class StreamedWrapper {
public:
explicit StreamedWrapper(const T& v) : v_(v) { }
private:
template <typename S>
friend ArgConvertResult<FormatConversionCharSetInternal::s> FormatConvertImpl(
const StreamedWrapper<S>& v, FormatConversionSpecImpl conv,
FormatSinkImpl* out);
const T& v_;
};
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_

View file

@ -0,0 +1,143 @@
#include "absl/strings/internal/str_format/bind.h"
#include <string.h>
#include <limits>
#include "gtest/gtest.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
class FormatBindTest : public ::testing::Test {
public:
bool Extract(const char *s, UnboundConversion *props, int *next) const {
return ConsumeUnboundConversion(s, s + strlen(s), props, next) ==
s + strlen(s);
}
};
TEST_F(FormatBindTest, BindSingle) {
struct Expectation {
int line;
const char *fmt;
int ok_phases;
const FormatArgImpl *arg;
int width;
int precision;
int next_arg;
};
const int no = -1;
const int ia[] = { 10, 20, 30, 40};
const FormatArgImpl args[] = {FormatArgImpl(ia[0]), FormatArgImpl(ia[1]),
FormatArgImpl(ia[2]), FormatArgImpl(ia[3])};
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
const Expectation kExpect[] = {
{__LINE__, "d", 2, &args[0], no, no, 2},
{__LINE__, "4d", 2, &args[0], 4, no, 2},
{__LINE__, ".5d", 2, &args[0], no, 5, 2},
{__LINE__, "4.5d", 2, &args[0], 4, 5, 2},
{__LINE__, "*d", 2, &args[1], 10, no, 3},
{__LINE__, ".*d", 2, &args[1], no, 10, 3},
{__LINE__, "*.*d", 2, &args[2], 10, 20, 4},
{__LINE__, "1$d", 2, &args[0], no, no, 0},
{__LINE__, "2$d", 2, &args[1], no, no, 0},
{__LINE__, "3$d", 2, &args[2], no, no, 0},
{__LINE__, "4$d", 2, &args[3], no, no, 0},
{__LINE__, "2$*1$d", 2, &args[1], 10, no, 0},
{__LINE__, "2$*2$d", 2, &args[1], 20, no, 0},
{__LINE__, "2$*3$d", 2, &args[1], 30, no, 0},
{__LINE__, "2$.*1$d", 2, &args[1], no, 10, 0},
{__LINE__, "2$.*2$d", 2, &args[1], no, 20, 0},
{__LINE__, "2$.*3$d", 2, &args[1], no, 30, 0},
{__LINE__, "2$*3$.*1$d", 2, &args[1], 30, 10, 0},
{__LINE__, "2$*2$.*2$d", 2, &args[1], 20, 20, 0},
{__LINE__, "2$*1$.*3$d", 2, &args[1], 10, 30, 0},
{__LINE__, "2$*3$.*1$d", 2, &args[1], 30, 10, 0},
{__LINE__, "1$*d", 0}, // indexed, then positional
{__LINE__, "*2$d", 0}, // positional, then indexed
{__LINE__, "6$d", 1}, // arg position out of bounds
{__LINE__, "1$6$d", 0}, // width position incorrectly specified
{__LINE__, "1$.6$d", 0}, // precision position incorrectly specified
{__LINE__, "1$*6$d", 1}, // width position out of bounds
{__LINE__, "1$.*6$d", 1}, // precision position out of bounds
};
#pragma GCC diagnostic pop
for (const Expectation &e : kExpect) {
SCOPED_TRACE(e.line);
SCOPED_TRACE(e.fmt);
UnboundConversion props;
BoundConversion bound;
int ok_phases = 0;
int next = 0;
if (Extract(e.fmt, &props, &next)) {
++ok_phases;
if (BindWithPack(&props, args, &bound)) {
++ok_phases;
}
}
EXPECT_EQ(e.ok_phases, ok_phases);
if (e.ok_phases < 2) continue;
if (e.arg != nullptr) {
EXPECT_EQ(e.arg, bound.arg());
}
EXPECT_EQ(e.width, bound.width());
EXPECT_EQ(e.precision, bound.precision());
}
}
TEST_F(FormatBindTest, WidthUnderflowRegression) {
UnboundConversion props;
BoundConversion bound;
int next = 0;
const int args_i[] = {std::numeric_limits<int>::min(), 17};
const FormatArgImpl args[] = {FormatArgImpl(args_i[0]),
FormatArgImpl(args_i[1])};
ASSERT_TRUE(Extract("*d", &props, &next));
ASSERT_TRUE(BindWithPack(&props, args, &bound));
EXPECT_EQ(bound.width(), std::numeric_limits<int>::max());
EXPECT_EQ(bound.arg(), args + 1);
}
TEST_F(FormatBindTest, FormatPack) {
struct Expectation {
int line;
const char *fmt;
const char *summary;
};
const int ia[] = { 10, 20, 30, 40, -10 };
const FormatArgImpl args[] = {FormatArgImpl(ia[0]), FormatArgImpl(ia[1]),
FormatArgImpl(ia[2]), FormatArgImpl(ia[3]),
FormatArgImpl(ia[4])};
const Expectation kExpect[] = {
{__LINE__, "a%4db%dc", "a{10:4d}b{20:d}c"},
{__LINE__, "a%.4db%dc", "a{10:.4d}b{20:d}c"},
{__LINE__, "a%4.5db%dc", "a{10:4.5d}b{20:d}c"},
{__LINE__, "a%db%4.5dc", "a{10:d}b{20:4.5d}c"},
{__LINE__, "a%db%*.*dc", "a{10:d}b{40:20.30d}c"},
{__LINE__, "a%.*fb", "a{20:.10f}b"},
{__LINE__, "a%1$db%2$*3$.*4$dc", "a{10:d}b{20:30.40d}c"},
{__LINE__, "a%4$db%3$*2$.*1$dc", "a{40:d}b{30:20.10d}c"},
{__LINE__, "a%04ldb", "a{10:04d}b"},
{__LINE__, "a%-#04lldb", "a{10:-#04d}b"},
{__LINE__, "a%1$*5$db", "a{10:-10d}b"},
{__LINE__, "a%1$.*5$db", "a{10:d}b"},
};
for (const Expectation &e : kExpect) {
absl::string_view fmt = e.fmt;
SCOPED_TRACE(e.line);
SCOPED_TRACE(e.fmt);
UntypedFormatSpecImpl format(fmt);
EXPECT_EQ(e.summary,
str_format_internal::Summarize(format, absl::MakeSpan(args)))
<< "line:" << e.line;
}
}
} // namespace
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,319 @@
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_
#include "absl/base/attributes.h"
#include "absl/strings/internal/str_format/arg.h"
#include "absl/strings/internal/str_format/extension.h"
// Compile time check support for entry points.
#ifndef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
#if ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__)
#define ABSL_INTERNAL_ENABLE_FORMAT_CHECKER 1
#endif // ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__)
#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
constexpr bool AllOf() { return true; }
template <typename... T>
constexpr bool AllOf(bool b, T... t) {
return b && AllOf(t...);
}
#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
constexpr bool ContainsChar(const char* chars, char c) {
return *chars == c || (*chars && ContainsChar(chars + 1, c));
}
// A constexpr compatible list of Convs.
struct ConvList {
const FormatConversionCharSet* array;
int count;
// We do the bound check here to avoid having to do it on the callers.
// Returning an empty FormatConversionCharSet has the same effect as
// short circuiting because it will never match any conversion.
constexpr FormatConversionCharSet operator[](int i) const {
return i < count ? array[i] : FormatConversionCharSet{};
}
constexpr ConvList without_front() const {
return count != 0 ? ConvList{array + 1, count - 1} : *this;
}
};
template <size_t count>
struct ConvListT {
// Make sure the array has size > 0.
FormatConversionCharSet list[count ? count : 1];
};
constexpr char GetChar(string_view str, size_t index) {
return index < str.size() ? str[index] : char{};
}
constexpr string_view ConsumeFront(string_view str, size_t len = 1) {
return len <= str.size() ? string_view(str.data() + len, str.size() - len)
: string_view();
}
constexpr string_view ConsumeAnyOf(string_view format, const char* chars) {
return ContainsChar(chars, GetChar(format, 0))
? ConsumeAnyOf(ConsumeFront(format), chars)
: format;
}
constexpr bool IsDigit(char c) { return c >= '0' && c <= '9'; }
// Helper class for the ParseDigits function.
// It encapsulates the two return values we need there.
struct Integer {
string_view format;
int value;
// If the next character is a '$', consume it.
// Otherwise, make `this` an invalid positional argument.
constexpr Integer ConsumePositionalDollar() const {
return GetChar(format, 0) == '$' ? Integer{ConsumeFront(format), value}
: Integer{format, 0};
}
};
constexpr Integer ParseDigits(string_view format, int value = 0) {
return IsDigit(GetChar(format, 0))
? ParseDigits(ConsumeFront(format),
10 * value + GetChar(format, 0) - '0')
: Integer{format, value};
}
// Parse digits for a positional argument.
// The parsing also consumes the '$'.
constexpr Integer ParsePositional(string_view format) {
return ParseDigits(format).ConsumePositionalDollar();
}
// Parses a single conversion specifier.
// See ConvParser::Run() for post conditions.
class ConvParser {
constexpr ConvParser SetFormat(string_view format) const {
return ConvParser(format, args_, error_, arg_position_, is_positional_);
}
constexpr ConvParser SetArgs(ConvList args) const {
return ConvParser(format_, args, error_, arg_position_, is_positional_);
}
constexpr ConvParser SetError(bool error) const {
return ConvParser(format_, args_, error_ || error, arg_position_,
is_positional_);
}
constexpr ConvParser SetArgPosition(int arg_position) const {
return ConvParser(format_, args_, error_, arg_position, is_positional_);
}
// Consumes the next arg and verifies that it matches `conv`.
// `error_` is set if there is no next arg or if it doesn't match `conv`.
constexpr ConvParser ConsumeNextArg(char conv) const {
return SetArgs(args_.without_front()).SetError(!Contains(args_[0], conv));
}
// Verify that positional argument `i.value` matches `conv`.
// `error_` is set if `i.value` is not a valid argument or if it doesn't
// match.
constexpr ConvParser VerifyPositional(Integer i, char conv) const {
return SetFormat(i.format).SetError(!Contains(args_[i.value - 1], conv));
}
// Parse the position of the arg and store it in `arg_position_`.
constexpr ConvParser ParseArgPosition(Integer arg) const {
return SetFormat(arg.format).SetArgPosition(arg.value);
}
// Consume the flags.
constexpr ConvParser ParseFlags() const {
return SetFormat(ConsumeAnyOf(format_, "-+ #0"));
}
// Consume the width.
// If it is '*', we verify that it matches `args_`. `error_` is set if it
// doesn't match.
constexpr ConvParser ParseWidth() const {
return IsDigit(GetChar(format_, 0))
? SetFormat(ParseDigits(format_).format)
: GetChar(format_, 0) == '*'
? is_positional_
? VerifyPositional(
ParsePositional(ConsumeFront(format_)), '*')
: SetFormat(ConsumeFront(format_))
.ConsumeNextArg('*')
: *this;
}
// Consume the precision.
// If it is '*', we verify that it matches `args_`. `error_` is set if it
// doesn't match.
constexpr ConvParser ParsePrecision() const {
return GetChar(format_, 0) != '.'
? *this
: GetChar(format_, 1) == '*'
? is_positional_
? VerifyPositional(
ParsePositional(ConsumeFront(format_, 2)), '*')
: SetFormat(ConsumeFront(format_, 2))
.ConsumeNextArg('*')
: SetFormat(ParseDigits(ConsumeFront(format_)).format);
}
// Consume the length characters.
constexpr ConvParser ParseLength() const {
return SetFormat(ConsumeAnyOf(format_, "lLhjztq"));
}
// Consume the conversion character and verify that it matches `args_`.
// `error_` is set if it doesn't match.
constexpr ConvParser ParseConversion() const {
return is_positional_
? VerifyPositional({ConsumeFront(format_), arg_position_},
GetChar(format_, 0))
: ConsumeNextArg(GetChar(format_, 0))
.SetFormat(ConsumeFront(format_));
}
constexpr ConvParser(string_view format, ConvList args, bool error,
int arg_position, bool is_positional)
: format_(format),
args_(args),
error_(error),
arg_position_(arg_position),
is_positional_(is_positional) {}
public:
constexpr ConvParser(string_view format, ConvList args, bool is_positional)
: format_(format),
args_(args),
error_(false),
arg_position_(0),
is_positional_(is_positional) {}
// Consume the whole conversion specifier.
// `format()` will be set to the character after the conversion character.
// `error()` will be set if any of the arguments do not match.
constexpr ConvParser Run() const {
return (is_positional_ ? ParseArgPosition(ParsePositional(format_)) : *this)
.ParseFlags()
.ParseWidth()
.ParsePrecision()
.ParseLength()
.ParseConversion();
}
constexpr string_view format() const { return format_; }
constexpr ConvList args() const { return args_; }
constexpr bool error() const { return error_; }
constexpr bool is_positional() const { return is_positional_; }
private:
string_view format_;
// Current list of arguments. If we are not in positional mode we will consume
// from the front.
ConvList args_;
bool error_;
// Holds the argument position of the conversion character, if we are in
// positional mode. Otherwise, it is unspecified.
int arg_position_;
// Whether we are in positional mode.
// It changes the behavior of '*' and where to find the converted argument.
bool is_positional_;
};
// Parses a whole format expression.
// See FormatParser::Run().
class FormatParser {
static constexpr bool FoundPercent(string_view format) {
return format.empty() ||
(GetChar(format, 0) == '%' && GetChar(format, 1) != '%');
}
// We use an inner function to increase the recursion limit.
// The inner function consumes up to `limit` characters on every run.
// This increases the limit from 512 to ~512*limit.
static constexpr string_view ConsumeNonPercentInner(string_view format,
int limit = 20) {
return FoundPercent(format) || !limit
? format
: ConsumeNonPercentInner(
ConsumeFront(format, GetChar(format, 0) == '%' &&
GetChar(format, 1) == '%'
? 2
: 1),
limit - 1);
}
// Consume characters until the next conversion spec %.
// It skips %%.
static constexpr string_view ConsumeNonPercent(string_view format) {
return FoundPercent(format)
? format
: ConsumeNonPercent(ConsumeNonPercentInner(format));
}
static constexpr bool IsPositional(string_view format) {
return IsDigit(GetChar(format, 0)) ? IsPositional(ConsumeFront(format))
: GetChar(format, 0) == '$';
}
constexpr bool RunImpl(bool is_positional) const {
// In non-positional mode we require all arguments to be consumed.
// In positional mode just reaching the end of the format without errors is
// enough.
return (format_.empty() && (is_positional || args_.count == 0)) ||
(!format_.empty() &&
ValidateArg(
ConvParser(ConsumeFront(format_), args_, is_positional).Run()));
}
constexpr bool ValidateArg(ConvParser conv) const {
return !conv.error() && FormatParser(conv.format(), conv.args())
.RunImpl(conv.is_positional());
}
public:
constexpr FormatParser(string_view format, ConvList args)
: format_(ConsumeNonPercent(format)), args_(args) {}
// Runs the parser for `format` and `args`.
// It verifies that the format is valid and that all conversion specifiers
// match the arguments passed.
// In non-positional mode it also verfies that all arguments are consumed.
constexpr bool Run() const {
return RunImpl(!format_.empty() && IsPositional(ConsumeFront(format_)));
}
private:
string_view format_;
// Current list of arguments.
// If we are not in positional mode we will consume from the front and will
// have to be empty in the end.
ConvList args_;
};
template <FormatConversionCharSet... C>
constexpr bool ValidFormatImpl(string_view format) {
return FormatParser(format,
{ConvListT<sizeof...(C)>{{C...}}.list, sizeof...(C)})
.Run();
}
#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_

View file

@ -0,0 +1,156 @@
#include <string>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/strings/str_format.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
std::string ConvToString(FormatConversionCharSet conv) {
std::string out;
#define CONV_SET_CASE(c) \
if (Contains(conv, FormatConversionCharSetInternal::c)) { \
out += #c; \
}
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, )
#undef CONV_SET_CASE
if (Contains(conv, FormatConversionCharSetInternal::kStar)) {
out += "*";
}
return out;
}
TEST(StrFormatChecker, ArgumentToConv) {
FormatConversionCharSet conv = ArgumentToConv<std::string>();
EXPECT_EQ(ConvToString(conv), "s");
conv = ArgumentToConv<const char*>();
EXPECT_EQ(ConvToString(conv), "sp");
conv = ArgumentToConv<double>();
EXPECT_EQ(ConvToString(conv), "fFeEgGaA");
conv = ArgumentToConv<int>();
EXPECT_EQ(ConvToString(conv), "cdiouxXfFeEgGaA*");
conv = ArgumentToConv<std::string*>();
EXPECT_EQ(ConvToString(conv), "p");
}
#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
struct Case {
bool result;
const char* format;
};
template <typename... Args>
constexpr Case ValidFormat(const char* format) {
return {ValidFormatImpl<ArgumentToConv<Args>()...>(format), format};
}
TEST(StrFormatChecker, ValidFormat) {
// We want to make sure these expressions are constexpr and they have the
// expected value.
// If they are not constexpr the attribute will just ignore them and not give
// a compile time error.
enum e {};
enum class e2 {};
constexpr Case trues[] = {
ValidFormat<>("abc"), //
ValidFormat<e>("%d"), //
ValidFormat<e2>("%d"), //
ValidFormat<int>("%% %d"), //
ValidFormat<int>("%ld"), //
ValidFormat<int>("%lld"), //
ValidFormat<std::string>("%s"), //
ValidFormat<std::string>("%10s"), //
ValidFormat<int>("%.10x"), //
ValidFormat<int, int>("%*.3x"), //
ValidFormat<int>("%1.d"), //
ValidFormat<int>("%.d"), //
ValidFormat<int, double>("%d %g"), //
ValidFormat<int, std::string>("%*s"), //
ValidFormat<int, double>("%.*f"), //
ValidFormat<void (*)(), volatile int*>("%p %p"), //
ValidFormat<string_view, const char*, double, void*>(
"string_view=%s const char*=%s double=%f void*=%p)"),
ValidFormat<int>("%% %1$d"), //
ValidFormat<int>("%1$ld"), //
ValidFormat<int>("%1$lld"), //
ValidFormat<std::string>("%1$s"), //
ValidFormat<std::string>("%1$10s"), //
ValidFormat<int>("%1$.10x"), //
ValidFormat<int>("%1$*1$.*1$d"), //
ValidFormat<int, int>("%1$*2$.3x"), //
ValidFormat<int>("%1$1.d"), //
ValidFormat<int>("%1$.d"), //
ValidFormat<double, int>("%2$d %1$g"), //
ValidFormat<int, std::string>("%2$*1$s"), //
ValidFormat<int, double>("%2$.*1$f"), //
ValidFormat<void*, string_view, const char*, double>(
"string_view=%2$s const char*=%3$s double=%4$f void*=%1$p "
"repeat=%3$s)")};
for (Case c : trues) {
EXPECT_TRUE(c.result) << c.format;
}
constexpr Case falses[] = {
ValidFormat<int>(""), //
ValidFormat<e>("%s"), //
ValidFormat<e2>("%s"), //
ValidFormat<>("%s"), //
ValidFormat<>("%r"), //
ValidFormat<int>("%s"), //
ValidFormat<int>("%.1.d"), //
ValidFormat<int>("%*1d"), //
ValidFormat<int>("%1-d"), //
ValidFormat<std::string, int>("%*s"), //
ValidFormat<int>("%*d"), //
ValidFormat<std::string>("%p"), //
ValidFormat<int (*)(int)>("%d"), //
ValidFormat<>("%3$d"), //
ValidFormat<>("%1$r"), //
ValidFormat<int>("%1$s"), //
ValidFormat<int>("%1$.1.d"), //
ValidFormat<int>("%1$*2$1d"), //
ValidFormat<int>("%1$1-d"), //
ValidFormat<std::string, int>("%2$*1$s"), //
ValidFormat<std::string>("%1$p"),
ValidFormat<int, int>("%d %2$d"), //
};
for (Case c : falses) {
EXPECT_FALSE(c.result) << c.format;
}
}
TEST(StrFormatChecker, LongFormat) {
#define CHARS_X_40 "1234567890123456789012345678901234567890"
#define CHARS_X_400 \
CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 \
CHARS_X_40 CHARS_X_40 CHARS_X_40
#define CHARS_X_4000 \
CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 \
CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400
constexpr char long_format[] =
CHARS_X_4000 "%d" CHARS_X_4000 "%s" CHARS_X_4000;
constexpr bool is_valid = ValidFormat<int, std::string>(long_format).result;
EXPECT_TRUE(is_valid);
}
#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
} // namespace
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,865 @@
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <cctype>
#include <cmath>
#include <limits>
#include <string>
#include <thread> // NOLINT
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/internal/str_format/bind.h"
#include "absl/types/optional.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
template <typename T, size_t N>
size_t ArraySize(T (&)[N]) {
return N;
}
std::string LengthModFor(float) { return ""; }
std::string LengthModFor(double) { return ""; }
std::string LengthModFor(long double) { return "L"; }
std::string LengthModFor(char) { return "hh"; }
std::string LengthModFor(signed char) { return "hh"; }
std::string LengthModFor(unsigned char) { return "hh"; }
std::string LengthModFor(short) { return "h"; } // NOLINT
std::string LengthModFor(unsigned short) { return "h"; } // NOLINT
std::string LengthModFor(int) { return ""; }
std::string LengthModFor(unsigned) { return ""; }
std::string LengthModFor(long) { return "l"; } // NOLINT
std::string LengthModFor(unsigned long) { return "l"; } // NOLINT
std::string LengthModFor(long long) { return "ll"; } // NOLINT
std::string LengthModFor(unsigned long long) { return "ll"; } // NOLINT
std::string EscCharImpl(int v) {
if (std::isprint(static_cast<unsigned char>(v))) {
return std::string(1, static_cast<char>(v));
}
char buf[64];
int n = snprintf(buf, sizeof(buf), "\\%#.2x",
static_cast<unsigned>(v & 0xff));
assert(n > 0 && n < sizeof(buf));
return std::string(buf, n);
}
std::string Esc(char v) { return EscCharImpl(v); }
std::string Esc(signed char v) { return EscCharImpl(v); }
std::string Esc(unsigned char v) { return EscCharImpl(v); }
template <typename T>
std::string Esc(const T &v) {
std::ostringstream oss;
oss << v;
return oss.str();
}
void StrAppendV(std::string *dst, const char *format, va_list ap) {
// First try with a small fixed size buffer
static const int kSpaceLength = 1024;
char space[kSpaceLength];
// It's possible for methods that use a va_list to invalidate
// the data in it upon use. The fix is to make a copy
// of the structure before using it and use that copy instead.
va_list backup_ap;
va_copy(backup_ap, ap);
int result = vsnprintf(space, kSpaceLength, format, backup_ap);
va_end(backup_ap);
if (result < kSpaceLength) {
if (result >= 0) {
// Normal case -- everything fit.
dst->append(space, result);
return;
}
if (result < 0) {
// Just an error.
return;
}
}
// Increase the buffer size to the size requested by vsnprintf,
// plus one for the closing \0.
int length = result + 1;
char *buf = new char[length];
// Restore the va_list before we use it again
va_copy(backup_ap, ap);
result = vsnprintf(buf, length, format, backup_ap);
va_end(backup_ap);
if (result >= 0 && result < length) {
// It fit
dst->append(buf, result);
}
delete[] buf;
}
void StrAppend(std::string *out, const char *format, ...) {
va_list ap;
va_start(ap, format);
StrAppendV(out, format, ap);
va_end(ap);
}
std::string StrPrint(const char *format, ...) {
va_list ap;
va_start(ap, format);
std::string result;
StrAppendV(&result, format, ap);
va_end(ap);
return result;
}
class FormatConvertTest : public ::testing::Test { };
template <typename T>
void TestStringConvert(const T& str) {
const FormatArgImpl args[] = {FormatArgImpl(str)};
struct Expectation {
const char *out;
const char *fmt;
};
const Expectation kExpect[] = {
{"hello", "%1$s" },
{"", "%1$.s" },
{"", "%1$.0s" },
{"h", "%1$.1s" },
{"he", "%1$.2s" },
{"hello", "%1$.10s" },
{" hello", "%1$6s" },
{" he", "%1$5.2s" },
{"he ", "%1$-5.2s" },
{"hello ", "%1$-6.10s" },
};
for (const Expectation &e : kExpect) {
UntypedFormatSpecImpl format(e.fmt);
EXPECT_EQ(e.out, FormatPack(format, absl::MakeSpan(args)));
}
}
TEST_F(FormatConvertTest, BasicString) {
TestStringConvert("hello"); // As char array.
TestStringConvert(static_cast<const char*>("hello"));
TestStringConvert(std::string("hello"));
TestStringConvert(string_view("hello"));
}
TEST_F(FormatConvertTest, NullString) {
const char* p = nullptr;
UntypedFormatSpecImpl format("%s");
EXPECT_EQ("", FormatPack(format, {FormatArgImpl(p)}));
}
TEST_F(FormatConvertTest, StringPrecision) {
// We cap at the precision.
char c = 'a';
const char* p = &c;
UntypedFormatSpecImpl format("%.1s");
EXPECT_EQ("a", FormatPack(format, {FormatArgImpl(p)}));
// We cap at the NUL-terminator.
p = "ABC";
UntypedFormatSpecImpl format2("%.10s");
EXPECT_EQ("ABC", FormatPack(format2, {FormatArgImpl(p)}));
}
// Pointer formatting is implementation defined. This checks that the argument
// can be matched to `ptr`.
MATCHER_P(MatchesPointerString, ptr, "") {
if (ptr == nullptr && arg == "(nil)") {
return true;
}
void* parsed = nullptr;
if (sscanf(arg.c_str(), "%p", &parsed) != 1) {
ABSL_RAW_LOG(FATAL, "Could not parse %s", arg.c_str());
}
return ptr == parsed;
}
TEST_F(FormatConvertTest, Pointer) {
static int x = 0;
const int *xp = &x;
char c = 'h';
char *mcp = &c;
const char *cp = "hi";
const char *cnil = nullptr;
const int *inil = nullptr;
using VoidF = void (*)();
VoidF fp = [] {}, fnil = nullptr;
volatile char vc;
volatile char *vcp = &vc;
volatile char *vcnil = nullptr;
const FormatArgImpl args_array[] = {
FormatArgImpl(xp), FormatArgImpl(cp), FormatArgImpl(inil),
FormatArgImpl(cnil), FormatArgImpl(mcp), FormatArgImpl(fp),
FormatArgImpl(fnil), FormatArgImpl(vcp), FormatArgImpl(vcnil),
};
auto args = absl::MakeConstSpan(args_array);
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%p"), args),
MatchesPointerString(&x));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%20p"), args),
MatchesPointerString(&x));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%.1p"), args),
MatchesPointerString(&x));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%.20p"), args),
MatchesPointerString(&x));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%30.20p"), args),
MatchesPointerString(&x));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-p"), args),
MatchesPointerString(&x));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-20p"), args),
MatchesPointerString(&x));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-.1p"), args),
MatchesPointerString(&x));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%.20p"), args),
MatchesPointerString(&x));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-30.20p"), args),
MatchesPointerString(&x));
// const char*
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%2$p"), args),
MatchesPointerString(cp));
// null const int*
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%3$p"), args),
MatchesPointerString(nullptr));
// null const char*
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%4$p"), args),
MatchesPointerString(nullptr));
// nonconst char*
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%5$p"), args),
MatchesPointerString(mcp));
// function pointers
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%6$p"), args),
MatchesPointerString(reinterpret_cast<const void*>(fp)));
EXPECT_THAT(
FormatPack(UntypedFormatSpecImpl("%8$p"), args),
MatchesPointerString(reinterpret_cast<volatile const void *>(vcp)));
// null function pointers
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%7$p"), args),
MatchesPointerString(nullptr));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%9$p"), args),
MatchesPointerString(nullptr));
}
struct Cardinal {
enum Pos { k1 = 1, k2 = 2, k3 = 3 };
enum Neg { kM1 = -1, kM2 = -2, kM3 = -3 };
};
TEST_F(FormatConvertTest, Enum) {
const Cardinal::Pos k3 = Cardinal::k3;
const Cardinal::Neg km3 = Cardinal::kM3;
const FormatArgImpl args[] = {FormatArgImpl(k3), FormatArgImpl(km3)};
UntypedFormatSpecImpl format("%1$d");
UntypedFormatSpecImpl format2("%2$d");
EXPECT_EQ("3", FormatPack(format, absl::MakeSpan(args)));
EXPECT_EQ("-3", FormatPack(format2, absl::MakeSpan(args)));
}
template <typename T>
class TypedFormatConvertTest : public FormatConvertTest { };
TYPED_TEST_SUITE_P(TypedFormatConvertTest);
std::vector<std::string> AllFlagCombinations() {
const char kFlags[] = {'-', '#', '0', '+', ' '};
std::vector<std::string> result;
for (size_t fsi = 0; fsi < (1ull << ArraySize(kFlags)); ++fsi) {
std::string flag_set;
for (size_t fi = 0; fi < ArraySize(kFlags); ++fi)
if (fsi & (1ull << fi))
flag_set += kFlags[fi];
result.push_back(flag_set);
}
return result;
}
TYPED_TEST_P(TypedFormatConvertTest, AllIntsWithFlags) {
typedef TypeParam T;
typedef typename std::make_unsigned<T>::type UnsignedT;
using remove_volatile_t = typename std::remove_volatile<T>::type;
const T kMin = std::numeric_limits<remove_volatile_t>::min();
const T kMax = std::numeric_limits<remove_volatile_t>::max();
const T kVals[] = {
remove_volatile_t(1),
remove_volatile_t(2),
remove_volatile_t(3),
remove_volatile_t(123),
remove_volatile_t(-1),
remove_volatile_t(-2),
remove_volatile_t(-3),
remove_volatile_t(-123),
remove_volatile_t(0),
kMax - remove_volatile_t(1),
kMax,
kMin + remove_volatile_t(1),
kMin,
};
const char kConvChars[] = {'d', 'i', 'u', 'o', 'x', 'X'};
const std::string kWid[] = {"", "4", "10"};
const std::string kPrec[] = {"", ".", ".0", ".4", ".10"};
const std::vector<std::string> flag_sets = AllFlagCombinations();
for (size_t vi = 0; vi < ArraySize(kVals); ++vi) {
const T val = kVals[vi];
SCOPED_TRACE(Esc(val));
const FormatArgImpl args[] = {FormatArgImpl(val)};
for (size_t ci = 0; ci < ArraySize(kConvChars); ++ci) {
const char conv_char = kConvChars[ci];
for (size_t fsi = 0; fsi < flag_sets.size(); ++fsi) {
const std::string &flag_set = flag_sets[fsi];
for (size_t wi = 0; wi < ArraySize(kWid); ++wi) {
const std::string &wid = kWid[wi];
for (size_t pi = 0; pi < ArraySize(kPrec); ++pi) {
const std::string &prec = kPrec[pi];
const bool is_signed_conv = (conv_char == 'd' || conv_char == 'i');
const bool is_unsigned_to_signed =
!std::is_signed<T>::value && is_signed_conv;
// Don't consider sign-related flags '+' and ' ' when doing
// unsigned to signed conversions.
if (is_unsigned_to_signed &&
flag_set.find_first_of("+ ") != std::string::npos) {
continue;
}
std::string new_fmt("%");
new_fmt += flag_set;
new_fmt += wid;
new_fmt += prec;
// old and new always agree up to here.
std::string old_fmt = new_fmt;
new_fmt += conv_char;
std::string old_result;
if (is_unsigned_to_signed) {
// don't expect agreement on unsigned formatted as signed,
// as printf can't do that conversion properly. For those
// cases, we do expect agreement with printf with a "%u"
// and the unsigned equivalent of 'val'.
UnsignedT uval = val;
old_fmt += LengthModFor(uval);
old_fmt += "u";
old_result = StrPrint(old_fmt.c_str(), uval);
} else {
old_fmt += LengthModFor(val);
old_fmt += conv_char;
old_result = StrPrint(old_fmt.c_str(), val);
}
SCOPED_TRACE(std::string() + " old_fmt: \"" + old_fmt +
"\"'"
" new_fmt: \"" +
new_fmt + "\"");
UntypedFormatSpecImpl format(new_fmt);
EXPECT_EQ(old_result, FormatPack(format, absl::MakeSpan(args)));
}
}
}
}
}
}
TYPED_TEST_P(TypedFormatConvertTest, Char) {
typedef TypeParam T;
using remove_volatile_t = typename std::remove_volatile<T>::type;
static const T kMin = std::numeric_limits<remove_volatile_t>::min();
static const T kMax = std::numeric_limits<remove_volatile_t>::max();
T kVals[] = {
remove_volatile_t(1), remove_volatile_t(2), remove_volatile_t(10),
remove_volatile_t(-1), remove_volatile_t(-2), remove_volatile_t(-10),
remove_volatile_t(0),
kMin + remove_volatile_t(1), kMin,
kMax - remove_volatile_t(1), kMax
};
for (const T &c : kVals) {
const FormatArgImpl args[] = {FormatArgImpl(c)};
UntypedFormatSpecImpl format("%c");
EXPECT_EQ(StrPrint("%c", c), FormatPack(format, absl::MakeSpan(args)));
}
}
REGISTER_TYPED_TEST_CASE_P(TypedFormatConvertTest, AllIntsWithFlags, Char);
typedef ::testing::Types<
int, unsigned, volatile int,
short, unsigned short,
long, unsigned long,
long long, unsigned long long,
signed char, unsigned char, char>
AllIntTypes;
INSTANTIATE_TYPED_TEST_CASE_P(TypedFormatConvertTestWithAllIntTypes,
TypedFormatConvertTest, AllIntTypes);
TEST_F(FormatConvertTest, VectorBool) {
// Make sure vector<bool>'s values behave as bools.
std::vector<bool> v = {true, false};
const std::vector<bool> cv = {true, false};
EXPECT_EQ("1,0,1,0",
FormatPack(UntypedFormatSpecImpl("%d,%d,%d,%d"),
absl::Span<const FormatArgImpl>(
{FormatArgImpl(v[0]), FormatArgImpl(v[1]),
FormatArgImpl(cv[0]), FormatArgImpl(cv[1])})));
}
TEST_F(FormatConvertTest, Int128) {
absl::int128 positive = static_cast<absl::int128>(0x1234567890abcdef) * 1979;
absl::int128 negative = -positive;
absl::int128 max = absl::Int128Max(), min = absl::Int128Min();
const FormatArgImpl args[] = {FormatArgImpl(positive),
FormatArgImpl(negative), FormatArgImpl(max),
FormatArgImpl(min)};
struct Case {
const char* format;
const char* expected;
} cases[] = {
{"%1$d", "2595989796776606496405"},
{"%1$30d", " 2595989796776606496405"},
{"%1$-30d", "2595989796776606496405 "},
{"%1$u", "2595989796776606496405"},
{"%1$x", "8cba9876066020f695"},
{"%2$d", "-2595989796776606496405"},
{"%2$30d", " -2595989796776606496405"},
{"%2$-30d", "-2595989796776606496405 "},
{"%2$u", "340282366920938460867384810655161715051"},
{"%2$x", "ffffffffffffff73456789f99fdf096b"},
{"%3$d", "170141183460469231731687303715884105727"},
{"%3$u", "170141183460469231731687303715884105727"},
{"%3$x", "7fffffffffffffffffffffffffffffff"},
{"%4$d", "-170141183460469231731687303715884105728"},
{"%4$x", "80000000000000000000000000000000"},
};
for (auto c : cases) {
UntypedFormatSpecImpl format(c.format);
EXPECT_EQ(c.expected, FormatPack(format, absl::MakeSpan(args)));
}
}
TEST_F(FormatConvertTest, Uint128) {
absl::uint128 v = static_cast<absl::uint128>(0x1234567890abcdef) * 1979;
absl::uint128 max = absl::Uint128Max();
const FormatArgImpl args[] = {FormatArgImpl(v), FormatArgImpl(max)};
struct Case {
const char* format;
const char* expected;
} cases[] = {
{"%1$d", "2595989796776606496405"},
{"%1$30d", " 2595989796776606496405"},
{"%1$-30d", "2595989796776606496405 "},
{"%1$u", "2595989796776606496405"},
{"%1$x", "8cba9876066020f695"},
{"%2$d", "340282366920938463463374607431768211455"},
{"%2$u", "340282366920938463463374607431768211455"},
{"%2$x", "ffffffffffffffffffffffffffffffff"},
};
for (auto c : cases) {
UntypedFormatSpecImpl format(c.format);
EXPECT_EQ(c.expected, FormatPack(format, absl::MakeSpan(args)));
}
}
TEST_F(FormatConvertTest, Float) {
#ifdef _MSC_VER
// MSVC has a different rounding policy than us so we can't test our
// implementation against the native one there.
return;
#endif // _MSC_VER
const char *const kFormats[] = {
"%", "%.3", "%8.5", "%500", "%.5000", "%.60", "%.30", "%03",
"%+", "% ", "%-10", "%#15.3", "%#.0", "%.0", "%1$*2$", "%1$.*2$"};
std::vector<double> doubles = {0.0,
-0.0,
.99999999999999,
99999999999999.,
std::numeric_limits<double>::max(),
-std::numeric_limits<double>::max(),
std::numeric_limits<double>::min(),
-std::numeric_limits<double>::min(),
std::numeric_limits<double>::lowest(),
-std::numeric_limits<double>::lowest(),
std::numeric_limits<double>::epsilon(),
std::numeric_limits<double>::epsilon() + 1,
std::numeric_limits<double>::infinity(),
-std::numeric_limits<double>::infinity()};
// Some regression tests.
doubles.push_back(0.99999999999999989);
if (std::numeric_limits<double>::has_denorm != std::denorm_absent) {
doubles.push_back(std::numeric_limits<double>::denorm_min());
doubles.push_back(-std::numeric_limits<double>::denorm_min());
}
for (double base :
{1., 12., 123., 1234., 12345., 123456., 1234567., 12345678., 123456789.,
1234567890., 12345678901., 123456789012., 1234567890123.}) {
for (int exp = -123; exp <= 123; ++exp) {
for (int sign : {1, -1}) {
doubles.push_back(sign * std::ldexp(base, exp));
}
}
}
// Workaround libc bug.
// https://sourceware.org/bugzilla/show_bug.cgi?id=22142
const bool gcc_bug_22142 =
StrPrint("%f", std::numeric_limits<double>::max()) !=
"1797693134862315708145274237317043567980705675258449965989174768031"
"5726078002853876058955863276687817154045895351438246423432132688946"
"4182768467546703537516986049910576551282076245490090389328944075868"
"5084551339423045832369032229481658085593321233482747978262041447231"
"68738177180919299881250404026184124858368.000000";
if (!gcc_bug_22142) {
for (int exp = -300; exp <= 300; ++exp) {
const double all_ones_mantissa = 0x1fffffffffffff;
doubles.push_back(std::ldexp(all_ones_mantissa, exp));
}
}
if (gcc_bug_22142) {
for (auto &d : doubles) {
using L = std::numeric_limits<double>;
double d2 = std::abs(d);
if (d2 == L::max() || d2 == L::min() || d2 == L::denorm_min()) {
d = 0;
}
}
}
// Remove duplicates to speed up the logic below.
std::sort(doubles.begin(), doubles.end());
doubles.erase(std::unique(doubles.begin(), doubles.end()), doubles.end());
#ifndef __APPLE__
// Apple formats NaN differently (+nan) vs. (nan)
doubles.push_back(std::nan(""));
#endif
// Reserve the space to ensure we don't allocate memory in the output itself.
std::string str_format_result;
str_format_result.reserve(1 << 20);
std::string string_printf_result;
string_printf_result.reserve(1 << 20);
for (const char *fmt : kFormats) {
for (char f : {'f', 'F', //
'g', 'G', //
'a', 'A', //
'e', 'E'}) {
std::string fmt_str = std::string(fmt) + f;
if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F') {
// This particular test takes way too long with snprintf.
// Disable for the case we are not implementing natively.
continue;
}
for (double d : doubles) {
int i = -10;
FormatArgImpl args[2] = {FormatArgImpl(d), FormatArgImpl(i)};
UntypedFormatSpecImpl format(fmt_str);
string_printf_result.clear();
StrAppend(&string_printf_result, fmt_str.c_str(), d, i);
str_format_result.clear();
{
AppendPack(&str_format_result, format, absl::MakeSpan(args));
}
if (string_printf_result != str_format_result) {
// We use ASSERT_EQ here because failures are usually correlated and a
// bug would print way too many failed expectations causing the test
// to time out.
ASSERT_EQ(string_printf_result, str_format_result)
<< fmt_str << " " << StrPrint("%.18g", d) << " "
<< StrPrint("%a", d) << " " << StrPrint("%.1080f", d);
}
}
}
}
}
TEST_F(FormatConvertTest, FloatRound) {
std::string s;
const auto format = [&](const char *fmt, double d) -> std::string & {
s.clear();
FormatArgImpl args[1] = {FormatArgImpl(d)};
AppendPack(&s, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args));
#if !defined(_MSC_VER)
// MSVC has a different rounding policy than us so we can't test our
// implementation against the native one there.
EXPECT_EQ(StrPrint(fmt, d), s);
#endif // _MSC_VER
return s;
};
// All of these values have to be exactly represented.
// Otherwise we might not be testing what we think we are testing.
// These values can fit in a 64bit "fast" representation.
const double exact_value = 0.00000000000005684341886080801486968994140625;
assert(exact_value == std::pow(2, -44));
// Round up at a 5xx.
EXPECT_EQ(format("%.13f", exact_value), "0.0000000000001");
// Round up at a >5
EXPECT_EQ(format("%.14f", exact_value), "0.00000000000006");
// Round down at a <5
EXPECT_EQ(format("%.16f", exact_value), "0.0000000000000568");
// Nine handling
EXPECT_EQ(format("%.35f", exact_value),
"0.00000000000005684341886080801486969");
EXPECT_EQ(format("%.36f", exact_value),
"0.000000000000056843418860808014869690");
// Round down the last nine.
EXPECT_EQ(format("%.37f", exact_value),
"0.0000000000000568434188608080148696899");
EXPECT_EQ(format("%.10f", 0.000003814697265625), "0.0000038147");
// Round up the last nine
EXPECT_EQ(format("%.11f", 0.000003814697265625), "0.00000381470");
EXPECT_EQ(format("%.12f", 0.000003814697265625), "0.000003814697");
// Round to even (down)
EXPECT_EQ(format("%.43f", exact_value),
"0.0000000000000568434188608080148696899414062");
// Exact
EXPECT_EQ(format("%.44f", exact_value),
"0.00000000000005684341886080801486968994140625");
// Round to even (up), let make the last digits 75 instead of 25
EXPECT_EQ(format("%.43f", exact_value + std::pow(2, -43)),
"0.0000000000001705302565824240446090698242188");
// Exact, just to check.
EXPECT_EQ(format("%.44f", exact_value + std::pow(2, -43)),
"0.00000000000017053025658242404460906982421875");
// This value has to be small enough that it won't fit in the uint128
// representation for printing.
const double small_exact_value =
0.000000000000000000000000000000000000752316384526264005099991383822237233803945956334136013765601092018187046051025390625; // NOLINT
assert(small_exact_value == std::pow(2, -120));
// Round up at a 5xx.
EXPECT_EQ(format("%.37f", small_exact_value),
"0.0000000000000000000000000000000000008");
// Round down at a <5
EXPECT_EQ(format("%.38f", small_exact_value),
"0.00000000000000000000000000000000000075");
// Round up at a >5
EXPECT_EQ(format("%.41f", small_exact_value),
"0.00000000000000000000000000000000000075232");
// Nine handling
EXPECT_EQ(format("%.55f", small_exact_value),
"0.0000000000000000000000000000000000007523163845262640051");
EXPECT_EQ(format("%.56f", small_exact_value),
"0.00000000000000000000000000000000000075231638452626400510");
EXPECT_EQ(format("%.57f", small_exact_value),
"0.000000000000000000000000000000000000752316384526264005100");
EXPECT_EQ(format("%.58f", small_exact_value),
"0.0000000000000000000000000000000000007523163845262640051000");
// Round down the last nine
EXPECT_EQ(format("%.59f", small_exact_value),
"0.00000000000000000000000000000000000075231638452626400509999");
// Round up the last nine
EXPECT_EQ(format("%.79f", small_exact_value),
"0.000000000000000000000000000000000000"
"7523163845262640050999913838222372338039460");
// Round to even (down)
EXPECT_EQ(format("%.119f", small_exact_value),
"0.000000000000000000000000000000000000"
"75231638452626400509999138382223723380"
"394595633413601376560109201818704605102539062");
// Exact
EXPECT_EQ(format("%.120f", small_exact_value),
"0.000000000000000000000000000000000000"
"75231638452626400509999138382223723380"
"3945956334136013765601092018187046051025390625");
// Round to even (up), let make the last digits 75 instead of 25
EXPECT_EQ(format("%.119f", small_exact_value + std::pow(2, -119)),
"0.000000000000000000000000000000000002"
"25694915357879201529997415146671170141"
"183786900240804129680327605456113815307617188");
// Exact, just to check.
EXPECT_EQ(format("%.120f", small_exact_value + std::pow(2, -119)),
"0.000000000000000000000000000000000002"
"25694915357879201529997415146671170141"
"1837869002408041296803276054561138153076171875");
}
// We don't actually store the results. This is just to exercise the rest of the
// machinery.
struct NullSink {
friend void AbslFormatFlush(NullSink *sink, string_view str) {}
};
template <typename... T>
bool FormatWithNullSink(absl::string_view fmt, const T &... a) {
NullSink sink;
FormatArgImpl args[] = {FormatArgImpl(a)...};
return FormatUntyped(&sink, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args));
}
TEST_F(FormatConvertTest, ExtremeWidthPrecision) {
for (const char *fmt : {"f"}) {
for (double d : {1e-100, 1.0, 1e100}) {
constexpr int max = std::numeric_limits<int>::max();
EXPECT_TRUE(FormatWithNullSink(std::string("%.*") + fmt, max, d));
EXPECT_TRUE(FormatWithNullSink(std::string("%1.*") + fmt, max, d));
EXPECT_TRUE(FormatWithNullSink(std::string("%*") + fmt, max, d));
EXPECT_TRUE(FormatWithNullSink(std::string("%*.*") + fmt, max, max, d));
}
}
}
TEST_F(FormatConvertTest, LongDouble) {
#ifdef _MSC_VER
// MSVC has a different rounding policy than us so we can't test our
// implementation against the native one there.
return;
#endif // _MSC_VER
const char *const kFormats[] = {"%", "%.3", "%8.5", "%9", "%.5000",
"%.60", "%+", "% ", "%-10"};
std::vector<long double> doubles = {
0.0,
-0.0,
std::numeric_limits<long double>::max(),
-std::numeric_limits<long double>::max(),
std::numeric_limits<long double>::min(),
-std::numeric_limits<long double>::min(),
std::numeric_limits<long double>::infinity(),
-std::numeric_limits<long double>::infinity()};
for (long double base : {1.L, 12.L, 123.L, 1234.L, 12345.L, 123456.L,
1234567.L, 12345678.L, 123456789.L, 1234567890.L,
12345678901.L, 123456789012.L, 1234567890123.L,
// This value is not representable in double, but it
// is in long double that uses the extended format.
// This is to verify that we are not truncating the
// value mistakenly through a double.
10000000000000000.25L}) {
for (int exp : {-1000, -500, 0, 500, 1000}) {
for (int sign : {1, -1}) {
doubles.push_back(sign * std::ldexp(base, exp));
doubles.push_back(sign / std::ldexp(base, exp));
}
}
}
for (const char *fmt : kFormats) {
for (char f : {'f', 'F', //
'g', 'G', //
'a', 'A', //
'e', 'E'}) {
std::string fmt_str = std::string(fmt) + 'L' + f;
if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F') {
// This particular test takes way too long with snprintf.
// Disable for the case we are not implementing natively.
continue;
}
for (auto d : doubles) {
FormatArgImpl arg(d);
UntypedFormatSpecImpl format(fmt_str);
// We use ASSERT_EQ here because failures are usually correlated and a
// bug would print way too many failed expectations causing the test to
// time out.
ASSERT_EQ(StrPrint(fmt_str.c_str(), d), FormatPack(format, {&arg, 1}))
<< fmt_str << " " << StrPrint("%.18Lg", d) << " "
<< StrPrint("%La", d) << " " << StrPrint("%.1080Lf", d);
}
}
}
}
TEST_F(FormatConvertTest, IntAsFloat) {
const int kMin = std::numeric_limits<int>::min();
const int kMax = std::numeric_limits<int>::max();
const int ia[] = {
1, 2, 3, 123,
-1, -2, -3, -123,
0, kMax - 1, kMax, kMin + 1, kMin };
for (const int fx : ia) {
SCOPED_TRACE(fx);
const FormatArgImpl args[] = {FormatArgImpl(fx)};
struct Expectation {
int line;
std::string out;
const char *fmt;
};
const double dx = static_cast<double>(fx);
const Expectation kExpect[] = {
{ __LINE__, StrPrint("%f", dx), "%f" },
{ __LINE__, StrPrint("%12f", dx), "%12f" },
{ __LINE__, StrPrint("%.12f", dx), "%.12f" },
{ __LINE__, StrPrint("%12a", dx), "%12a" },
{ __LINE__, StrPrint("%.12a", dx), "%.12a" },
};
for (const Expectation &e : kExpect) {
SCOPED_TRACE(e.line);
SCOPED_TRACE(e.fmt);
UntypedFormatSpecImpl format(e.fmt);
EXPECT_EQ(e.out, FormatPack(format, absl::MakeSpan(args)));
}
}
}
template <typename T>
bool FormatFails(const char* test_format, T value) {
std::string format_string = std::string("<<") + test_format + ">>";
UntypedFormatSpecImpl format(format_string);
int one = 1;
const FormatArgImpl args[] = {FormatArgImpl(value), FormatArgImpl(one)};
EXPECT_EQ(FormatPack(format, absl::MakeSpan(args)), "")
<< "format=" << test_format << " value=" << value;
return FormatPack(format, absl::MakeSpan(args)).empty();
}
TEST_F(FormatConvertTest, ExpectedFailures) {
// Int input
EXPECT_TRUE(FormatFails("%p", 1));
EXPECT_TRUE(FormatFails("%s", 1));
EXPECT_TRUE(FormatFails("%n", 1));
// Double input
EXPECT_TRUE(FormatFails("%p", 1.));
EXPECT_TRUE(FormatFails("%s", 1.));
EXPECT_TRUE(FormatFails("%n", 1.));
EXPECT_TRUE(FormatFails("%c", 1.));
EXPECT_TRUE(FormatFails("%d", 1.));
EXPECT_TRUE(FormatFails("%x", 1.));
EXPECT_TRUE(FormatFails("%*d", 1.));
// String input
EXPECT_TRUE(FormatFails("%n", ""));
EXPECT_TRUE(FormatFails("%c", ""));
EXPECT_TRUE(FormatFails("%d", ""));
EXPECT_TRUE(FormatFails("%x", ""));
EXPECT_TRUE(FormatFails("%f", ""));
EXPECT_TRUE(FormatFails("%*d", ""));
}
} // namespace
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,52 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/str_format/extension.h"
#include <errno.h>
#include <algorithm>
#include <string>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
std::string Flags::ToString() const {
std::string s;
s.append(left ? "-" : "");
s.append(show_pos ? "+" : "");
s.append(sign_col ? " " : "");
s.append(alt ? "#" : "");
s.append(zero ? "0" : "");
return s;
}
bool FormatSinkImpl::PutPaddedString(string_view value, int width,
int precision, bool left) {
size_t space_remaining = 0;
if (width >= 0) space_remaining = width;
size_t n = value.size();
if (precision >= 0) n = std::min(n, static_cast<size_t>(precision));
string_view shown(value.data(), n);
space_remaining = Excess(shown.size(), space_remaining);
if (!left) Append(space_remaining, ' ');
Append(shown);
if (left) Append(space_remaining, ' ');
return true;
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,429 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_
#include <limits.h>
#include <cstddef>
#include <cstring>
#include <ostream>
#include "absl/base/config.h"
#include "absl/base/port.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/internal/str_format/output.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
enum class FormatConversionChar : uint8_t;
enum class FormatConversionCharSet : uint64_t;
class FormatRawSinkImpl {
public:
// Implicitly convert from any type that provides the hook function as
// described above.
template <typename T, decltype(str_format_internal::InvokeFlush(
std::declval<T*>(), string_view()))* = nullptr>
FormatRawSinkImpl(T* raw) // NOLINT
: sink_(raw), write_(&FormatRawSinkImpl::Flush<T>) {}
void Write(string_view s) { write_(sink_, s); }
template <typename T>
static FormatRawSinkImpl Extract(T s) {
return s.sink_;
}
private:
template <typename T>
static void Flush(void* r, string_view s) {
str_format_internal::InvokeFlush(static_cast<T*>(r), s);
}
void* sink_;
void (*write_)(void*, string_view);
};
// An abstraction to which conversions write their string data.
class FormatSinkImpl {
public:
explicit FormatSinkImpl(FormatRawSinkImpl raw) : raw_(raw) {}
~FormatSinkImpl() { Flush(); }
void Flush() {
raw_.Write(string_view(buf_, pos_ - buf_));
pos_ = buf_;
}
void Append(size_t n, char c) {
if (n == 0) return;
size_ += n;
auto raw_append = [&](size_t count) {
memset(pos_, c, count);
pos_ += count;
};
while (n > Avail()) {
n -= Avail();
if (Avail() > 0) {
raw_append(Avail());
}
Flush();
}
raw_append(n);
}
void Append(string_view v) {
size_t n = v.size();
if (n == 0) return;
size_ += n;
if (n >= Avail()) {
Flush();
raw_.Write(v);
return;
}
memcpy(pos_, v.data(), n);
pos_ += n;
}
size_t size() const { return size_; }
// Put 'v' to 'sink' with specified width, precision, and left flag.
bool PutPaddedString(string_view v, int width, int precision, bool left);
template <typename T>
T Wrap() {
return T(this);
}
template <typename T>
static FormatSinkImpl* Extract(T* s) {
return s->sink_;
}
private:
size_t Avail() const { return buf_ + sizeof(buf_) - pos_; }
FormatRawSinkImpl raw_;
size_t size_ = 0;
char* pos_ = buf_;
char buf_[1024];
};
struct Flags {
bool basic : 1; // fastest conversion: no flags, width, or precision
bool left : 1; // "-"
bool show_pos : 1; // "+"
bool sign_col : 1; // " "
bool alt : 1; // "#"
bool zero : 1; // "0"
std::string ToString() const;
friend std::ostream& operator<<(std::ostream& os, const Flags& v) {
return os << v.ToString();
}
};
// clang-format off
#define ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP) \
/* text */ \
X_VAL(c) X_SEP X_VAL(s) X_SEP \
/* ints */ \
X_VAL(d) X_SEP X_VAL(i) X_SEP X_VAL(o) X_SEP \
X_VAL(u) X_SEP X_VAL(x) X_SEP X_VAL(X) X_SEP \
/* floats */ \
X_VAL(f) X_SEP X_VAL(F) X_SEP X_VAL(e) X_SEP X_VAL(E) X_SEP \
X_VAL(g) X_SEP X_VAL(G) X_SEP X_VAL(a) X_SEP X_VAL(A) X_SEP \
/* misc */ \
X_VAL(n) X_SEP X_VAL(p)
// clang-format on
// This type should not be referenced, it exists only to provide labels
// internally that match the values declared in FormatConversionChar in
// str_format.h. This is meant to allow internal libraries to use the same
// declared interface type as the public interface
// (absl::StrFormatConversionChar) while keeping the definition in a public
// header.
// Internal libraries should use the form
// `FormatConversionCharInternal::c`, `FormatConversionCharInternal::kNone` for
// comparisons. Use in switch statements is not recommended due to a bug in how
// gcc 4.9 -Wswitch handles declared but undefined enums.
struct FormatConversionCharInternal {
FormatConversionCharInternal() = delete;
private:
// clang-format off
enum class Enum : uint8_t {
c, s, // text
d, i, o, u, x, X, // int
f, F, e, E, g, G, a, A, // float
n, p, // misc
kNone
};
// clang-format on
public:
#define ABSL_INTERNAL_X_VAL(id) \
static constexpr FormatConversionChar id = \
static_cast<FormatConversionChar>(Enum::id);
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, )
#undef ABSL_INTERNAL_X_VAL
static constexpr FormatConversionChar kNone =
static_cast<FormatConversionChar>(Enum::kNone);
};
// clang-format on
inline FormatConversionChar FormatConversionCharFromChar(char c) {
switch (c) {
#define ABSL_INTERNAL_X_VAL(id) \
case #id[0]: \
return FormatConversionCharInternal::id;
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, )
#undef ABSL_INTERNAL_X_VAL
}
return FormatConversionCharInternal::kNone;
}
inline bool FormatConversionCharIsUpper(FormatConversionChar c) {
if (c == FormatConversionCharInternal::X ||
c == FormatConversionCharInternal::F ||
c == FormatConversionCharInternal::E ||
c == FormatConversionCharInternal::G ||
c == FormatConversionCharInternal::A) {
return true;
} else {
return false;
}
}
inline bool FormatConversionCharIsFloat(FormatConversionChar c) {
if (c == FormatConversionCharInternal::a ||
c == FormatConversionCharInternal::e ||
c == FormatConversionCharInternal::f ||
c == FormatConversionCharInternal::g ||
c == FormatConversionCharInternal::A ||
c == FormatConversionCharInternal::E ||
c == FormatConversionCharInternal::F ||
c == FormatConversionCharInternal::G) {
return true;
} else {
return false;
}
}
inline char FormatConversionCharToChar(FormatConversionChar c) {
if (c == FormatConversionCharInternal::kNone) {
return '\0';
#define ABSL_INTERNAL_X_VAL(e) \
} else if (c == FormatConversionCharInternal::e) { \
return #e[0];
#define ABSL_INTERNAL_X_SEP
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL,
ABSL_INTERNAL_X_SEP)
} else {
return '\0';
}
#undef ABSL_INTERNAL_X_VAL
#undef ABSL_INTERNAL_X_SEP
}
// The associated char.
inline std::ostream& operator<<(std::ostream& os, FormatConversionChar v) {
char c = FormatConversionCharToChar(v);
if (!c) c = '?';
return os << c;
}
struct FormatConversionSpecImplFriend;
class FormatConversionSpecImpl {
public:
// Width and precison are not specified, no flags are set.
bool is_basic() const { return flags_.basic; }
bool has_left_flag() const { return flags_.left; }
bool has_show_pos_flag() const { return flags_.show_pos; }
bool has_sign_col_flag() const { return flags_.sign_col; }
bool has_alt_flag() const { return flags_.alt; }
bool has_zero_flag() const { return flags_.zero; }
FormatConversionChar conversion_char() const {
// Keep this field first in the struct . It generates better code when
// accessing it when ConversionSpec is passed by value in registers.
static_assert(offsetof(FormatConversionSpecImpl, conv_) == 0, "");
return conv_;
}
// Returns the specified width. If width is unspecfied, it returns a negative
// value.
int width() const { return width_; }
// Returns the specified precision. If precision is unspecfied, it returns a
// negative value.
int precision() const { return precision_; }
template <typename T>
T Wrap() {
return T(*this);
}
private:
friend struct str_format_internal::FormatConversionSpecImplFriend;
FormatConversionChar conv_ = FormatConversionCharInternal::kNone;
Flags flags_;
int width_;
int precision_;
};
struct FormatConversionSpecImplFriend final {
static void SetFlags(Flags f, FormatConversionSpecImpl* conv) {
conv->flags_ = f;
}
static void SetConversionChar(FormatConversionChar c,
FormatConversionSpecImpl* conv) {
conv->conv_ = c;
}
static void SetWidth(int w, FormatConversionSpecImpl* conv) {
conv->width_ = w;
}
static void SetPrecision(int p, FormatConversionSpecImpl* conv) {
conv->precision_ = p;
}
static std::string FlagsToString(const FormatConversionSpecImpl& spec) {
return spec.flags_.ToString();
}
};
// Type safe OR operator.
// We need this for two reasons:
// 1. operator| on enums makes them decay to integers and the result is an
// integer. We need the result to stay as an enum.
// 2. We use "enum class" which would not work even if we accepted the decay.
constexpr FormatConversionCharSet FormatConversionCharSetUnion(
FormatConversionCharSet a) {
return a;
}
template <typename... CharSet>
constexpr FormatConversionCharSet FormatConversionCharSetUnion(
FormatConversionCharSet a, CharSet... rest) {
return static_cast<FormatConversionCharSet>(
static_cast<uint64_t>(a) |
static_cast<uint64_t>(FormatConversionCharSetUnion(rest...)));
}
constexpr uint64_t FormatConversionCharToConvInt(FormatConversionChar c) {
return uint64_t{1} << (1 + static_cast<uint8_t>(c));
}
constexpr uint64_t FormatConversionCharToConvInt(char conv) {
return
#define ABSL_INTERNAL_CHAR_SET_CASE(c) \
conv == #c[0] \
? FormatConversionCharToConvInt(FormatConversionCharInternal::c) \
:
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, )
#undef ABSL_INTERNAL_CHAR_SET_CASE
conv == '*'
? 1
: 0;
}
constexpr FormatConversionCharSet FormatConversionCharToConvValue(char conv) {
return static_cast<FormatConversionCharSet>(
FormatConversionCharToConvInt(conv));
}
struct FormatConversionCharSetInternal {
#define ABSL_INTERNAL_CHAR_SET_CASE(c) \
static constexpr FormatConversionCharSet c = \
FormatConversionCharToConvValue(#c[0]);
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, )
#undef ABSL_INTERNAL_CHAR_SET_CASE
// Used for width/precision '*' specification.
static constexpr FormatConversionCharSet kStar =
FormatConversionCharToConvValue('*');
// Some predefined values (TODO(matthewbr), delete any that are unused).
static constexpr FormatConversionCharSet kIntegral =
FormatConversionCharSetUnion(d, i, u, o, x, X);
static constexpr FormatConversionCharSet kFloating =
FormatConversionCharSetUnion(a, e, f, g, A, E, F, G);
static constexpr FormatConversionCharSet kNumeric =
FormatConversionCharSetUnion(kIntegral, kFloating);
static constexpr FormatConversionCharSet kString = s;
static constexpr FormatConversionCharSet kPointer = p;
};
// Type safe OR operator.
// We need this for two reasons:
// 1. operator| on enums makes them decay to integers and the result is an
// integer. We need the result to stay as an enum.
// 2. We use "enum class" which would not work even if we accepted the decay.
constexpr FormatConversionCharSet operator|(FormatConversionCharSet a,
FormatConversionCharSet b) {
return FormatConversionCharSetUnion(a, b);
}
// Overloaded conversion functions to support absl::ParsedFormat.
// Get a conversion with a single character in it.
constexpr FormatConversionCharSet ToFormatConversionCharSet(char c) {
return static_cast<FormatConversionCharSet>(
FormatConversionCharToConvValue(c));
}
// Get a conversion with a single character in it.
constexpr FormatConversionCharSet ToFormatConversionCharSet(
FormatConversionCharSet c) {
return c;
}
template <typename T>
void ToFormatConversionCharSet(T) = delete;
// Checks whether `c` exists in `set`.
constexpr bool Contains(FormatConversionCharSet set, char c) {
return (static_cast<uint64_t>(set) &
static_cast<uint64_t>(FormatConversionCharToConvValue(c))) != 0;
}
// Checks whether all the characters in `c` are contained in `set`
constexpr bool Contains(FormatConversionCharSet set,
FormatConversionCharSet c) {
return (static_cast<uint64_t>(set) & static_cast<uint64_t>(c)) ==
static_cast<uint64_t>(c);
}
// Checks whether all the characters in `c` are contained in `set`
constexpr bool Contains(FormatConversionCharSet set, FormatConversionChar c) {
return (static_cast<uint64_t>(set) & FormatConversionCharToConvInt(c)) != 0;
}
// Return capacity - used, clipped to a minimum of 0.
inline size_t Excess(size_t used, size_t capacity) {
return used < capacity ? capacity - used : 0;
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_

View file

@ -0,0 +1,83 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "absl/strings/internal/str_format/extension.h"
#include <random>
#include <string>
#include "gtest/gtest.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
namespace my_namespace {
class UserDefinedType {
public:
UserDefinedType() = default;
void Append(absl::string_view str) { value_.append(str.data(), str.size()); }
const std::string& Value() const { return value_; }
friend void AbslFormatFlush(UserDefinedType* x, absl::string_view str) {
x->Append(str);
}
private:
std::string value_;
};
} // namespace my_namespace
namespace {
std::string MakeRandomString(size_t len) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> dis('a', 'z');
std::string s(len, '0');
for (char& c : s) {
c = dis(gen);
}
return s;
}
TEST(FormatExtensionTest, SinkAppendSubstring) {
for (size_t chunk_size : {1, 10, 100, 1000, 10000}) {
std::string expected, actual;
absl::str_format_internal::FormatSinkImpl sink(&actual);
for (size_t chunks = 0; chunks < 10; ++chunks) {
std::string rand = MakeRandomString(chunk_size);
expected += rand;
sink.Append(rand);
}
sink.Flush();
EXPECT_EQ(actual, expected);
}
}
TEST(FormatExtensionTest, SinkAppendChars) {
for (size_t chunk_size : {1, 10, 100, 1000, 10000}) {
std::string expected, actual;
absl::str_format_internal::FormatSinkImpl sink(&actual);
for (size_t chunks = 0; chunks < 10; ++chunks) {
std::string rand = MakeRandomString(1);
expected.append(chunk_size, rand[0]);
sink.Append(chunk_size, rand[0]);
}
sink.Flush();
EXPECT_EQ(actual, expected);
}
}
} // namespace

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,23 @@
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_
#include "absl/strings/internal/str_format/extension.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
bool ConvertFloatImpl(float v, const FormatConversionSpecImpl &conv,
FormatSinkImpl *sink);
bool ConvertFloatImpl(double v, const FormatConversionSpecImpl &conv,
FormatSinkImpl *sink);
bool ConvertFloatImpl(long double v, const FormatConversionSpecImpl &conv,
FormatSinkImpl *sink);
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_

View file

@ -0,0 +1,72 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/str_format/output.h"
#include <errno.h>
#include <cstring>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
struct ClearErrnoGuard {
ClearErrnoGuard() : old_value(errno) { errno = 0; }
~ClearErrnoGuard() {
if (!errno) errno = old_value;
}
int old_value;
};
} // namespace
void BufferRawSink::Write(string_view v) {
size_t to_write = std::min(v.size(), size_);
std::memcpy(buffer_, v.data(), to_write);
buffer_ += to_write;
size_ -= to_write;
total_written_ += v.size();
}
void FILERawSink::Write(string_view v) {
while (!v.empty() && !error_) {
// Reset errno to zero in case the libc implementation doesn't set errno
// when a failure occurs.
ClearErrnoGuard guard;
if (size_t result = std::fwrite(v.data(), 1, v.size(), output_)) {
// Some progress was made.
count_ += result;
v.remove_prefix(result);
} else {
if (errno == EINTR) {
continue;
} else if (errno) {
error_ = errno;
} else if (std::ferror(output_)) {
// Non-POSIX compliant libc implementations may not set errno, so we
// have check the streams error indicator.
error_ = EBADF;
} else {
// We're likely on a non-POSIX system that encountered EINTR but had no
// way of reporting it.
continue;
}
}
}
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,96 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Output extension hooks for the Format library.
// `internal::InvokeFlush` calls the appropriate flush function for the
// specified output argument.
// `BufferRawSink` is a simple output sink for a char buffer. Used by SnprintF.
// `FILERawSink` is a std::FILE* based sink. Used by PrintF and FprintF.
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_
#include <cstdio>
#include <ostream>
#include <string>
#include "absl/base/port.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
// RawSink implementation that writes into a char* buffer.
// It will not overflow the buffer, but will keep the total count of chars
// that would have been written.
class BufferRawSink {
public:
BufferRawSink(char* buffer, size_t size) : buffer_(buffer), size_(size) {}
size_t total_written() const { return total_written_; }
void Write(string_view v);
private:
char* buffer_;
size_t size_;
size_t total_written_ = 0;
};
// RawSink implementation that writes into a FILE*.
// It keeps track of the total number of bytes written and any error encountered
// during the writes.
class FILERawSink {
public:
explicit FILERawSink(std::FILE* output) : output_(output) {}
void Write(string_view v);
size_t count() const { return count_; }
int error() const { return error_; }
private:
std::FILE* output_;
int error_ = 0;
size_t count_ = 0;
};
// Provide RawSink integration with common types from the STL.
inline void AbslFormatFlush(std::string* out, string_view s) {
out->append(s.data(), s.size());
}
inline void AbslFormatFlush(std::ostream* out, string_view s) {
out->write(s.data(), s.size());
}
inline void AbslFormatFlush(FILERawSink* sink, string_view v) {
sink->Write(v);
}
inline void AbslFormatFlush(BufferRawSink* sink, string_view v) {
sink->Write(v);
}
// This is a SFINAE to get a better compiler error message when the type
// is not supported.
template <typename T>
auto InvokeFlush(T* out, string_view s) -> decltype(AbslFormatFlush(out, s)) {
AbslFormatFlush(out, s);
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_

View file

@ -0,0 +1,79 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/str_format/output.h"
#include <sstream>
#include <string>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/strings/cord.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace {
TEST(InvokeFlush, String) {
std::string str = "ABC";
str_format_internal::InvokeFlush(&str, "DEF");
EXPECT_EQ(str, "ABCDEF");
}
TEST(InvokeFlush, Stream) {
std::stringstream str;
str << "ABC";
str_format_internal::InvokeFlush(&str, "DEF");
EXPECT_EQ(str.str(), "ABCDEF");
}
TEST(InvokeFlush, Cord) {
absl::Cord str("ABC");
str_format_internal::InvokeFlush(&str, "DEF");
EXPECT_EQ(str, "ABCDEF");
}
TEST(BufferRawSink, Limits) {
char buf[16];
{
std::fill(std::begin(buf), std::end(buf), 'x');
str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1);
str_format_internal::InvokeFlush(&bufsink, "Hello World237");
EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World237xx");
}
{
std::fill(std::begin(buf), std::end(buf), 'x');
str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1);
str_format_internal::InvokeFlush(&bufsink, "Hello World237237");
EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World2372x");
}
{
std::fill(std::begin(buf), std::end(buf), 'x');
str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1);
str_format_internal::InvokeFlush(&bufsink, "Hello World");
str_format_internal::InvokeFlush(&bufsink, "237");
EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World237xx");
}
{
std::fill(std::begin(buf), std::end(buf), 'x');
str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1);
str_format_internal::InvokeFlush(&bufsink, "Hello World");
str_format_internal::InvokeFlush(&bufsink, "237237");
EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World2372x");
}
}
} // namespace
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,336 @@
#include "absl/strings/internal/str_format/parser.h"
#include <assert.h>
#include <string.h>
#include <wchar.h>
#include <cctype>
#include <cstdint>
#include <algorithm>
#include <initializer_list>
#include <limits>
#include <ostream>
#include <string>
#include <unordered_set>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
using CC = FormatConversionCharInternal;
using LM = LengthMod;
ABSL_CONST_INIT const ConvTag kTags[256] = {
{}, {}, {}, {}, {}, {}, {}, {}, // 00-07
{}, {}, {}, {}, {}, {}, {}, {}, // 08-0f
{}, {}, {}, {}, {}, {}, {}, {}, // 10-17
{}, {}, {}, {}, {}, {}, {}, {}, // 18-1f
{}, {}, {}, {}, {}, {}, {}, {}, // 20-27
{}, {}, {}, {}, {}, {}, {}, {}, // 28-2f
{}, {}, {}, {}, {}, {}, {}, {}, // 30-37
{}, {}, {}, {}, {}, {}, {}, {}, // 38-3f
{}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG
{}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO
{}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW
CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_
{}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg
LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno
CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw
CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}!
{}, {}, {}, {}, {}, {}, {}, {}, // 80-87
{}, {}, {}, {}, {}, {}, {}, {}, // 88-8f
{}, {}, {}, {}, {}, {}, {}, {}, // 90-97
{}, {}, {}, {}, {}, {}, {}, {}, // 98-9f
{}, {}, {}, {}, {}, {}, {}, {}, // a0-a7
{}, {}, {}, {}, {}, {}, {}, {}, // a8-af
{}, {}, {}, {}, {}, {}, {}, {}, // b0-b7
{}, {}, {}, {}, {}, {}, {}, {}, // b8-bf
{}, {}, {}, {}, {}, {}, {}, {}, // c0-c7
{}, {}, {}, {}, {}, {}, {}, {}, // c8-cf
{}, {}, {}, {}, {}, {}, {}, {}, // d0-d7
{}, {}, {}, {}, {}, {}, {}, {}, // d8-df
{}, {}, {}, {}, {}, {}, {}, {}, // e0-e7
{}, {}, {}, {}, {}, {}, {}, {}, // e8-ef
{}, {}, {}, {}, {}, {}, {}, {}, // f0-f7
{}, {}, {}, {}, {}, {}, {}, {}, // f8-ff
};
namespace {
bool CheckFastPathSetting(const UnboundConversion& conv) {
bool should_be_basic = !conv.flags.left && //
!conv.flags.show_pos && //
!conv.flags.sign_col && //
!conv.flags.alt && //
!conv.flags.zero && //
(conv.width.value() == -1) &&
(conv.precision.value() == -1);
if (should_be_basic != conv.flags.basic) {
fprintf(stderr,
"basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
"width=%d precision=%d\n",
conv.flags.basic, conv.flags.left, conv.flags.show_pos,
conv.flags.sign_col, conv.flags.alt, conv.flags.zero,
conv.width.value(), conv.precision.value());
}
return should_be_basic == conv.flags.basic;
}
template <bool is_positional>
const char *ConsumeConversion(const char *pos, const char *const end,
UnboundConversion *conv, int *next_arg) {
const char* const original_pos = pos;
char c;
// Read the next char into `c` and update `pos`. Returns false if there are
// no more chars to read.
#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \
do { \
if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
c = *pos++; \
} while (0)
const auto parse_digits = [&] {
int digits = c - '0';
// We do not want to overflow `digits` so we consume at most digits10
// digits. If there are more digits the parsing will fail later on when the
// digit doesn't match the expected characters.
int num_digits = std::numeric_limits<int>::digits10;
for (;;) {
if (ABSL_PREDICT_FALSE(pos == end)) break;
c = *pos++;
if (!std::isdigit(c)) break;
--num_digits;
if (ABSL_PREDICT_FALSE(!num_digits)) break;
digits = 10 * digits + c - '0';
}
return digits;
};
if (is_positional) {
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
conv->arg_position = parse_digits();
assert(conv->arg_position > 0);
if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
}
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
// We should start with the basic flag on.
assert(conv->flags.basic);
// Any non alpha character makes this conversion not basic.
// This includes flags (-+ #0), width (1-9, *) or precision (.).
// All conversion characters and length modifiers are alpha characters.
if (c < 'A') {
conv->flags.basic = false;
for (; c <= '0';) {
// FIXME: We might be able to speed this up reusing the lookup table from
// above. It might require changing Flags to be a plain integer where we
// can |= a value.
switch (c) {
case '-':
conv->flags.left = true;
break;
case '+':
conv->flags.show_pos = true;
break;
case ' ':
conv->flags.sign_col = true;
break;
case '#':
conv->flags.alt = true;
break;
case '0':
conv->flags.zero = true;
break;
default:
goto flags_done;
}
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
}
flags_done:
if (c <= '9') {
if (c >= '0') {
int maybe_width = parse_digits();
if (!is_positional && c == '$') {
if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
// Positional conversion.
*next_arg = -1;
conv->flags = Flags();
conv->flags.basic = true;
return ConsumeConversion<true>(original_pos, end, conv, next_arg);
}
conv->width.set_value(maybe_width);
} else if (c == '*') {
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (is_positional) {
if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
conv->width.set_from_arg(parse_digits());
if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else {
conv->width.set_from_arg(++*next_arg);
}
}
}
if (c == '.') {
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (std::isdigit(c)) {
conv->precision.set_value(parse_digits());
} else if (c == '*') {
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (is_positional) {
if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
conv->precision.set_from_arg(parse_digits());
if (c != '$') return nullptr;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else {
conv->precision.set_from_arg(++*next_arg);
}
} else {
conv->precision.set_value(0);
}
}
}
auto tag = GetTagForChar(c);
if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
// It is a length modifier.
using str_format_internal::LengthMod;
LengthMod length_mod = tag.as_length();
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (c == 'h' && length_mod == LengthMod::h) {
conv->length_mod = LengthMod::hh;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else if (c == 'l' && length_mod == LengthMod::l) {
conv->length_mod = LengthMod::ll;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else {
conv->length_mod = length_mod;
}
tag = GetTagForChar(c);
if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
}
assert(CheckFastPathSetting(*conv));
(void)(&CheckFastPathSetting);
conv->conv = tag.as_conv();
if (!is_positional) conv->arg_position = ++*next_arg;
return pos;
}
} // namespace
std::string LengthModToString(LengthMod v) {
switch (v) {
case LengthMod::h:
return "h";
case LengthMod::hh:
return "hh";
case LengthMod::l:
return "l";
case LengthMod::ll:
return "ll";
case LengthMod::L:
return "L";
case LengthMod::j:
return "j";
case LengthMod::z:
return "z";
case LengthMod::t:
return "t";
case LengthMod::q:
return "q";
case LengthMod::none:
return "";
}
return "";
}
const char *ConsumeUnboundConversion(const char *p, const char *end,
UnboundConversion *conv, int *next_arg) {
if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
return ConsumeConversion<false>(p, end, conv, next_arg);
}
struct ParsedFormatBase::ParsedFormatConsumer {
explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat)
: parsed(parsedformat), data_pos(parsedformat->data_.get()) {}
bool Append(string_view s) {
if (s.empty()) return true;
size_t text_end = AppendText(s);
if (!parsed->items_.empty() && !parsed->items_.back().is_conversion) {
// Let's extend the existing text run.
parsed->items_.back().text_end = text_end;
} else {
// Let's make a new text run.
parsed->items_.push_back({false, text_end, {}});
}
return true;
}
bool ConvertOne(const UnboundConversion &conv, string_view s) {
size_t text_end = AppendText(s);
parsed->items_.push_back({true, text_end, conv});
return true;
}
size_t AppendText(string_view s) {
memcpy(data_pos, s.data(), s.size());
data_pos += s.size();
return static_cast<size_t>(data_pos - parsed->data_.get());
}
ParsedFormatBase *parsed;
char* data_pos;
};
ParsedFormatBase::ParsedFormatBase(
string_view format, bool allow_ignored,
std::initializer_list<FormatConversionCharSet> convs)
: data_(format.empty() ? nullptr : new char[format.size()]) {
has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) ||
!MatchesConversions(allow_ignored, convs);
}
bool ParsedFormatBase::MatchesConversions(
bool allow_ignored,
std::initializer_list<FormatConversionCharSet> convs) const {
std::unordered_set<int> used;
auto add_if_valid_conv = [&](int pos, char c) {
if (static_cast<size_t>(pos) > convs.size() ||
!Contains(convs.begin()[pos - 1], c))
return false;
used.insert(pos);
return true;
};
for (const ConversionItem &item : items_) {
if (!item.is_conversion) continue;
auto &conv = item.conv;
if (conv.precision.is_from_arg() &&
!add_if_valid_conv(conv.precision.get_from_arg(), '*'))
return false;
if (conv.width.is_from_arg() &&
!add_if_valid_conv(conv.width.get_from_arg(), '*'))
return false;
if (!add_if_valid_conv(conv.arg_position,
FormatConversionCharToChar(conv.conv)))
return false;
}
return used.size() == convs.size() || allow_ignored;
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,335 @@
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
#include <limits.h>
#include <stddef.h>
#include <stdlib.h>
#include <cassert>
#include <cstdint>
#include <initializer_list>
#include <iosfwd>
#include <iterator>
#include <memory>
#include <string>
#include <vector>
#include "absl/strings/internal/str_format/checker.h"
#include "absl/strings/internal/str_format/extension.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none };
std::string LengthModToString(LengthMod v);
// The analyzed properties of a single specified conversion.
struct UnboundConversion {
UnboundConversion()
: flags() /* This is required to zero all the fields of flags. */ {
flags.basic = true;
}
class InputValue {
public:
void set_value(int value) {
assert(value >= 0);
value_ = value;
}
int value() const { return value_; }
// Marks the value as "from arg". aka the '*' format.
// Requires `value >= 1`.
// When set, is_from_arg() return true and get_from_arg() returns the
// original value.
// `value()`'s return value is unspecfied in this state.
void set_from_arg(int value) {
assert(value > 0);
value_ = -value - 1;
}
bool is_from_arg() const { return value_ < -1; }
int get_from_arg() const {
assert(is_from_arg());
return -value_ - 1;
}
private:
int value_ = -1;
};
// No need to initialize. It will always be set in the parser.
int arg_position;
InputValue width;
InputValue precision;
Flags flags;
LengthMod length_mod = LengthMod::none;
FormatConversionChar conv = FormatConversionCharInternal::kNone;
};
// Consume conversion spec prefix (not including '%') of [p, end) if valid.
// Examples of valid specs would be e.g.: "s", "d", "-12.6f".
// If valid, it returns the first character following the conversion spec,
// and the spec part is broken down and returned in 'conv'.
// If invalid, returns nullptr.
const char* ConsumeUnboundConversion(const char* p, const char* end,
UnboundConversion* conv, int* next_arg);
// Helper tag class for the table below.
// It allows fast `char -> ConversionChar/LengthMod` checking and
// conversions.
class ConvTag {
public:
constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT
: tag_(static_cast<int8_t>(conversion_char)) {}
// We invert the length modifiers to make them negative so that we can easily
// test for them.
constexpr ConvTag(LengthMod length_mod) // NOLINT
: tag_(~static_cast<std::int8_t>(length_mod)) {}
// Everything else is -128, which is negative to make is_conv() simpler.
constexpr ConvTag() : tag_(-128) {}
bool is_conv() const { return tag_ >= 0; }
bool is_length() const { return tag_ < 0 && tag_ != -128; }
FormatConversionChar as_conv() const {
assert(is_conv());
return static_cast<FormatConversionChar>(tag_);
}
LengthMod as_length() const {
assert(is_length());
return static_cast<LengthMod>(~tag_);
}
private:
std::int8_t tag_;
};
extern const ConvTag kTags[256];
// Keep a single table for all the conversion chars and length modifiers.
inline ConvTag GetTagForChar(char c) {
return kTags[static_cast<unsigned char>(c)];
}
// Parse the format string provided in 'src' and pass the identified items into
// 'consumer'.
// Text runs will be passed by calling
// Consumer::Append(string_view);
// ConversionItems will be passed by calling
// Consumer::ConvertOne(UnboundConversion, string_view);
// In the case of ConvertOne, the string_view that is passed is the
// portion of the format string corresponding to the conversion, not including
// the leading %. On success, it returns true. On failure, it stops and returns
// false.
template <typename Consumer>
bool ParseFormatString(string_view src, Consumer consumer) {
int next_arg = 0;
const char* p = src.data();
const char* const end = p + src.size();
while (p != end) {
const char* percent = static_cast<const char*>(memchr(p, '%', end - p));
if (!percent) {
// We found the last substring.
return consumer.Append(string_view(p, end - p));
}
// We found a percent, so push the text run then process the percent.
if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) {
return false;
}
if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false;
auto tag = GetTagForChar(percent[1]);
if (tag.is_conv()) {
if (ABSL_PREDICT_FALSE(next_arg < 0)) {
// This indicates an error in the format string.
// The only way to get `next_arg < 0` here is to have a positional
// argument first which sets next_arg to -1 and then a non-positional
// argument.
return false;
}
p = percent + 2;
// Keep this case separate from the one below.
// ConvertOne is more efficient when the compiler can see that the `basic`
// flag is set.
UnboundConversion conv;
conv.conv = tag.as_conv();
conv.arg_position = ++next_arg;
if (ABSL_PREDICT_FALSE(
!consumer.ConvertOne(conv, string_view(percent + 1, 1)))) {
return false;
}
} else if (percent[1] != '%') {
UnboundConversion conv;
p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg);
if (ABSL_PREDICT_FALSE(p == nullptr)) return false;
if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(
conv, string_view(percent + 1, p - (percent + 1))))) {
return false;
}
} else {
if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false;
p = percent + 2;
continue;
}
}
return true;
}
// Always returns true, or fails to compile in a constexpr context if s does not
// point to a constexpr char array.
constexpr bool EnsureConstexpr(string_view s) {
return s.empty() || s[0] == s[0];
}
class ParsedFormatBase {
public:
explicit ParsedFormatBase(
string_view format, bool allow_ignored,
std::initializer_list<FormatConversionCharSet> convs);
ParsedFormatBase(const ParsedFormatBase& other) { *this = other; }
ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); }
ParsedFormatBase& operator=(const ParsedFormatBase& other) {
if (this == &other) return *this;
has_error_ = other.has_error_;
items_ = other.items_;
size_t text_size = items_.empty() ? 0 : items_.back().text_end;
data_.reset(new char[text_size]);
memcpy(data_.get(), other.data_.get(), text_size);
return *this;
}
ParsedFormatBase& operator=(ParsedFormatBase&& other) {
if (this == &other) return *this;
has_error_ = other.has_error_;
data_ = std::move(other.data_);
items_ = std::move(other.items_);
// Reset the vector to make sure the invariants hold.
other.items_.clear();
return *this;
}
template <typename Consumer>
bool ProcessFormat(Consumer consumer) const {
const char* const base = data_.get();
string_view text(base, 0);
for (const auto& item : items_) {
const char* const end = text.data() + text.size();
text = string_view(end, (base + item.text_end) - end);
if (item.is_conversion) {
if (!consumer.ConvertOne(item.conv, text)) return false;
} else {
if (!consumer.Append(text)) return false;
}
}
return !has_error_;
}
bool has_error() const { return has_error_; }
private:
// Returns whether the conversions match and if !allow_ignored it verifies
// that all conversions are used by the format.
bool MatchesConversions(
bool allow_ignored,
std::initializer_list<FormatConversionCharSet> convs) const;
struct ParsedFormatConsumer;
struct ConversionItem {
bool is_conversion;
// Points to the past-the-end location of this element in the data_ array.
size_t text_end;
UnboundConversion conv;
};
bool has_error_;
std::unique_ptr<char[]> data_;
std::vector<ConversionItem> items_;
};
// A value type representing a preparsed format. These can be created, copied
// around, and reused to speed up formatting loops.
// The user must specify through the template arguments the conversion
// characters used in the format. This will be checked at compile time.
//
// This class uses Conv enum values to specify each argument.
// This allows for more flexibility as you can specify multiple possible
// conversion characters for each argument.
// ParsedFormat<char...> is a simplified alias for when the user only
// needs to specify a single conversion character for each argument.
//
// Example:
// // Extended format supports multiple characters per argument:
// using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>;
// MyFormat GetFormat(bool use_hex) {
// if (use_hex) return MyFormat("foo %x bar");
// return MyFormat("foo %d bar");
// }
// // 'format' can be used with any value that supports 'd' and 'x',
// // like `int`.
// auto format = GetFormat(use_hex);
// value = StringF(format, i);
//
// This class also supports runtime format checking with the ::New() and
// ::NewAllowIgnored() factory functions.
// This is the only API that allows the user to pass a runtime specified format
// string. These factory functions will return NULL if the format does not match
// the conversions requested by the user.
template <FormatConversionCharSet... C>
class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase {
public:
explicit ExtendedParsedFormat(string_view format)
#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
__attribute__((
enable_if(str_format_internal::EnsureConstexpr(format),
"Format string is not constexpr."),
enable_if(str_format_internal::ValidFormatImpl<C...>(format),
"Format specified does not match the template arguments.")))
#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
: ExtendedParsedFormat(format, false) {
}
// ExtendedParsedFormat factory function.
// The user still has to specify the conversion characters, but they will not
// be checked at compile time. Instead, it will be checked at runtime.
// This delays the checking to runtime, but allows the user to pass
// dynamically sourced formats.
// It returns NULL if the format does not match the conversion characters.
// The user is responsible for checking the return value before using it.
//
// The 'New' variant will check that all the specified arguments are being
// consumed by the format and return NULL if any argument is being ignored.
// The 'NewAllowIgnored' variant will not verify this and will allow formats
// that ignore arguments.
static std::unique_ptr<ExtendedParsedFormat> New(string_view format) {
return New(format, false);
}
static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored(
string_view format) {
return New(format, true);
}
private:
static std::unique_ptr<ExtendedParsedFormat> New(string_view format,
bool allow_ignored) {
std::unique_ptr<ExtendedParsedFormat> conv(
new ExtendedParsedFormat(format, allow_ignored));
if (conv->has_error()) return nullptr;
return conv;
}
ExtendedParsedFormat(string_view s, bool allow_ignored)
: ParsedFormatBase(s, allow_ignored, {C...}) {}
};
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_

View file

@ -0,0 +1,413 @@
#include "absl/strings/internal/str_format/parser.h"
#include <string.h>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/macros.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
using testing::Pair;
TEST(LengthModTest, Names) {
struct Expectation {
int line;
LengthMod mod;
const char *name;
};
const Expectation kExpect[] = {
{__LINE__, LengthMod::none, "" },
{__LINE__, LengthMod::h, "h" },
{__LINE__, LengthMod::hh, "hh"},
{__LINE__, LengthMod::l, "l" },
{__LINE__, LengthMod::ll, "ll"},
{__LINE__, LengthMod::L, "L" },
{__LINE__, LengthMod::j, "j" },
{__LINE__, LengthMod::z, "z" },
{__LINE__, LengthMod::t, "t" },
{__LINE__, LengthMod::q, "q" },
};
EXPECT_EQ(ABSL_ARRAYSIZE(kExpect), 10);
for (auto e : kExpect) {
SCOPED_TRACE(e.line);
EXPECT_EQ(e.name, LengthModToString(e.mod));
}
}
TEST(ConversionCharTest, Names) {
struct Expectation {
FormatConversionChar id;
char name;
};
// clang-format off
const Expectation kExpect[] = {
#define X(c) {FormatConversionCharInternal::c, #c[0]}
X(c), X(s), // text
X(d), X(i), X(o), X(u), X(x), X(X), // int
X(f), X(F), X(e), X(E), X(g), X(G), X(a), X(A), // float
X(n), X(p), // misc
#undef X
{FormatConversionCharInternal::kNone, '\0'},
};
// clang-format on
for (auto e : kExpect) {
SCOPED_TRACE(e.name);
FormatConversionChar v = e.id;
EXPECT_EQ(e.name, FormatConversionCharToChar(v));
}
}
class ConsumeUnboundConversionTest : public ::testing::Test {
public:
std::pair<string_view, string_view> Consume(string_view src) {
int next = 0;
o = UnboundConversion(); // refresh
const char* p = ConsumeUnboundConversion(
src.data(), src.data() + src.size(), &o, &next);
if (!p) return {{}, src};
return {string_view(src.data(), p - src.data()),
string_view(p, src.data() + src.size() - p)};
}
bool Run(const char *fmt, bool force_positional = false) {
int next = force_positional ? -1 : 0;
o = UnboundConversion(); // refresh
return ConsumeUnboundConversion(fmt, fmt + strlen(fmt), &o, &next) ==
fmt + strlen(fmt);
}
UnboundConversion o;
};
TEST_F(ConsumeUnboundConversionTest, ConsumeSpecification) {
struct Expectation {
int line;
string_view src;
string_view out;
string_view src_post;
};
const Expectation kExpect[] = {
{__LINE__, "", "", "" },
{__LINE__, "b", "", "b" }, // 'b' is invalid
{__LINE__, "ba", "", "ba"}, // 'b' is invalid
{__LINE__, "l", "", "l" }, // just length mod isn't okay
{__LINE__, "d", "d", "" }, // basic
{__LINE__, "d ", "d", " " }, // leave suffix
{__LINE__, "dd", "d", "d" }, // don't be greedy
{__LINE__, "d9", "d", "9" }, // leave non-space suffix
{__LINE__, "dzz", "d", "zz"}, // length mod as suffix
{__LINE__, "1$*2$d", "1$*2$d", "" }, // arg indexing and * allowed.
{__LINE__, "0-14.3hhd", "0-14.3hhd", ""}, // precision, width
{__LINE__, " 0-+#14.3hhd", " 0-+#14.3hhd", ""}, // flags
};
for (const auto& e : kExpect) {
SCOPED_TRACE(e.line);
EXPECT_THAT(Consume(e.src), Pair(e.out, e.src_post));
}
}
TEST_F(ConsumeUnboundConversionTest, BasicConversion) {
EXPECT_FALSE(Run(""));
EXPECT_FALSE(Run("z"));
EXPECT_FALSE(Run("dd")); // no excess allowed
EXPECT_TRUE(Run("d"));
EXPECT_EQ('d', FormatConversionCharToChar(o.conv));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_LT(o.width.value(), 0);
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_LT(o.precision.value(), 0);
EXPECT_EQ(1, o.arg_position);
}
TEST_F(ConsumeUnboundConversionTest, ArgPosition) {
EXPECT_TRUE(Run("d"));
EXPECT_EQ(1, o.arg_position);
EXPECT_TRUE(Run("3$d"));
EXPECT_EQ(3, o.arg_position);
EXPECT_TRUE(Run("1$d"));
EXPECT_EQ(1, o.arg_position);
EXPECT_TRUE(Run("1$d", true));
EXPECT_EQ(1, o.arg_position);
EXPECT_TRUE(Run("123$d"));
EXPECT_EQ(123, o.arg_position);
EXPECT_TRUE(Run("123$d", true));
EXPECT_EQ(123, o.arg_position);
EXPECT_TRUE(Run("10$d"));
EXPECT_EQ(10, o.arg_position);
EXPECT_TRUE(Run("10$d", true));
EXPECT_EQ(10, o.arg_position);
// Position can't be zero.
EXPECT_FALSE(Run("0$d"));
EXPECT_FALSE(Run("0$d", true));
EXPECT_FALSE(Run("1$*0$d"));
EXPECT_FALSE(Run("1$.*0$d"));
// Position can't start with a zero digit at all. That is not a 'decimal'.
EXPECT_FALSE(Run("01$p"));
EXPECT_FALSE(Run("01$p", true));
EXPECT_FALSE(Run("1$*01$p"));
EXPECT_FALSE(Run("1$.*01$p"));
}
TEST_F(ConsumeUnboundConversionTest, WidthAndPrecision) {
EXPECT_TRUE(Run("14d"));
EXPECT_EQ('d', FormatConversionCharToChar(o.conv));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_EQ(14, o.width.value());
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_LT(o.precision.value(), 0);
EXPECT_TRUE(Run("14.d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_EQ(14, o.width.value());
EXPECT_EQ(0, o.precision.value());
EXPECT_TRUE(Run(".d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_LT(o.width.value(), 0);
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_EQ(0, o.precision.value());
EXPECT_TRUE(Run(".5d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_LT(o.width.value(), 0);
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_EQ(5, o.precision.value());
EXPECT_TRUE(Run(".0d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_LT(o.width.value(), 0);
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_EQ(0, o.precision.value());
EXPECT_TRUE(Run("14.5d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_EQ(14, o.width.value());
EXPECT_EQ(5, o.precision.value());
EXPECT_TRUE(Run("*.*d"));
EXPECT_TRUE(o.width.is_from_arg());
EXPECT_EQ(1, o.width.get_from_arg());
EXPECT_TRUE(o.precision.is_from_arg());
EXPECT_EQ(2, o.precision.get_from_arg());
EXPECT_EQ(3, o.arg_position);
EXPECT_TRUE(Run("*d"));
EXPECT_TRUE(o.width.is_from_arg());
EXPECT_EQ(1, o.width.get_from_arg());
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_LT(o.precision.value(), 0);
EXPECT_EQ(2, o.arg_position);
EXPECT_TRUE(Run(".*d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_LT(o.width.value(), 0);
EXPECT_TRUE(o.precision.is_from_arg());
EXPECT_EQ(1, o.precision.get_from_arg());
EXPECT_EQ(2, o.arg_position);
// mixed implicit and explicit: didn't specify arg position.
EXPECT_FALSE(Run("*23$.*34$d"));
EXPECT_TRUE(Run("12$*23$.*34$d"));
EXPECT_EQ(12, o.arg_position);
EXPECT_TRUE(o.width.is_from_arg());
EXPECT_EQ(23, o.width.get_from_arg());
EXPECT_TRUE(o.precision.is_from_arg());
EXPECT_EQ(34, o.precision.get_from_arg());
EXPECT_TRUE(Run("2$*5$.*9$d"));
EXPECT_EQ(2, o.arg_position);
EXPECT_TRUE(o.width.is_from_arg());
EXPECT_EQ(5, o.width.get_from_arg());
EXPECT_TRUE(o.precision.is_from_arg());
EXPECT_EQ(9, o.precision.get_from_arg());
EXPECT_FALSE(Run(".*0$d")) << "no arg 0";
// Large values
EXPECT_TRUE(Run("999999999.999999999d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_EQ(999999999, o.width.value());
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_EQ(999999999, o.precision.value());
EXPECT_FALSE(Run("1000000000.999999999d"));
EXPECT_FALSE(Run("999999999.1000000000d"));
EXPECT_FALSE(Run("9999999999d"));
EXPECT_FALSE(Run(".9999999999d"));
}
TEST_F(ConsumeUnboundConversionTest, Flags) {
static const char kAllFlags[] = "-+ #0";
static const int kNumFlags = ABSL_ARRAYSIZE(kAllFlags) - 1;
for (int rev = 0; rev < 2; ++rev) {
for (int i = 0; i < 1 << kNumFlags; ++i) {
std::string fmt;
for (int k = 0; k < kNumFlags; ++k)
if ((i >> k) & 1) fmt += kAllFlags[k];
// flag order shouldn't matter
if (rev == 1) { std::reverse(fmt.begin(), fmt.end()); }
fmt += 'd';
SCOPED_TRACE(fmt);
EXPECT_TRUE(Run(fmt.c_str()));
EXPECT_EQ(fmt.find('-') == std::string::npos, !o.flags.left);
EXPECT_EQ(fmt.find('+') == std::string::npos, !o.flags.show_pos);
EXPECT_EQ(fmt.find(' ') == std::string::npos, !o.flags.sign_col);
EXPECT_EQ(fmt.find('#') == std::string::npos, !o.flags.alt);
EXPECT_EQ(fmt.find('0') == std::string::npos, !o.flags.zero);
}
}
}
TEST_F(ConsumeUnboundConversionTest, BasicFlag) {
// Flag is on
for (const char* fmt : {"d", "llx", "G", "1$X"}) {
SCOPED_TRACE(fmt);
EXPECT_TRUE(Run(fmt));
EXPECT_TRUE(o.flags.basic);
}
// Flag is off
for (const char* fmt : {"3d", ".llx", "-G", "1$#X"}) {
SCOPED_TRACE(fmt);
EXPECT_TRUE(Run(fmt));
EXPECT_FALSE(o.flags.basic);
}
}
TEST_F(ConsumeUnboundConversionTest, LengthMod) {
EXPECT_TRUE(Run("d"));
EXPECT_EQ(LengthMod::none, o.length_mod);
EXPECT_TRUE(Run("hd"));
EXPECT_EQ(LengthMod::h, o.length_mod);
EXPECT_TRUE(Run("hhd"));
EXPECT_EQ(LengthMod::hh, o.length_mod);
EXPECT_TRUE(Run("ld"));
EXPECT_EQ(LengthMod::l, o.length_mod);
EXPECT_TRUE(Run("lld"));
EXPECT_EQ(LengthMod::ll, o.length_mod);
EXPECT_TRUE(Run("Lf"));
EXPECT_EQ(LengthMod::L, o.length_mod);
EXPECT_TRUE(Run("qf"));
EXPECT_EQ(LengthMod::q, o.length_mod);
EXPECT_TRUE(Run("jd"));
EXPECT_EQ(LengthMod::j, o.length_mod);
EXPECT_TRUE(Run("zd"));
EXPECT_EQ(LengthMod::z, o.length_mod);
EXPECT_TRUE(Run("td"));
EXPECT_EQ(LengthMod::t, o.length_mod);
}
struct SummarizeConsumer {
std::string* out;
explicit SummarizeConsumer(std::string* out) : out(out) {}
bool Append(string_view s) {
*out += "[" + std::string(s) + "]";
return true;
}
bool ConvertOne(const UnboundConversion& conv, string_view s) {
*out += "{";
*out += std::string(s);
*out += ":";
*out += std::to_string(conv.arg_position) + "$";
if (conv.width.is_from_arg()) {
*out += std::to_string(conv.width.get_from_arg()) + "$*";
}
if (conv.precision.is_from_arg()) {
*out += "." + std::to_string(conv.precision.get_from_arg()) + "$*";
}
*out += FormatConversionCharToChar(conv.conv);
*out += "}";
return true;
}
};
std::string SummarizeParsedFormat(const ParsedFormatBase& pc) {
std::string out;
if (!pc.ProcessFormat(SummarizeConsumer(&out))) out += "!";
return out;
}
class ParsedFormatTest : public testing::Test {};
TEST_F(ParsedFormatTest, ValueSemantics) {
ParsedFormatBase p1({}, true, {}); // empty format
EXPECT_EQ("", SummarizeParsedFormat(p1));
ParsedFormatBase p2 = p1; // copy construct (empty)
EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p2));
p1 = ParsedFormatBase("hello%s", true,
{FormatConversionCharSetInternal::s}); // move assign
EXPECT_EQ("[hello]{s:1$s}", SummarizeParsedFormat(p1));
ParsedFormatBase p3 = p1; // copy construct (nonempty)
EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p3));
using std::swap;
swap(p1, p2);
EXPECT_EQ("", SummarizeParsedFormat(p1));
EXPECT_EQ("[hello]{s:1$s}", SummarizeParsedFormat(p2));
swap(p1, p2); // undo
p2 = p1; // copy assign
EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p2));
}
struct ExpectParse {
const char* in;
std::initializer_list<FormatConversionCharSet> conv_set;
const char* out;
};
TEST_F(ParsedFormatTest, Parsing) {
// Parse should be equivalent to that obtained by ConversionParseIterator.
// No need to retest the parsing edge cases here.
const ExpectParse kExpect[] = {
{"", {}, ""},
{"ab", {}, "[ab]"},
{"a%d", {FormatConversionCharSetInternal::d}, "[a]{d:1$d}"},
{"a%+d", {FormatConversionCharSetInternal::d}, "[a]{+d:1$d}"},
{"a% d", {FormatConversionCharSetInternal::d}, "[a]{ d:1$d}"},
{"a%b %d", {}, "[a]!"}, // stop after error
};
for (const auto& e : kExpect) {
SCOPED_TRACE(e.in);
EXPECT_EQ(e.out,
SummarizeParsedFormat(ParsedFormatBase(e.in, false, e.conv_set)));
}
}
TEST_F(ParsedFormatTest, ParsingFlagOrder) {
const ExpectParse kExpect[] = {
{"a%+ 0d", {FormatConversionCharSetInternal::d}, "[a]{+ 0d:1$d}"},
{"a%+0 d", {FormatConversionCharSetInternal::d}, "[a]{+0 d:1$d}"},
{"a%0+ d", {FormatConversionCharSetInternal::d}, "[a]{0+ d:1$d}"},
{"a% +0d", {FormatConversionCharSetInternal::d}, "[a]{ +0d:1$d}"},
{"a%0 +d", {FormatConversionCharSetInternal::d}, "[a]{0 +d:1$d}"},
{"a% 0+d", {FormatConversionCharSetInternal::d}, "[a]{ 0+d:1$d}"},
{"a%+ 0+d", {FormatConversionCharSetInternal::d}, "[a]{+ 0+d:1$d}"},
};
for (const auto& e : kExpect) {
SCOPED_TRACE(e.in);
EXPECT_EQ(e.out,
SummarizeParsedFormat(ParsedFormatBase(e.in, false, e.conv_set)));
}
}
} // namespace
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,314 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file declares INTERNAL parts of the Join API that are inlined/templated
// or otherwise need to be available at compile time. The main abstractions
// defined in this file are:
//
// - A handful of default Formatters
// - JoinAlgorithm() overloads
// - JoinRange() overloads
// - JoinTuple()
//
// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
// absl/strings/str_join.h
//
// IWYU pragma: private, include "absl/strings/str_join.h"
#ifndef ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
#define ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
#include <cstring>
#include <iterator>
#include <memory>
#include <string>
#include <type_traits>
#include <utility>
#include "absl/strings/internal/ostringstream.h"
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/str_cat.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
//
// Formatter objects
//
// The following are implementation classes for standard Formatter objects. The
// factory functions that users will call to create and use these formatters are
// defined and documented in strings/join.h.
//
// The default formatter. Converts alpha-numeric types to strings.
struct AlphaNumFormatterImpl {
// This template is needed in order to support passing in a dereferenced
// vector<bool>::iterator
template <typename T>
void operator()(std::string* out, const T& t) const {
StrAppend(out, AlphaNum(t));
}
void operator()(std::string* out, const AlphaNum& t) const {
StrAppend(out, t);
}
};
// A type that's used to overload the JoinAlgorithm() function (defined below)
// for ranges that do not require additional formatting (e.g., a range of
// strings).
struct NoFormatter : public AlphaNumFormatterImpl {};
// Formats types to strings using the << operator.
class StreamFormatterImpl {
public:
// The method isn't const because it mutates state. Making it const will
// render StreamFormatterImpl thread-hostile.
template <typename T>
void operator()(std::string* out, const T& t) {
// The stream is created lazily to avoid paying the relatively high cost
// of its construction when joining an empty range.
if (strm_) {
strm_->clear(); // clear the bad, fail and eof bits in case they were set
strm_->str(out);
} else {
strm_.reset(new strings_internal::OStringStream(out));
}
*strm_ << t;
}
private:
std::unique_ptr<strings_internal::OStringStream> strm_;
};
// Formats a std::pair<>. The 'first' member is formatted using f1_ and the
// 'second' member is formatted using f2_. sep_ is the separator.
template <typename F1, typename F2>
class PairFormatterImpl {
public:
PairFormatterImpl(F1 f1, absl::string_view sep, F2 f2)
: f1_(std::move(f1)), sep_(sep), f2_(std::move(f2)) {}
template <typename T>
void operator()(std::string* out, const T& p) {
f1_(out, p.first);
out->append(sep_);
f2_(out, p.second);
}
template <typename T>
void operator()(std::string* out, const T& p) const {
f1_(out, p.first);
out->append(sep_);
f2_(out, p.second);
}
private:
F1 f1_;
std::string sep_;
F2 f2_;
};
// Wraps another formatter and dereferences the argument to operator() then
// passes the dereferenced argument to the wrapped formatter. This can be
// useful, for example, to join a std::vector<int*>.
template <typename Formatter>
class DereferenceFormatterImpl {
public:
DereferenceFormatterImpl() : f_() {}
explicit DereferenceFormatterImpl(Formatter&& f)
: f_(std::forward<Formatter>(f)) {}
template <typename T>
void operator()(std::string* out, const T& t) {
f_(out, *t);
}
template <typename T>
void operator()(std::string* out, const T& t) const {
f_(out, *t);
}
private:
Formatter f_;
};
// DefaultFormatter<T> is a traits class that selects a default Formatter to use
// for the given type T. The ::Type member names the Formatter to use. This is
// used by the strings::Join() functions that do NOT take a Formatter argument,
// in which case a default Formatter must be chosen.
//
// AlphaNumFormatterImpl is the default in the base template, followed by
// specializations for other types.
template <typename ValueType>
struct DefaultFormatter {
typedef AlphaNumFormatterImpl Type;
};
template <>
struct DefaultFormatter<const char*> {
typedef AlphaNumFormatterImpl Type;
};
template <>
struct DefaultFormatter<char*> {
typedef AlphaNumFormatterImpl Type;
};
template <>
struct DefaultFormatter<std::string> {
typedef NoFormatter Type;
};
template <>
struct DefaultFormatter<absl::string_view> {
typedef NoFormatter Type;
};
template <typename ValueType>
struct DefaultFormatter<ValueType*> {
typedef DereferenceFormatterImpl<typename DefaultFormatter<ValueType>::Type>
Type;
};
template <typename ValueType>
struct DefaultFormatter<std::unique_ptr<ValueType>>
: public DefaultFormatter<ValueType*> {};
//
// JoinAlgorithm() functions
//
// The main joining algorithm. This simply joins the elements in the given
// iterator range, each separated by the given separator, into an output string,
// and formats each element using the provided Formatter object.
template <typename Iterator, typename Formatter>
std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s,
Formatter&& f) {
std::string result;
absl::string_view sep("");
for (Iterator it = start; it != end; ++it) {
result.append(sep.data(), sep.size());
f(&result, *it);
sep = s;
}
return result;
}
// A joining algorithm that's optimized for a forward iterator range of
// string-like objects that do not need any additional formatting. This is to
// optimize the common case of joining, say, a std::vector<string> or a
// std::vector<absl::string_view>.
//
// This is an overload of the previous JoinAlgorithm() function. Here the
// Formatter argument is of type NoFormatter. Since NoFormatter is an internal
// type, this overload is only invoked when strings::Join() is called with a
// range of string-like objects (e.g., std::string, absl::string_view), and an
// explicit Formatter argument was NOT specified.
//
// The optimization is that the needed space will be reserved in the output
// string to avoid the need to resize while appending. To do this, the iterator
// range will be traversed twice: once to calculate the total needed size, and
// then again to copy the elements and delimiters to the output string.
template <typename Iterator,
typename = typename std::enable_if<std::is_convertible<
typename std::iterator_traits<Iterator>::iterator_category,
std::forward_iterator_tag>::value>::type>
std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s,
NoFormatter) {
std::string result;
if (start != end) {
// Sums size
size_t result_size = start->size();
for (Iterator it = start; ++it != end;) {
result_size += s.size();
result_size += it->size();
}
if (result_size > 0) {
STLStringResizeUninitialized(&result, result_size);
// Joins strings
char* result_buf = &*result.begin();
memcpy(result_buf, start->data(), start->size());
result_buf += start->size();
for (Iterator it = start; ++it != end;) {
memcpy(result_buf, s.data(), s.size());
result_buf += s.size();
memcpy(result_buf, it->data(), it->size());
result_buf += it->size();
}
}
}
return result;
}
// JoinTupleLoop implements a loop over the elements of a std::tuple, which
// are heterogeneous. The primary template matches the tuple interior case. It
// continues the iteration after appending a separator (for nonzero indices)
// and formatting an element of the tuple. The specialization for the I=N case
// matches the end-of-tuple, and terminates the iteration.
template <size_t I, size_t N>
struct JoinTupleLoop {
template <typename Tup, typename Formatter>
void operator()(std::string* out, const Tup& tup, absl::string_view sep,
Formatter&& fmt) {
if (I > 0) out->append(sep.data(), sep.size());
fmt(out, std::get<I>(tup));
JoinTupleLoop<I + 1, N>()(out, tup, sep, fmt);
}
};
template <size_t N>
struct JoinTupleLoop<N, N> {
template <typename Tup, typename Formatter>
void operator()(std::string*, const Tup&, absl::string_view, Formatter&&) {}
};
template <typename... T, typename Formatter>
std::string JoinAlgorithm(const std::tuple<T...>& tup, absl::string_view sep,
Formatter&& fmt) {
std::string result;
JoinTupleLoop<0, sizeof...(T)>()(&result, tup, sep, fmt);
return result;
}
template <typename Iterator>
std::string JoinRange(Iterator first, Iterator last,
absl::string_view separator) {
// No formatter was explicitly given, so a default must be chosen.
typedef typename std::iterator_traits<Iterator>::value_type ValueType;
typedef typename DefaultFormatter<ValueType>::Type Formatter;
return JoinAlgorithm(first, last, separator, Formatter());
}
template <typename Range, typename Formatter>
std::string JoinRange(const Range& range, absl::string_view separator,
Formatter&& fmt) {
using std::begin;
using std::end;
return JoinAlgorithm(begin(range), end(range), separator, fmt);
}
template <typename Range>
std::string JoinRange(const Range& range, absl::string_view separator) {
using std::begin;
using std::end;
return JoinRange(begin(range), end(range), separator);
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_

View file

@ -0,0 +1,455 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file declares INTERNAL parts of the Split API that are inline/templated
// or otherwise need to be available at compile time. The main abstractions
// defined in here are
//
// - ConvertibleToStringView
// - SplitIterator<>
// - Splitter<>
//
// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
// absl/strings/str_split.h.
//
// IWYU pragma: private, include "absl/strings/str_split.h"
#ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
#define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
#include <array>
#include <initializer_list>
#include <iterator>
#include <map>
#include <type_traits>
#include <utility>
#include <vector>
#include "absl/base/macros.h"
#include "absl/base/port.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/string_view.h"
#ifdef _GLIBCXX_DEBUG
#include "absl/strings/internal/stl_type_traits.h"
#endif // _GLIBCXX_DEBUG
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// This class is implicitly constructible from everything that absl::string_view
// is implicitly constructible from. If it's constructed from a temporary
// string, the data is moved into a data member so its lifetime matches that of
// the ConvertibleToStringView instance.
class ConvertibleToStringView {
public:
ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit)
: value_(s) {}
ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit)
ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit)
: value_(s) {}
ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit)
: value_(s) {}
// Matches rvalue strings and moves their data to a member.
ConvertibleToStringView(std::string&& s) // NOLINT(runtime/explicit)
: copy_(std::move(s)), value_(copy_) {}
ConvertibleToStringView(const ConvertibleToStringView& other)
: copy_(other.copy_),
value_(other.IsSelfReferential() ? copy_ : other.value_) {}
ConvertibleToStringView(ConvertibleToStringView&& other) {
StealMembers(std::move(other));
}
ConvertibleToStringView& operator=(ConvertibleToStringView other) {
StealMembers(std::move(other));
return *this;
}
absl::string_view value() const { return value_; }
private:
// Returns true if ctsp's value refers to its internal copy_ member.
bool IsSelfReferential() const { return value_.data() == copy_.data(); }
void StealMembers(ConvertibleToStringView&& other) {
if (other.IsSelfReferential()) {
copy_ = std::move(other.copy_);
value_ = copy_;
other.value_ = other.copy_;
} else {
value_ = other.value_;
}
}
// Holds the data moved from temporary std::string arguments. Declared first
// so that 'value' can refer to 'copy_'.
std::string copy_;
absl::string_view value_;
};
// An iterator that enumerates the parts of a string from a Splitter. The text
// to be split, the Delimiter, and the Predicate are all taken from the given
// Splitter object. Iterators may only be compared if they refer to the same
// Splitter instance.
//
// This class is NOT part of the public splitting API.
template <typename Splitter>
class SplitIterator {
public:
using iterator_category = std::input_iterator_tag;
using value_type = absl::string_view;
using difference_type = ptrdiff_t;
using pointer = const value_type*;
using reference = const value_type&;
enum State { kInitState, kLastState, kEndState };
SplitIterator(State state, const Splitter* splitter)
: pos_(0),
state_(state),
splitter_(splitter),
delimiter_(splitter->delimiter()),
predicate_(splitter->predicate()) {
// Hack to maintain backward compatibility. This one block makes it so an
// empty absl::string_view whose .data() happens to be nullptr behaves
// *differently* from an otherwise empty absl::string_view whose .data() is
// not nullptr. This is an undesirable difference in general, but this
// behavior is maintained to avoid breaking existing code that happens to
// depend on this old behavior/bug. Perhaps it will be fixed one day. The
// difference in behavior is as follows:
// Split(absl::string_view(""), '-'); // {""}
// Split(absl::string_view(), '-'); // {}
if (splitter_->text().data() == nullptr) {
state_ = kEndState;
pos_ = splitter_->text().size();
return;
}
if (state_ == kEndState) {
pos_ = splitter_->text().size();
} else {
++(*this);
}
}
bool at_end() const { return state_ == kEndState; }
reference operator*() const { return curr_; }
pointer operator->() const { return &curr_; }
SplitIterator& operator++() {
do {
if (state_ == kLastState) {
state_ = kEndState;
return *this;
}
const absl::string_view text = splitter_->text();
const absl::string_view d = delimiter_.Find(text, pos_);
if (d.data() == text.data() + text.size()) state_ = kLastState;
curr_ = text.substr(pos_, d.data() - (text.data() + pos_));
pos_ += curr_.size() + d.size();
} while (!predicate_(curr_));
return *this;
}
SplitIterator operator++(int) {
SplitIterator old(*this);
++(*this);
return old;
}
friend bool operator==(const SplitIterator& a, const SplitIterator& b) {
return a.state_ == b.state_ && a.pos_ == b.pos_;
}
friend bool operator!=(const SplitIterator& a, const SplitIterator& b) {
return !(a == b);
}
private:
size_t pos_;
State state_;
absl::string_view curr_;
const Splitter* splitter_;
typename Splitter::DelimiterType delimiter_;
typename Splitter::PredicateType predicate_;
};
// HasMappedType<T>::value is true iff there exists a type T::mapped_type.
template <typename T, typename = void>
struct HasMappedType : std::false_type {};
template <typename T>
struct HasMappedType<T, absl::void_t<typename T::mapped_type>>
: std::true_type {};
// HasValueType<T>::value is true iff there exists a type T::value_type.
template <typename T, typename = void>
struct HasValueType : std::false_type {};
template <typename T>
struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type {
};
// HasConstIterator<T>::value is true iff there exists a type T::const_iterator.
template <typename T, typename = void>
struct HasConstIterator : std::false_type {};
template <typename T>
struct HasConstIterator<T, absl::void_t<typename T::const_iterator>>
: std::true_type {};
// IsInitializerList<T>::value is true iff T is an std::initializer_list. More
// details below in Splitter<> where this is used.
std::false_type IsInitializerListDispatch(...); // default: No
template <typename T>
std::true_type IsInitializerListDispatch(std::initializer_list<T>*);
template <typename T>
struct IsInitializerList
: decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {};
// A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition
// is true for type 'C'.
//
// Restricts conversion to container-like types (by testing for the presence of
// a const_iterator member type) and also to disable conversion to an
// std::initializer_list (which also has a const_iterator). Otherwise, code
// compiled in C++11 will get an error due to ambiguous conversion paths (in
// C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T>
// or an std::initializer_list<T>).
template <typename C, bool has_value_type, bool has_mapped_type>
struct SplitterIsConvertibleToImpl : std::false_type {};
template <typename C>
struct SplitterIsConvertibleToImpl<C, true, false>
: std::is_constructible<typename C::value_type, absl::string_view> {};
template <typename C>
struct SplitterIsConvertibleToImpl<C, true, true>
: absl::conjunction<
std::is_constructible<typename C::key_type, absl::string_view>,
std::is_constructible<typename C::mapped_type, absl::string_view>> {};
template <typename C>
struct SplitterIsConvertibleTo
: SplitterIsConvertibleToImpl<
C,
#ifdef _GLIBCXX_DEBUG
!IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value &&
#endif // _GLIBCXX_DEBUG
!IsInitializerList<
typename std::remove_reference<C>::type>::value &&
HasValueType<C>::value && HasConstIterator<C>::value,
HasMappedType<C>::value> {
};
// This class implements the range that is returned by absl::StrSplit(). This
// class has templated conversion operators that allow it to be implicitly
// converted to a variety of types that the caller may have specified on the
// left-hand side of an assignment.
//
// The main interface for interacting with this class is through its implicit
// conversion operators. However, this class may also be used like a container
// in that it has .begin() and .end() member functions. It may also be used
// within a range-for loop.
//
// Output containers can be collections of any type that is constructible from
// an absl::string_view.
//
// An Predicate functor may be supplied. This predicate will be used to filter
// the split strings: only strings for which the predicate returns true will be
// kept. A Predicate object is any unary functor that takes an absl::string_view
// and returns bool.
template <typename Delimiter, typename Predicate>
class Splitter {
public:
using DelimiterType = Delimiter;
using PredicateType = Predicate;
using const_iterator = strings_internal::SplitIterator<Splitter>;
using value_type = typename std::iterator_traits<const_iterator>::value_type;
Splitter(ConvertibleToStringView input_text, Delimiter d, Predicate p)
: text_(std::move(input_text)),
delimiter_(std::move(d)),
predicate_(std::move(p)) {}
absl::string_view text() const { return text_.value(); }
const Delimiter& delimiter() const { return delimiter_; }
const Predicate& predicate() const { return predicate_; }
// Range functions that iterate the split substrings as absl::string_view
// objects. These methods enable a Splitter to be used in a range-based for
// loop.
const_iterator begin() const { return {const_iterator::kInitState, this}; }
const_iterator end() const { return {const_iterator::kEndState, this}; }
// An implicit conversion operator that is restricted to only those containers
// that the splitter is convertible to.
template <typename Container,
typename = typename std::enable_if<
SplitterIsConvertibleTo<Container>::value>::type>
operator Container() const { // NOLINT(runtime/explicit)
return ConvertToContainer<Container, typename Container::value_type,
HasMappedType<Container>::value>()(*this);
}
// Returns a pair with its .first and .second members set to the first two
// strings returned by the begin() iterator. Either/both of .first and .second
// will be constructed with empty strings if the iterator doesn't have a
// corresponding value.
template <typename First, typename Second>
operator std::pair<First, Second>() const { // NOLINT(runtime/explicit)
absl::string_view first, second;
auto it = begin();
if (it != end()) {
first = *it;
if (++it != end()) {
second = *it;
}
}
return {First(first), Second(second)};
}
private:
// ConvertToContainer is a functor converting a Splitter to the requested
// Container of ValueType. It is specialized below to optimize splitting to
// certain combinations of Container and ValueType.
//
// This base template handles the generic case of storing the split results in
// the requested non-map-like container and converting the split substrings to
// the requested type.
template <typename Container, typename ValueType, bool is_map = false>
struct ConvertToContainer {
Container operator()(const Splitter& splitter) const {
Container c;
auto it = std::inserter(c, c.end());
for (const auto sp : splitter) {
*it++ = ValueType(sp);
}
return c;
}
};
// Partial specialization for a std::vector<absl::string_view>.
//
// Optimized for the common case of splitting to a
// std::vector<absl::string_view>. In this case we first split the results to
// a small array of absl::string_view on the stack, to reduce reallocations.
template <typename A>
struct ConvertToContainer<std::vector<absl::string_view, A>,
absl::string_view, false> {
std::vector<absl::string_view, A> operator()(
const Splitter& splitter) const {
struct raw_view {
const char* data;
size_t size;
operator absl::string_view() const { // NOLINT(runtime/explicit)
return {data, size};
}
};
std::vector<absl::string_view, A> v;
std::array<raw_view, 16> ar;
for (auto it = splitter.begin(); !it.at_end();) {
size_t index = 0;
do {
ar[index].data = it->data();
ar[index].size = it->size();
++it;
} while (++index != ar.size() && !it.at_end());
v.insert(v.end(), ar.begin(), ar.begin() + index);
}
return v;
}
};
// Partial specialization for a std::vector<std::string>.
//
// Optimized for the common case of splitting to a std::vector<std::string>.
// In this case we first split the results to a std::vector<absl::string_view>
// so the returned std::vector<std::string> can have space reserved to avoid
// std::string moves.
template <typename A>
struct ConvertToContainer<std::vector<std::string, A>, std::string, false> {
std::vector<std::string, A> operator()(const Splitter& splitter) const {
const std::vector<absl::string_view> v = splitter;
return std::vector<std::string, A>(v.begin(), v.end());
}
};
// Partial specialization for containers of pairs (e.g., maps).
//
// The algorithm is to insert a new pair into the map for each even-numbered
// item, with the even-numbered item as the key with a default-constructed
// value. Each odd-numbered item will then be assigned to the last pair's
// value.
template <typename Container, typename First, typename Second>
struct ConvertToContainer<Container, std::pair<const First, Second>, true> {
Container operator()(const Splitter& splitter) const {
Container m;
typename Container::iterator it;
bool insert = true;
for (const auto sp : splitter) {
if (insert) {
it = Inserter<Container>::Insert(&m, First(sp), Second());
} else {
it->second = Second(sp);
}
insert = !insert;
}
return m;
}
// Inserts the key and value into the given map, returning an iterator to
// the inserted item. Specialized for std::map and std::multimap to use
// emplace() and adapt emplace()'s return value.
template <typename Map>
struct Inserter {
using M = Map;
template <typename... Args>
static typename M::iterator Insert(M* m, Args&&... args) {
return m->insert(std::make_pair(std::forward<Args>(args)...)).first;
}
};
template <typename... Ts>
struct Inserter<std::map<Ts...>> {
using M = std::map<Ts...>;
template <typename... Args>
static typename M::iterator Insert(M* m, Args&&... args) {
return m->emplace(std::make_pair(std::forward<Args>(args)...)).first;
}
};
template <typename... Ts>
struct Inserter<std::multimap<Ts...>> {
using M = std::multimap<Ts...>;
template <typename... Args>
static typename M::iterator Insert(M* m, Args&&... args) {
return m->emplace(std::make_pair(std::forward<Args>(args)...));
}
};
};
ConvertibleToStringView text_;
Delimiter delimiter_;
Predicate predicate_;
};
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_

View file

@ -0,0 +1,53 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// UTF8 utilities, implemented to reduce dependencies.
#include "absl/strings/internal/utf8.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) {
if (utf8_char <= 0x7F) {
*buffer = static_cast<char>(utf8_char);
return 1;
} else if (utf8_char <= 0x7FF) {
buffer[1] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[0] = 0xC0 | utf8_char;
return 2;
} else if (utf8_char <= 0xFFFF) {
buffer[2] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[1] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[0] = 0xE0 | utf8_char;
return 3;
} else {
buffer[3] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[2] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[1] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[0] = 0xF0 | utf8_char;
return 4;
}
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,50 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// UTF8 utilities, implemented to reduce dependencies.
#ifndef ABSL_STRINGS_INTERNAL_UTF8_H_
#define ABSL_STRINGS_INTERNAL_UTF8_H_
#include <cstddef>
#include <cstdint>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// For Unicode code points 0 through 0x10FFFF, EncodeUTF8Char writes
// out the UTF-8 encoding into buffer, and returns the number of chars
// it wrote.
//
// As described in https://tools.ietf.org/html/rfc3629#section-3 , the encodings
// are:
// 00 - 7F : 0xxxxxxx
// 80 - 7FF : 110xxxxx 10xxxxxx
// 800 - FFFF : 1110xxxx 10xxxxxx 10xxxxxx
// 10000 - 10FFFF : 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
//
// Values greater than 0x10FFFF are not supported and may or may not write
// characters into buffer, however never will more than kMaxEncodedUTF8Size
// bytes be written, regardless of the value of utf8_char.
enum { kMaxEncodedUTF8Size = 4 };
size_t EncodeUTF8Char(char *buffer, char32_t utf8_char);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_UTF8_H_

View file

@ -0,0 +1,66 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/utf8.h"
#include <cstdint>
#include <utility>
#include "gtest/gtest.h"
#include "absl/base/port.h"
namespace {
#if !defined(__cpp_char8_t)
#if defined(__clang__)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wc++2a-compat"
#endif
TEST(EncodeUTF8Char, BasicFunction) {
std::pair<char32_t, std::string> tests[] = {{0x0030, u8"\u0030"},
{0x00A3, u8"\u00A3"},
{0x00010000, u8"\U00010000"},
{0x0000FFFF, u8"\U0000FFFF"},
{0x0010FFFD, u8"\U0010FFFD"}};
for (auto &test : tests) {
char buf0[7] = {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'};
char buf1[7] = {'\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF'};
char *buf0_written =
&buf0[absl::strings_internal::EncodeUTF8Char(buf0, test.first)];
char *buf1_written =
&buf1[absl::strings_internal::EncodeUTF8Char(buf1, test.first)];
int apparent_length = 7;
while (buf0[apparent_length - 1] == '\x00' &&
buf1[apparent_length - 1] == '\xFF') {
if (--apparent_length == 0) break;
}
EXPECT_EQ(apparent_length, buf0_written - buf0);
EXPECT_EQ(apparent_length, buf1_written - buf1);
EXPECT_EQ(apparent_length, test.second.length());
EXPECT_EQ(std::string(buf0, apparent_length), test.second);
EXPECT_EQ(std::string(buf1, apparent_length), test.second);
}
char buf[32] = "Don't Tread On Me";
EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf, 0x00110000),
absl::strings_internal::kMaxEncodedUTF8Size);
char buf2[32] = "Negative is invalid but sane";
EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf2, -1),
absl::strings_internal::kMaxEncodedUTF8Size);
}
#if defined(__clang__)
#pragma clang diagnostic pop
#endif
#endif // !defined(__cpp_char8_t)
} // namespace

View file

@ -0,0 +1,40 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/match.h"
#include "absl/strings/internal/memutil.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
bool EqualsIgnoreCase(absl::string_view piece1, absl::string_view piece2) {
return (piece1.size() == piece2.size() &&
0 == absl::strings_internal::memcasecmp(piece1.data(), piece2.data(),
piece1.size()));
// memcasecmp uses absl::ascii_tolower().
}
bool StartsWithIgnoreCase(absl::string_view text, absl::string_view prefix) {
return (text.size() >= prefix.size()) &&
EqualsIgnoreCase(text.substr(0, prefix.size()), prefix);
}
bool EndsWithIgnoreCase(absl::string_view text, absl::string_view suffix) {
return (text.size() >= suffix.size()) &&
EqualsIgnoreCase(text.substr(text.size() - suffix.size()), suffix);
}
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,90 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: match.h
// -----------------------------------------------------------------------------
//
// This file contains simple utilities for performing string matching checks.
// All of these function parameters are specified as `absl::string_view`,
// meaning that these functions can accept `std::string`, `absl::string_view` or
// NUL-terminated C-style strings.
//
// Examples:
// std::string s = "foo";
// absl::string_view sv = "f";
// assert(absl::StrContains(s, sv));
//
// Note: The order of parameters in these functions is designed to mimic the
// order an equivalent member function would exhibit;
// e.g. `s.Contains(x)` ==> `absl::StrContains(s, x).
#ifndef ABSL_STRINGS_MATCH_H_
#define ABSL_STRINGS_MATCH_H_
#include <cstring>
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
// StrContains()
//
// Returns whether a given string `haystack` contains the substring `needle`.
inline bool StrContains(absl::string_view haystack, absl::string_view needle) {
return haystack.find(needle, 0) != haystack.npos;
}
// StartsWith()
//
// Returns whether a given string `text` begins with `prefix`.
inline bool StartsWith(absl::string_view text, absl::string_view prefix) {
return prefix.empty() ||
(text.size() >= prefix.size() &&
memcmp(text.data(), prefix.data(), prefix.size()) == 0);
}
// EndsWith()
//
// Returns whether a given string `text` ends with `suffix`.
inline bool EndsWith(absl::string_view text, absl::string_view suffix) {
return suffix.empty() ||
(text.size() >= suffix.size() &&
memcmp(text.data() + (text.size() - suffix.size()), suffix.data(),
suffix.size()) == 0);
}
// EqualsIgnoreCase()
//
// Returns whether given ASCII strings `piece1` and `piece2` are equal, ignoring
// case in the comparison.
bool EqualsIgnoreCase(absl::string_view piece1, absl::string_view piece2);
// StartsWithIgnoreCase()
//
// Returns whether a given ASCII string `text` starts with `prefix`,
// ignoring case in the comparison.
bool StartsWithIgnoreCase(absl::string_view text, absl::string_view prefix);
// EndsWithIgnoreCase()
//
// Returns whether a given ASCII string `text` ends with `suffix`, ignoring
// case in the comparison.
bool EndsWithIgnoreCase(absl::string_view text, absl::string_view suffix);
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_MATCH_H_

View file

@ -0,0 +1,110 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/match.h"
#include "gtest/gtest.h"
namespace {
TEST(MatchTest, StartsWith) {
const std::string s1("123\0abc", 7);
const absl::string_view a("foobar");
const absl::string_view b(s1);
const absl::string_view e;
EXPECT_TRUE(absl::StartsWith(a, a));
EXPECT_TRUE(absl::StartsWith(a, "foo"));
EXPECT_TRUE(absl::StartsWith(a, e));
EXPECT_TRUE(absl::StartsWith(b, s1));
EXPECT_TRUE(absl::StartsWith(b, b));
EXPECT_TRUE(absl::StartsWith(b, e));
EXPECT_TRUE(absl::StartsWith(e, ""));
EXPECT_FALSE(absl::StartsWith(a, b));
EXPECT_FALSE(absl::StartsWith(b, a));
EXPECT_FALSE(absl::StartsWith(e, a));
}
TEST(MatchTest, EndsWith) {
const std::string s1("123\0abc", 7);
const absl::string_view a("foobar");
const absl::string_view b(s1);
const absl::string_view e;
EXPECT_TRUE(absl::EndsWith(a, a));
EXPECT_TRUE(absl::EndsWith(a, "bar"));
EXPECT_TRUE(absl::EndsWith(a, e));
EXPECT_TRUE(absl::EndsWith(b, s1));
EXPECT_TRUE(absl::EndsWith(b, b));
EXPECT_TRUE(absl::EndsWith(b, e));
EXPECT_TRUE(absl::EndsWith(e, ""));
EXPECT_FALSE(absl::EndsWith(a, b));
EXPECT_FALSE(absl::EndsWith(b, a));
EXPECT_FALSE(absl::EndsWith(e, a));
}
TEST(MatchTest, Contains) {
absl::string_view a("abcdefg");
absl::string_view b("abcd");
absl::string_view c("efg");
absl::string_view d("gh");
EXPECT_TRUE(absl::StrContains(a, a));
EXPECT_TRUE(absl::StrContains(a, b));
EXPECT_TRUE(absl::StrContains(a, c));
EXPECT_FALSE(absl::StrContains(a, d));
EXPECT_TRUE(absl::StrContains("", ""));
EXPECT_TRUE(absl::StrContains("abc", ""));
EXPECT_FALSE(absl::StrContains("", "a"));
}
TEST(MatchTest, ContainsNull) {
const std::string s = "foo";
const char* cs = "foo";
const absl::string_view sv("foo");
const absl::string_view sv2("foo\0bar", 4);
EXPECT_EQ(s, "foo");
EXPECT_EQ(sv, "foo");
EXPECT_NE(sv2, "foo");
EXPECT_TRUE(absl::EndsWith(s, sv));
EXPECT_TRUE(absl::StartsWith(cs, sv));
EXPECT_TRUE(absl::StrContains(cs, sv));
EXPECT_FALSE(absl::StrContains(cs, sv2));
}
TEST(MatchTest, EqualsIgnoreCase) {
std::string text = "the";
absl::string_view data(text);
EXPECT_TRUE(absl::EqualsIgnoreCase(data, "The"));
EXPECT_TRUE(absl::EqualsIgnoreCase(data, "THE"));
EXPECT_TRUE(absl::EqualsIgnoreCase(data, "the"));
EXPECT_FALSE(absl::EqualsIgnoreCase(data, "Quick"));
EXPECT_FALSE(absl::EqualsIgnoreCase(data, "then"));
}
TEST(MatchTest, StartsWithIgnoreCase) {
EXPECT_TRUE(absl::StartsWithIgnoreCase("foo", "foo"));
EXPECT_TRUE(absl::StartsWithIgnoreCase("foo", "Fo"));
EXPECT_TRUE(absl::StartsWithIgnoreCase("foo", ""));
EXPECT_FALSE(absl::StartsWithIgnoreCase("foo", "fooo"));
EXPECT_FALSE(absl::StartsWithIgnoreCase("", "fo"));
}
TEST(MatchTest, EndsWithIgnoreCase) {
EXPECT_TRUE(absl::EndsWithIgnoreCase("foo", "foo"));
EXPECT_TRUE(absl::EndsWithIgnoreCase("foo", "Oo"));
EXPECT_TRUE(absl::EndsWithIgnoreCase("foo", ""));
EXPECT_FALSE(absl::EndsWithIgnoreCase("foo", "fooo"));
EXPECT_FALSE(absl::EndsWithIgnoreCase("", "fo"));
}
} // namespace

View file

@ -0,0 +1,965 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This file contains string processing functions related to
// numeric values.
#include "absl/strings/numbers.h"
#include <algorithm>
#include <cassert>
#include <cfloat> // for DBL_DIG and FLT_DIG
#include <cmath> // for HUGE_VAL
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iterator>
#include <limits>
#include <memory>
#include <utility>
#include "absl/base/attributes.h"
#include "absl/base/internal/bits.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/ascii.h"
#include "absl/strings/charconv.h"
#include "absl/strings/escaping.h"
#include "absl/strings/internal/memutil.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
bool SimpleAtof(absl::string_view str, float* out) {
*out = 0.0;
str = StripAsciiWhitespace(str);
if (!str.empty() && str[0] == '+') {
str.remove_prefix(1);
}
auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
if (result.ec == std::errc::invalid_argument) {
return false;
}
if (result.ptr != str.data() + str.size()) {
// not all non-whitespace characters consumed
return false;
}
// from_chars() with DR 3081's current wording will return max() on
// overflow. SimpleAtof returns infinity instead.
if (result.ec == std::errc::result_out_of_range) {
if (*out > 1.0) {
*out = std::numeric_limits<float>::infinity();
} else if (*out < -1.0) {
*out = -std::numeric_limits<float>::infinity();
}
}
return true;
}
bool SimpleAtod(absl::string_view str, double* out) {
*out = 0.0;
str = StripAsciiWhitespace(str);
if (!str.empty() && str[0] == '+') {
str.remove_prefix(1);
}
auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
if (result.ec == std::errc::invalid_argument) {
return false;
}
if (result.ptr != str.data() + str.size()) {
// not all non-whitespace characters consumed
return false;
}
// from_chars() with DR 3081's current wording will return max() on
// overflow. SimpleAtod returns infinity instead.
if (result.ec == std::errc::result_out_of_range) {
if (*out > 1.0) {
*out = std::numeric_limits<double>::infinity();
} else if (*out < -1.0) {
*out = -std::numeric_limits<double>::infinity();
}
}
return true;
}
bool SimpleAtob(absl::string_view str, bool* out) {
ABSL_RAW_CHECK(out != nullptr, "Output pointer must not be nullptr.");
if (EqualsIgnoreCase(str, "true") || EqualsIgnoreCase(str, "t") ||
EqualsIgnoreCase(str, "yes") || EqualsIgnoreCase(str, "y") ||
EqualsIgnoreCase(str, "1")) {
*out = true;
return true;
}
if (EqualsIgnoreCase(str, "false") || EqualsIgnoreCase(str, "f") ||
EqualsIgnoreCase(str, "no") || EqualsIgnoreCase(str, "n") ||
EqualsIgnoreCase(str, "0")) {
*out = false;
return true;
}
return false;
}
// ----------------------------------------------------------------------
// FastIntToBuffer() overloads
//
// Like the Fast*ToBuffer() functions above, these are intended for speed.
// Unlike the Fast*ToBuffer() functions, however, these functions write
// their output to the beginning of the buffer. The caller is responsible
// for ensuring that the buffer has enough space to hold the output.
//
// Returns a pointer to the end of the string (i.e. the null character
// terminating the string).
// ----------------------------------------------------------------------
namespace {
// Used to optimize printing a decimal number's final digit.
const char one_ASCII_final_digits[10][2] {
{'0', 0}, {'1', 0}, {'2', 0}, {'3', 0}, {'4', 0},
{'5', 0}, {'6', 0}, {'7', 0}, {'8', 0}, {'9', 0},
};
} // namespace
char* numbers_internal::FastIntToBuffer(uint32_t i, char* buffer) {
uint32_t digits;
// The idea of this implementation is to trim the number of divides to as few
// as possible, and also reducing memory stores and branches, by going in
// steps of two digits at a time rather than one whenever possible.
// The huge-number case is first, in the hopes that the compiler will output
// that case in one branch-free block of code, and only output conditional
// branches into it from below.
if (i >= 1000000000) { // >= 1,000,000,000
digits = i / 100000000; // 100,000,000
i -= digits * 100000000;
PutTwoDigits(digits, buffer);
buffer += 2;
lt100_000_000:
digits = i / 1000000; // 1,000,000
i -= digits * 1000000;
PutTwoDigits(digits, buffer);
buffer += 2;
lt1_000_000:
digits = i / 10000; // 10,000
i -= digits * 10000;
PutTwoDigits(digits, buffer);
buffer += 2;
lt10_000:
digits = i / 100;
i -= digits * 100;
PutTwoDigits(digits, buffer);
buffer += 2;
lt100:
digits = i;
PutTwoDigits(digits, buffer);
buffer += 2;
*buffer = 0;
return buffer;
}
if (i < 100) {
digits = i;
if (i >= 10) goto lt100;
memcpy(buffer, one_ASCII_final_digits[i], 2);
return buffer + 1;
}
if (i < 10000) { // 10,000
if (i >= 1000) goto lt10_000;
digits = i / 100;
i -= digits * 100;
*buffer++ = '0' + digits;
goto lt100;
}
if (i < 1000000) { // 1,000,000
if (i >= 100000) goto lt1_000_000;
digits = i / 10000; // 10,000
i -= digits * 10000;
*buffer++ = '0' + digits;
goto lt10_000;
}
if (i < 100000000) { // 100,000,000
if (i >= 10000000) goto lt100_000_000;
digits = i / 1000000; // 1,000,000
i -= digits * 1000000;
*buffer++ = '0' + digits;
goto lt1_000_000;
}
// we already know that i < 1,000,000,000
digits = i / 100000000; // 100,000,000
i -= digits * 100000000;
*buffer++ = '0' + digits;
goto lt100_000_000;
}
char* numbers_internal::FastIntToBuffer(int32_t i, char* buffer) {
uint32_t u = i;
if (i < 0) {
*buffer++ = '-';
// We need to do the negation in modular (i.e., "unsigned")
// arithmetic; MSVC++ apprently warns for plain "-u", so
// we write the equivalent expression "0 - u" instead.
u = 0 - u;
}
return numbers_internal::FastIntToBuffer(u, buffer);
}
char* numbers_internal::FastIntToBuffer(uint64_t i, char* buffer) {
uint32_t u32 = static_cast<uint32_t>(i);
if (u32 == i) return numbers_internal::FastIntToBuffer(u32, buffer);
// Here we know i has at least 10 decimal digits.
uint64_t top_1to11 = i / 1000000000;
u32 = static_cast<uint32_t>(i - top_1to11 * 1000000000);
uint32_t top_1to11_32 = static_cast<uint32_t>(top_1to11);
if (top_1to11_32 == top_1to11) {
buffer = numbers_internal::FastIntToBuffer(top_1to11_32, buffer);
} else {
// top_1to11 has more than 32 bits too; print it in two steps.
uint32_t top_8to9 = static_cast<uint32_t>(top_1to11 / 100);
uint32_t mid_2 = static_cast<uint32_t>(top_1to11 - top_8to9 * 100);
buffer = numbers_internal::FastIntToBuffer(top_8to9, buffer);
PutTwoDigits(mid_2, buffer);
buffer += 2;
}
// We have only 9 digits now, again the maximum uint32_t can handle fully.
uint32_t digits = u32 / 10000000; // 10,000,000
u32 -= digits * 10000000;
PutTwoDigits(digits, buffer);
buffer += 2;
digits = u32 / 100000; // 100,000
u32 -= digits * 100000;
PutTwoDigits(digits, buffer);
buffer += 2;
digits = u32 / 1000; // 1,000
u32 -= digits * 1000;
PutTwoDigits(digits, buffer);
buffer += 2;
digits = u32 / 10;
u32 -= digits * 10;
PutTwoDigits(digits, buffer);
buffer += 2;
memcpy(buffer, one_ASCII_final_digits[u32], 2);
return buffer + 1;
}
char* numbers_internal::FastIntToBuffer(int64_t i, char* buffer) {
uint64_t u = i;
if (i < 0) {
*buffer++ = '-';
u = 0 - u;
}
return numbers_internal::FastIntToBuffer(u, buffer);
}
// Given a 128-bit number expressed as a pair of uint64_t, high half first,
// return that number multiplied by the given 32-bit value. If the result is
// too large to fit in a 128-bit number, divide it by 2 until it fits.
static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num,
uint32_t mul) {
uint64_t bits0_31 = num.second & 0xFFFFFFFF;
uint64_t bits32_63 = num.second >> 32;
uint64_t bits64_95 = num.first & 0xFFFFFFFF;
uint64_t bits96_127 = num.first >> 32;
// The picture so far: each of these 64-bit values has only the lower 32 bits
// filled in.
// bits96_127: [ 00000000 xxxxxxxx ]
// bits64_95: [ 00000000 xxxxxxxx ]
// bits32_63: [ 00000000 xxxxxxxx ]
// bits0_31: [ 00000000 xxxxxxxx ]
bits0_31 *= mul;
bits32_63 *= mul;
bits64_95 *= mul;
bits96_127 *= mul;
// Now the top halves may also have value, though all 64 of their bits will
// never be set at the same time, since they are a result of a 32x32 bit
// multiply. This makes the carry calculation slightly easier.
// bits96_127: [ mmmmmmmm | mmmmmmmm ]
// bits64_95: [ | mmmmmmmm mmmmmmmm | ]
// bits32_63: | [ mmmmmmmm | mmmmmmmm ]
// bits0_31: | [ | mmmmmmmm mmmmmmmm ]
// eventually: [ bits128_up | ...bits64_127.... | ..bits0_63... ]
uint64_t bits0_63 = bits0_31 + (bits32_63 << 32);
uint64_t bits64_127 = bits64_95 + (bits96_127 << 32) + (bits32_63 >> 32) +
(bits0_63 < bits0_31);
uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95);
if (bits128_up == 0) return {bits64_127, bits0_63};
int shift = 64 - base_internal::CountLeadingZeros64(bits128_up);
uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift));
uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift));
return {hi, lo};
}
// Compute num * 5 ^ expfive, and return the first 128 bits of the result,
// where the first bit is always a one. So PowFive(1, 0) starts 0b100000,
// PowFive(1, 1) starts 0b101000, PowFive(1, 2) starts 0b110010, etc.
static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) {
std::pair<uint64_t, uint64_t> result = {num, 0};
while (expfive >= 13) {
// 5^13 is the highest power of five that will fit in a 32-bit integer.
result = Mul32(result, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5);
expfive -= 13;
}
constexpr int powers_of_five[13] = {
1,
5,
5 * 5,
5 * 5 * 5,
5 * 5 * 5 * 5,
5 * 5 * 5 * 5 * 5,
5 * 5 * 5 * 5 * 5 * 5,
5 * 5 * 5 * 5 * 5 * 5 * 5,
5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5};
result = Mul32(result, powers_of_five[expfive & 15]);
int shift = base_internal::CountLeadingZeros64(result.first);
if (shift != 0) {
result.first = (result.first << shift) + (result.second >> (64 - shift));
result.second = (result.second << shift);
}
return result;
}
struct ExpDigits {
int32_t exponent;
char digits[6];
};
// SplitToSix converts value, a positive double-precision floating-point number,
// into a base-10 exponent and 6 ASCII digits, where the first digit is never
// zero. For example, SplitToSix(1) returns an exponent of zero and a digits
// array of {'1', '0', '0', '0', '0', '0'}. If value is exactly halfway between
// two possible representations, e.g. value = 100000.5, then "round to even" is
// performed.
static ExpDigits SplitToSix(const double value) {
ExpDigits exp_dig;
int exp = 5;
double d = value;
// First step: calculate a close approximation of the output, where the
// value d will be between 100,000 and 999,999, representing the digits
// in the output ASCII array, and exp is the base-10 exponent. It would be
// faster to use a table here, and to look up the base-2 exponent of value,
// however value is an IEEE-754 64-bit number, so the table would have 2,000
// entries, which is not cache-friendly.
if (d >= 999999.5) {
if (d >= 1e+261) exp += 256, d *= 1e-256;
if (d >= 1e+133) exp += 128, d *= 1e-128;
if (d >= 1e+69) exp += 64, d *= 1e-64;
if (d >= 1e+37) exp += 32, d *= 1e-32;
if (d >= 1e+21) exp += 16, d *= 1e-16;
if (d >= 1e+13) exp += 8, d *= 1e-8;
if (d >= 1e+9) exp += 4, d *= 1e-4;
if (d >= 1e+7) exp += 2, d *= 1e-2;
if (d >= 1e+6) exp += 1, d *= 1e-1;
} else {
if (d < 1e-250) exp -= 256, d *= 1e256;
if (d < 1e-122) exp -= 128, d *= 1e128;
if (d < 1e-58) exp -= 64, d *= 1e64;
if (d < 1e-26) exp -= 32, d *= 1e32;
if (d < 1e-10) exp -= 16, d *= 1e16;
if (d < 1e-2) exp -= 8, d *= 1e8;
if (d < 1e+2) exp -= 4, d *= 1e4;
if (d < 1e+4) exp -= 2, d *= 1e2;
if (d < 1e+5) exp -= 1, d *= 1e1;
}
// At this point, d is in the range [99999.5..999999.5) and exp is in the
// range [-324..308]. Since we need to round d up, we want to add a half
// and truncate.
// However, the technique above may have lost some precision, due to its
// repeated multiplication by constants that each may be off by half a bit
// of precision. This only matters if we're close to the edge though.
// Since we'd like to know if the fractional part of d is close to a half,
// we multiply it by 65536 and see if the fractional part is close to 32768.
// (The number doesn't have to be a power of two,but powers of two are faster)
uint64_t d64k = d * 65536;
int dddddd; // A 6-digit decimal integer.
if ((d64k % 65536) == 32767 || (d64k % 65536) == 32768) {
// OK, it's fairly likely that precision was lost above, which is
// not a surprise given only 52 mantissa bits are available. Therefore
// redo the calculation using 128-bit numbers. (64 bits are not enough).
// Start out with digits rounded down; maybe add one below.
dddddd = static_cast<int>(d64k / 65536);
// mantissa is a 64-bit integer representing M.mmm... * 2^63. The actual
// value we're representing, of course, is M.mmm... * 2^exp2.
int exp2;
double m = std::frexp(value, &exp2);
uint64_t mantissa = m * (32768.0 * 65536.0 * 65536.0 * 65536.0);
// std::frexp returns an m value in the range [0.5, 1.0), however we
// can't multiply it by 2^64 and convert to an integer because some FPUs
// throw an exception when converting an number higher than 2^63 into an
// integer - even an unsigned 64-bit integer! Fortunately it doesn't matter
// since m only has 52 significant bits anyway.
mantissa <<= 1;
exp2 -= 64; // not needed, but nice for debugging
// OK, we are here to compare:
// (dddddd + 0.5) * 10^(exp-5) vs. mantissa * 2^exp2
// so we can round up dddddd if appropriate. Those values span the full
// range of 600 orders of magnitude of IEE 64-bit floating-point.
// Fortunately, we already know they are very close, so we don't need to
// track the base-2 exponent of both sides. This greatly simplifies the
// the math since the 2^exp2 calculation is unnecessary and the power-of-10
// calculation can become a power-of-5 instead.
std::pair<uint64_t, uint64_t> edge, val;
if (exp >= 6) {
// Compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa
// Since we're tossing powers of two, 2 * dddddd + 1 is the
// same as dddddd + 0.5
edge = PowFive(2 * dddddd + 1, exp - 5);
val.first = mantissa;
val.second = 0;
} else {
// We can't compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa as we did
// above because (exp - 5) is negative. So we compare (dddddd + 0.5) to
// mantissa * 5 ^ (5 - exp)
edge = PowFive(2 * dddddd + 1, 0);
val = PowFive(mantissa, 5 - exp);
}
// printf("exp=%d %016lx %016lx vs %016lx %016lx\n", exp, val.first,
// val.second, edge.first, edge.second);
if (val > edge) {
dddddd++;
} else if (val == edge) {
dddddd += (dddddd & 1);
}
} else {
// Here, we are not close to the edge.
dddddd = static_cast<int>((d64k + 32768) / 65536);
}
if (dddddd == 1000000) {
dddddd = 100000;
exp += 1;
}
exp_dig.exponent = exp;
int two_digits = dddddd / 10000;
dddddd -= two_digits * 10000;
numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[0]);
two_digits = dddddd / 100;
dddddd -= two_digits * 100;
numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[2]);
numbers_internal::PutTwoDigits(dddddd, &exp_dig.digits[4]);
return exp_dig;
}
// Helper function for fast formatting of floating-point.
// The result is the same as "%g", a.k.a. "%.6g".
size_t numbers_internal::SixDigitsToBuffer(double d, char* const buffer) {
static_assert(std::numeric_limits<float>::is_iec559,
"IEEE-754/IEC-559 support only");
char* out = buffer; // we write data to out, incrementing as we go, but
// FloatToBuffer always returns the address of the buffer
// passed in.
if (std::isnan(d)) {
strcpy(out, "nan"); // NOLINT(runtime/printf)
return 3;
}
if (d == 0) { // +0 and -0 are handled here
if (std::signbit(d)) *out++ = '-';
*out++ = '0';
*out = 0;
return out - buffer;
}
if (d < 0) {
*out++ = '-';
d = -d;
}
if (std::isinf(d)) {
strcpy(out, "inf"); // NOLINT(runtime/printf)
return out + 3 - buffer;
}
auto exp_dig = SplitToSix(d);
int exp = exp_dig.exponent;
const char* digits = exp_dig.digits;
out[0] = '0';
out[1] = '.';
switch (exp) {
case 5:
memcpy(out, &digits[0], 6), out += 6;
*out = 0;
return out - buffer;
case 4:
memcpy(out, &digits[0], 5), out += 5;
if (digits[5] != '0') {
*out++ = '.';
*out++ = digits[5];
}
*out = 0;
return out - buffer;
case 3:
memcpy(out, &digits[0], 4), out += 4;
if ((digits[5] | digits[4]) != '0') {
*out++ = '.';
*out++ = digits[4];
if (digits[5] != '0') *out++ = digits[5];
}
*out = 0;
return out - buffer;
case 2:
memcpy(out, &digits[0], 3), out += 3;
*out++ = '.';
memcpy(out, &digits[3], 3);
out += 3;
while (out[-1] == '0') --out;
if (out[-1] == '.') --out;
*out = 0;
return out - buffer;
case 1:
memcpy(out, &digits[0], 2), out += 2;
*out++ = '.';
memcpy(out, &digits[2], 4);
out += 4;
while (out[-1] == '0') --out;
if (out[-1] == '.') --out;
*out = 0;
return out - buffer;
case 0:
memcpy(out, &digits[0], 1), out += 1;
*out++ = '.';
memcpy(out, &digits[1], 5);
out += 5;
while (out[-1] == '0') --out;
if (out[-1] == '.') --out;
*out = 0;
return out - buffer;
case -4:
out[2] = '0';
++out;
ABSL_FALLTHROUGH_INTENDED;
case -3:
out[2] = '0';
++out;
ABSL_FALLTHROUGH_INTENDED;
case -2:
out[2] = '0';
++out;
ABSL_FALLTHROUGH_INTENDED;
case -1:
out += 2;
memcpy(out, &digits[0], 6);
out += 6;
while (out[-1] == '0') --out;
*out = 0;
return out - buffer;
}
assert(exp < -4 || exp >= 6);
out[0] = digits[0];
assert(out[1] == '.');
out += 2;
memcpy(out, &digits[1], 5), out += 5;
while (out[-1] == '0') --out;
if (out[-1] == '.') --out;
*out++ = 'e';
if (exp > 0) {
*out++ = '+';
} else {
*out++ = '-';
exp = -exp;
}
if (exp > 99) {
int dig1 = exp / 100;
exp -= dig1 * 100;
*out++ = '0' + dig1;
}
PutTwoDigits(exp, out);
out += 2;
*out = 0;
return out - buffer;
}
namespace {
// Represents integer values of digits.
// Uses 36 to indicate an invalid character since we support
// bases up to 36.
static const int8_t kAsciiToInt[256] = {
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s.
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 1, 2, 3, 4, 5,
6, 7, 8, 9, 36, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36,
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
// Parse the sign and optional hex or oct prefix in text.
inline bool safe_parse_sign_and_base(absl::string_view* text /*inout*/,
int* base_ptr /*inout*/,
bool* negative_ptr /*output*/) {
if (text->data() == nullptr) {
return false;
}
const char* start = text->data();
const char* end = start + text->size();
int base = *base_ptr;
// Consume whitespace.
while (start < end && absl::ascii_isspace(start[0])) {
++start;
}
while (start < end && absl::ascii_isspace(end[-1])) {
--end;
}
if (start >= end) {
return false;
}
// Consume sign.
*negative_ptr = (start[0] == '-');
if (*negative_ptr || start[0] == '+') {
++start;
if (start >= end) {
return false;
}
}
// Consume base-dependent prefix.
// base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
// base 16: "0x" -> base 16
// Also validate the base.
if (base == 0) {
if (end - start >= 2 && start[0] == '0' &&
(start[1] == 'x' || start[1] == 'X')) {
base = 16;
start += 2;
if (start >= end) {
// "0x" with no digits after is invalid.
return false;
}
} else if (end - start >= 1 && start[0] == '0') {
base = 8;
start += 1;
} else {
base = 10;
}
} else if (base == 16) {
if (end - start >= 2 && start[0] == '0' &&
(start[1] == 'x' || start[1] == 'X')) {
start += 2;
if (start >= end) {
// "0x" with no digits after is invalid.
return false;
}
}
} else if (base >= 2 && base <= 36) {
// okay
} else {
return false;
}
*text = absl::string_view(start, end - start);
*base_ptr = base;
return true;
}
// Consume digits.
//
// The classic loop:
//
// for each digit
// value = value * base + digit
// value *= sign
//
// The classic loop needs overflow checking. It also fails on the most
// negative integer, -2147483648 in 32-bit two's complement representation.
//
// My improved loop:
//
// if (!negative)
// for each digit
// value = value * base
// value = value + digit
// else
// for each digit
// value = value * base
// value = value - digit
//
// Overflow checking becomes simple.
// Lookup tables per IntType:
// vmax/base and vmin/base are precomputed because division costs at least 8ns.
// TODO(junyer): Doing this per base instead (i.e. an array of structs, not a
// struct of arrays) would probably be better in terms of d-cache for the most
// commonly used bases.
template <typename IntType>
struct LookupTables {
ABSL_CONST_INIT static const IntType kVmaxOverBase[];
ABSL_CONST_INIT static const IntType kVminOverBase[];
};
// An array initializer macro for X/base where base in [0, 36].
// However, note that lookups for base in [0, 1] should never happen because
// base has been validated to be in [2, 36] by safe_parse_sign_and_base().
#define X_OVER_BASE_INITIALIZER(X) \
{ \
0, 0, X / 2, X / 3, X / 4, X / 5, X / 6, X / 7, X / 8, X / 9, X / 10, \
X / 11, X / 12, X / 13, X / 14, X / 15, X / 16, X / 17, X / 18, \
X / 19, X / 20, X / 21, X / 22, X / 23, X / 24, X / 25, X / 26, \
X / 27, X / 28, X / 29, X / 30, X / 31, X / 32, X / 33, X / 34, \
X / 35, X / 36, \
}
// uint128& operator/=(uint128) is not constexpr, so hardcode the resulting
// array to avoid a static initializer.
template <>
const uint128 LookupTables<uint128>::kVmaxOverBase[] = {
0,
0,
MakeUint128(9223372036854775807u, 18446744073709551615u),
MakeUint128(6148914691236517205u, 6148914691236517205u),
MakeUint128(4611686018427387903u, 18446744073709551615u),
MakeUint128(3689348814741910323u, 3689348814741910323u),
MakeUint128(3074457345618258602u, 12297829382473034410u),
MakeUint128(2635249153387078802u, 5270498306774157604u),
MakeUint128(2305843009213693951u, 18446744073709551615u),
MakeUint128(2049638230412172401u, 14347467612885206812u),
MakeUint128(1844674407370955161u, 11068046444225730969u),
MakeUint128(1676976733973595601u, 8384883669867978007u),
MakeUint128(1537228672809129301u, 6148914691236517205u),
MakeUint128(1418980313362273201u, 4256940940086819603u),
MakeUint128(1317624576693539401u, 2635249153387078802u),
MakeUint128(1229782938247303441u, 1229782938247303441u),
MakeUint128(1152921504606846975u, 18446744073709551615u),
MakeUint128(1085102592571150095u, 1085102592571150095u),
MakeUint128(1024819115206086200u, 16397105843297379214u),
MakeUint128(970881267037344821u, 16504981539634861972u),
MakeUint128(922337203685477580u, 14757395258967641292u),
MakeUint128(878416384462359600u, 14054662151397753612u),
MakeUint128(838488366986797800u, 13415813871788764811u),
MakeUint128(802032351030850070u, 4812194106185100421u),
MakeUint128(768614336404564650u, 12297829382473034410u),
MakeUint128(737869762948382064u, 11805916207174113034u),
MakeUint128(709490156681136600u, 11351842506898185609u),
MakeUint128(683212743470724133u, 17080318586768103348u),
MakeUint128(658812288346769700u, 10540996613548315209u),
MakeUint128(636094623231363848u, 15266270957552732371u),
MakeUint128(614891469123651720u, 9838263505978427528u),
MakeUint128(595056260442243600u, 9520900167075897608u),
MakeUint128(576460752303423487u, 18446744073709551615u),
MakeUint128(558992244657865200u, 8943875914525843207u),
MakeUint128(542551296285575047u, 9765923333140350855u),
MakeUint128(527049830677415760u, 8432797290838652167u),
MakeUint128(512409557603043100u, 8198552921648689607u),
};
template <typename IntType>
const IntType LookupTables<IntType>::kVmaxOverBase[] =
X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max());
template <typename IntType>
const IntType LookupTables<IntType>::kVminOverBase[] =
X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min());
#undef X_OVER_BASE_INITIALIZER
template <typename IntType>
inline bool safe_parse_positive_int(absl::string_view text, int base,
IntType* value_p) {
IntType value = 0;
const IntType vmax = std::numeric_limits<IntType>::max();
assert(vmax > 0);
assert(base >= 0);
assert(vmax >= static_cast<IntType>(base));
const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base];
assert(base < 2 ||
std::numeric_limits<IntType>::max() / base == vmax_over_base);
const char* start = text.data();
const char* end = start + text.size();
// loop over digits
for (; start < end; ++start) {
unsigned char c = static_cast<unsigned char>(start[0]);
int digit = kAsciiToInt[c];
if (digit >= base) {
*value_p = value;
return false;
}
if (value > vmax_over_base) {
*value_p = vmax;
return false;
}
value *= base;
if (value > vmax - digit) {
*value_p = vmax;
return false;
}
value += digit;
}
*value_p = value;
return true;
}
template <typename IntType>
inline bool safe_parse_negative_int(absl::string_view text, int base,
IntType* value_p) {
IntType value = 0;
const IntType vmin = std::numeric_limits<IntType>::min();
assert(vmin < 0);
assert(vmin <= 0 - base);
IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base];
assert(base < 2 ||
std::numeric_limits<IntType>::min() / base == vmin_over_base);
// 2003 c++ standard [expr.mul]
// "... the sign of the remainder is implementation-defined."
// Although (vmin/base)*base + vmin%base is always vmin.
// 2011 c++ standard tightens the spec but we cannot rely on it.
// TODO(junyer): Handle this in the lookup table generation.
if (vmin % base > 0) {
vmin_over_base += 1;
}
const char* start = text.data();
const char* end = start + text.size();
// loop over digits
for (; start < end; ++start) {
unsigned char c = static_cast<unsigned char>(start[0]);
int digit = kAsciiToInt[c];
if (digit >= base) {
*value_p = value;
return false;
}
if (value < vmin_over_base) {
*value_p = vmin;
return false;
}
value *= base;
if (value < vmin + digit) {
*value_p = vmin;
return false;
}
value -= digit;
}
*value_p = value;
return true;
}
// Input format based on POSIX.1-2008 strtol
// http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
template <typename IntType>
inline bool safe_int_internal(absl::string_view text, IntType* value_p,
int base) {
*value_p = 0;
bool negative;
if (!safe_parse_sign_and_base(&text, &base, &negative)) {
return false;
}
if (!negative) {
return safe_parse_positive_int(text, base, value_p);
} else {
return safe_parse_negative_int(text, base, value_p);
}
}
template <typename IntType>
inline bool safe_uint_internal(absl::string_view text, IntType* value_p,
int base) {
*value_p = 0;
bool negative;
if (!safe_parse_sign_and_base(&text, &base, &negative) || negative) {
return false;
}
return safe_parse_positive_int(text, base, value_p);
}
} // anonymous namespace
namespace numbers_internal {
// Digit conversion.
ABSL_CONST_INIT ABSL_DLL const char kHexChar[] =
"0123456789abcdef";
ABSL_CONST_INIT ABSL_DLL const char kHexTable[513] =
"000102030405060708090a0b0c0d0e0f"
"101112131415161718191a1b1c1d1e1f"
"202122232425262728292a2b2c2d2e2f"
"303132333435363738393a3b3c3d3e3f"
"404142434445464748494a4b4c4d4e4f"
"505152535455565758595a5b5c5d5e5f"
"606162636465666768696a6b6c6d6e6f"
"707172737475767778797a7b7c7d7e7f"
"808182838485868788898a8b8c8d8e8f"
"909192939495969798999a9b9c9d9e9f"
"a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
"b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
"c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
"d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
"e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
ABSL_CONST_INIT ABSL_DLL const char two_ASCII_digits[100][2] = {
{'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'},
{'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'},
{'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'},
{'1', '8'}, {'1', '9'}, {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'},
{'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
{'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'},
{'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'},
{'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'},
{'4', '8'}, {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'},
{'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
{'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'},
{'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, {'7', '0'}, {'7', '1'},
{'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'},
{'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'},
{'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
{'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'},
{'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}};
bool safe_strto32_base(absl::string_view text, int32_t* value, int base) {
return safe_int_internal<int32_t>(text, value, base);
}
bool safe_strto64_base(absl::string_view text, int64_t* value, int base) {
return safe_int_internal<int64_t>(text, value, base);
}
bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base) {
return safe_uint_internal<uint32_t>(text, value, base);
}
bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base) {
return safe_uint_internal<uint64_t>(text, value, base);
}
bool safe_strtou128_base(absl::string_view text, uint128* value, int base) {
return safe_uint_internal<absl::uint128>(text, value, base);
}
} // namespace numbers_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,266 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: numbers.h
// -----------------------------------------------------------------------------
//
// This package contains functions for converting strings to numbers. For
// converting numbers to strings, use `StrCat()` or `StrAppend()` in str_cat.h,
// which automatically detect and convert most number values appropriately.
#ifndef ABSL_STRINGS_NUMBERS_H_
#define ABSL_STRINGS_NUMBERS_H_
#ifdef __SSE4_2__
#include <x86intrin.h>
#endif
#include <cstddef>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <limits>
#include <string>
#include <type_traits>
#include "absl/base/config.h"
#include "absl/base/internal/bits.h"
#ifdef __SSE4_2__
// TODO(jorg): Remove this when we figure out the right way
// to swap bytes on SSE 4.2 that works with the compilers
// we claim to support. Also, add tests for the compiler
// that doesn't support the Intel _bswap64 intrinsic but
// does support all the SSE 4.2 intrinsics
#include "absl/base/internal/endian.h"
#endif
#include "absl/base/macros.h"
#include "absl/base/port.h"
#include "absl/numeric/int128.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
// SimpleAtoi()
//
// Converts the given string (optionally followed or preceded by ASCII
// whitespace) into an integer value, returning `true` if successful. The string
// must reflect a base-10 integer whose value falls within the range of the
// integer type (optionally preceded by a `+` or `-`). If any errors are
// encountered, this function returns `false`, leaving `out` in an unspecified
// state.
template <typename int_type>
ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view str, int_type* out);
// SimpleAtof()
//
// Converts the given string (optionally followed or preceded by ASCII
// whitespace) into a float, which may be rounded on overflow or underflow,
// returning `true` if successful.
// See https://en.cppreference.com/w/c/string/byte/strtof for details about the
// allowed formats for `str`, except SimpleAtof() is locale-independent and will
// always use the "C" locale. If any errors are encountered, this function
// returns `false`, leaving `out` in an unspecified state.
ABSL_MUST_USE_RESULT bool SimpleAtof(absl::string_view str, float* out);
// SimpleAtod()
//
// Converts the given string (optionally followed or preceded by ASCII
// whitespace) into a double, which may be rounded on overflow or underflow,
// returning `true` if successful.
// See https://en.cppreference.com/w/c/string/byte/strtof for details about the
// allowed formats for `str`, except SimpleAtod is locale-independent and will
// always use the "C" locale. If any errors are encountered, this function
// returns `false`, leaving `out` in an unspecified state.
ABSL_MUST_USE_RESULT bool SimpleAtod(absl::string_view str, double* out);
// SimpleAtob()
//
// Converts the given string into a boolean, returning `true` if successful.
// The following case-insensitive strings are interpreted as boolean `true`:
// "true", "t", "yes", "y", "1". The following case-insensitive strings
// are interpreted as boolean `false`: "false", "f", "no", "n", "0". If any
// errors are encountered, this function returns `false`, leaving `out` in an
// unspecified state.
ABSL_MUST_USE_RESULT bool SimpleAtob(absl::string_view str, bool* out);
ABSL_NAMESPACE_END
} // namespace absl
// End of public API. Implementation details follow.
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace numbers_internal {
// Digit conversion.
ABSL_DLL extern const char kHexChar[17]; // 0123456789abcdef
ABSL_DLL extern const char
kHexTable[513]; // 000102030405060708090a0b0c0d0e0f1011...
ABSL_DLL extern const char
two_ASCII_digits[100][2]; // 00, 01, 02, 03...
// Writes a two-character representation of 'i' to 'buf'. 'i' must be in the
// range 0 <= i < 100, and buf must have space for two characters. Example:
// char buf[2];
// PutTwoDigits(42, buf);
// // buf[0] == '4'
// // buf[1] == '2'
inline void PutTwoDigits(size_t i, char* buf) {
assert(i < 100);
memcpy(buf, two_ASCII_digits[i], 2);
}
// safe_strto?() functions for implementing SimpleAtoi()
bool safe_strto32_base(absl::string_view text, int32_t* value, int base);
bool safe_strto64_base(absl::string_view text, int64_t* value, int base);
bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base);
bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base);
bool safe_strtou128_base(absl::string_view text, absl::uint128* value,
int base);
static const int kFastToBufferSize = 32;
static const int kSixDigitsToBufferSize = 16;
// Helper function for fast formatting of floating-point values.
// The result is the same as printf's "%g", a.k.a. "%.6g"; that is, six
// significant digits are returned, trailing zeros are removed, and numbers
// outside the range 0.0001-999999 are output using scientific notation
// (1.23456e+06). This routine is heavily optimized.
// Required buffer size is `kSixDigitsToBufferSize`.
size_t SixDigitsToBuffer(double d, char* buffer);
// These functions are intended for speed. All functions take an output buffer
// as an argument and return a pointer to the last byte they wrote, which is the
// terminating '\0'. At most `kFastToBufferSize` bytes are written.
char* FastIntToBuffer(int32_t, char*);
char* FastIntToBuffer(uint32_t, char*);
char* FastIntToBuffer(int64_t, char*);
char* FastIntToBuffer(uint64_t, char*);
// For enums and integer types that are not an exact match for the types above,
// use templates to call the appropriate one of the four overloads above.
template <typename int_type>
char* FastIntToBuffer(int_type i, char* buffer) {
static_assert(sizeof(i) <= 64 / 8,
"FastIntToBuffer works only with 64-bit-or-less integers.");
// TODO(jorg): This signed-ness check is used because it works correctly
// with enums, and it also serves to check that int_type is not a pointer.
// If one day something like std::is_signed<enum E> works, switch to it.
if (static_cast<int_type>(1) - 2 < 0) { // Signed
if (sizeof(i) > 32 / 8) { // 33-bit to 64-bit
return FastIntToBuffer(static_cast<int64_t>(i), buffer);
} else { // 32-bit or less
return FastIntToBuffer(static_cast<int32_t>(i), buffer);
}
} else { // Unsigned
if (sizeof(i) > 32 / 8) { // 33-bit to 64-bit
return FastIntToBuffer(static_cast<uint64_t>(i), buffer);
} else { // 32-bit or less
return FastIntToBuffer(static_cast<uint32_t>(i), buffer);
}
}
}
// Implementation of SimpleAtoi, generalized to support arbitrary base (used
// with base different from 10 elsewhere in Abseil implementation).
template <typename int_type>
ABSL_MUST_USE_RESULT bool safe_strtoi_base(absl::string_view s, int_type* out,
int base) {
static_assert(sizeof(*out) == 4 || sizeof(*out) == 8,
"SimpleAtoi works only with 32-bit or 64-bit integers.");
static_assert(!std::is_floating_point<int_type>::value,
"Use SimpleAtof or SimpleAtod instead.");
bool parsed;
// TODO(jorg): This signed-ness check is used because it works correctly
// with enums, and it also serves to check that int_type is not a pointer.
// If one day something like std::is_signed<enum E> works, switch to it.
if (static_cast<int_type>(1) - 2 < 0) { // Signed
if (sizeof(*out) == 64 / 8) { // 64-bit
int64_t val;
parsed = numbers_internal::safe_strto64_base(s, &val, base);
*out = static_cast<int_type>(val);
} else { // 32-bit
int32_t val;
parsed = numbers_internal::safe_strto32_base(s, &val, base);
*out = static_cast<int_type>(val);
}
} else { // Unsigned
if (sizeof(*out) == 64 / 8) { // 64-bit
uint64_t val;
parsed = numbers_internal::safe_strtou64_base(s, &val, base);
*out = static_cast<int_type>(val);
} else { // 32-bit
uint32_t val;
parsed = numbers_internal::safe_strtou32_base(s, &val, base);
*out = static_cast<int_type>(val);
}
}
return parsed;
}
// FastHexToBufferZeroPad16()
//
// Outputs `val` into `out` as if by `snprintf(out, 17, "%016x", val)` but
// without the terminating null character. Thus `out` must be of length >= 16.
// Returns the number of non-pad digits of the output (it can never be zero
// since 0 has one digit).
inline size_t FastHexToBufferZeroPad16(uint64_t val, char* out) {
#ifdef __SSE4_2__
uint64_t be = absl::big_endian::FromHost64(val);
const auto kNibbleMask = _mm_set1_epi8(0xf);
const auto kHexDigits = _mm_setr_epi8('0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f');
auto v = _mm_loadl_epi64(reinterpret_cast<__m128i*>(&be)); // load lo dword
auto v4 = _mm_srli_epi64(v, 4); // shift 4 right
auto il = _mm_unpacklo_epi8(v4, v); // interleave bytes
auto m = _mm_and_si128(il, kNibbleMask); // mask out nibbles
auto hexchars = _mm_shuffle_epi8(kHexDigits, m); // hex chars
_mm_storeu_si128(reinterpret_cast<__m128i*>(out), hexchars);
#else
for (int i = 0; i < 8; ++i) {
auto byte = (val >> (56 - 8 * i)) & 0xFF;
auto* hex = &absl::numbers_internal::kHexTable[byte * 2];
std::memcpy(out + 2 * i, hex, 2);
}
#endif
// | 0x1 so that even 0 has 1 digit.
return 16 - absl::base_internal::CountLeadingZeros64(val | 0x1) / 4;
}
} // namespace numbers_internal
// SimpleAtoi()
//
// Converts a string to an integer, using `safe_strto?()` functions for actual
// parsing, returning `true` if successful. The `safe_strto?()` functions apply
// strict checking; the string must be a base-10 integer, optionally followed or
// preceded by ASCII whitespace, with a value in the range of the corresponding
// integer type.
template <typename int_type>
ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view str, int_type* out) {
return numbers_internal::safe_strtoi_base(str, out, 10);
}
ABSL_MUST_USE_RESULT inline bool SimpleAtoi(absl::string_view str,
absl::uint128* out) {
return numbers_internal::safe_strtou128_base(str, out, 10);
}
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_NUMBERS_H_

View file

@ -0,0 +1,286 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdint>
#include <random>
#include <string>
#include <type_traits>
#include <vector>
#include "benchmark/benchmark.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/random/distributions.h"
#include "absl/random/random.h"
#include "absl/strings/numbers.h"
namespace {
template <typename T>
void BM_FastIntToBuffer(benchmark::State& state) {
const int inc = state.range(0);
char buf[absl::numbers_internal::kFastToBufferSize];
// Use the unsigned type to increment to take advantage of well-defined
// modular arithmetic.
typename std::make_unsigned<T>::type x = 0;
for (auto _ : state) {
absl::numbers_internal::FastIntToBuffer(static_cast<T>(x), buf);
x += inc;
}
}
BENCHMARK_TEMPLATE(BM_FastIntToBuffer, int32_t)->Range(0, 1 << 15);
BENCHMARK_TEMPLATE(BM_FastIntToBuffer, int64_t)->Range(0, 1 << 30);
// Creates an integer that would be printed as `num_digits` repeated 7s in the
// given `base`. `base` must be greater than or equal to 8.
int64_t RepeatedSevens(int num_digits, int base) {
ABSL_RAW_CHECK(base >= 8, "");
int64_t num = 7;
while (--num_digits) num = base * num + 7;
return num;
}
void BM_safe_strto32_string(benchmark::State& state) {
const int digits = state.range(0);
const int base = state.range(1);
std::string str(digits, '7'); // valid in octal, decimal and hex
int32_t value = 0;
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::numbers_internal::safe_strto32_base(str, &value, base));
}
ABSL_RAW_CHECK(value == RepeatedSevens(digits, base), "");
}
BENCHMARK(BM_safe_strto32_string)
->ArgPair(1, 8)
->ArgPair(1, 10)
->ArgPair(1, 16)
->ArgPair(2, 8)
->ArgPair(2, 10)
->ArgPair(2, 16)
->ArgPair(4, 8)
->ArgPair(4, 10)
->ArgPair(4, 16)
->ArgPair(8, 8)
->ArgPair(8, 10)
->ArgPair(8, 16)
->ArgPair(10, 8)
->ArgPair(9, 10);
void BM_safe_strto64_string(benchmark::State& state) {
const int digits = state.range(0);
const int base = state.range(1);
std::string str(digits, '7'); // valid in octal, decimal and hex
int64_t value = 0;
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::numbers_internal::safe_strto64_base(str, &value, base));
}
ABSL_RAW_CHECK(value == RepeatedSevens(digits, base), "");
}
BENCHMARK(BM_safe_strto64_string)
->ArgPair(1, 8)
->ArgPair(1, 10)
->ArgPair(1, 16)
->ArgPair(2, 8)
->ArgPair(2, 10)
->ArgPair(2, 16)
->ArgPair(4, 8)
->ArgPair(4, 10)
->ArgPair(4, 16)
->ArgPair(8, 8)
->ArgPair(8, 10)
->ArgPair(8, 16)
->ArgPair(16, 8)
->ArgPair(16, 10)
->ArgPair(16, 16);
void BM_safe_strtou32_string(benchmark::State& state) {
const int digits = state.range(0);
const int base = state.range(1);
std::string str(digits, '7'); // valid in octal, decimal and hex
uint32_t value = 0;
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::numbers_internal::safe_strtou32_base(str, &value, base));
}
ABSL_RAW_CHECK(value == RepeatedSevens(digits, base), "");
}
BENCHMARK(BM_safe_strtou32_string)
->ArgPair(1, 8)
->ArgPair(1, 10)
->ArgPair(1, 16)
->ArgPair(2, 8)
->ArgPair(2, 10)
->ArgPair(2, 16)
->ArgPair(4, 8)
->ArgPair(4, 10)
->ArgPair(4, 16)
->ArgPair(8, 8)
->ArgPair(8, 10)
->ArgPair(8, 16)
->ArgPair(10, 8)
->ArgPair(9, 10);
void BM_safe_strtou64_string(benchmark::State& state) {
const int digits = state.range(0);
const int base = state.range(1);
std::string str(digits, '7'); // valid in octal, decimal and hex
uint64_t value = 0;
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::numbers_internal::safe_strtou64_base(str, &value, base));
}
ABSL_RAW_CHECK(value == RepeatedSevens(digits, base), "");
}
BENCHMARK(BM_safe_strtou64_string)
->ArgPair(1, 8)
->ArgPair(1, 10)
->ArgPair(1, 16)
->ArgPair(2, 8)
->ArgPair(2, 10)
->ArgPair(2, 16)
->ArgPair(4, 8)
->ArgPair(4, 10)
->ArgPair(4, 16)
->ArgPair(8, 8)
->ArgPair(8, 10)
->ArgPair(8, 16)
->ArgPair(16, 8)
->ArgPair(16, 10)
->ArgPair(16, 16);
// Returns a vector of `num_strings` strings. Each string represents a
// floating point number with `num_digits` digits before the decimal point and
// another `num_digits` digits after.
std::vector<std::string> MakeFloatStrings(int num_strings, int num_digits) {
// For convenience, use a random number generator to generate the test data.
// We don't actually need random properties, so use a fixed seed.
std::minstd_rand0 rng(1);
std::uniform_int_distribution<int> random_digit('0', '9');
std::vector<std::string> float_strings(num_strings);
for (std::string& s : float_strings) {
s.reserve(2 * num_digits + 1);
for (int i = 0; i < num_digits; ++i) {
s.push_back(static_cast<char>(random_digit(rng)));
}
s.push_back('.');
for (int i = 0; i < num_digits; ++i) {
s.push_back(static_cast<char>(random_digit(rng)));
}
}
return float_strings;
}
template <typename StringType>
StringType GetStringAs(const std::string& s) {
return static_cast<StringType>(s);
}
template <>
const char* GetStringAs<const char*>(const std::string& s) {
return s.c_str();
}
template <typename StringType>
std::vector<StringType> GetStringsAs(const std::vector<std::string>& strings) {
std::vector<StringType> result;
result.reserve(strings.size());
for (const std::string& s : strings) {
result.push_back(GetStringAs<StringType>(s));
}
return result;
}
template <typename T>
void BM_SimpleAtof(benchmark::State& state) {
const int num_strings = state.range(0);
const int num_digits = state.range(1);
std::vector<std::string> backing_strings =
MakeFloatStrings(num_strings, num_digits);
std::vector<T> inputs = GetStringsAs<T>(backing_strings);
float value;
for (auto _ : state) {
for (const T& input : inputs) {
benchmark::DoNotOptimize(absl::SimpleAtof(input, &value));
}
}
}
BENCHMARK_TEMPLATE(BM_SimpleAtof, absl::string_view)
->ArgPair(10, 1)
->ArgPair(10, 2)
->ArgPair(10, 4)
->ArgPair(10, 8);
BENCHMARK_TEMPLATE(BM_SimpleAtof, const char*)
->ArgPair(10, 1)
->ArgPair(10, 2)
->ArgPair(10, 4)
->ArgPair(10, 8);
BENCHMARK_TEMPLATE(BM_SimpleAtof, std::string)
->ArgPair(10, 1)
->ArgPair(10, 2)
->ArgPair(10, 4)
->ArgPair(10, 8);
template <typename T>
void BM_SimpleAtod(benchmark::State& state) {
const int num_strings = state.range(0);
const int num_digits = state.range(1);
std::vector<std::string> backing_strings =
MakeFloatStrings(num_strings, num_digits);
std::vector<T> inputs = GetStringsAs<T>(backing_strings);
double value;
for (auto _ : state) {
for (const T& input : inputs) {
benchmark::DoNotOptimize(absl::SimpleAtod(input, &value));
}
}
}
BENCHMARK_TEMPLATE(BM_SimpleAtod, absl::string_view)
->ArgPair(10, 1)
->ArgPair(10, 2)
->ArgPair(10, 4)
->ArgPair(10, 8);
BENCHMARK_TEMPLATE(BM_SimpleAtod, const char*)
->ArgPair(10, 1)
->ArgPair(10, 2)
->ArgPair(10, 4)
->ArgPair(10, 8);
BENCHMARK_TEMPLATE(BM_SimpleAtod, std::string)
->ArgPair(10, 1)
->ArgPair(10, 2)
->ArgPair(10, 4)
->ArgPair(10, 8);
void BM_FastHexToBufferZeroPad16(benchmark::State& state) {
absl::BitGen rng;
std::vector<uint64_t> nums;
nums.resize(1000);
auto min = std::numeric_limits<uint64_t>::min();
auto max = std::numeric_limits<uint64_t>::max();
for (auto& num : nums) {
num = absl::LogUniform(rng, min, max);
}
char buf[16];
while (state.KeepRunningBatch(nums.size())) {
for (auto num : nums) {
auto digits = absl::numbers_internal::FastHexToBufferZeroPad16(num, buf);
benchmark::DoNotOptimize(digits);
benchmark::DoNotOptimize(buf);
}
}
}
BENCHMARK(BM_FastHexToBufferZeroPad16);
} // namespace

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,246 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/str_cat.h"
#include <assert.h>
#include <algorithm>
#include <cstdint>
#include <cstring>
#include "absl/strings/ascii.h"
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/numbers.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
AlphaNum::AlphaNum(Hex hex) {
static_assert(numbers_internal::kFastToBufferSize >= 32,
"This function only works when output buffer >= 32 bytes long");
char* const end = &digits_[numbers_internal::kFastToBufferSize];
auto real_width =
absl::numbers_internal::FastHexToBufferZeroPad16(hex.value, end - 16);
if (real_width >= hex.width) {
piece_ = absl::string_view(end - real_width, real_width);
} else {
// Pad first 16 chars because FastHexToBufferZeroPad16 pads only to 16 and
// max pad width can be up to 20.
std::memset(end - 32, hex.fill, 16);
// Patch up everything else up to the real_width.
std::memset(end - real_width - 16, hex.fill, 16);
piece_ = absl::string_view(end - hex.width, hex.width);
}
}
AlphaNum::AlphaNum(Dec dec) {
assert(dec.width <= numbers_internal::kFastToBufferSize);
char* const end = &digits_[numbers_internal::kFastToBufferSize];
char* const minfill = end - dec.width;
char* writer = end;
uint64_t value = dec.value;
bool neg = dec.neg;
while (value > 9) {
*--writer = '0' + (value % 10);
value /= 10;
}
*--writer = '0' + value;
if (neg) *--writer = '-';
ptrdiff_t fillers = writer - minfill;
if (fillers > 0) {
// Tricky: if the fill character is ' ', then it's <fill><+/-><digits>
// But...: if the fill character is '0', then it's <+/-><fill><digits>
bool add_sign_again = false;
if (neg && dec.fill == '0') { // If filling with '0',
++writer; // ignore the sign we just added
add_sign_again = true; // and re-add the sign later.
}
writer -= fillers;
std::fill_n(writer, fillers, dec.fill);
if (add_sign_again) *--writer = '-';
}
piece_ = absl::string_view(writer, end - writer);
}
// ----------------------------------------------------------------------
// StrCat()
// This merges the given strings or integers, with no delimiter. This
// is designed to be the fastest possible way to construct a string out
// of a mix of raw C strings, string_views, strings, and integer values.
// ----------------------------------------------------------------------
// Append is merely a version of memcpy that returns the address of the byte
// after the area just overwritten.
static char* Append(char* out, const AlphaNum& x) {
// memcpy is allowed to overwrite arbitrary memory, so doing this after the
// call would force an extra fetch of x.size().
char* after = out + x.size();
if (x.size() != 0) {
memcpy(out, x.data(), x.size());
}
return after;
}
std::string StrCat(const AlphaNum& a, const AlphaNum& b) {
std::string result;
absl::strings_internal::STLStringResizeUninitialized(&result,
a.size() + b.size());
char* const begin = &result[0];
char* out = begin;
out = Append(out, a);
out = Append(out, b);
assert(out == begin + result.size());
return result;
}
std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c) {
std::string result;
strings_internal::STLStringResizeUninitialized(
&result, a.size() + b.size() + c.size());
char* const begin = &result[0];
char* out = begin;
out = Append(out, a);
out = Append(out, b);
out = Append(out, c);
assert(out == begin + result.size());
return result;
}
std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c,
const AlphaNum& d) {
std::string result;
strings_internal::STLStringResizeUninitialized(
&result, a.size() + b.size() + c.size() + d.size());
char* const begin = &result[0];
char* out = begin;
out = Append(out, a);
out = Append(out, b);
out = Append(out, c);
out = Append(out, d);
assert(out == begin + result.size());
return result;
}
namespace strings_internal {
// Do not call directly - these are not part of the public API.
std::string CatPieces(std::initializer_list<absl::string_view> pieces) {
std::string result;
size_t total_size = 0;
for (const absl::string_view piece : pieces) total_size += piece.size();
strings_internal::STLStringResizeUninitialized(&result, total_size);
char* const begin = &result[0];
char* out = begin;
for (const absl::string_view piece : pieces) {
const size_t this_size = piece.size();
if (this_size != 0) {
memcpy(out, piece.data(), this_size);
out += this_size;
}
}
assert(out == begin + result.size());
return result;
}
// It's possible to call StrAppend with an absl::string_view that is itself a
// fragment of the string we're appending to. However the results of this are
// random. Therefore, check for this in debug mode. Use unsigned math so we
// only have to do one comparison. Note, there's an exception case: appending an
// empty string is always allowed.
#define ASSERT_NO_OVERLAP(dest, src) \
assert(((src).size() == 0) || \
(uintptr_t((src).data() - (dest).data()) > uintptr_t((dest).size())))
void AppendPieces(std::string* dest,
std::initializer_list<absl::string_view> pieces) {
size_t old_size = dest->size();
size_t total_size = old_size;
for (const absl::string_view piece : pieces) {
ASSERT_NO_OVERLAP(*dest, piece);
total_size += piece.size();
}
strings_internal::STLStringResizeUninitialized(dest, total_size);
char* const begin = &(*dest)[0];
char* out = begin + old_size;
for (const absl::string_view piece : pieces) {
const size_t this_size = piece.size();
if (this_size != 0) {
memcpy(out, piece.data(), this_size);
out += this_size;
}
}
assert(out == begin + dest->size());
}
} // namespace strings_internal
void StrAppend(std::string* dest, const AlphaNum& a) {
ASSERT_NO_OVERLAP(*dest, a);
dest->append(a.data(), a.size());
}
void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b) {
ASSERT_NO_OVERLAP(*dest, a);
ASSERT_NO_OVERLAP(*dest, b);
std::string::size_type old_size = dest->size();
strings_internal::STLStringResizeUninitialized(
dest, old_size + a.size() + b.size());
char* const begin = &(*dest)[0];
char* out = begin + old_size;
out = Append(out, a);
out = Append(out, b);
assert(out == begin + dest->size());
}
void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
const AlphaNum& c) {
ASSERT_NO_OVERLAP(*dest, a);
ASSERT_NO_OVERLAP(*dest, b);
ASSERT_NO_OVERLAP(*dest, c);
std::string::size_type old_size = dest->size();
strings_internal::STLStringResizeUninitialized(
dest, old_size + a.size() + b.size() + c.size());
char* const begin = &(*dest)[0];
char* out = begin + old_size;
out = Append(out, a);
out = Append(out, b);
out = Append(out, c);
assert(out == begin + dest->size());
}
void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
const AlphaNum& c, const AlphaNum& d) {
ASSERT_NO_OVERLAP(*dest, a);
ASSERT_NO_OVERLAP(*dest, b);
ASSERT_NO_OVERLAP(*dest, c);
ASSERT_NO_OVERLAP(*dest, d);
std::string::size_type old_size = dest->size();
strings_internal::STLStringResizeUninitialized(
dest, old_size + a.size() + b.size() + c.size() + d.size());
char* const begin = &(*dest)[0];
char* out = begin + old_size;
out = Append(out, a);
out = Append(out, b);
out = Append(out, c);
out = Append(out, d);
assert(out == begin + dest->size());
}
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,408 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: str_cat.h
// -----------------------------------------------------------------------------
//
// This package contains functions for efficiently concatenating and appending
// strings: `StrCat()` and `StrAppend()`. Most of the work within these routines
// is actually handled through use of a special AlphaNum type, which was
// designed to be used as a parameter type that efficiently manages conversion
// to strings and avoids copies in the above operations.
//
// Any routine accepting either a string or a number may accept `AlphaNum`.
// The basic idea is that by accepting a `const AlphaNum &` as an argument
// to your function, your callers will automagically convert bools, integers,
// and floating point values to strings for you.
//
// NOTE: Use of `AlphaNum` outside of the //absl/strings package is unsupported
// except for the specific case of function parameters of type `AlphaNum` or
// `const AlphaNum &`. In particular, instantiating `AlphaNum` directly as a
// stack variable is not supported.
//
// Conversion from 8-bit values is not accepted because, if it were, then an
// attempt to pass ':' instead of ":" might result in a 58 ending up in your
// result.
//
// Bools convert to "0" or "1". Pointers to types other than `char *` are not
// valid inputs. No output is generated for null `char *` pointers.
//
// Floating point numbers are formatted with six-digit precision, which is
// the default for "std::cout <<" or printf "%g" (the same as "%.6g").
//
// You can convert to hexadecimal output rather than decimal output using the
// `Hex` type contained here. To do so, pass `Hex(my_int)` as a parameter to
// `StrCat()` or `StrAppend()`. You may specify a minimum hex field width using
// a `PadSpec` enum.
//
// -----------------------------------------------------------------------------
#ifndef ABSL_STRINGS_STR_CAT_H_
#define ABSL_STRINGS_STR_CAT_H_
#include <array>
#include <cstdint>
#include <string>
#include <type_traits>
#include <vector>
#include "absl/base/port.h"
#include "absl/strings/numbers.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// AlphaNumBuffer allows a way to pass a string to StrCat without having to do
// memory allocation. It is simply a pair of a fixed-size character array, and
// a size. Please don't use outside of absl, yet.
template <size_t max_size>
struct AlphaNumBuffer {
std::array<char, max_size> data;
size_t size;
};
} // namespace strings_internal
// Enum that specifies the number of significant digits to return in a `Hex` or
// `Dec` conversion and fill character to use. A `kZeroPad2` value, for example,
// would produce hexadecimal strings such as "0a","0f" and a 'kSpacePad5' value
// would produce hexadecimal strings such as " a"," f".
enum PadSpec : uint8_t {
kNoPad = 1,
kZeroPad2,
kZeroPad3,
kZeroPad4,
kZeroPad5,
kZeroPad6,
kZeroPad7,
kZeroPad8,
kZeroPad9,
kZeroPad10,
kZeroPad11,
kZeroPad12,
kZeroPad13,
kZeroPad14,
kZeroPad15,
kZeroPad16,
kZeroPad17,
kZeroPad18,
kZeroPad19,
kZeroPad20,
kSpacePad2 = kZeroPad2 + 64,
kSpacePad3,
kSpacePad4,
kSpacePad5,
kSpacePad6,
kSpacePad7,
kSpacePad8,
kSpacePad9,
kSpacePad10,
kSpacePad11,
kSpacePad12,
kSpacePad13,
kSpacePad14,
kSpacePad15,
kSpacePad16,
kSpacePad17,
kSpacePad18,
kSpacePad19,
kSpacePad20,
};
// -----------------------------------------------------------------------------
// Hex
// -----------------------------------------------------------------------------
//
// `Hex` stores a set of hexadecimal string conversion parameters for use
// within `AlphaNum` string conversions.
struct Hex {
uint64_t value;
uint8_t width;
char fill;
template <typename Int>
explicit Hex(
Int v, PadSpec spec = absl::kNoPad,
typename std::enable_if<sizeof(Int) == 1 &&
!std::is_pointer<Int>::value>::type* = nullptr)
: Hex(spec, static_cast<uint8_t>(v)) {}
template <typename Int>
explicit Hex(
Int v, PadSpec spec = absl::kNoPad,
typename std::enable_if<sizeof(Int) == 2 &&
!std::is_pointer<Int>::value>::type* = nullptr)
: Hex(spec, static_cast<uint16_t>(v)) {}
template <typename Int>
explicit Hex(
Int v, PadSpec spec = absl::kNoPad,
typename std::enable_if<sizeof(Int) == 4 &&
!std::is_pointer<Int>::value>::type* = nullptr)
: Hex(spec, static_cast<uint32_t>(v)) {}
template <typename Int>
explicit Hex(
Int v, PadSpec spec = absl::kNoPad,
typename std::enable_if<sizeof(Int) == 8 &&
!std::is_pointer<Int>::value>::type* = nullptr)
: Hex(spec, static_cast<uint64_t>(v)) {}
template <typename Pointee>
explicit Hex(Pointee* v, PadSpec spec = absl::kNoPad)
: Hex(spec, reinterpret_cast<uintptr_t>(v)) {}
private:
Hex(PadSpec spec, uint64_t v)
: value(v),
width(spec == absl::kNoPad
? 1
: spec >= absl::kSpacePad2 ? spec - absl::kSpacePad2 + 2
: spec - absl::kZeroPad2 + 2),
fill(spec >= absl::kSpacePad2 ? ' ' : '0') {}
};
// -----------------------------------------------------------------------------
// Dec
// -----------------------------------------------------------------------------
//
// `Dec` stores a set of decimal string conversion parameters for use
// within `AlphaNum` string conversions. Dec is slower than the default
// integer conversion, so use it only if you need padding.
struct Dec {
uint64_t value;
uint8_t width;
char fill;
bool neg;
template <typename Int>
explicit Dec(Int v, PadSpec spec = absl::kNoPad,
typename std::enable_if<(sizeof(Int) <= 8)>::type* = nullptr)
: value(v >= 0 ? static_cast<uint64_t>(v)
: uint64_t{0} - static_cast<uint64_t>(v)),
width(spec == absl::kNoPad
? 1
: spec >= absl::kSpacePad2 ? spec - absl::kSpacePad2 + 2
: spec - absl::kZeroPad2 + 2),
fill(spec >= absl::kSpacePad2 ? ' ' : '0'),
neg(v < 0) {}
};
// -----------------------------------------------------------------------------
// AlphaNum
// -----------------------------------------------------------------------------
//
// The `AlphaNum` class acts as the main parameter type for `StrCat()` and
// `StrAppend()`, providing efficient conversion of numeric, boolean, and
// hexadecimal values (through the `Hex` type) into strings.
class AlphaNum {
public:
// No bool ctor -- bools convert to an integral type.
// A bool ctor would also convert incoming pointers (bletch).
AlphaNum(int x) // NOLINT(runtime/explicit)
: piece_(digits_,
numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {}
AlphaNum(unsigned int x) // NOLINT(runtime/explicit)
: piece_(digits_,
numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {}
AlphaNum(long x) // NOLINT(*)
: piece_(digits_,
numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {}
AlphaNum(unsigned long x) // NOLINT(*)
: piece_(digits_,
numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {}
AlphaNum(long long x) // NOLINT(*)
: piece_(digits_,
numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {}
AlphaNum(unsigned long long x) // NOLINT(*)
: piece_(digits_,
numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {}
AlphaNum(float f) // NOLINT(runtime/explicit)
: piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {}
AlphaNum(double f) // NOLINT(runtime/explicit)
: piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {}
AlphaNum(Hex hex); // NOLINT(runtime/explicit)
AlphaNum(Dec dec); // NOLINT(runtime/explicit)
template <size_t size>
AlphaNum( // NOLINT(runtime/explicit)
const strings_internal::AlphaNumBuffer<size>& buf)
: piece_(&buf.data[0], buf.size) {}
AlphaNum(const char* c_str) : piece_(c_str) {} // NOLINT(runtime/explicit)
AlphaNum(absl::string_view pc) : piece_(pc) {} // NOLINT(runtime/explicit)
template <typename Allocator>
AlphaNum( // NOLINT(runtime/explicit)
const std::basic_string<char, std::char_traits<char>, Allocator>& str)
: piece_(str) {}
// Use string literals ":" instead of character literals ':'.
AlphaNum(char c) = delete; // NOLINT(runtime/explicit)
AlphaNum(const AlphaNum&) = delete;
AlphaNum& operator=(const AlphaNum&) = delete;
absl::string_view::size_type size() const { return piece_.size(); }
const char* data() const { return piece_.data(); }
absl::string_view Piece() const { return piece_; }
// Normal enums are already handled by the integer formatters.
// This overload matches only scoped enums.
template <typename T,
typename = typename std::enable_if<
std::is_enum<T>{} && !std::is_convertible<T, int>{}>::type>
AlphaNum(T e) // NOLINT(runtime/explicit)
: AlphaNum(static_cast<typename std::underlying_type<T>::type>(e)) {}
// vector<bool>::reference and const_reference require special help to
// convert to `AlphaNum` because it requires two user defined conversions.
template <
typename T,
typename std::enable_if<
std::is_class<T>::value &&
(std::is_same<T, std::vector<bool>::reference>::value ||
std::is_same<T, std::vector<bool>::const_reference>::value)>::type* =
nullptr>
AlphaNum(T e) : AlphaNum(static_cast<bool>(e)) {} // NOLINT(runtime/explicit)
private:
absl::string_view piece_;
char digits_[numbers_internal::kFastToBufferSize];
};
// -----------------------------------------------------------------------------
// StrCat()
// -----------------------------------------------------------------------------
//
// Merges given strings or numbers, using no delimiter(s), returning the merged
// result as a string.
//
// `StrCat()` is designed to be the fastest possible way to construct a string
// out of a mix of raw C strings, string_views, strings, bool values,
// and numeric values.
//
// Don't use `StrCat()` for user-visible strings. The localization process
// works poorly on strings built up out of fragments.
//
// For clarity and performance, don't use `StrCat()` when appending to a
// string. Use `StrAppend()` instead. In particular, avoid using any of these
// (anti-)patterns:
//
// str.append(StrCat(...))
// str += StrCat(...)
// str = StrCat(str, ...)
//
// The last case is the worst, with a potential to change a loop
// from a linear time operation with O(1) dynamic allocations into a
// quadratic time operation with O(n) dynamic allocations.
//
// See `StrAppend()` below for more information.
namespace strings_internal {
// Do not call directly - this is not part of the public API.
std::string CatPieces(std::initializer_list<absl::string_view> pieces);
void AppendPieces(std::string* dest,
std::initializer_list<absl::string_view> pieces);
} // namespace strings_internal
ABSL_MUST_USE_RESULT inline std::string StrCat() { return std::string(); }
ABSL_MUST_USE_RESULT inline std::string StrCat(const AlphaNum& a) {
return std::string(a.data(), a.size());
}
ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b);
ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b,
const AlphaNum& c);
ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b,
const AlphaNum& c, const AlphaNum& d);
// Support 5 or more arguments
template <typename... AV>
ABSL_MUST_USE_RESULT inline std::string StrCat(
const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d,
const AlphaNum& e, const AV&... args) {
return strings_internal::CatPieces(
{a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(),
static_cast<const AlphaNum&>(args).Piece()...});
}
// -----------------------------------------------------------------------------
// StrAppend()
// -----------------------------------------------------------------------------
//
// Appends a string or set of strings to an existing string, in a similar
// fashion to `StrCat()`.
//
// WARNING: `StrAppend(&str, a, b, c, ...)` requires that none of the
// a, b, c, parameters be a reference into str. For speed, `StrAppend()` does
// not try to check each of its input arguments to be sure that they are not
// a subset of the string being appended to. That is, while this will work:
//
// std::string s = "foo";
// s += s;
//
// This output is undefined:
//
// std::string s = "foo";
// StrAppend(&s, s);
//
// This output is undefined as well, since `absl::string_view` does not own its
// data:
//
// std::string s = "foobar";
// absl::string_view p = s;
// StrAppend(&s, p);
inline void StrAppend(std::string*) {}
void StrAppend(std::string* dest, const AlphaNum& a);
void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b);
void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
const AlphaNum& c);
void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
const AlphaNum& c, const AlphaNum& d);
// Support 5 or more arguments
template <typename... AV>
inline void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
const AlphaNum& c, const AlphaNum& d, const AlphaNum& e,
const AV&... args) {
strings_internal::AppendPieces(
dest, {a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(),
static_cast<const AlphaNum&>(args).Piece()...});
}
// Helper function for the future StrCat default floating-point format, %.6g
// This is fast.
inline strings_internal::AlphaNumBuffer<
numbers_internal::kSixDigitsToBufferSize>
SixDigits(double d) {
strings_internal::AlphaNumBuffer<numbers_internal::kSixDigitsToBufferSize>
result;
result.size = numbers_internal::SixDigitsToBuffer(d, &result.data[0]);
return result;
}
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_STR_CAT_H_

View file

@ -0,0 +1,140 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/str_cat.h"
#include <cstdint>
#include <string>
#include "benchmark/benchmark.h"
#include "absl/strings/substitute.h"
namespace {
const char kStringOne[] = "Once Upon A Time, ";
const char kStringTwo[] = "There was a string benchmark";
// We want to include negative numbers in the benchmark, so this function
// is used to count 0, 1, -1, 2, -2, 3, -3, ...
inline int IncrementAlternatingSign(int i) {
return i > 0 ? -i : 1 - i;
}
void BM_Sum_By_StrCat(benchmark::State& state) {
int i = 0;
char foo[100];
for (auto _ : state) {
// NOLINTNEXTLINE(runtime/printf)
strcpy(foo, absl::StrCat(kStringOne, i, kStringTwo, i * 65536ULL).c_str());
int sum = 0;
for (char* f = &foo[0]; *f != 0; ++f) {
sum += *f;
}
benchmark::DoNotOptimize(sum);
i = IncrementAlternatingSign(i);
}
}
BENCHMARK(BM_Sum_By_StrCat);
void BM_StrCat_By_snprintf(benchmark::State& state) {
int i = 0;
char on_stack[1000];
for (auto _ : state) {
snprintf(on_stack, sizeof(on_stack), "%s %s:%d", kStringOne, kStringTwo, i);
i = IncrementAlternatingSign(i);
}
}
BENCHMARK(BM_StrCat_By_snprintf);
void BM_StrCat_By_Strings(benchmark::State& state) {
int i = 0;
for (auto _ : state) {
std::string result =
std::string(kStringOne) + " " + kStringTwo + ":" + absl::StrCat(i);
benchmark::DoNotOptimize(result);
i = IncrementAlternatingSign(i);
}
}
BENCHMARK(BM_StrCat_By_Strings);
void BM_StrCat_By_StringOpPlus(benchmark::State& state) {
int i = 0;
for (auto _ : state) {
std::string result = kStringOne;
result += " ";
result += kStringTwo;
result += ":";
result += absl::StrCat(i);
benchmark::DoNotOptimize(result);
i = IncrementAlternatingSign(i);
}
}
BENCHMARK(BM_StrCat_By_StringOpPlus);
void BM_StrCat_By_StrCat(benchmark::State& state) {
int i = 0;
for (auto _ : state) {
std::string result = absl::StrCat(kStringOne, " ", kStringTwo, ":", i);
benchmark::DoNotOptimize(result);
i = IncrementAlternatingSign(i);
}
}
BENCHMARK(BM_StrCat_By_StrCat);
void BM_HexCat_By_StrCat(benchmark::State& state) {
int i = 0;
for (auto _ : state) {
std::string result =
absl::StrCat(kStringOne, " ", absl::Hex(int64_t{i} + 0x10000000));
benchmark::DoNotOptimize(result);
i = IncrementAlternatingSign(i);
}
}
BENCHMARK(BM_HexCat_By_StrCat);
void BM_HexCat_By_Substitute(benchmark::State& state) {
int i = 0;
for (auto _ : state) {
std::string result = absl::Substitute(
"$0 $1", kStringOne, reinterpret_cast<void*>(int64_t{i} + 0x10000000));
benchmark::DoNotOptimize(result);
i = IncrementAlternatingSign(i);
}
}
BENCHMARK(BM_HexCat_By_Substitute);
void BM_FloatToString_By_StrCat(benchmark::State& state) {
int i = 0;
float foo = 0.0f;
for (auto _ : state) {
std::string result = absl::StrCat(foo += 1.001f, " != ", int64_t{i});
benchmark::DoNotOptimize(result);
i = IncrementAlternatingSign(i);
}
}
BENCHMARK(BM_FloatToString_By_StrCat);
void BM_DoubleToString_By_SixDigits(benchmark::State& state) {
int i = 0;
double foo = 0.0;
for (auto _ : state) {
std::string result =
absl::StrCat(absl::SixDigits(foo += 1.001), " != ", int64_t{i});
benchmark::DoNotOptimize(result);
i = IncrementAlternatingSign(i);
}
}
BENCHMARK(BM_DoubleToString_By_SixDigits);
} // namespace

View file

@ -0,0 +1,610 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Unit tests for all str_cat.h functions
#include "absl/strings/str_cat.h"
#include <cstdint>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "absl/strings/substitute.h"
#ifdef __ANDROID__
// Android assert messages only go to system log, so death tests cannot inspect
// the message for matching.
#define ABSL_EXPECT_DEBUG_DEATH(statement, regex) \
EXPECT_DEBUG_DEATH(statement, ".*")
#else
#define ABSL_EXPECT_DEBUG_DEATH(statement, regex) \
EXPECT_DEBUG_DEATH(statement, regex)
#endif
namespace {
// Test absl::StrCat of ints and longs of various sizes and signdedness.
TEST(StrCat, Ints) {
const short s = -1; // NOLINT(runtime/int)
const uint16_t us = 2;
const int i = -3;
const unsigned int ui = 4;
const long l = -5; // NOLINT(runtime/int)
const unsigned long ul = 6; // NOLINT(runtime/int)
const long long ll = -7; // NOLINT(runtime/int)
const unsigned long long ull = 8; // NOLINT(runtime/int)
const ptrdiff_t ptrdiff = -9;
const size_t size = 10;
const intptr_t intptr = -12;
const uintptr_t uintptr = 13;
std::string answer;
answer = absl::StrCat(s, us);
EXPECT_EQ(answer, "-12");
answer = absl::StrCat(i, ui);
EXPECT_EQ(answer, "-34");
answer = absl::StrCat(l, ul);
EXPECT_EQ(answer, "-56");
answer = absl::StrCat(ll, ull);
EXPECT_EQ(answer, "-78");
answer = absl::StrCat(ptrdiff, size);
EXPECT_EQ(answer, "-910");
answer = absl::StrCat(ptrdiff, intptr);
EXPECT_EQ(answer, "-9-12");
answer = absl::StrCat(uintptr, 0);
EXPECT_EQ(answer, "130");
}
TEST(StrCat, Enums) {
enum SmallNumbers { One = 1, Ten = 10 } e = Ten;
EXPECT_EQ("10", absl::StrCat(e));
EXPECT_EQ("-5", absl::StrCat(SmallNumbers(-5)));
enum class Option { Boxers = 1, Briefs = -1 };
EXPECT_EQ("-1", absl::StrCat(Option::Briefs));
enum class Airplane : uint64_t {
Airbus = 1,
Boeing = 1000,
Canary = 10000000000 // too big for "int"
};
EXPECT_EQ("10000000000", absl::StrCat(Airplane::Canary));
enum class TwoGig : int32_t {
TwoToTheZero = 1,
TwoToTheSixteenth = 1 << 16,
TwoToTheThirtyFirst = INT32_MIN
};
EXPECT_EQ("65536", absl::StrCat(TwoGig::TwoToTheSixteenth));
EXPECT_EQ("-2147483648", absl::StrCat(TwoGig::TwoToTheThirtyFirst));
EXPECT_EQ("-1", absl::StrCat(static_cast<TwoGig>(-1)));
enum class FourGig : uint32_t {
TwoToTheZero = 1,
TwoToTheSixteenth = 1 << 16,
TwoToTheThirtyFirst = 1U << 31 // too big for "int"
};
EXPECT_EQ("65536", absl::StrCat(FourGig::TwoToTheSixteenth));
EXPECT_EQ("2147483648", absl::StrCat(FourGig::TwoToTheThirtyFirst));
EXPECT_EQ("4294967295", absl::StrCat(static_cast<FourGig>(-1)));
EXPECT_EQ("10000000000", absl::StrCat(Airplane::Canary));
}
TEST(StrCat, Basics) {
std::string result;
std::string strs[] = {"Hello", "Cruel", "World"};
std::string stdstrs[] = {
"std::Hello",
"std::Cruel",
"std::World"
};
absl::string_view pieces[] = {"Hello", "Cruel", "World"};
const char* c_strs[] = {
"Hello",
"Cruel",
"World"
};
int32_t i32s[] = {'H', 'C', 'W'};
uint64_t ui64s[] = {12345678910LL, 10987654321LL};
EXPECT_EQ(absl::StrCat(), "");
result = absl::StrCat(false, true, 2, 3);
EXPECT_EQ(result, "0123");
result = absl::StrCat(-1);
EXPECT_EQ(result, "-1");
result = absl::StrCat(absl::SixDigits(0.5));
EXPECT_EQ(result, "0.5");
result = absl::StrCat(strs[1], pieces[2]);
EXPECT_EQ(result, "CruelWorld");
result = absl::StrCat(stdstrs[1], " ", stdstrs[2]);
EXPECT_EQ(result, "std::Cruel std::World");
result = absl::StrCat(strs[0], ", ", pieces[2]);
EXPECT_EQ(result, "Hello, World");
result = absl::StrCat(strs[0], ", ", strs[1], " ", strs[2], "!");
EXPECT_EQ(result, "Hello, Cruel World!");
result = absl::StrCat(pieces[0], ", ", pieces[1], " ", pieces[2]);
EXPECT_EQ(result, "Hello, Cruel World");
result = absl::StrCat(c_strs[0], ", ", c_strs[1], " ", c_strs[2]);
EXPECT_EQ(result, "Hello, Cruel World");
result = absl::StrCat("ASCII ", i32s[0], ", ", i32s[1], " ", i32s[2], "!");
EXPECT_EQ(result, "ASCII 72, 67 87!");
result = absl::StrCat(ui64s[0], ", ", ui64s[1], "!");
EXPECT_EQ(result, "12345678910, 10987654321!");
std::string one =
"1"; // Actually, it's the size of this string that we want; a
// 64-bit build distinguishes between size_t and uint64_t,
// even though they're both unsigned 64-bit values.
result = absl::StrCat("And a ", one.size(), " and a ",
&result[2] - &result[0], " and a ", one, " 2 3 4", "!");
EXPECT_EQ(result, "And a 1 and a 2 and a 1 2 3 4!");
// result = absl::StrCat("Single chars won't compile", '!');
// result = absl::StrCat("Neither will nullptrs", nullptr);
result =
absl::StrCat("To output a char by ASCII/numeric value, use +: ", '!' + 0);
EXPECT_EQ(result, "To output a char by ASCII/numeric value, use +: 33");
float f = 100000.5;
result = absl::StrCat("A hundred K and a half is ", absl::SixDigits(f));
EXPECT_EQ(result, "A hundred K and a half is 100000");
f = 100001.5;
result =
absl::StrCat("A hundred K and one and a half is ", absl::SixDigits(f));
EXPECT_EQ(result, "A hundred K and one and a half is 100002");
double d = 100000.5;
d *= d;
result =
absl::StrCat("A hundred K and a half squared is ", absl::SixDigits(d));
EXPECT_EQ(result, "A hundred K and a half squared is 1.00001e+10");
result = absl::StrCat(1, 2, 333, 4444, 55555, 666666, 7777777, 88888888,
999999999);
EXPECT_EQ(result, "12333444455555666666777777788888888999999999");
}
TEST(StrCat, CornerCases) {
std::string result;
result = absl::StrCat(""); // NOLINT
EXPECT_EQ(result, "");
result = absl::StrCat("", "");
EXPECT_EQ(result, "");
result = absl::StrCat("", "", "");
EXPECT_EQ(result, "");
result = absl::StrCat("", "", "", "");
EXPECT_EQ(result, "");
result = absl::StrCat("", "", "", "", "");
EXPECT_EQ(result, "");
}
// A minimal allocator that uses malloc().
template <typename T>
struct Mallocator {
typedef T value_type;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
size_type max_size() const {
return size_t(std::numeric_limits<size_type>::max()) / sizeof(value_type);
}
template <typename U>
struct rebind {
typedef Mallocator<U> other;
};
Mallocator() = default;
template <class U>
Mallocator(const Mallocator<U>&) {} // NOLINT(runtime/explicit)
T* allocate(size_t n) { return static_cast<T*>(std::malloc(n * sizeof(T))); }
void deallocate(T* p, size_t) { std::free(p); }
};
template <typename T, typename U>
bool operator==(const Mallocator<T>&, const Mallocator<U>&) {
return true;
}
template <typename T, typename U>
bool operator!=(const Mallocator<T>&, const Mallocator<U>&) {
return false;
}
TEST(StrCat, CustomAllocator) {
using mstring =
std::basic_string<char, std::char_traits<char>, Mallocator<char>>;
const mstring str1("PARACHUTE OFF A BLIMP INTO MOSCONE!!");
const mstring str2("Read this book about coffee tables");
std::string result = absl::StrCat(str1, str2);
EXPECT_EQ(result,
"PARACHUTE OFF A BLIMP INTO MOSCONE!!"
"Read this book about coffee tables");
}
TEST(StrCat, MaxArgs) {
std::string result;
// Test 10 up to 26 arguments, the old maximum
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a");
EXPECT_EQ(result, "123456789a");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b");
EXPECT_EQ(result, "123456789ab");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c");
EXPECT_EQ(result, "123456789abc");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d");
EXPECT_EQ(result, "123456789abcd");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e");
EXPECT_EQ(result, "123456789abcde");
result =
absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f");
EXPECT_EQ(result, "123456789abcdef");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g");
EXPECT_EQ(result, "123456789abcdefg");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h");
EXPECT_EQ(result, "123456789abcdefgh");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h", "i");
EXPECT_EQ(result, "123456789abcdefghi");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h", "i", "j");
EXPECT_EQ(result, "123456789abcdefghij");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h", "i", "j", "k");
EXPECT_EQ(result, "123456789abcdefghijk");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h", "i", "j", "k", "l");
EXPECT_EQ(result, "123456789abcdefghijkl");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h", "i", "j", "k", "l", "m");
EXPECT_EQ(result, "123456789abcdefghijklm");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h", "i", "j", "k", "l", "m", "n");
EXPECT_EQ(result, "123456789abcdefghijklmn");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h", "i", "j", "k", "l", "m", "n", "o");
EXPECT_EQ(result, "123456789abcdefghijklmno");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h", "i", "j", "k", "l", "m", "n", "o", "p");
EXPECT_EQ(result, "123456789abcdefghijklmnop");
result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
"g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q");
EXPECT_EQ(result, "123456789abcdefghijklmnopq");
// No limit thanks to C++11's variadic templates
result = absl::StrCat(
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, "a", "b", "c", "d", "e", "f", "g", "h",
"i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
"x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L",
"M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z");
EXPECT_EQ(result,
"12345678910abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
}
TEST(StrAppend, Basics) {
std::string result = "existing text";
std::string strs[] = {"Hello", "Cruel", "World"};
std::string stdstrs[] = {
"std::Hello",
"std::Cruel",
"std::World"
};
absl::string_view pieces[] = {"Hello", "Cruel", "World"};
const char* c_strs[] = {
"Hello",
"Cruel",
"World"
};
int32_t i32s[] = {'H', 'C', 'W'};
uint64_t ui64s[] = {12345678910LL, 10987654321LL};
std::string::size_type old_size = result.size();
absl::StrAppend(&result);
EXPECT_EQ(result.size(), old_size);
old_size = result.size();
absl::StrAppend(&result, strs[0]);
EXPECT_EQ(result.substr(old_size), "Hello");
old_size = result.size();
absl::StrAppend(&result, strs[1], pieces[2]);
EXPECT_EQ(result.substr(old_size), "CruelWorld");
old_size = result.size();
absl::StrAppend(&result, stdstrs[0], ", ", pieces[2]);
EXPECT_EQ(result.substr(old_size), "std::Hello, World");
old_size = result.size();
absl::StrAppend(&result, strs[0], ", ", stdstrs[1], " ", strs[2], "!");
EXPECT_EQ(result.substr(old_size), "Hello, std::Cruel World!");
old_size = result.size();
absl::StrAppend(&result, pieces[0], ", ", pieces[1], " ", pieces[2]);
EXPECT_EQ(result.substr(old_size), "Hello, Cruel World");
old_size = result.size();
absl::StrAppend(&result, c_strs[0], ", ", c_strs[1], " ", c_strs[2]);
EXPECT_EQ(result.substr(old_size), "Hello, Cruel World");
old_size = result.size();
absl::StrAppend(&result, "ASCII ", i32s[0], ", ", i32s[1], " ", i32s[2], "!");
EXPECT_EQ(result.substr(old_size), "ASCII 72, 67 87!");
old_size = result.size();
absl::StrAppend(&result, ui64s[0], ", ", ui64s[1], "!");
EXPECT_EQ(result.substr(old_size), "12345678910, 10987654321!");
std::string one =
"1"; // Actually, it's the size of this string that we want; a
// 64-bit build distinguishes between size_t and uint64_t,
// even though they're both unsigned 64-bit values.
old_size = result.size();
absl::StrAppend(&result, "And a ", one.size(), " and a ",
&result[2] - &result[0], " and a ", one, " 2 3 4", "!");
EXPECT_EQ(result.substr(old_size), "And a 1 and a 2 and a 1 2 3 4!");
// result = absl::StrCat("Single chars won't compile", '!');
// result = absl::StrCat("Neither will nullptrs", nullptr);
old_size = result.size();
absl::StrAppend(&result,
"To output a char by ASCII/numeric value, use +: ", '!' + 0);
EXPECT_EQ(result.substr(old_size),
"To output a char by ASCII/numeric value, use +: 33");
// Test 9 arguments, the old maximum
old_size = result.size();
absl::StrAppend(&result, 1, 22, 333, 4444, 55555, 666666, 7777777, 88888888,
9);
EXPECT_EQ(result.substr(old_size), "1223334444555556666667777777888888889");
// No limit thanks to C++11's variadic templates
old_size = result.size();
absl::StrAppend(
&result, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, //
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", //
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", //
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", //
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", //
"No limit thanks to C++11's variadic templates");
EXPECT_EQ(result.substr(old_size),
"12345678910abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
"No limit thanks to C++11's variadic templates");
}
TEST(StrCat, VectorBoolReferenceTypes) {
std::vector<bool> v;
v.push_back(true);
v.push_back(false);
std::vector<bool> const& cv = v;
// Test that vector<bool>::reference and vector<bool>::const_reference
// are handled as if the were really bool types and not the proxy types
// they really are.
std::string result = absl::StrCat(v[0], v[1], cv[0], cv[1]); // NOLINT
EXPECT_EQ(result, "1010");
}
// Passing nullptr to memcpy is undefined behavior and this test
// provides coverage of codepaths that handle empty strings with nullptrs.
TEST(StrCat, AvoidsMemcpyWithNullptr) {
EXPECT_EQ(absl::StrCat(42, absl::string_view{}), "42");
// Cover CatPieces code.
EXPECT_EQ(absl::StrCat(1, 2, 3, 4, 5, absl::string_view{}), "12345");
// Cover AppendPieces.
std::string result;
absl::StrAppend(&result, 1, 2, 3, 4, 5, absl::string_view{});
EXPECT_EQ(result, "12345");
}
#ifdef GTEST_HAS_DEATH_TEST
TEST(StrAppend, Death) {
std::string s = "self";
// on linux it's "assertion", on mac it's "Assertion",
// on chromiumos it's "Assertion ... failed".
ABSL_EXPECT_DEBUG_DEATH(absl::StrAppend(&s, s.c_str() + 1),
"ssertion.*failed");
ABSL_EXPECT_DEBUG_DEATH(absl::StrAppend(&s, s), "ssertion.*failed");
}
#endif // GTEST_HAS_DEATH_TEST
TEST(StrAppend, CornerCases) {
std::string result;
absl::StrAppend(&result, "");
EXPECT_EQ(result, "");
absl::StrAppend(&result, "", "");
EXPECT_EQ(result, "");
absl::StrAppend(&result, "", "", "");
EXPECT_EQ(result, "");
absl::StrAppend(&result, "", "", "", "");
EXPECT_EQ(result, "");
absl::StrAppend(&result, "", "", "", "", "");
EXPECT_EQ(result, "");
}
TEST(StrAppend, CornerCasesNonEmptyAppend) {
for (std::string result : {"hello", "a string too long to fit in the SSO"}) {
const std::string expected = result;
absl::StrAppend(&result, "");
EXPECT_EQ(result, expected);
absl::StrAppend(&result, "", "");
EXPECT_EQ(result, expected);
absl::StrAppend(&result, "", "", "");
EXPECT_EQ(result, expected);
absl::StrAppend(&result, "", "", "", "");
EXPECT_EQ(result, expected);
absl::StrAppend(&result, "", "", "", "", "");
EXPECT_EQ(result, expected);
}
}
template <typename IntType>
void CheckHex(IntType v, const char* nopad_format, const char* zeropad_format,
const char* spacepad_format) {
char expected[256];
std::string actual = absl::StrCat(absl::Hex(v, absl::kNoPad));
snprintf(expected, sizeof(expected), nopad_format, v);
EXPECT_EQ(expected, actual) << " decimal value " << v;
for (int spec = absl::kZeroPad2; spec <= absl::kZeroPad20; ++spec) {
std::string actual =
absl::StrCat(absl::Hex(v, static_cast<absl::PadSpec>(spec)));
snprintf(expected, sizeof(expected), zeropad_format,
spec - absl::kZeroPad2 + 2, v);
EXPECT_EQ(expected, actual) << " decimal value " << v;
}
for (int spec = absl::kSpacePad2; spec <= absl::kSpacePad20; ++spec) {
std::string actual =
absl::StrCat(absl::Hex(v, static_cast<absl::PadSpec>(spec)));
snprintf(expected, sizeof(expected), spacepad_format,
spec - absl::kSpacePad2 + 2, v);
EXPECT_EQ(expected, actual) << " decimal value " << v;
}
}
template <typename IntType>
void CheckDec(IntType v, const char* nopad_format, const char* zeropad_format,
const char* spacepad_format) {
char expected[256];
std::string actual = absl::StrCat(absl::Dec(v, absl::kNoPad));
snprintf(expected, sizeof(expected), nopad_format, v);
EXPECT_EQ(expected, actual) << " decimal value " << v;
for (int spec = absl::kZeroPad2; spec <= absl::kZeroPad20; ++spec) {
std::string actual =
absl::StrCat(absl::Dec(v, static_cast<absl::PadSpec>(spec)));
snprintf(expected, sizeof(expected), zeropad_format,
spec - absl::kZeroPad2 + 2, v);
EXPECT_EQ(expected, actual)
<< " decimal value " << v << " format '" << zeropad_format
<< "' digits " << (spec - absl::kZeroPad2 + 2);
}
for (int spec = absl::kSpacePad2; spec <= absl::kSpacePad20; ++spec) {
std::string actual =
absl::StrCat(absl::Dec(v, static_cast<absl::PadSpec>(spec)));
snprintf(expected, sizeof(expected), spacepad_format,
spec - absl::kSpacePad2 + 2, v);
EXPECT_EQ(expected, actual)
<< " decimal value " << v << " format '" << spacepad_format
<< "' digits " << (spec - absl::kSpacePad2 + 2);
}
}
void CheckHexDec64(uint64_t v) {
unsigned long long ullv = v; // NOLINT(runtime/int)
CheckHex(ullv, "%llx", "%0*llx", "%*llx");
CheckDec(ullv, "%llu", "%0*llu", "%*llu");
long long llv = static_cast<long long>(ullv); // NOLINT(runtime/int)
CheckDec(llv, "%lld", "%0*lld", "%*lld");
if (sizeof(v) == sizeof(&v)) {
auto uintptr = static_cast<uintptr_t>(v);
void* ptr = reinterpret_cast<void*>(uintptr);
CheckHex(ptr, "%llx", "%0*llx", "%*llx");
}
}
void CheckHexDec32(uint32_t uv) {
CheckHex(uv, "%x", "%0*x", "%*x");
CheckDec(uv, "%u", "%0*u", "%*u");
int32_t v = static_cast<int32_t>(uv);
CheckDec(v, "%d", "%0*d", "%*d");
if (sizeof(v) == sizeof(&v)) {
auto uintptr = static_cast<uintptr_t>(v);
void* ptr = reinterpret_cast<void*>(uintptr);
CheckHex(ptr, "%x", "%0*x", "%*x");
}
}
void CheckAll(uint64_t v) {
CheckHexDec64(v);
CheckHexDec32(static_cast<uint32_t>(v));
}
void TestFastPrints() {
// Test all small ints; there aren't many and they're common.
for (int i = 0; i < 10000; i++) {
CheckAll(i);
}
CheckAll(std::numeric_limits<uint64_t>::max());
CheckAll(std::numeric_limits<uint64_t>::max() - 1);
CheckAll(std::numeric_limits<int64_t>::min());
CheckAll(std::numeric_limits<int64_t>::min() + 1);
CheckAll(std::numeric_limits<uint32_t>::max());
CheckAll(std::numeric_limits<uint32_t>::max() - 1);
CheckAll(std::numeric_limits<int32_t>::min());
CheckAll(std::numeric_limits<int32_t>::min() + 1);
CheckAll(999999999); // fits in 32 bits
CheckAll(1000000000); // fits in 32 bits
CheckAll(9999999999); // doesn't fit in 32 bits
CheckAll(10000000000); // doesn't fit in 32 bits
CheckAll(999999999999999999); // fits in signed 64-bit
CheckAll(9999999999999999999u); // fits in unsigned 64-bit, but not signed.
CheckAll(1000000000000000000); // fits in signed 64-bit
CheckAll(10000000000000000000u); // fits in unsigned 64-bit, but not signed.
CheckAll(999999999876543210); // check all decimal digits, signed
CheckAll(9999999999876543210u); // check all decimal digits, unsigned.
CheckAll(0x123456789abcdef0); // check all hex digits
CheckAll(0x12345678);
int8_t minus_one_8bit = -1;
EXPECT_EQ("ff", absl::StrCat(absl::Hex(minus_one_8bit)));
int16_t minus_one_16bit = -1;
EXPECT_EQ("ffff", absl::StrCat(absl::Hex(minus_one_16bit)));
}
TEST(Numbers, TestFunctionsMovedOverFromNumbersMain) {
TestFastPrints();
}
} // namespace

View file

@ -0,0 +1,543 @@
//
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: str_format.h
// -----------------------------------------------------------------------------
//
// The `str_format` library is a typesafe replacement for the family of
// `printf()` string formatting routines within the `<cstdio>` standard library
// header. Like the `printf` family, the `str_format` uses a "format string" to
// perform argument substitutions based on types. See the `FormatSpec` section
// below for format string documentation.
//
// Example:
//
// std::string s = absl::StrFormat(
// "%s %s You have $%d!", "Hello", name, dollars);
//
// The library consists of the following basic utilities:
//
// * `absl::StrFormat()`, a type-safe replacement for `std::sprintf()`, to
// write a format string to a `string` value.
// * `absl::StrAppendFormat()` to append a format string to a `string`
// * `absl::StreamFormat()` to more efficiently write a format string to a
// stream, such as`std::cout`.
// * `absl::PrintF()`, `absl::FPrintF()` and `absl::SNPrintF()` as
// replacements for `std::printf()`, `std::fprintf()` and `std::snprintf()`.
//
// Note: a version of `std::sprintf()` is not supported as it is
// generally unsafe due to buffer overflows.
//
// Additionally, you can provide a format string (and its associated arguments)
// using one of the following abstractions:
//
// * A `FormatSpec` class template fully encapsulates a format string and its
// type arguments and is usually provided to `str_format` functions as a
// variadic argument of type `FormatSpec<Arg...>`. The `FormatSpec<Args...>`
// template is evaluated at compile-time, providing type safety.
// * A `ParsedFormat` instance, which encapsulates a specific, pre-compiled
// format string for a specific set of type(s), and which can be passed
// between API boundaries. (The `FormatSpec` type should not be used
// directly except as an argument type for wrapper functions.)
//
// The `str_format` library provides the ability to output its format strings to
// arbitrary sink types:
//
// * A generic `Format()` function to write outputs to arbitrary sink types,
// which must implement a `FormatRawSink` interface.
//
// * A `FormatUntyped()` function that is similar to `Format()` except it is
// loosely typed. `FormatUntyped()` is not a template and does not perform
// any compile-time checking of the format string; instead, it returns a
// boolean from a runtime check.
#ifndef ABSL_STRINGS_STR_FORMAT_H_
#define ABSL_STRINGS_STR_FORMAT_H_
#include <cstdio>
#include <string>
#include "absl/strings/internal/str_format/arg.h" // IWYU pragma: export
#include "absl/strings/internal/str_format/bind.h" // IWYU pragma: export
#include "absl/strings/internal/str_format/checker.h" // IWYU pragma: export
#include "absl/strings/internal/str_format/extension.h" // IWYU pragma: export
#include "absl/strings/internal/str_format/parser.h" // IWYU pragma: export
namespace absl {
ABSL_NAMESPACE_BEGIN
// UntypedFormatSpec
//
// A type-erased class that can be used directly within untyped API entry
// points. An `UntypedFormatSpec` is specifically used as an argument to
// `FormatUntyped()`.
//
// Example:
//
// absl::UntypedFormatSpec format("%d");
// std::string out;
// CHECK(absl::FormatUntyped(&out, format, {absl::FormatArg(1)}));
class UntypedFormatSpec {
public:
UntypedFormatSpec() = delete;
UntypedFormatSpec(const UntypedFormatSpec&) = delete;
UntypedFormatSpec& operator=(const UntypedFormatSpec&) = delete;
explicit UntypedFormatSpec(string_view s) : spec_(s) {}
protected:
explicit UntypedFormatSpec(const str_format_internal::ParsedFormatBase* pc)
: spec_(pc) {}
private:
friend str_format_internal::UntypedFormatSpecImpl;
str_format_internal::UntypedFormatSpecImpl spec_;
};
// FormatStreamed()
//
// Takes a streamable argument and returns an object that can print it
// with '%s'. Allows printing of types that have an `operator<<` but no
// intrinsic type support within `StrFormat()` itself.
//
// Example:
//
// absl::StrFormat("%s", absl::FormatStreamed(obj));
template <typename T>
str_format_internal::StreamedWrapper<T> FormatStreamed(const T& v) {
return str_format_internal::StreamedWrapper<T>(v);
}
// FormatCountCapture
//
// This class provides a way to safely wrap `StrFormat()` captures of `%n`
// conversions, which denote the number of characters written by a formatting
// operation to this point, into an integer value.
//
// This wrapper is designed to allow safe usage of `%n` within `StrFormat(); in
// the `printf()` family of functions, `%n` is not safe to use, as the `int *`
// buffer can be used to capture arbitrary data.
//
// Example:
//
// int n = 0;
// std::string s = absl::StrFormat("%s%d%n", "hello", 123,
// absl::FormatCountCapture(&n));
// EXPECT_EQ(8, n);
class FormatCountCapture {
public:
explicit FormatCountCapture(int* p) : p_(p) {}
private:
// FormatCountCaptureHelper is used to define FormatConvertImpl() for this
// class.
friend struct str_format_internal::FormatCountCaptureHelper;
// Unused() is here because of the false positive from -Wunused-private-field
// p_ is used in the templated function of the friend FormatCountCaptureHelper
// class.
int* Unused() { return p_; }
int* p_;
};
// FormatSpec
//
// The `FormatSpec` type defines the makeup of a format string within the
// `str_format` library. It is a variadic class template that is evaluated at
// compile-time, according to the format string and arguments that are passed to
// it.
//
// You should not need to manipulate this type directly. You should only name it
// if you are writing wrapper functions which accept format arguments that will
// be provided unmodified to functions in this library. Such a wrapper function
// might be a class method that provides format arguments and/or internally uses
// the result of formatting.
//
// For a `FormatSpec` to be valid at compile-time, it must be provided as
// either:
//
// * A `constexpr` literal or `absl::string_view`, which is how it most often
// used.
// * A `ParsedFormat` instantiation, which ensures the format string is
// valid before use. (See below.)
//
// Example:
//
// // Provided as a string literal.
// absl::StrFormat("Welcome to %s, Number %d!", "The Village", 6);
//
// // Provided as a constexpr absl::string_view.
// constexpr absl::string_view formatString = "Welcome to %s, Number %d!";
// absl::StrFormat(formatString, "The Village", 6);
//
// // Provided as a pre-compiled ParsedFormat object.
// // Note that this example is useful only for illustration purposes.
// absl::ParsedFormat<'s', 'd'> formatString("Welcome to %s, Number %d!");
// absl::StrFormat(formatString, "TheVillage", 6);
//
// A format string generally follows the POSIX syntax as used within the POSIX
// `printf` specification.
//
// (See http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html.)
//
// In specific, the `FormatSpec` supports the following type specifiers:
// * `c` for characters
// * `s` for strings
// * `d` or `i` for integers
// * `o` for unsigned integer conversions into octal
// * `x` or `X` for unsigned integer conversions into hex
// * `u` for unsigned integers
// * `f` or `F` for floating point values into decimal notation
// * `e` or `E` for floating point values into exponential notation
// * `a` or `A` for floating point values into hex exponential notation
// * `g` or `G` for floating point values into decimal or exponential
// notation based on their precision
// * `p` for pointer address values
// * `n` for the special case of writing out the number of characters
// written to this point. The resulting value must be captured within an
// `absl::FormatCountCapture` type.
//
// Implementation-defined behavior:
// * A null pointer provided to "%s" or "%p" is output as "(nil)".
// * A non-null pointer provided to "%p" is output in hex as if by %#x or
// %#lx.
//
// NOTE: `o`, `x\X` and `u` will convert signed values to their unsigned
// counterpart before formatting.
//
// Examples:
// "%c", 'a' -> "a"
// "%c", 32 -> " "
// "%s", "C" -> "C"
// "%s", std::string("C++") -> "C++"
// "%d", -10 -> "-10"
// "%o", 10 -> "12"
// "%x", 16 -> "10"
// "%f", 123456789 -> "123456789.000000"
// "%e", .01 -> "1.00000e-2"
// "%a", -3.0 -> "-0x1.8p+1"
// "%g", .01 -> "1e-2"
// "%p", (void*)&value -> "0x7ffdeb6ad2a4"
//
// int n = 0;
// std::string s = absl::StrFormat(
// "%s%d%n", "hello", 123, absl::FormatCountCapture(&n));
// EXPECT_EQ(8, n);
//
// The `FormatSpec` intrinsically supports all of these fundamental C++ types:
//
// * Characters: `char`, `signed char`, `unsigned char`
// * Integers: `int`, `short`, `unsigned short`, `unsigned`, `long`,
// `unsigned long`, `long long`, `unsigned long long`
// * Floating-point: `float`, `double`, `long double`
//
// However, in the `str_format` library, a format conversion specifies a broader
// C++ conceptual category instead of an exact type. For example, `%s` binds to
// any string-like argument, so `std::string`, `absl::string_view`, and
// `const char*` are all accepted. Likewise, `%d` accepts any integer-like
// argument, etc.
template <typename... Args>
using FormatSpec = str_format_internal::FormatSpecTemplate<
str_format_internal::ArgumentToConv<Args>()...>;
// ParsedFormat
//
// A `ParsedFormat` is a class template representing a preparsed `FormatSpec`,
// with template arguments specifying the conversion characters used within the
// format string. Such characters must be valid format type specifiers, and
// these type specifiers are checked at compile-time.
//
// Instances of `ParsedFormat` can be created, copied, and reused to speed up
// formatting loops. A `ParsedFormat` may either be constructed statically, or
// dynamically through its `New()` factory function, which only constructs a
// runtime object if the format is valid at that time.
//
// Example:
//
// // Verified at compile time.
// absl::ParsedFormat<'s', 'd'> formatString("Welcome to %s, Number %d!");
// absl::StrFormat(formatString, "TheVillage", 6);
//
// // Verified at runtime.
// auto format_runtime = absl::ParsedFormat<'d'>::New(format_string);
// if (format_runtime) {
// value = absl::StrFormat(*format_runtime, i);
// } else {
// ... error case ...
// }
template <char... Conv>
using ParsedFormat = str_format_internal::ExtendedParsedFormat<
absl::str_format_internal::ToFormatConversionCharSet(Conv)...>;
// StrFormat()
//
// Returns a `string` given a `printf()`-style format string and zero or more
// additional arguments. Use it as you would `sprintf()`. `StrFormat()` is the
// primary formatting function within the `str_format` library, and should be
// used in most cases where you need type-safe conversion of types into
// formatted strings.
//
// The format string generally consists of ordinary character data along with
// one or more format conversion specifiers (denoted by the `%` character).
// Ordinary character data is returned unchanged into the result string, while
// each conversion specification performs a type substitution from
// `StrFormat()`'s other arguments. See the comments for `FormatSpec` for full
// information on the makeup of this format string.
//
// Example:
//
// std::string s = absl::StrFormat(
// "Welcome to %s, Number %d!", "The Village", 6);
// EXPECT_EQ("Welcome to The Village, Number 6!", s);
//
// Returns an empty string in case of error.
template <typename... Args>
ABSL_MUST_USE_RESULT std::string StrFormat(const FormatSpec<Args...>& format,
const Args&... args) {
return str_format_internal::FormatPack(
str_format_internal::UntypedFormatSpecImpl::Extract(format),
{str_format_internal::FormatArgImpl(args)...});
}
// StrAppendFormat()
//
// Appends to a `dst` string given a format string, and zero or more additional
// arguments, returning `*dst` as a convenience for chaining purposes. Appends
// nothing in case of error (but possibly alters its capacity).
//
// Example:
//
// std::string orig("For example PI is approximately ");
// std::cout << StrAppendFormat(&orig, "%12.6f", 3.14);
template <typename... Args>
std::string& StrAppendFormat(std::string* dst,
const FormatSpec<Args...>& format,
const Args&... args) {
return str_format_internal::AppendPack(
dst, str_format_internal::UntypedFormatSpecImpl::Extract(format),
{str_format_internal::FormatArgImpl(args)...});
}
// StreamFormat()
//
// Writes to an output stream given a format string and zero or more arguments,
// generally in a manner that is more efficient than streaming the result of
// `absl:: StrFormat()`. The returned object must be streamed before the full
// expression ends.
//
// Example:
//
// std::cout << StreamFormat("%12.6f", 3.14);
template <typename... Args>
ABSL_MUST_USE_RESULT str_format_internal::Streamable StreamFormat(
const FormatSpec<Args...>& format, const Args&... args) {
return str_format_internal::Streamable(
str_format_internal::UntypedFormatSpecImpl::Extract(format),
{str_format_internal::FormatArgImpl(args)...});
}
// PrintF()
//
// Writes to stdout given a format string and zero or more arguments. This
// function is functionally equivalent to `std::printf()` (and type-safe);
// prefer `absl::PrintF()` over `std::printf()`.
//
// Example:
//
// std::string_view s = "Ulaanbaatar";
// absl::PrintF("The capital of Mongolia is %s", s);
//
// Outputs: "The capital of Mongolia is Ulaanbaatar"
//
template <typename... Args>
int PrintF(const FormatSpec<Args...>& format, const Args&... args) {
return str_format_internal::FprintF(
stdout, str_format_internal::UntypedFormatSpecImpl::Extract(format),
{str_format_internal::FormatArgImpl(args)...});
}
// FPrintF()
//
// Writes to a file given a format string and zero or more arguments. This
// function is functionally equivalent to `std::fprintf()` (and type-safe);
// prefer `absl::FPrintF()` over `std::fprintf()`.
//
// Example:
//
// std::string_view s = "Ulaanbaatar";
// absl::FPrintF(stdout, "The capital of Mongolia is %s", s);
//
// Outputs: "The capital of Mongolia is Ulaanbaatar"
//
template <typename... Args>
int FPrintF(std::FILE* output, const FormatSpec<Args...>& format,
const Args&... args) {
return str_format_internal::FprintF(
output, str_format_internal::UntypedFormatSpecImpl::Extract(format),
{str_format_internal::FormatArgImpl(args)...});
}
// SNPrintF()
//
// Writes to a sized buffer given a format string and zero or more arguments.
// This function is functionally equivalent to `std::snprintf()` (and
// type-safe); prefer `absl::SNPrintF()` over `std::snprintf()`.
//
// In particular, a successful call to `absl::SNPrintF()` writes at most `size`
// bytes of the formatted output to `output`, including a NUL-terminator, and
// returns the number of bytes that would have been written if truncation did
// not occur. In the event of an error, a negative value is returned and `errno`
// is set.
//
// Example:
//
// std::string_view s = "Ulaanbaatar";
// char output[128];
// absl::SNPrintF(output, sizeof(output),
// "The capital of Mongolia is %s", s);
//
// Post-condition: output == "The capital of Mongolia is Ulaanbaatar"
//
template <typename... Args>
int SNPrintF(char* output, std::size_t size, const FormatSpec<Args...>& format,
const Args&... args) {
return str_format_internal::SnprintF(
output, size, str_format_internal::UntypedFormatSpecImpl::Extract(format),
{str_format_internal::FormatArgImpl(args)...});
}
// -----------------------------------------------------------------------------
// Custom Output Formatting Functions
// -----------------------------------------------------------------------------
// FormatRawSink
//
// FormatRawSink is a type erased wrapper around arbitrary sink objects
// specifically used as an argument to `Format()`.
//
// All the object has to do define an overload of `AbslFormatFlush()` for the
// sink, usually by adding a ADL-based free function in the same namespace as
// the sink:
//
// void AbslFormatFlush(MySink* dest, absl::string_view part);
//
// where `dest` is the pointer passed to `absl::Format()`. The function should
// append `part` to `dest`.
//
// FormatRawSink does not own the passed sink object. The passed object must
// outlive the FormatRawSink.
class FormatRawSink {
public:
// Implicitly convert from any type that provides the hook function as
// described above.
template <typename T,
typename = typename std::enable_if<std::is_constructible<
str_format_internal::FormatRawSinkImpl, T*>::value>::type>
FormatRawSink(T* raw) // NOLINT
: sink_(raw) {}
private:
friend str_format_internal::FormatRawSinkImpl;
str_format_internal::FormatRawSinkImpl sink_;
};
// Format()
//
// Writes a formatted string to an arbitrary sink object (implementing the
// `absl::FormatRawSink` interface), using a format string and zero or more
// additional arguments.
//
// By default, `std::string`, `std::ostream`, and `absl::Cord` are supported as
// destination objects. If a `std::string` is used the formatted string is
// appended to it.
//
// `absl::Format()` is a generic version of `absl::StrAppendFormat()`, for
// custom sinks. The format string, like format strings for `StrFormat()`, is
// checked at compile-time.
//
// On failure, this function returns `false` and the state of the sink is
// unspecified.
template <typename... Args>
bool Format(FormatRawSink raw_sink, const FormatSpec<Args...>& format,
const Args&... args) {
return str_format_internal::FormatUntyped(
str_format_internal::FormatRawSinkImpl::Extract(raw_sink),
str_format_internal::UntypedFormatSpecImpl::Extract(format),
{str_format_internal::FormatArgImpl(args)...});
}
// FormatArg
//
// A type-erased handle to a format argument specifically used as an argument to
// `FormatUntyped()`. You may construct `FormatArg` by passing
// reference-to-const of any printable type. `FormatArg` is both copyable and
// assignable. The source data must outlive the `FormatArg` instance. See
// example below.
//
using FormatArg = str_format_internal::FormatArgImpl;
// FormatUntyped()
//
// Writes a formatted string to an arbitrary sink object (implementing the
// `absl::FormatRawSink` interface), using an `UntypedFormatSpec` and zero or
// more additional arguments.
//
// This function acts as the most generic formatting function in the
// `str_format` library. The caller provides a raw sink, an unchecked format
// string, and (usually) a runtime specified list of arguments; no compile-time
// checking of formatting is performed within this function. As a result, a
// caller should check the return value to verify that no error occurred.
// On failure, this function returns `false` and the state of the sink is
// unspecified.
//
// The arguments are provided in an `absl::Span<const absl::FormatArg>`.
// Each `absl::FormatArg` object binds to a single argument and keeps a
// reference to it. The values used to create the `FormatArg` objects must
// outlive this function call. (See `str_format_arg.h` for information on
// the `FormatArg` class.)_
//
// Example:
//
// std::optional<std::string> FormatDynamic(
// const std::string& in_format,
// const vector<std::string>& in_args) {
// std::string out;
// std::vector<absl::FormatArg> args;
// for (const auto& v : in_args) {
// // It is important that 'v' is a reference to the objects in in_args.
// // The values we pass to FormatArg must outlive the call to
// // FormatUntyped.
// args.emplace_back(v);
// }
// absl::UntypedFormatSpec format(in_format);
// if (!absl::FormatUntyped(&out, format, args)) {
// return std::nullopt;
// }
// return std::move(out);
// }
//
ABSL_MUST_USE_RESULT inline bool FormatUntyped(
FormatRawSink raw_sink, const UntypedFormatSpec& format,
absl::Span<const FormatArg> args) {
return str_format_internal::FormatUntyped(
str_format_internal::FormatRawSinkImpl::Extract(raw_sink),
str_format_internal::UntypedFormatSpecImpl::Extract(format), args);
}
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_STR_FORMAT_H_

View file

@ -0,0 +1,685 @@
#include "absl/strings/str_format.h"
#include <cstdarg>
#include <cstdint>
#include <cstdio>
#include <string>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace {
using str_format_internal::FormatArgImpl;
using str_format_internal::FormatConversionCharSetInternal;
using FormatEntryPointTest = ::testing::Test;
TEST_F(FormatEntryPointTest, Format) {
std::string sink;
EXPECT_TRUE(Format(&sink, "A format %d", 123));
EXPECT_EQ("A format 123", sink);
sink.clear();
ParsedFormat<'d'> pc("A format %d");
EXPECT_TRUE(Format(&sink, pc, 123));
EXPECT_EQ("A format 123", sink);
}
TEST_F(FormatEntryPointTest, UntypedFormat) {
constexpr const char* formats[] = {
"",
"a",
"%80d",
#if !defined(_MSC_VER) && !defined(__ANDROID__) && !defined(__native_client__)
// MSVC, NaCL and Android don't support positional syntax.
"complicated multipart %% %1$d format %1$0999d",
#endif // _MSC_VER
};
for (const char* fmt : formats) {
std::string actual;
int i = 123;
FormatArgImpl arg_123(i);
absl::Span<const FormatArgImpl> args(&arg_123, 1);
UntypedFormatSpec format(fmt);
EXPECT_TRUE(FormatUntyped(&actual, format, args));
char buf[4096]{};
snprintf(buf, sizeof(buf), fmt, 123);
EXPECT_EQ(
str_format_internal::FormatPack(
str_format_internal::UntypedFormatSpecImpl::Extract(format), args),
buf);
EXPECT_EQ(actual, buf);
}
// The internal version works with a preparsed format.
ParsedFormat<'d'> pc("A format %d");
int i = 345;
FormatArg arg(i);
std::string out;
EXPECT_TRUE(str_format_internal::FormatUntyped(
&out, str_format_internal::UntypedFormatSpecImpl(&pc), {&arg, 1}));
EXPECT_EQ("A format 345", out);
}
TEST_F(FormatEntryPointTest, StringFormat) {
EXPECT_EQ("123", StrFormat("%d", 123));
constexpr absl::string_view view("=%d=", 4);
EXPECT_EQ("=123=", StrFormat(view, 123));
}
TEST_F(FormatEntryPointTest, AppendFormat) {
std::string s;
std::string& r = StrAppendFormat(&s, "%d", 123);
EXPECT_EQ(&s, &r); // should be same object
EXPECT_EQ("123", r);
}
TEST_F(FormatEntryPointTest, AppendFormatFail) {
std::string s = "orig";
UntypedFormatSpec format(" more %d");
FormatArgImpl arg("not an int");
EXPECT_EQ("orig",
str_format_internal::AppendPack(
&s, str_format_internal::UntypedFormatSpecImpl::Extract(format),
{&arg, 1}));
}
TEST_F(FormatEntryPointTest, ManyArgs) {
EXPECT_EQ("24", StrFormat("%24$d", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24));
EXPECT_EQ("60", StrFormat("%60$d", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
53, 54, 55, 56, 57, 58, 59, 60));
}
TEST_F(FormatEntryPointTest, Preparsed) {
ParsedFormat<'d'> pc("%d");
EXPECT_EQ("123", StrFormat(pc, 123));
// rvalue ok?
EXPECT_EQ("123", StrFormat(ParsedFormat<'d'>("%d"), 123));
constexpr absl::string_view view("=%d=", 4);
EXPECT_EQ("=123=", StrFormat(ParsedFormat<'d'>(view), 123));
}
TEST_F(FormatEntryPointTest, FormatCountCapture) {
int n = 0;
EXPECT_EQ("", StrFormat("%n", FormatCountCapture(&n)));
EXPECT_EQ(0, n);
EXPECT_EQ("123", StrFormat("%d%n", 123, FormatCountCapture(&n)));
EXPECT_EQ(3, n);
}
TEST_F(FormatEntryPointTest, FormatCountCaptureWrongType) {
// Should reject int*.
int n = 0;
UntypedFormatSpec format("%d%n");
int i = 123, *ip = &n;
FormatArgImpl args[2] = {FormatArgImpl(i), FormatArgImpl(ip)};
EXPECT_EQ("", str_format_internal::FormatPack(
str_format_internal::UntypedFormatSpecImpl::Extract(format),
absl::MakeSpan(args)));
}
TEST_F(FormatEntryPointTest, FormatCountCaptureMultiple) {
int n1 = 0;
int n2 = 0;
EXPECT_EQ(" 1 2",
StrFormat("%5d%n%10d%n", 1, FormatCountCapture(&n1), 2,
FormatCountCapture(&n2)));
EXPECT_EQ(5, n1);
EXPECT_EQ(15, n2);
}
TEST_F(FormatEntryPointTest, FormatCountCaptureExample) {
int n;
std::string s;
StrAppendFormat(&s, "%s: %n%s\n", "(1,1)", FormatCountCapture(&n), "(1,2)");
StrAppendFormat(&s, "%*s%s\n", n, "", "(2,2)");
EXPECT_EQ(7, n);
EXPECT_EQ(
"(1,1): (1,2)\n"
" (2,2)\n",
s);
}
TEST_F(FormatEntryPointTest, Stream) {
const std::string formats[] = {
"",
"a",
"%80d",
"%d %u %c %s %f %g",
#if !defined(_MSC_VER) && !defined(__ANDROID__) && !defined(__native_client__)
// MSVC, NaCL and Android don't support positional syntax.
"complicated multipart %% %1$d format %1$080d",
#endif // _MSC_VER
};
std::string buf(4096, '\0');
for (const auto& fmt : formats) {
const auto parsed =
ParsedFormat<'d', 'u', 'c', 's', 'f', 'g'>::NewAllowIgnored(fmt);
std::ostringstream oss;
oss << StreamFormat(*parsed, 123, 3, 49, "multistreaming!!!", 1.01, 1.01);
int fmt_result = snprintf(&*buf.begin(), buf.size(), fmt.c_str(), //
123, 3, 49, "multistreaming!!!", 1.01, 1.01);
ASSERT_TRUE(oss) << fmt;
ASSERT_TRUE(fmt_result >= 0 && static_cast<size_t>(fmt_result) < buf.size())
<< fmt_result;
EXPECT_EQ(buf.c_str(), oss.str());
}
}
TEST_F(FormatEntryPointTest, StreamOk) {
std::ostringstream oss;
oss << StreamFormat("hello %d", 123);
EXPECT_EQ("hello 123", oss.str());
EXPECT_TRUE(oss.good());
}
TEST_F(FormatEntryPointTest, StreamFail) {
std::ostringstream oss;
UntypedFormatSpec format("hello %d");
FormatArgImpl arg("non-numeric");
oss << str_format_internal::Streamable(
str_format_internal::UntypedFormatSpecImpl::Extract(format), {&arg, 1});
EXPECT_EQ("hello ", oss.str()); // partial write
EXPECT_TRUE(oss.fail());
}
std::string WithSnprintf(const char* fmt, ...) {
std::string buf;
buf.resize(128);
va_list va;
va_start(va, fmt);
int r = vsnprintf(&*buf.begin(), buf.size(), fmt, va);
va_end(va);
EXPECT_GE(r, 0);
EXPECT_LT(r, buf.size());
buf.resize(r);
return buf;
}
TEST_F(FormatEntryPointTest, FloatPrecisionArg) {
// Test that positional parameters for width and precision
// are indexed to precede the value.
// Also sanity check the same formats against snprintf.
EXPECT_EQ("0.1", StrFormat("%.1f", 0.1));
EXPECT_EQ("0.1", WithSnprintf("%.1f", 0.1));
EXPECT_EQ(" 0.1", StrFormat("%*.1f", 5, 0.1));
EXPECT_EQ(" 0.1", WithSnprintf("%*.1f", 5, 0.1));
EXPECT_EQ("0.1", StrFormat("%.*f", 1, 0.1));
EXPECT_EQ("0.1", WithSnprintf("%.*f", 1, 0.1));
EXPECT_EQ(" 0.1", StrFormat("%*.*f", 5, 1, 0.1));
EXPECT_EQ(" 0.1", WithSnprintf("%*.*f", 5, 1, 0.1));
}
namespace streamed_test {
struct X {};
std::ostream& operator<<(std::ostream& os, const X&) {
return os << "X";
}
} // streamed_test
TEST_F(FormatEntryPointTest, FormatStreamed) {
EXPECT_EQ("123", StrFormat("%s", FormatStreamed(123)));
EXPECT_EQ(" 123", StrFormat("%5s", FormatStreamed(123)));
EXPECT_EQ("123 ", StrFormat("%-5s", FormatStreamed(123)));
EXPECT_EQ("X", StrFormat("%s", FormatStreamed(streamed_test::X())));
EXPECT_EQ("123", StrFormat("%s", FormatStreamed(StreamFormat("%d", 123))));
}
// Helper class that creates a temporary file and exposes a FILE* to it.
// It will close the file on destruction.
class TempFile {
public:
TempFile() : file_(std::tmpfile()) {}
~TempFile() { std::fclose(file_); }
std::FILE* file() const { return file_; }
// Read the file into a string.
std::string ReadFile() {
std::fseek(file_, 0, SEEK_END);
int size = std::ftell(file_);
EXPECT_GT(size, 0);
std::rewind(file_);
std::string str(2 * size, ' ');
int read_bytes = std::fread(&str[0], 1, str.size(), file_);
EXPECT_EQ(read_bytes, size);
str.resize(read_bytes);
EXPECT_TRUE(std::feof(file_));
return str;
}
private:
std::FILE* file_;
};
TEST_F(FormatEntryPointTest, FPrintF) {
TempFile tmp;
int result =
FPrintF(tmp.file(), "STRING: %s NUMBER: %010d", std::string("ABC"), -19);
EXPECT_EQ(result, 30);
EXPECT_EQ(tmp.ReadFile(), "STRING: ABC NUMBER: -000000019");
}
TEST_F(FormatEntryPointTest, FPrintFError) {
errno = 0;
int result = FPrintF(stdin, "ABC");
EXPECT_LT(result, 0);
EXPECT_EQ(errno, EBADF);
}
#ifdef __GLIBC__
TEST_F(FormatEntryPointTest, FprintfTooLarge) {
std::FILE* f = std::fopen("/dev/null", "w");
int width = 2000000000;
errno = 0;
int result = FPrintF(f, "%*d %*d", width, 0, width, 0);
EXPECT_LT(result, 0);
EXPECT_EQ(errno, EFBIG);
std::fclose(f);
}
TEST_F(FormatEntryPointTest, PrintF) {
int stdout_tmp = dup(STDOUT_FILENO);
TempFile tmp;
std::fflush(stdout);
dup2(fileno(tmp.file()), STDOUT_FILENO);
int result = PrintF("STRING: %s NUMBER: %010d", std::string("ABC"), -19);
std::fflush(stdout);
dup2(stdout_tmp, STDOUT_FILENO);
close(stdout_tmp);
EXPECT_EQ(result, 30);
EXPECT_EQ(tmp.ReadFile(), "STRING: ABC NUMBER: -000000019");
}
#endif // __GLIBC__
TEST_F(FormatEntryPointTest, SNPrintF) {
char buffer[16];
int result =
SNPrintF(buffer, sizeof(buffer), "STRING: %s", std::string("ABC"));
EXPECT_EQ(result, 11);
EXPECT_EQ(std::string(buffer), "STRING: ABC");
result = SNPrintF(buffer, sizeof(buffer), "NUMBER: %d", 123456);
EXPECT_EQ(result, 14);
EXPECT_EQ(std::string(buffer), "NUMBER: 123456");
result = SNPrintF(buffer, sizeof(buffer), "NUMBER: %d", 1234567);
EXPECT_EQ(result, 15);
EXPECT_EQ(std::string(buffer), "NUMBER: 1234567");
result = SNPrintF(buffer, sizeof(buffer), "NUMBER: %d", 12345678);
EXPECT_EQ(result, 16);
EXPECT_EQ(std::string(buffer), "NUMBER: 1234567");
result = SNPrintF(buffer, sizeof(buffer), "NUMBER: %d", 123456789);
EXPECT_EQ(result, 17);
EXPECT_EQ(std::string(buffer), "NUMBER: 1234567");
result = SNPrintF(nullptr, 0, "Just checking the %s of the output.", "size");
EXPECT_EQ(result, 37);
}
TEST(StrFormat, BehavesAsDocumented) {
std::string s = absl::StrFormat("%s, %d!", "Hello", 123);
EXPECT_EQ("Hello, 123!", s);
// The format of a replacement is
// '%'[position][flags][width['.'precision]][length_modifier][format]
EXPECT_EQ(absl::StrFormat("%1$+3.2Lf", 1.1), "+1.10");
// Text conversion:
// "c" - Character. Eg: 'a' -> "A", 20 -> " "
EXPECT_EQ(StrFormat("%c", 'a'), "a");
EXPECT_EQ(StrFormat("%c", 0x20), " ");
// Formats char and integral types: int, long, uint64_t, etc.
EXPECT_EQ(StrFormat("%c", int{'a'}), "a");
EXPECT_EQ(StrFormat("%c", long{'a'}), "a"); // NOLINT
EXPECT_EQ(StrFormat("%c", uint64_t{'a'}), "a");
// "s" - string Eg: "C" -> "C", std::string("C++") -> "C++"
// Formats std::string, char*, string_view, and Cord.
EXPECT_EQ(StrFormat("%s", "C"), "C");
EXPECT_EQ(StrFormat("%s", std::string("C++")), "C++");
EXPECT_EQ(StrFormat("%s", string_view("view")), "view");
// Integral Conversion
// These format integral types: char, int, long, uint64_t, etc.
EXPECT_EQ(StrFormat("%d", char{10}), "10");
EXPECT_EQ(StrFormat("%d", int{10}), "10");
EXPECT_EQ(StrFormat("%d", long{10}), "10"); // NOLINT
EXPECT_EQ(StrFormat("%d", uint64_t{10}), "10");
// d,i - signed decimal Eg: -10 -> "-10"
EXPECT_EQ(StrFormat("%d", -10), "-10");
EXPECT_EQ(StrFormat("%i", -10), "-10");
// o - octal Eg: 10 -> "12"
EXPECT_EQ(StrFormat("%o", 10), "12");
// u - unsigned decimal Eg: 10 -> "10"
EXPECT_EQ(StrFormat("%u", 10), "10");
// x/X - lower,upper case hex Eg: 10 -> "a"/"A"
EXPECT_EQ(StrFormat("%x", 10), "a");
EXPECT_EQ(StrFormat("%X", 10), "A");
// Floating-point, with upper/lower-case output.
// These format floating points types: float, double, long double, etc.
EXPECT_EQ(StrFormat("%.1f", float{1}), "1.0");
EXPECT_EQ(StrFormat("%.1f", double{1}), "1.0");
const long double long_double = 1.0;
EXPECT_EQ(StrFormat("%.1f", long_double), "1.0");
// These also format integral types: char, int, long, uint64_t, etc.:
EXPECT_EQ(StrFormat("%.1f", char{1}), "1.0");
EXPECT_EQ(StrFormat("%.1f", int{1}), "1.0");
EXPECT_EQ(StrFormat("%.1f", long{1}), "1.0"); // NOLINT
EXPECT_EQ(StrFormat("%.1f", uint64_t{1}), "1.0");
// f/F - decimal. Eg: 123456789 -> "123456789.000000"
EXPECT_EQ(StrFormat("%f", 123456789), "123456789.000000");
EXPECT_EQ(StrFormat("%F", 123456789), "123456789.000000");
// e/E - exponentiated Eg: .01 -> "1.00000e-2"/"1.00000E-2"
EXPECT_EQ(StrFormat("%e", .01), "1.000000e-02");
EXPECT_EQ(StrFormat("%E", .01), "1.000000E-02");
// g/G - exponentiate to fit Eg: .01 -> "0.01", 1e10 ->"1e+10"/"1E+10"
EXPECT_EQ(StrFormat("%g", .01), "0.01");
EXPECT_EQ(StrFormat("%g", 1e10), "1e+10");
EXPECT_EQ(StrFormat("%G", 1e10), "1E+10");
// a/A - lower,upper case hex Eg: -3.0 -> "-0x1.8p+1"/"-0X1.8P+1"
// On Android platform <=21, there is a regression in hexfloat formatting.
#if !defined(__ANDROID_API__) || __ANDROID_API__ > 21
EXPECT_EQ(StrFormat("%.1a", -3.0), "-0x1.8p+1"); // .1 to fix MSVC output
EXPECT_EQ(StrFormat("%.1A", -3.0), "-0X1.8P+1"); // .1 to fix MSVC output
#endif
// Other conversion
int64_t value = 0x7ffdeb4;
auto ptr_value = static_cast<uintptr_t>(value);
const int& something = *reinterpret_cast<const int*>(ptr_value);
EXPECT_EQ(StrFormat("%p", &something), StrFormat("0x%x", ptr_value));
// Output widths are supported, with optional flags.
EXPECT_EQ(StrFormat("%3d", 1), " 1");
EXPECT_EQ(StrFormat("%3d", 123456), "123456");
EXPECT_EQ(StrFormat("%06.2f", 1.234), "001.23");
EXPECT_EQ(StrFormat("%+d", 1), "+1");
EXPECT_EQ(StrFormat("% d", 1), " 1");
EXPECT_EQ(StrFormat("%-4d", -1), "-1 ");
EXPECT_EQ(StrFormat("%#o", 10), "012");
EXPECT_EQ(StrFormat("%#x", 15), "0xf");
EXPECT_EQ(StrFormat("%04d", 8), "0008");
// Posix positional substitution.
EXPECT_EQ(absl::StrFormat("%2$s, %3$s, %1$s!", "vici", "veni", "vidi"),
"veni, vidi, vici!");
// Length modifiers are ignored.
EXPECT_EQ(StrFormat("%hhd", int{1}), "1");
EXPECT_EQ(StrFormat("%hd", int{1}), "1");
EXPECT_EQ(StrFormat("%ld", int{1}), "1");
EXPECT_EQ(StrFormat("%lld", int{1}), "1");
EXPECT_EQ(StrFormat("%Ld", int{1}), "1");
EXPECT_EQ(StrFormat("%jd", int{1}), "1");
EXPECT_EQ(StrFormat("%zd", int{1}), "1");
EXPECT_EQ(StrFormat("%td", int{1}), "1");
EXPECT_EQ(StrFormat("%qd", int{1}), "1");
}
using str_format_internal::ExtendedParsedFormat;
using str_format_internal::ParsedFormatBase;
struct SummarizeConsumer {
std::string* out;
explicit SummarizeConsumer(std::string* out) : out(out) {}
bool Append(string_view s) {
*out += "[" + std::string(s) + "]";
return true;
}
bool ConvertOne(const str_format_internal::UnboundConversion& conv,
string_view s) {
*out += "{";
*out += std::string(s);
*out += ":";
*out += std::to_string(conv.arg_position) + "$";
if (conv.width.is_from_arg()) {
*out += std::to_string(conv.width.get_from_arg()) + "$*";
}
if (conv.precision.is_from_arg()) {
*out += "." + std::to_string(conv.precision.get_from_arg()) + "$*";
}
*out += str_format_internal::FormatConversionCharToChar(conv.conv);
*out += "}";
return true;
}
};
std::string SummarizeParsedFormat(const ParsedFormatBase& pc) {
std::string out;
if (!pc.ProcessFormat(SummarizeConsumer(&out))) out += "!";
return out;
}
using ParsedFormatTest = ::testing::Test;
TEST_F(ParsedFormatTest, SimpleChecked) {
EXPECT_EQ("[ABC]{d:1$d}[DEF]",
SummarizeParsedFormat(ParsedFormat<'d'>("ABC%dDEF")));
EXPECT_EQ("{s:1$s}[FFF]{d:2$d}[ZZZ]{f:3$f}",
SummarizeParsedFormat(ParsedFormat<'s', 'd', 'f'>("%sFFF%dZZZ%f")));
EXPECT_EQ("{s:1$s}[ ]{.*d:3$.2$*d}",
SummarizeParsedFormat(ParsedFormat<'s', '*', 'd'>("%s %.*d")));
}
TEST_F(ParsedFormatTest, SimpleUncheckedCorrect) {
auto f = ParsedFormat<'d'>::New("ABC%dDEF");
ASSERT_TRUE(f);
EXPECT_EQ("[ABC]{d:1$d}[DEF]", SummarizeParsedFormat(*f));
std::string format = "%sFFF%dZZZ%f";
auto f2 = ParsedFormat<'s', 'd', 'f'>::New(format);
ASSERT_TRUE(f2);
EXPECT_EQ("{s:1$s}[FFF]{d:2$d}[ZZZ]{f:3$f}", SummarizeParsedFormat(*f2));
f2 = ParsedFormat<'s', 'd', 'f'>::New("%s %d %f");
ASSERT_TRUE(f2);
EXPECT_EQ("{s:1$s}[ ]{d:2$d}[ ]{f:3$f}", SummarizeParsedFormat(*f2));
auto star = ParsedFormat<'*', 'd'>::New("%*d");
ASSERT_TRUE(star);
EXPECT_EQ("{*d:2$1$*d}", SummarizeParsedFormat(*star));
auto dollar = ParsedFormat<'d', 's'>::New("%2$s %1$d");
ASSERT_TRUE(dollar);
EXPECT_EQ("{2$s:2$s}[ ]{1$d:1$d}", SummarizeParsedFormat(*dollar));
// with reuse
dollar = ParsedFormat<'d', 's'>::New("%2$s %1$d %1$d");
ASSERT_TRUE(dollar);
EXPECT_EQ("{2$s:2$s}[ ]{1$d:1$d}[ ]{1$d:1$d}",
SummarizeParsedFormat(*dollar));
}
TEST_F(ParsedFormatTest, SimpleUncheckedIgnoredArgs) {
EXPECT_FALSE((ParsedFormat<'d', 's'>::New("ABC")));
EXPECT_FALSE((ParsedFormat<'d', 's'>::New("%dABC")));
EXPECT_FALSE((ParsedFormat<'d', 's'>::New("ABC%2$s")));
auto f = ParsedFormat<'d', 's'>::NewAllowIgnored("ABC");
ASSERT_TRUE(f);
EXPECT_EQ("[ABC]", SummarizeParsedFormat(*f));
f = ParsedFormat<'d', 's'>::NewAllowIgnored("%dABC");
ASSERT_TRUE(f);
EXPECT_EQ("{d:1$d}[ABC]", SummarizeParsedFormat(*f));
f = ParsedFormat<'d', 's'>::NewAllowIgnored("ABC%2$s");
ASSERT_TRUE(f);
EXPECT_EQ("[ABC]{2$s:2$s}", SummarizeParsedFormat(*f));
}
TEST_F(ParsedFormatTest, SimpleUncheckedUnsupported) {
EXPECT_FALSE(ParsedFormat<'d'>::New("%1$d %1$x"));
EXPECT_FALSE(ParsedFormat<'x'>::New("%1$d %1$x"));
}
TEST_F(ParsedFormatTest, SimpleUncheckedIncorrect) {
EXPECT_FALSE(ParsedFormat<'d'>::New(""));
EXPECT_FALSE(ParsedFormat<'d'>::New("ABC%dDEF%d"));
std::string format = "%sFFF%dZZZ%f";
EXPECT_FALSE((ParsedFormat<'s', 'd', 'g'>::New(format)));
}
using absl::str_format_internal::FormatConversionCharSet;
TEST_F(ParsedFormatTest, UncheckedCorrect) {
auto f =
ExtendedParsedFormat<FormatConversionCharSetInternal::d>::New("ABC%dDEF");
ASSERT_TRUE(f);
EXPECT_EQ("[ABC]{d:1$d}[DEF]", SummarizeParsedFormat(*f));
std::string format = "%sFFF%dZZZ%f";
auto f2 = ExtendedParsedFormat<
FormatConversionCharSetInternal::kString,
FormatConversionCharSetInternal::d,
FormatConversionCharSetInternal::kFloating>::New(format);
ASSERT_TRUE(f2);
EXPECT_EQ("{s:1$s}[FFF]{d:2$d}[ZZZ]{f:3$f}", SummarizeParsedFormat(*f2));
f2 = ExtendedParsedFormat<
FormatConversionCharSetInternal::kString,
FormatConversionCharSetInternal::d,
FormatConversionCharSetInternal::kFloating>::New("%s %d %f");
ASSERT_TRUE(f2);
EXPECT_EQ("{s:1$s}[ ]{d:2$d}[ ]{f:3$f}", SummarizeParsedFormat(*f2));
auto star =
ExtendedParsedFormat<FormatConversionCharSetInternal::kStar,
FormatConversionCharSetInternal::d>::New("%*d");
ASSERT_TRUE(star);
EXPECT_EQ("{*d:2$1$*d}", SummarizeParsedFormat(*star));
auto dollar = ExtendedParsedFormat<
FormatConversionCharSetInternal::d,
FormatConversionCharSetInternal::s>::New("%2$s %1$d");
ASSERT_TRUE(dollar);
EXPECT_EQ("{2$s:2$s}[ ]{1$d:1$d}", SummarizeParsedFormat(*dollar));
// with reuse
dollar = ExtendedParsedFormat<
FormatConversionCharSetInternal::d,
FormatConversionCharSetInternal::s>::New("%2$s %1$d %1$d");
ASSERT_TRUE(dollar);
EXPECT_EQ("{2$s:2$s}[ ]{1$d:1$d}[ ]{1$d:1$d}",
SummarizeParsedFormat(*dollar));
}
TEST_F(ParsedFormatTest, UncheckedIgnoredArgs) {
EXPECT_FALSE(
(ExtendedParsedFormat<FormatConversionCharSetInternal::d,
FormatConversionCharSetInternal::s>::New("ABC")));
EXPECT_FALSE(
(ExtendedParsedFormat<FormatConversionCharSetInternal::d,
FormatConversionCharSetInternal::s>::New("%dABC")));
EXPECT_FALSE((ExtendedParsedFormat<
FormatConversionCharSetInternal::d,
FormatConversionCharSetInternal::s>::New("ABC%2$s")));
auto f = ExtendedParsedFormat<
FormatConversionCharSetInternal::d,
FormatConversionCharSetInternal::s>::NewAllowIgnored("ABC");
ASSERT_TRUE(f);
EXPECT_EQ("[ABC]", SummarizeParsedFormat(*f));
f = ExtendedParsedFormat<
FormatConversionCharSetInternal::d,
FormatConversionCharSetInternal::s>::NewAllowIgnored("%dABC");
ASSERT_TRUE(f);
EXPECT_EQ("{d:1$d}[ABC]", SummarizeParsedFormat(*f));
f = ExtendedParsedFormat<
FormatConversionCharSetInternal::d,
FormatConversionCharSetInternal::s>::NewAllowIgnored("ABC%2$s");
ASSERT_TRUE(f);
EXPECT_EQ("[ABC]{2$s:2$s}", SummarizeParsedFormat(*f));
}
TEST_F(ParsedFormatTest, UncheckedMultipleTypes) {
auto dx = ExtendedParsedFormat<
FormatConversionCharSetInternal::d |
FormatConversionCharSetInternal::x>::New("%1$d %1$x");
EXPECT_TRUE(dx);
EXPECT_EQ("{1$d:1$d}[ ]{1$x:1$x}", SummarizeParsedFormat(*dx));
dx = ExtendedParsedFormat<FormatConversionCharSetInternal::d |
FormatConversionCharSetInternal::x>::New("%1$d");
EXPECT_TRUE(dx);
EXPECT_EQ("{1$d:1$d}", SummarizeParsedFormat(*dx));
}
TEST_F(ParsedFormatTest, UncheckedIncorrect) {
EXPECT_FALSE(
ExtendedParsedFormat<FormatConversionCharSetInternal::d>::New(""));
EXPECT_FALSE(ExtendedParsedFormat<FormatConversionCharSetInternal::d>::New(
"ABC%dDEF%d"));
std::string format = "%sFFF%dZZZ%f";
EXPECT_FALSE(
(ExtendedParsedFormat<FormatConversionCharSetInternal::s,
FormatConversionCharSetInternal::d,
FormatConversionCharSetInternal::g>::New(format)));
}
TEST_F(ParsedFormatTest, RegressionMixPositional) {
EXPECT_FALSE((ExtendedParsedFormat<
FormatConversionCharSetInternal::d,
FormatConversionCharSetInternal::o>::New("%1$d %o")));
}
using FormatWrapperTest = ::testing::Test;
// Plain wrapper for StrFormat.
template <typename... Args>
std::string WrappedFormat(const absl::FormatSpec<Args...>& format,
const Args&... args) {
return StrFormat(format, args...);
}
TEST_F(FormatWrapperTest, ConstexprStringFormat) {
EXPECT_EQ(WrappedFormat("%s there", "hello"), "hello there");
}
TEST_F(FormatWrapperTest, ParsedFormat) {
ParsedFormat<'s'> format("%s there");
EXPECT_EQ(WrappedFormat(format, "hello"), "hello there");
}
} // namespace
ABSL_NAMESPACE_END
} // namespace absl
// Some codegen thunks that we can use to easily dump the generated assembly for
// different StrFormat calls.
std::string CodegenAbslStrFormatInt(int i) { // NOLINT
return absl::StrFormat("%d", i);
}
std::string CodegenAbslStrFormatIntStringInt64(int i, const std::string& s,
int64_t i64) { // NOLINT
return absl::StrFormat("%d %s %d", i, s, i64);
}
void CodegenAbslStrAppendFormatInt(std::string* out, int i) { // NOLINT
absl::StrAppendFormat(out, "%d", i);
}
void CodegenAbslStrAppendFormatIntStringInt64(std::string* out, int i,
const std::string& s,
int64_t i64) { // NOLINT
absl::StrAppendFormat(out, "%d %s %d", i, s, i64);
}

View file

@ -0,0 +1,293 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: str_join.h
// -----------------------------------------------------------------------------
//
// This header file contains functions for joining a range of elements and
// returning the result as a std::string. StrJoin operations are specified by
// passing a range, a separator string to use between the elements joined, and
// an optional Formatter responsible for converting each argument in the range
// to a string. If omitted, a default `AlphaNumFormatter()` is called on the
// elements to be joined, using the same formatting that `absl::StrCat()` uses.
// This package defines a number of default formatters, and you can define your
// own implementations.
//
// Ranges are specified by passing a container with `std::begin()` and
// `std::end()` iterators, container-specific `begin()` and `end()` iterators, a
// brace-initialized `std::initializer_list`, or a `std::tuple` of heterogeneous
// objects. The separator string is specified as an `absl::string_view`.
//
// Because the default formatter uses the `absl::AlphaNum` class,
// `absl::StrJoin()`, like `absl::StrCat()`, will work out-of-the-box on
// collections of strings, ints, floats, doubles, etc.
//
// Example:
//
// std::vector<std::string> v = {"foo", "bar", "baz"};
// std::string s = absl::StrJoin(v, "-");
// EXPECT_EQ("foo-bar-baz", s);
//
// See comments on the `absl::StrJoin()` function for more examples.
#ifndef ABSL_STRINGS_STR_JOIN_H_
#define ABSL_STRINGS_STR_JOIN_H_
#include <cstdio>
#include <cstring>
#include <initializer_list>
#include <iterator>
#include <string>
#include <tuple>
#include <type_traits>
#include <utility>
#include "absl/base/macros.h"
#include "absl/strings/internal/str_join_internal.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
// -----------------------------------------------------------------------------
// Concept: Formatter
// -----------------------------------------------------------------------------
//
// A Formatter is a function object that is responsible for formatting its
// argument as a string and appending it to a given output std::string.
// Formatters may be implemented as function objects, lambdas, or normal
// functions. You may provide your own Formatter to enable `absl::StrJoin()` to
// work with arbitrary types.
//
// The following is an example of a custom Formatter that simply uses
// `std::to_string()` to format an integer as a std::string.
//
// struct MyFormatter {
// void operator()(std::string* out, int i) const {
// out->append(std::to_string(i));
// }
// };
//
// You would use the above formatter by passing an instance of it as the final
// argument to `absl::StrJoin()`:
//
// std::vector<int> v = {1, 2, 3, 4};
// std::string s = absl::StrJoin(v, "-", MyFormatter());
// EXPECT_EQ("1-2-3-4", s);
//
// The following standard formatters are provided within this file:
//
// - `AlphaNumFormatter()` (the default)
// - `StreamFormatter()`
// - `PairFormatter()`
// - `DereferenceFormatter()`
// AlphaNumFormatter()
//
// Default formatter used if none is specified. Uses `absl::AlphaNum` to convert
// numeric arguments to strings.
inline strings_internal::AlphaNumFormatterImpl AlphaNumFormatter() {
return strings_internal::AlphaNumFormatterImpl();
}
// StreamFormatter()
//
// Formats its argument using the << operator.
inline strings_internal::StreamFormatterImpl StreamFormatter() {
return strings_internal::StreamFormatterImpl();
}
// Function Template: PairFormatter(Formatter, absl::string_view, Formatter)
//
// Formats a `std::pair` by putting a given separator between the pair's
// `.first` and `.second` members. This formatter allows you to specify
// custom Formatters for both the first and second member of each pair.
template <typename FirstFormatter, typename SecondFormatter>
inline strings_internal::PairFormatterImpl<FirstFormatter, SecondFormatter>
PairFormatter(FirstFormatter f1, absl::string_view sep, SecondFormatter f2) {
return strings_internal::PairFormatterImpl<FirstFormatter, SecondFormatter>(
std::move(f1), sep, std::move(f2));
}
// Function overload of PairFormatter() for using a default
// `AlphaNumFormatter()` for each Formatter in the pair.
inline strings_internal::PairFormatterImpl<
strings_internal::AlphaNumFormatterImpl,
strings_internal::AlphaNumFormatterImpl>
PairFormatter(absl::string_view sep) {
return PairFormatter(AlphaNumFormatter(), sep, AlphaNumFormatter());
}
// Function Template: DereferenceFormatter(Formatter)
//
// Formats its argument by dereferencing it and then applying the given
// formatter. This formatter is useful for formatting a container of
// pointer-to-T. This pattern often shows up when joining repeated fields in
// protocol buffers.
template <typename Formatter>
strings_internal::DereferenceFormatterImpl<Formatter> DereferenceFormatter(
Formatter&& f) {
return strings_internal::DereferenceFormatterImpl<Formatter>(
std::forward<Formatter>(f));
}
// Function overload of `DererefenceFormatter()` for using a default
// `AlphaNumFormatter()`.
inline strings_internal::DereferenceFormatterImpl<
strings_internal::AlphaNumFormatterImpl>
DereferenceFormatter() {
return strings_internal::DereferenceFormatterImpl<
strings_internal::AlphaNumFormatterImpl>(AlphaNumFormatter());
}
// -----------------------------------------------------------------------------
// StrJoin()
// -----------------------------------------------------------------------------
//
// Joins a range of elements and returns the result as a std::string.
// `absl::StrJoin()` takes a range, a separator string to use between the
// elements joined, and an optional Formatter responsible for converting each
// argument in the range to a string.
//
// If omitted, the default `AlphaNumFormatter()` is called on the elements to be
// joined.
//
// Example 1:
// // Joins a collection of strings. This pattern also works with a collection
// // of `absl::string_view` or even `const char*`.
// std::vector<std::string> v = {"foo", "bar", "baz"};
// std::string s = absl::StrJoin(v, "-");
// EXPECT_EQ("foo-bar-baz", s);
//
// Example 2:
// // Joins the values in the given `std::initializer_list<>` specified using
// // brace initialization. This pattern also works with an initializer_list
// // of ints or `absl::string_view` -- any `AlphaNum`-compatible type.
// std::string s = absl::StrJoin({"foo", "bar", "baz"}, "-");
// EXPECT_EQ("foo-bar-baz", s);
//
// Example 3:
// // Joins a collection of ints. This pattern also works with floats,
// // doubles, int64s -- any `StrCat()`-compatible type.
// std::vector<int> v = {1, 2, 3, -4};
// std::string s = absl::StrJoin(v, "-");
// EXPECT_EQ("1-2-3--4", s);
//
// Example 4:
// // Joins a collection of pointer-to-int. By default, pointers are
// // dereferenced and the pointee is formatted using the default format for
// // that type; such dereferencing occurs for all levels of indirection, so
// // this pattern works just as well for `std::vector<int**>` as for
// // `std::vector<int*>`.
// int x = 1, y = 2, z = 3;
// std::vector<int*> v = {&x, &y, &z};
// std::string s = absl::StrJoin(v, "-");
// EXPECT_EQ("1-2-3", s);
//
// Example 5:
// // Dereferencing of `std::unique_ptr<>` is also supported:
// std::vector<std::unique_ptr<int>> v
// v.emplace_back(new int(1));
// v.emplace_back(new int(2));
// v.emplace_back(new int(3));
// std::string s = absl::StrJoin(v, "-");
// EXPECT_EQ("1-2-3", s);
//
// Example 6:
// // Joins a `std::map`, with each key-value pair separated by an equals
// // sign. This pattern would also work with, say, a
// // `std::vector<std::pair<>>`.
// std::map<std::string, int> m = {
// std::make_pair("a", 1),
// std::make_pair("b", 2),
// std::make_pair("c", 3)};
// std::string s = absl::StrJoin(m, ",", absl::PairFormatter("="));
// EXPECT_EQ("a=1,b=2,c=3", s);
//
// Example 7:
// // These examples show how `absl::StrJoin()` handles a few common edge
// // cases:
// std::vector<std::string> v_empty;
// EXPECT_EQ("", absl::StrJoin(v_empty, "-"));
//
// std::vector<std::string> v_one_item = {"foo"};
// EXPECT_EQ("foo", absl::StrJoin(v_one_item, "-"));
//
// std::vector<std::string> v_empty_string = {""};
// EXPECT_EQ("", absl::StrJoin(v_empty_string, "-"));
//
// std::vector<std::string> v_one_item_empty_string = {"a", ""};
// EXPECT_EQ("a-", absl::StrJoin(v_one_item_empty_string, "-"));
//
// std::vector<std::string> v_two_empty_string = {"", ""};
// EXPECT_EQ("-", absl::StrJoin(v_two_empty_string, "-"));
//
// Example 8:
// // Joins a `std::tuple<T...>` of heterogeneous types, converting each to
// // a std::string using the `absl::AlphaNum` class.
// std::string s = absl::StrJoin(std::make_tuple(123, "abc", 0.456), "-");
// EXPECT_EQ("123-abc-0.456", s);
template <typename Iterator, typename Formatter>
std::string StrJoin(Iterator start, Iterator end, absl::string_view sep,
Formatter&& fmt) {
return strings_internal::JoinAlgorithm(start, end, sep, fmt);
}
template <typename Range, typename Formatter>
std::string StrJoin(const Range& range, absl::string_view separator,
Formatter&& fmt) {
return strings_internal::JoinRange(range, separator, fmt);
}
template <typename T, typename Formatter>
std::string StrJoin(std::initializer_list<T> il, absl::string_view separator,
Formatter&& fmt) {
return strings_internal::JoinRange(il, separator, fmt);
}
template <typename... T, typename Formatter>
std::string StrJoin(const std::tuple<T...>& value, absl::string_view separator,
Formatter&& fmt) {
return strings_internal::JoinAlgorithm(value, separator, fmt);
}
template <typename Iterator>
std::string StrJoin(Iterator start, Iterator end, absl::string_view separator) {
return strings_internal::JoinRange(start, end, separator);
}
template <typename Range>
std::string StrJoin(const Range& range, absl::string_view separator) {
return strings_internal::JoinRange(range, separator);
}
template <typename T>
std::string StrJoin(std::initializer_list<T> il,
absl::string_view separator) {
return strings_internal::JoinRange(il, separator);
}
template <typename... T>
std::string StrJoin(const std::tuple<T...>& value,
absl::string_view separator) {
return strings_internal::JoinAlgorithm(value, separator, AlphaNumFormatter());
}
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_STR_JOIN_H_

View file

@ -0,0 +1,97 @@
//
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/str_join.h"
#include <string>
#include <vector>
#include <utility>
#include "benchmark/benchmark.h"
namespace {
void BM_Join2_Strings(benchmark::State& state) {
const int string_len = state.range(0);
const int num_strings = state.range(1);
const std::string s(string_len, 'x');
const std::vector<std::string> v(num_strings, s);
for (auto _ : state) {
std::string s = absl::StrJoin(v, "-");
benchmark::DoNotOptimize(s);
}
}
BENCHMARK(BM_Join2_Strings)
->ArgPair(1 << 0, 1 << 3)
->ArgPair(1 << 10, 1 << 3)
->ArgPair(1 << 13, 1 << 3)
->ArgPair(1 << 0, 1 << 10)
->ArgPair(1 << 10, 1 << 10)
->ArgPair(1 << 13, 1 << 10)
->ArgPair(1 << 0, 1 << 13)
->ArgPair(1 << 10, 1 << 13)
->ArgPair(1 << 13, 1 << 13);
void BM_Join2_Ints(benchmark::State& state) {
const int num_ints = state.range(0);
const std::vector<int> v(num_ints, 42);
for (auto _ : state) {
std::string s = absl::StrJoin(v, "-");
benchmark::DoNotOptimize(s);
}
}
BENCHMARK(BM_Join2_Ints)->Range(0, 1 << 13);
void BM_Join2_KeysAndValues(benchmark::State& state) {
const int string_len = state.range(0);
const int num_pairs = state.range(1);
const std::string s(string_len, 'x');
const std::vector<std::pair<std::string, int>> v(num_pairs,
std::make_pair(s, 42));
for (auto _ : state) {
std::string s = absl::StrJoin(v, ",", absl::PairFormatter("="));
benchmark::DoNotOptimize(s);
}
}
BENCHMARK(BM_Join2_KeysAndValues)
->ArgPair(1 << 0, 1 << 3)
->ArgPair(1 << 10, 1 << 3)
->ArgPair(1 << 13, 1 << 3)
->ArgPair(1 << 0, 1 << 10)
->ArgPair(1 << 10, 1 << 10)
->ArgPair(1 << 13, 1 << 10)
->ArgPair(1 << 0, 1 << 13)
->ArgPair(1 << 10, 1 << 13)
->ArgPair(1 << 13, 1 << 13);
void BM_JoinStreamable(benchmark::State& state) {
const int string_len = state.range(0);
const int num_strings = state.range(1);
const std::vector<std::string> v(num_strings, std::string(string_len, 'x'));
for (auto _ : state) {
std::string s = absl::StrJoin(v, "", absl::StreamFormatter());
benchmark::DoNotOptimize(s);
}
}
BENCHMARK(BM_JoinStreamable)
->ArgPair(0, 0)
->ArgPair(16, 1)
->ArgPair(256, 1)
->ArgPair(16, 16)
->ArgPair(256, 16)
->ArgPair(16, 256)
->ArgPair(256, 256);
} // namespace

View file

@ -0,0 +1,474 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Unit tests for all join.h functions
#include "absl/strings/str_join.h"
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <functional>
#include <initializer_list>
#include <map>
#include <memory>
#include <ostream>
#include <tuple>
#include <type_traits>
#include <vector>
#include "gtest/gtest.h"
#include "absl/base/macros.h"
#include "absl/memory/memory.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_split.h"
namespace {
TEST(StrJoin, APIExamples) {
{
// Collection of strings
std::vector<std::string> v = {"foo", "bar", "baz"};
EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-"));
}
{
// Collection of absl::string_view
std::vector<absl::string_view> v = {"foo", "bar", "baz"};
EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-"));
}
{
// Collection of const char*
std::vector<const char*> v = {"foo", "bar", "baz"};
EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-"));
}
{
// Collection of non-const char*
std::string a = "foo", b = "bar", c = "baz";
std::vector<char*> v = {&a[0], &b[0], &c[0]};
EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-"));
}
{
// Collection of ints
std::vector<int> v = {1, 2, 3, -4};
EXPECT_EQ("1-2-3--4", absl::StrJoin(v, "-"));
}
{
// Literals passed as a std::initializer_list
std::string s = absl::StrJoin({"a", "b", "c"}, "-");
EXPECT_EQ("a-b-c", s);
}
{
// Join a std::tuple<T...>.
std::string s = absl::StrJoin(std::make_tuple(123, "abc", 0.456), "-");
EXPECT_EQ("123-abc-0.456", s);
}
{
// Collection of unique_ptrs
std::vector<std::unique_ptr<int>> v;
v.emplace_back(new int(1));
v.emplace_back(new int(2));
v.emplace_back(new int(3));
EXPECT_EQ("1-2-3", absl::StrJoin(v, "-"));
}
{
// Array of ints
const int a[] = {1, 2, 3, -4};
EXPECT_EQ("1-2-3--4", absl::StrJoin(a, a + ABSL_ARRAYSIZE(a), "-"));
}
{
// Collection of pointers
int x = 1, y = 2, z = 3;
std::vector<int*> v = {&x, &y, &z};
EXPECT_EQ("1-2-3", absl::StrJoin(v, "-"));
}
{
// Collection of pointers to pointers
int x = 1, y = 2, z = 3;
int *px = &x, *py = &y, *pz = &z;
std::vector<int**> v = {&px, &py, &pz};
EXPECT_EQ("1-2-3", absl::StrJoin(v, "-"));
}
{
// Collection of pointers to std::string
std::string a("a"), b("b");
std::vector<std::string*> v = {&a, &b};
EXPECT_EQ("a-b", absl::StrJoin(v, "-"));
}
{
// A std::map, which is a collection of std::pair<>s.
std::map<std::string, int> m = {{"a", 1}, {"b", 2}, {"c", 3}};
EXPECT_EQ("a=1,b=2,c=3", absl::StrJoin(m, ",", absl::PairFormatter("=")));
}
{
// Shows absl::StrSplit and absl::StrJoin working together. This example is
// equivalent to s/=/-/g.
const std::string s = "a=b=c=d";
EXPECT_EQ("a-b-c-d", absl::StrJoin(absl::StrSplit(s, "="), "-"));
}
//
// A few examples of edge cases
//
{
// Empty range yields an empty string.
std::vector<std::string> v;
EXPECT_EQ("", absl::StrJoin(v, "-"));
}
{
// A range of 1 element gives a string with that element but no
// separator.
std::vector<std::string> v = {"foo"};
EXPECT_EQ("foo", absl::StrJoin(v, "-"));
}
{
// A range with a single empty string element
std::vector<std::string> v = {""};
EXPECT_EQ("", absl::StrJoin(v, "-"));
}
{
// A range with 2 elements, one of which is an empty string
std::vector<std::string> v = {"a", ""};
EXPECT_EQ("a-", absl::StrJoin(v, "-"));
}
{
// A range with 2 empty elements.
std::vector<std::string> v = {"", ""};
EXPECT_EQ("-", absl::StrJoin(v, "-"));
}
{
// A std::vector of bool.
std::vector<bool> v = {true, false, true};
EXPECT_EQ("1-0-1", absl::StrJoin(v, "-"));
}
}
TEST(StrJoin, CustomFormatter) {
std::vector<std::string> v{"One", "Two", "Three"};
{
std::string joined =
absl::StrJoin(v, "", [](std::string* out, const std::string& in) {
absl::StrAppend(out, "(", in, ")");
});
EXPECT_EQ("(One)(Two)(Three)", joined);
}
{
class ImmovableFormatter {
public:
void operator()(std::string* out, const std::string& in) {
absl::StrAppend(out, "(", in, ")");
}
ImmovableFormatter() {}
ImmovableFormatter(const ImmovableFormatter&) = delete;
};
EXPECT_EQ("(One)(Two)(Three)", absl::StrJoin(v, "", ImmovableFormatter()));
}
{
class OverloadedFormatter {
public:
void operator()(std::string* out, const std::string& in) {
absl::StrAppend(out, "(", in, ")");
}
void operator()(std::string* out, const std::string& in) const {
absl::StrAppend(out, "[", in, "]");
}
};
EXPECT_EQ("(One)(Two)(Three)", absl::StrJoin(v, "", OverloadedFormatter()));
const OverloadedFormatter fmt = {};
EXPECT_EQ("[One][Two][Three]", absl::StrJoin(v, "", fmt));
}
}
//
// Tests the Formatters
//
TEST(AlphaNumFormatter, FormatterAPI) {
// Not an exhaustive test. See strings/strcat_test.h for the exhaustive test
// of what AlphaNum can convert.
auto f = absl::AlphaNumFormatter();
std::string s;
f(&s, "Testing: ");
f(&s, static_cast<int>(1));
f(&s, static_cast<int16_t>(2));
f(&s, static_cast<int64_t>(3));
f(&s, static_cast<float>(4));
f(&s, static_cast<double>(5));
f(&s, static_cast<unsigned>(6));
f(&s, static_cast<size_t>(7));
f(&s, absl::string_view(" OK"));
EXPECT_EQ("Testing: 1234567 OK", s);
}
// Make sure people who are mistakenly using std::vector<bool> even though
// they're not memory-constrained can use absl::AlphaNumFormatter().
TEST(AlphaNumFormatter, VectorOfBool) {
auto f = absl::AlphaNumFormatter();
std::string s;
std::vector<bool> v = {true, false, true};
f(&s, *v.cbegin());
f(&s, *v.begin());
f(&s, v[1]);
EXPECT_EQ("110", s);
}
TEST(AlphaNumFormatter, AlphaNum) {
auto f = absl::AlphaNumFormatter();
std::string s;
f(&s, absl::AlphaNum("hello"));
EXPECT_EQ("hello", s);
}
struct StreamableType {
std::string contents;
};
inline std::ostream& operator<<(std::ostream& os, const StreamableType& t) {
os << "Streamable:" << t.contents;
return os;
}
TEST(StreamFormatter, FormatterAPI) {
auto f = absl::StreamFormatter();
std::string s;
f(&s, "Testing: ");
f(&s, static_cast<int>(1));
f(&s, static_cast<int16_t>(2));
f(&s, static_cast<int64_t>(3));
f(&s, static_cast<float>(4));
f(&s, static_cast<double>(5));
f(&s, static_cast<unsigned>(6));
f(&s, static_cast<size_t>(7));
f(&s, absl::string_view(" OK "));
StreamableType streamable = {"object"};
f(&s, streamable);
EXPECT_EQ("Testing: 1234567 OK Streamable:object", s);
}
// A dummy formatter that wraps each element in parens. Used in some tests
// below.
struct TestingParenFormatter {
template <typename T>
void operator()(std::string* s, const T& t) {
absl::StrAppend(s, "(", t, ")");
}
};
TEST(PairFormatter, FormatterAPI) {
{
// Tests default PairFormatter(sep) that uses AlphaNumFormatter for the
// 'first' and 'second' members.
const auto f = absl::PairFormatter("=");
std::string s;
f(&s, std::make_pair("a", "b"));
f(&s, std::make_pair(1, 2));
EXPECT_EQ("a=b1=2", s);
}
{
// Tests using a custom formatter for the 'first' and 'second' members.
auto f = absl::PairFormatter(TestingParenFormatter(), "=",
TestingParenFormatter());
std::string s;
f(&s, std::make_pair("a", "b"));
f(&s, std::make_pair(1, 2));
EXPECT_EQ("(a)=(b)(1)=(2)", s);
}
}
TEST(DereferenceFormatter, FormatterAPI) {
{
// Tests wrapping the default AlphaNumFormatter.
const absl::strings_internal::DereferenceFormatterImpl<
absl::strings_internal::AlphaNumFormatterImpl>
f;
int x = 1, y = 2, z = 3;
std::string s;
f(&s, &x);
f(&s, &y);
f(&s, &z);
EXPECT_EQ("123", s);
}
{
// Tests wrapping std::string's default formatter.
absl::strings_internal::DereferenceFormatterImpl<
absl::strings_internal::DefaultFormatter<std::string>::Type>
f;
std::string x = "x";
std::string y = "y";
std::string z = "z";
std::string s;
f(&s, &x);
f(&s, &y);
f(&s, &z);
EXPECT_EQ(s, "xyz");
}
{
// Tests wrapping a custom formatter.
auto f = absl::DereferenceFormatter(TestingParenFormatter());
int x = 1, y = 2, z = 3;
std::string s;
f(&s, &x);
f(&s, &y);
f(&s, &z);
EXPECT_EQ("(1)(2)(3)", s);
}
{
absl::strings_internal::DereferenceFormatterImpl<
absl::strings_internal::AlphaNumFormatterImpl>
f;
auto x = std::unique_ptr<int>(new int(1));
auto y = std::unique_ptr<int>(new int(2));
auto z = std::unique_ptr<int>(new int(3));
std::string s;
f(&s, x);
f(&s, y);
f(&s, z);
EXPECT_EQ("123", s);
}
}
//
// Tests the interfaces for the 4 public Join function overloads. The semantics
// of the algorithm is covered in the above APIExamples test.
//
TEST(StrJoin, PublicAPIOverloads) {
std::vector<std::string> v = {"a", "b", "c"};
// Iterators + formatter
EXPECT_EQ("a-b-c",
absl::StrJoin(v.begin(), v.end(), "-", absl::AlphaNumFormatter()));
// Range + formatter
EXPECT_EQ("a-b-c", absl::StrJoin(v, "-", absl::AlphaNumFormatter()));
// Iterators, no formatter
EXPECT_EQ("a-b-c", absl::StrJoin(v.begin(), v.end(), "-"));
// Range, no formatter
EXPECT_EQ("a-b-c", absl::StrJoin(v, "-"));
}
TEST(StrJoin, Array) {
const absl::string_view a[] = {"a", "b", "c"};
EXPECT_EQ("a-b-c", absl::StrJoin(a, "-"));
}
TEST(StrJoin, InitializerList) {
{ EXPECT_EQ("a-b-c", absl::StrJoin({"a", "b", "c"}, "-")); }
{
auto a = {"a", "b", "c"};
EXPECT_EQ("a-b-c", absl::StrJoin(a, "-"));
}
{
std::initializer_list<const char*> a = {"a", "b", "c"};
EXPECT_EQ("a-b-c", absl::StrJoin(a, "-"));
}
{
std::initializer_list<std::string> a = {"a", "b", "c"};
EXPECT_EQ("a-b-c", absl::StrJoin(a, "-"));
}
{
std::initializer_list<absl::string_view> a = {"a", "b", "c"};
EXPECT_EQ("a-b-c", absl::StrJoin(a, "-"));
}
{
// Tests initializer_list with a non-default formatter
auto a = {"a", "b", "c"};
TestingParenFormatter f;
EXPECT_EQ("(a)-(b)-(c)", absl::StrJoin(a, "-", f));
}
{
// initializer_list of ints
EXPECT_EQ("1-2-3", absl::StrJoin({1, 2, 3}, "-"));
}
{
// Tests initializer_list of ints with a non-default formatter
auto a = {1, 2, 3};
TestingParenFormatter f;
EXPECT_EQ("(1)-(2)-(3)", absl::StrJoin(a, "-", f));
}
}
TEST(StrJoin, Tuple) {
EXPECT_EQ("", absl::StrJoin(std::make_tuple(), "-"));
EXPECT_EQ("hello", absl::StrJoin(std::make_tuple("hello"), "-"));
int x(10);
std::string y("hello");
double z(3.14);
EXPECT_EQ("10-hello-3.14", absl::StrJoin(std::make_tuple(x, y, z), "-"));
// Faster! Faster!!
EXPECT_EQ("10-hello-3.14",
absl::StrJoin(std::make_tuple(x, std::cref(y), z), "-"));
struct TestFormatter {
char buffer[128];
void operator()(std::string* out, int v) {
snprintf(buffer, sizeof(buffer), "%#.8x", v);
out->append(buffer);
}
void operator()(std::string* out, double v) {
snprintf(buffer, sizeof(buffer), "%#.0f", v);
out->append(buffer);
}
void operator()(std::string* out, const std::string& v) {
snprintf(buffer, sizeof(buffer), "%.4s", v.c_str());
out->append(buffer);
}
};
EXPECT_EQ("0x0000000a-hell-3.",
absl::StrJoin(std::make_tuple(x, y, z), "-", TestFormatter()));
EXPECT_EQ(
"0x0000000a-hell-3.",
absl::StrJoin(std::make_tuple(x, std::cref(y), z), "-", TestFormatter()));
EXPECT_EQ("0x0000000a-hell-3.",
absl::StrJoin(std::make_tuple(&x, &y, &z), "-",
absl::DereferenceFormatter(TestFormatter())));
EXPECT_EQ("0x0000000a-hell-3.",
absl::StrJoin(std::make_tuple(absl::make_unique<int>(x),
absl::make_unique<std::string>(y),
absl::make_unique<double>(z)),
"-", absl::DereferenceFormatter(TestFormatter())));
EXPECT_EQ("0x0000000a-hell-3.",
absl::StrJoin(std::make_tuple(absl::make_unique<int>(x), &y, &z),
"-", absl::DereferenceFormatter(TestFormatter())));
}
} // namespace

View file

@ -0,0 +1,82 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/str_replace.h"
#include "absl/strings/str_cat.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
using FixedMapping =
std::initializer_list<std::pair<absl::string_view, absl::string_view>>;
// Applies the ViableSubstitutions in subs_ptr to the absl::string_view s, and
// stores the result in *result_ptr. Returns the number of substitutions that
// occurred.
int ApplySubstitutions(
absl::string_view s,
std::vector<strings_internal::ViableSubstitution>* subs_ptr,
std::string* result_ptr) {
auto& subs = *subs_ptr;
int substitutions = 0;
size_t pos = 0;
while (!subs.empty()) {
auto& sub = subs.back();
if (sub.offset >= pos) {
if (pos <= s.size()) {
StrAppend(result_ptr, s.substr(pos, sub.offset - pos), sub.replacement);
}
pos = sub.offset + sub.old.size();
substitutions += 1;
}
sub.offset = s.find(sub.old, pos);
if (sub.offset == s.npos) {
subs.pop_back();
} else {
// Insertion sort to ensure the last ViableSubstitution continues to be
// before all the others.
size_t index = subs.size();
while (--index && subs[index - 1].OccursBefore(subs[index])) {
std::swap(subs[index], subs[index - 1]);
}
}
}
result_ptr->append(s.data() + pos, s.size() - pos);
return substitutions;
}
} // namespace strings_internal
// We can implement this in terms of the generic StrReplaceAll, but
// we must specify the template overload because C++ cannot deduce the type
// of an initializer_list parameter to a function, and also if we don't specify
// the type, we just call ourselves.
//
// Note that we implement them here, rather than in the header, so that they
// aren't inlined.
std::string StrReplaceAll(absl::string_view s,
strings_internal::FixedMapping replacements) {
return StrReplaceAll<strings_internal::FixedMapping>(s, replacements);
}
int StrReplaceAll(strings_internal::FixedMapping replacements,
std::string* target) {
return StrReplaceAll<strings_internal::FixedMapping>(replacements, target);
}
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,219 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: str_replace.h
// -----------------------------------------------------------------------------
//
// This file defines `absl::StrReplaceAll()`, a general-purpose string
// replacement function designed for large, arbitrary text substitutions,
// especially on strings which you are receiving from some other system for
// further processing (e.g. processing regular expressions, escaping HTML
// entities, etc.). `StrReplaceAll` is designed to be efficient even when only
// one substitution is being performed, or when substitution is rare.
//
// If the string being modified is known at compile-time, and the substitutions
// vary, `absl::Substitute()` may be a better choice.
//
// Example:
//
// std::string html_escaped = absl::StrReplaceAll(user_input, {
// {"&", "&amp;"},
// {"<", "&lt;"},
// {">", "&gt;"},
// {"\"", "&quot;"},
// {"'", "&#39;"}});
#ifndef ABSL_STRINGS_STR_REPLACE_H_
#define ABSL_STRINGS_STR_REPLACE_H_
#include <string>
#include <utility>
#include <vector>
#include "absl/base/attributes.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
// StrReplaceAll()
//
// Replaces character sequences within a given string with replacements provided
// within an initializer list of key/value pairs. Candidate replacements are
// considered in order as they occur within the string, with earlier matches
// taking precedence, and longer matches taking precedence for candidates
// starting at the same position in the string. Once a substitution is made, the
// replaced text is not considered for any further substitutions.
//
// Example:
//
// std::string s = absl::StrReplaceAll(
// "$who bought $count #Noun. Thanks $who!",
// {{"$count", absl::StrCat(5)},
// {"$who", "Bob"},
// {"#Noun", "Apples"}});
// EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s);
ABSL_MUST_USE_RESULT std::string StrReplaceAll(
absl::string_view s,
std::initializer_list<std::pair<absl::string_view, absl::string_view>>
replacements);
// Overload of `StrReplaceAll()` to accept a container of key/value replacement
// pairs (typically either an associative map or a `std::vector` of `std::pair`
// elements). A vector of pairs is generally more efficient.
//
// Examples:
//
// std::map<const absl::string_view, const absl::string_view> replacements;
// replacements["$who"] = "Bob";
// replacements["$count"] = "5";
// replacements["#Noun"] = "Apples";
// std::string s = absl::StrReplaceAll(
// "$who bought $count #Noun. Thanks $who!",
// replacements);
// EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s);
//
// // A std::vector of std::pair elements can be more efficient.
// std::vector<std::pair<const absl::string_view, std::string>> replacements;
// replacements.push_back({"&", "&amp;"});
// replacements.push_back({"<", "&lt;"});
// replacements.push_back({">", "&gt;"});
// std::string s = absl::StrReplaceAll("if (ptr < &foo)",
// replacements);
// EXPECT_EQ("if (ptr &lt; &amp;foo)", s);
template <typename StrToStrMapping>
std::string StrReplaceAll(absl::string_view s,
const StrToStrMapping& replacements);
// Overload of `StrReplaceAll()` to replace character sequences within a given
// output string *in place* with replacements provided within an initializer
// list of key/value pairs, returning the number of substitutions that occurred.
//
// Example:
//
// std::string s = std::string("$who bought $count #Noun. Thanks $who!");
// int count;
// count = absl::StrReplaceAll({{"$count", absl::StrCat(5)},
// {"$who", "Bob"},
// {"#Noun", "Apples"}}, &s);
// EXPECT_EQ(count, 4);
// EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s);
int StrReplaceAll(
std::initializer_list<std::pair<absl::string_view, absl::string_view>>
replacements,
std::string* target);
// Overload of `StrReplaceAll()` to replace patterns within a given output
// string *in place* with replacements provided within a container of key/value
// pairs.
//
// Example:
//
// std::string s = std::string("if (ptr < &foo)");
// int count = absl::StrReplaceAll({{"&", "&amp;"},
// {"<", "&lt;"},
// {">", "&gt;"}}, &s);
// EXPECT_EQ(count, 2);
// EXPECT_EQ("if (ptr &lt; &amp;foo)", s);
template <typename StrToStrMapping>
int StrReplaceAll(const StrToStrMapping& replacements, std::string* target);
// Implementation details only, past this point.
namespace strings_internal {
struct ViableSubstitution {
absl::string_view old;
absl::string_view replacement;
size_t offset;
ViableSubstitution(absl::string_view old_str,
absl::string_view replacement_str, size_t offset_val)
: old(old_str), replacement(replacement_str), offset(offset_val) {}
// One substitution occurs "before" another (takes priority) if either
// it has the lowest offset, or it has the same offset but a larger size.
bool OccursBefore(const ViableSubstitution& y) const {
if (offset != y.offset) return offset < y.offset;
return old.size() > y.old.size();
}
};
// Build a vector of ViableSubstitutions based on the given list of
// replacements. subs can be implemented as a priority_queue. However, it turns
// out that most callers have small enough a list of substitutions that the
// overhead of such a queue isn't worth it.
template <typename StrToStrMapping>
std::vector<ViableSubstitution> FindSubstitutions(
absl::string_view s, const StrToStrMapping& replacements) {
std::vector<ViableSubstitution> subs;
subs.reserve(replacements.size());
for (const auto& rep : replacements) {
using std::get;
absl::string_view old(get<0>(rep));
size_t pos = s.find(old);
if (pos == s.npos) continue;
// Ignore attempts to replace "". This condition is almost never true,
// but above condition is frequently true. That's why we test for this
// now and not before.
if (old.empty()) continue;
subs.emplace_back(old, get<1>(rep), pos);
// Insertion sort to ensure the last ViableSubstitution comes before
// all the others.
size_t index = subs.size();
while (--index && subs[index - 1].OccursBefore(subs[index])) {
std::swap(subs[index], subs[index - 1]);
}
}
return subs;
}
int ApplySubstitutions(absl::string_view s,
std::vector<ViableSubstitution>* subs_ptr,
std::string* result_ptr);
} // namespace strings_internal
template <typename StrToStrMapping>
std::string StrReplaceAll(absl::string_view s,
const StrToStrMapping& replacements) {
auto subs = strings_internal::FindSubstitutions(s, replacements);
std::string result;
result.reserve(s.size());
strings_internal::ApplySubstitutions(s, &subs, &result);
return result;
}
template <typename StrToStrMapping>
int StrReplaceAll(const StrToStrMapping& replacements, std::string* target) {
auto subs = strings_internal::FindSubstitutions(*target, replacements);
if (subs.empty()) return 0;
std::string result;
result.reserve(target->size());
int substitutions =
strings_internal::ApplySubstitutions(*target, &subs, &result);
target->swap(result);
return substitutions;
}
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_STR_REPLACE_H_

View file

@ -0,0 +1,122 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/str_replace.h"
#include <cstring>
#include <string>
#include "benchmark/benchmark.h"
#include "absl/base/internal/raw_logging.h"
namespace {
std::string* big_string;
std::string* after_replacing_the;
std::string* after_replacing_many;
struct Replacement {
const char* needle;
const char* replacement;
} replacements[] = {
{"the", "box"}, //
{"brown", "quick"}, //
{"jumped", "liquored"}, //
{"dozen", "brown"}, //
{"lazy", "pack"}, //
{"liquor", "shakes"}, //
};
// Here, we set up a string for use in global-replace benchmarks.
// We started with a million blanks, and then deterministically insert
// 10,000 copies each of two pangrams. The result is a string that is
// 40% blank space and 60% these words. 'the' occurs 18,247 times and
// all the substitutions together occur 49,004 times.
//
// We then create "after_replacing_the" to be a string that is a result of
// replacing "the" with "box" in big_string.
//
// And then we create "after_replacing_many" to be a string that is result
// of preferring several substitutions.
void SetUpStrings() {
if (big_string == nullptr) {
size_t r = 0;
big_string = new std::string(1000 * 1000, ' ');
for (std::string phrase : {"the quick brown fox jumped over the lazy dogs",
"pack my box with the five dozen liquor jugs"}) {
for (int i = 0; i < 10 * 1000; ++i) {
r = r * 237 + 41; // not very random.
memcpy(&(*big_string)[r % (big_string->size() - phrase.size())],
phrase.data(), phrase.size());
}
}
// big_string->resize(50);
// OK, we've set up the string, now let's set up expectations - first by
// just replacing "the" with "box"
after_replacing_the = new std::string(*big_string);
for (size_t pos = 0;
(pos = after_replacing_the->find("the", pos)) != std::string::npos;) {
memcpy(&(*after_replacing_the)[pos], "box", 3);
}
// And then with all the replacements.
after_replacing_many = new std::string(*big_string);
for (size_t pos = 0;;) {
size_t next_pos = static_cast<size_t>(-1);
const char* needle_string = nullptr;
const char* replacement_string = nullptr;
for (const auto& r : replacements) {
auto needlepos = after_replacing_many->find(r.needle, pos);
if (needlepos != std::string::npos && needlepos < next_pos) {
next_pos = needlepos;
needle_string = r.needle;
replacement_string = r.replacement;
}
}
if (next_pos > after_replacing_many->size()) break;
after_replacing_many->replace(next_pos, strlen(needle_string),
replacement_string);
next_pos += strlen(replacement_string);
pos = next_pos;
}
}
}
void BM_StrReplaceAllOneReplacement(benchmark::State& state) {
SetUpStrings();
std::string src = *big_string;
for (auto _ : state) {
std::string dest = absl::StrReplaceAll(src, {{"the", "box"}});
ABSL_RAW_CHECK(dest == *after_replacing_the,
"not benchmarking intended behavior");
}
}
BENCHMARK(BM_StrReplaceAllOneReplacement);
void BM_StrReplaceAll(benchmark::State& state) {
SetUpStrings();
std::string src = *big_string;
for (auto _ : state) {
std::string dest = absl::StrReplaceAll(src, {{"the", "box"},
{"brown", "quick"},
{"jumped", "liquored"},
{"dozen", "brown"},
{"lazy", "pack"},
{"liquor", "shakes"}});
ABSL_RAW_CHECK(dest == *after_replacing_many,
"not benchmarking intended behavior");
}
}
BENCHMARK(BM_StrReplaceAll);
} // namespace

View file

@ -0,0 +1,341 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/str_replace.h"
#include <list>
#include <map>
#include <tuple>
#include "gtest/gtest.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_split.h"
TEST(StrReplaceAll, OneReplacement) {
std::string s;
// Empty string.
s = absl::StrReplaceAll(s, {{"", ""}});
EXPECT_EQ(s, "");
s = absl::StrReplaceAll(s, {{"x", ""}});
EXPECT_EQ(s, "");
s = absl::StrReplaceAll(s, {{"", "y"}});
EXPECT_EQ(s, "");
s = absl::StrReplaceAll(s, {{"x", "y"}});
EXPECT_EQ(s, "");
// Empty substring.
s = absl::StrReplaceAll("abc", {{"", ""}});
EXPECT_EQ(s, "abc");
s = absl::StrReplaceAll("abc", {{"", "y"}});
EXPECT_EQ(s, "abc");
s = absl::StrReplaceAll("abc", {{"x", ""}});
EXPECT_EQ(s, "abc");
// Substring not found.
s = absl::StrReplaceAll("abc", {{"xyz", "123"}});
EXPECT_EQ(s, "abc");
// Replace entire string.
s = absl::StrReplaceAll("abc", {{"abc", "xyz"}});
EXPECT_EQ(s, "xyz");
// Replace once at the start.
s = absl::StrReplaceAll("abc", {{"a", "x"}});
EXPECT_EQ(s, "xbc");
// Replace once in the middle.
s = absl::StrReplaceAll("abc", {{"b", "x"}});
EXPECT_EQ(s, "axc");
// Replace once at the end.
s = absl::StrReplaceAll("abc", {{"c", "x"}});
EXPECT_EQ(s, "abx");
// Replace multiple times with varying lengths of original/replacement.
s = absl::StrReplaceAll("ababa", {{"a", "xxx"}});
EXPECT_EQ(s, "xxxbxxxbxxx");
s = absl::StrReplaceAll("ababa", {{"b", "xxx"}});
EXPECT_EQ(s, "axxxaxxxa");
s = absl::StrReplaceAll("aaabaaabaaa", {{"aaa", "x"}});
EXPECT_EQ(s, "xbxbx");
s = absl::StrReplaceAll("abbbabbba", {{"bbb", "x"}});
EXPECT_EQ(s, "axaxa");
// Overlapping matches are replaced greedily.
s = absl::StrReplaceAll("aaa", {{"aa", "x"}});
EXPECT_EQ(s, "xa");
// The replacements are not recursive.
s = absl::StrReplaceAll("aaa", {{"aa", "a"}});
EXPECT_EQ(s, "aa");
}
TEST(StrReplaceAll, ManyReplacements) {
std::string s;
// Empty string.
s = absl::StrReplaceAll("", {{"", ""}, {"x", ""}, {"", "y"}, {"x", "y"}});
EXPECT_EQ(s, "");
// Empty substring.
s = absl::StrReplaceAll("abc", {{"", ""}, {"", "y"}, {"x", ""}});
EXPECT_EQ(s, "abc");
// Replace entire string, one char at a time
s = absl::StrReplaceAll("abc", {{"a", "x"}, {"b", "y"}, {"c", "z"}});
EXPECT_EQ(s, "xyz");
s = absl::StrReplaceAll("zxy", {{"z", "x"}, {"x", "y"}, {"y", "z"}});
EXPECT_EQ(s, "xyz");
// Replace once at the start (longer matches take precedence)
s = absl::StrReplaceAll("abc", {{"a", "x"}, {"ab", "xy"}, {"abc", "xyz"}});
EXPECT_EQ(s, "xyz");
// Replace once in the middle.
s = absl::StrReplaceAll(
"Abc!", {{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc", "yz"}, {"c", "z"}});
EXPECT_EQ(s, "Ayz!");
// Replace once at the end.
s = absl::StrReplaceAll(
"Abc!",
{{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc!", "yz?"}, {"c!", "z;"}});
EXPECT_EQ(s, "Ayz?");
// Replace multiple times with varying lengths of original/replacement.
s = absl::StrReplaceAll("ababa", {{"a", "xxx"}, {"b", "XXXX"}});
EXPECT_EQ(s, "xxxXXXXxxxXXXXxxx");
// Overlapping matches are replaced greedily.
s = absl::StrReplaceAll("aaa", {{"aa", "x"}, {"a", "X"}});
EXPECT_EQ(s, "xX");
s = absl::StrReplaceAll("aaa", {{"a", "X"}, {"aa", "x"}});
EXPECT_EQ(s, "xX");
// Two well-known sentences
s = absl::StrReplaceAll("the quick brown fox jumped over the lazy dogs",
{
{"brown", "box"},
{"dogs", "jugs"},
{"fox", "with"},
{"jumped", "five"},
{"over", "dozen"},
{"quick", "my"},
{"the", "pack"},
{"the lazy", "liquor"},
});
EXPECT_EQ(s, "pack my box with five dozen liquor jugs");
}
TEST(StrReplaceAll, ManyReplacementsInMap) {
std::map<const char *, const char *> replacements;
replacements["$who"] = "Bob";
replacements["$count"] = "5";
replacements["#Noun"] = "Apples";
std::string s = absl::StrReplaceAll("$who bought $count #Noun. Thanks $who!",
replacements);
EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s);
}
TEST(StrReplaceAll, ReplacementsInPlace) {
std::string s = std::string("$who bought $count #Noun. Thanks $who!");
int count;
count = absl::StrReplaceAll({{"$count", absl::StrCat(5)},
{"$who", "Bob"},
{"#Noun", "Apples"}}, &s);
EXPECT_EQ(count, 4);
EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s);
}
TEST(StrReplaceAll, ReplacementsInPlaceInMap) {
std::string s = std::string("$who bought $count #Noun. Thanks $who!");
std::map<absl::string_view, absl::string_view> replacements;
replacements["$who"] = "Bob";
replacements["$count"] = "5";
replacements["#Noun"] = "Apples";
int count;
count = absl::StrReplaceAll(replacements, &s);
EXPECT_EQ(count, 4);
EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s);
}
struct Cont {
Cont() {}
explicit Cont(absl::string_view src) : data(src) {}
absl::string_view data;
};
template <int index>
absl::string_view get(const Cont& c) {
auto splitter = absl::StrSplit(c.data, ':');
auto it = splitter.begin();
for (int i = 0; i < index; ++i) ++it;
return *it;
}
TEST(StrReplaceAll, VariableNumber) {
std::string s;
{
std::vector<std::pair<std::string, std::string>> replacements;
s = "abc";
EXPECT_EQ(0, absl::StrReplaceAll(replacements, &s));
EXPECT_EQ("abc", s);
s = "abc";
replacements.push_back({"a", "A"});
EXPECT_EQ(1, absl::StrReplaceAll(replacements, &s));
EXPECT_EQ("Abc", s);
s = "abc";
replacements.push_back({"b", "B"});
EXPECT_EQ(2, absl::StrReplaceAll(replacements, &s));
EXPECT_EQ("ABc", s);
s = "abc";
replacements.push_back({"d", "D"});
EXPECT_EQ(2, absl::StrReplaceAll(replacements, &s));
EXPECT_EQ("ABc", s);
EXPECT_EQ("ABcABc", absl::StrReplaceAll("abcabc", replacements));
}
{
std::map<const char*, const char*> replacements;
replacements["aa"] = "x";
replacements["a"] = "X";
s = "aaa";
EXPECT_EQ(2, absl::StrReplaceAll(replacements, &s));
EXPECT_EQ("xX", s);
EXPECT_EQ("xxX", absl::StrReplaceAll("aaaaa", replacements));
}
{
std::list<std::pair<absl::string_view, absl::string_view>> replacements = {
{"a", "x"}, {"b", "y"}, {"c", "z"}};
std::string s = absl::StrReplaceAll("abc", replacements);
EXPECT_EQ(s, "xyz");
}
{
using X = std::tuple<absl::string_view, std::string, int>;
std::vector<X> replacements(3);
replacements[0] = X{"a", "x", 1};
replacements[1] = X{"b", "y", 0};
replacements[2] = X{"c", "z", -1};
std::string s = absl::StrReplaceAll("abc", replacements);
EXPECT_EQ(s, "xyz");
}
{
std::vector<Cont> replacements(3);
replacements[0] = Cont{"a:x"};
replacements[1] = Cont{"b:y"};
replacements[2] = Cont{"c:z"};
std::string s = absl::StrReplaceAll("abc", replacements);
EXPECT_EQ(s, "xyz");
}
}
// Same as above, but using the in-place variant of absl::StrReplaceAll,
// that returns the # of replacements performed.
TEST(StrReplaceAll, Inplace) {
std::string s;
int reps;
// Empty string.
s = "";
reps = absl::StrReplaceAll({{"", ""}, {"x", ""}, {"", "y"}, {"x", "y"}}, &s);
EXPECT_EQ(reps, 0);
EXPECT_EQ(s, "");
// Empty substring.
s = "abc";
reps = absl::StrReplaceAll({{"", ""}, {"", "y"}, {"x", ""}}, &s);
EXPECT_EQ(reps, 0);
EXPECT_EQ(s, "abc");
// Replace entire string, one char at a time
s = "abc";
reps = absl::StrReplaceAll({{"a", "x"}, {"b", "y"}, {"c", "z"}}, &s);
EXPECT_EQ(reps, 3);
EXPECT_EQ(s, "xyz");
s = "zxy";
reps = absl::StrReplaceAll({{"z", "x"}, {"x", "y"}, {"y", "z"}}, &s);
EXPECT_EQ(reps, 3);
EXPECT_EQ(s, "xyz");
// Replace once at the start (longer matches take precedence)
s = "abc";
reps = absl::StrReplaceAll({{"a", "x"}, {"ab", "xy"}, {"abc", "xyz"}}, &s);
EXPECT_EQ(reps, 1);
EXPECT_EQ(s, "xyz");
// Replace once in the middle.
s = "Abc!";
reps = absl::StrReplaceAll(
{{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc", "yz"}, {"c", "z"}}, &s);
EXPECT_EQ(reps, 1);
EXPECT_EQ(s, "Ayz!");
// Replace once at the end.
s = "Abc!";
reps = absl::StrReplaceAll(
{{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc!", "yz?"}, {"c!", "z;"}}, &s);
EXPECT_EQ(reps, 1);
EXPECT_EQ(s, "Ayz?");
// Replace multiple times with varying lengths of original/replacement.
s = "ababa";
reps = absl::StrReplaceAll({{"a", "xxx"}, {"b", "XXXX"}}, &s);
EXPECT_EQ(reps, 5);
EXPECT_EQ(s, "xxxXXXXxxxXXXXxxx");
// Overlapping matches are replaced greedily.
s = "aaa";
reps = absl::StrReplaceAll({{"aa", "x"}, {"a", "X"}}, &s);
EXPECT_EQ(reps, 2);
EXPECT_EQ(s, "xX");
s = "aaa";
reps = absl::StrReplaceAll({{"a", "X"}, {"aa", "x"}}, &s);
EXPECT_EQ(reps, 2);
EXPECT_EQ(s, "xX");
// Two well-known sentences
s = "the quick brown fox jumped over the lazy dogs";
reps = absl::StrReplaceAll(
{
{"brown", "box"},
{"dogs", "jugs"},
{"fox", "with"},
{"jumped", "five"},
{"over", "dozen"},
{"quick", "my"},
{"the", "pack"},
{"the lazy", "liquor"},
},
&s);
EXPECT_EQ(reps, 8);
EXPECT_EQ(s, "pack my box with five dozen liquor jugs");
}

View file

@ -0,0 +1,139 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/str_split.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <iterator>
#include <limits>
#include <memory>
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/ascii.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace {
// This GenericFind() template function encapsulates the finding algorithm
// shared between the ByString and ByAnyChar delimiters. The FindPolicy
// template parameter allows each delimiter to customize the actual find
// function to use and the length of the found delimiter. For example, the
// Literal delimiter will ultimately use absl::string_view::find(), and the
// AnyOf delimiter will use absl::string_view::find_first_of().
template <typename FindPolicy>
absl::string_view GenericFind(absl::string_view text,
absl::string_view delimiter, size_t pos,
FindPolicy find_policy) {
if (delimiter.empty() && text.length() > 0) {
// Special case for empty string delimiters: always return a zero-length
// absl::string_view referring to the item at position 1 past pos.
return absl::string_view(text.data() + pos + 1, 0);
}
size_t found_pos = absl::string_view::npos;
absl::string_view found(text.data() + text.size(),
0); // By default, not found
found_pos = find_policy.Find(text, delimiter, pos);
if (found_pos != absl::string_view::npos) {
found = absl::string_view(text.data() + found_pos,
find_policy.Length(delimiter));
}
return found;
}
// Finds using absl::string_view::find(), therefore the length of the found
// delimiter is delimiter.length().
struct LiteralPolicy {
size_t Find(absl::string_view text, absl::string_view delimiter, size_t pos) {
return text.find(delimiter, pos);
}
size_t Length(absl::string_view delimiter) { return delimiter.length(); }
};
// Finds using absl::string_view::find_first_of(), therefore the length of the
// found delimiter is 1.
struct AnyOfPolicy {
size_t Find(absl::string_view text, absl::string_view delimiter, size_t pos) {
return text.find_first_of(delimiter, pos);
}
size_t Length(absl::string_view /* delimiter */) { return 1; }
};
} // namespace
//
// ByString
//
ByString::ByString(absl::string_view sp) : delimiter_(sp) {}
absl::string_view ByString::Find(absl::string_view text, size_t pos) const {
if (delimiter_.length() == 1) {
// Much faster to call find on a single character than on an
// absl::string_view.
size_t found_pos = text.find(delimiter_[0], pos);
if (found_pos == absl::string_view::npos)
return absl::string_view(text.data() + text.size(), 0);
return text.substr(found_pos, 1);
}
return GenericFind(text, delimiter_, pos, LiteralPolicy());
}
//
// ByChar
//
absl::string_view ByChar::Find(absl::string_view text, size_t pos) const {
size_t found_pos = text.find(c_, pos);
if (found_pos == absl::string_view::npos)
return absl::string_view(text.data() + text.size(), 0);
return text.substr(found_pos, 1);
}
//
// ByAnyChar
//
ByAnyChar::ByAnyChar(absl::string_view sp) : delimiters_(sp) {}
absl::string_view ByAnyChar::Find(absl::string_view text, size_t pos) const {
return GenericFind(text, delimiters_, pos, AnyOfPolicy());
}
//
// ByLength
//
ByLength::ByLength(ptrdiff_t length) : length_(length) {
ABSL_RAW_CHECK(length > 0, "");
}
absl::string_view ByLength::Find(absl::string_view text,
size_t pos) const {
pos = std::min(pos, text.size()); // truncate `pos`
absl::string_view substr = text.substr(pos);
// If the string is shorter than the chunk size we say we
// "can't find the delimiter" so this will be the last chunk.
if (substr.length() <= static_cast<size_t>(length_))
return absl::string_view(text.data() + text.size(), 0);
return absl::string_view(substr.data() + length_, 0);
}
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,513 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: str_split.h
// -----------------------------------------------------------------------------
//
// This file contains functions for splitting strings. It defines the main
// `StrSplit()` function, several delimiters for determining the boundaries on
// which to split the string, and predicates for filtering delimited results.
// `StrSplit()` adapts the returned collection to the type specified by the
// caller.
//
// Example:
//
// // Splits the given string on commas. Returns the results in a
// // vector of strings.
// std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
// // Can also use ","
// // v[0] == "a", v[1] == "b", v[2] == "c"
//
// See StrSplit() below for more information.
#ifndef ABSL_STRINGS_STR_SPLIT_H_
#define ABSL_STRINGS_STR_SPLIT_H_
#include <algorithm>
#include <cstddef>
#include <map>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/internal/str_split_internal.h"
#include "absl/strings/string_view.h"
#include "absl/strings/strip.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
//------------------------------------------------------------------------------
// Delimiters
//------------------------------------------------------------------------------
//
// `StrSplit()` uses delimiters to define the boundaries between elements in the
// provided input. Several `Delimiter` types are defined below. If a string
// (`const char*`, `std::string`, or `absl::string_view`) is passed in place of
// an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it
// were passed a `ByString` delimiter.
//
// A `Delimiter` is an object with a `Find()` function that knows how to find
// the first occurrence of itself in a given `absl::string_view`.
//
// The following `Delimiter` types are available for use within `StrSplit()`:
//
// - `ByString` (default for string arguments)
// - `ByChar` (default for a char argument)
// - `ByAnyChar`
// - `ByLength`
// - `MaxSplits`
//
// A Delimiter's `Find()` member function will be passed an input `text` that is
// to be split and a position (`pos`) to begin searching for the next delimiter
// in `text`. The returned absl::string_view should refer to the next occurrence
// (after `pos`) of the represented delimiter; this returned absl::string_view
// represents the next location where the input `text` should be broken.
//
// The returned absl::string_view may be zero-length if the Delimiter does not
// represent a part of the string (e.g., a fixed-length delimiter). If no
// delimiter is found in the input `text`, a zero-length absl::string_view
// referring to `text.end()` should be returned (e.g.,
// `text.substr(text.size())`). It is important that the returned
// absl::string_view always be within the bounds of the input `text` given as an
// argument--it must not refer to a string that is physically located outside of
// the given string.
//
// The following example is a simple Delimiter object that is created with a
// single char and will look for that char in the text passed to the `Find()`
// function:
//
// struct SimpleDelimiter {
// const char c_;
// explicit SimpleDelimiter(char c) : c_(c) {}
// absl::string_view Find(absl::string_view text, size_t pos) {
// auto found = text.find(c_, pos);
// if (found == absl::string_view::npos)
// return text.substr(text.size());
//
// return text.substr(found, 1);
// }
// };
// ByString
//
// A sub-string delimiter. If `StrSplit()` is passed a string in place of a
// `Delimiter` object, the string will be implicitly converted into a
// `ByString` delimiter.
//
// Example:
//
// // Because a string literal is converted to an `absl::ByString`,
// // the following two splits are equivalent.
//
// std::vector<std::string> v1 = absl::StrSplit("a, b, c", ", ");
//
// using absl::ByString;
// std::vector<std::string> v2 = absl::StrSplit("a, b, c",
// ByString(", "));
// // v[0] == "a", v[1] == "b", v[2] == "c"
class ByString {
public:
explicit ByString(absl::string_view sp);
absl::string_view Find(absl::string_view text, size_t pos) const;
private:
const std::string delimiter_;
};
// ByChar
//
// A single character delimiter. `ByChar` is functionally equivalent to a
// 1-char string within a `ByString` delimiter, but slightly more efficient.
//
// Example:
//
// // Because a char literal is converted to a absl::ByChar,
// // the following two splits are equivalent.
// std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
// using absl::ByChar;
// std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(','));
// // v[0] == "a", v[1] == "b", v[2] == "c"
//
// `ByChar` is also the default delimiter if a single character is given
// as the delimiter to `StrSplit()`. For example, the following calls are
// equivalent:
//
// std::vector<std::string> v = absl::StrSplit("a-b", '-');
//
// using absl::ByChar;
// std::vector<std::string> v = absl::StrSplit("a-b", ByChar('-'));
//
class ByChar {
public:
explicit ByChar(char c) : c_(c) {}
absl::string_view Find(absl::string_view text, size_t pos) const;
private:
char c_;
};
// ByAnyChar
//
// A delimiter that will match any of the given byte-sized characters within
// its provided string.
//
// Note: this delimiter works with single-byte string data, but does not work
// with variable-width encodings, such as UTF-8.
//
// Example:
//
// using absl::ByAnyChar;
// std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
// // v[0] == "a", v[1] == "b", v[2] == "c"
//
// If `ByAnyChar` is given the empty string, it behaves exactly like
// `ByString` and matches each individual character in the input string.
//
class ByAnyChar {
public:
explicit ByAnyChar(absl::string_view sp);
absl::string_view Find(absl::string_view text, size_t pos) const;
private:
const std::string delimiters_;
};
// ByLength
//
// A delimiter for splitting into equal-length strings. The length argument to
// the constructor must be greater than 0.
//
// Note: this delimiter works with single-byte string data, but does not work
// with variable-width encodings, such as UTF-8.
//
// Example:
//
// using absl::ByLength;
// std::vector<std::string> v = absl::StrSplit("123456789", ByLength(3));
// // v[0] == "123", v[1] == "456", v[2] == "789"
//
// Note that the string does not have to be a multiple of the fixed split
// length. In such a case, the last substring will be shorter.
//
// using absl::ByLength;
// std::vector<std::string> v = absl::StrSplit("12345", ByLength(2));
//
// // v[0] == "12", v[1] == "34", v[2] == "5"
class ByLength {
public:
explicit ByLength(ptrdiff_t length);
absl::string_view Find(absl::string_view text, size_t pos) const;
private:
const ptrdiff_t length_;
};
namespace strings_internal {
// A traits-like metafunction for selecting the default Delimiter object type
// for a particular Delimiter type. The base case simply exposes type Delimiter
// itself as the delimiter's Type. However, there are specializations for
// string-like objects that map them to the ByString delimiter object.
// This allows functions like absl::StrSplit() and absl::MaxSplits() to accept
// string-like objects (e.g., ',') as delimiter arguments but they will be
// treated as if a ByString delimiter was given.
template <typename Delimiter>
struct SelectDelimiter {
using type = Delimiter;
};
template <>
struct SelectDelimiter<char> {
using type = ByChar;
};
template <>
struct SelectDelimiter<char*> {
using type = ByString;
};
template <>
struct SelectDelimiter<const char*> {
using type = ByString;
};
template <>
struct SelectDelimiter<absl::string_view> {
using type = ByString;
};
template <>
struct SelectDelimiter<std::string> {
using type = ByString;
};
// Wraps another delimiter and sets a max number of matches for that delimiter.
template <typename Delimiter>
class MaxSplitsImpl {
public:
MaxSplitsImpl(Delimiter delimiter, int limit)
: delimiter_(delimiter), limit_(limit), count_(0) {}
absl::string_view Find(absl::string_view text, size_t pos) {
if (count_++ == limit_) {
return absl::string_view(text.data() + text.size(),
0); // No more matches.
}
return delimiter_.Find(text, pos);
}
private:
Delimiter delimiter_;
const int limit_;
int count_;
};
} // namespace strings_internal
// MaxSplits()
//
// A delimiter that limits the number of matches which can occur to the passed
// `limit`. The last element in the returned collection will contain all
// remaining unsplit pieces, which may contain instances of the delimiter.
// The collection will contain at most `limit` + 1 elements.
// Example:
//
// using absl::MaxSplits;
// std::vector<std::string> v = absl::StrSplit("a,b,c", MaxSplits(',', 1));
//
// // v[0] == "a", v[1] == "b,c"
template <typename Delimiter>
inline strings_internal::MaxSplitsImpl<
typename strings_internal::SelectDelimiter<Delimiter>::type>
MaxSplits(Delimiter delimiter, int limit) {
typedef
typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType;
return strings_internal::MaxSplitsImpl<DelimiterType>(
DelimiterType(delimiter), limit);
}
//------------------------------------------------------------------------------
// Predicates
//------------------------------------------------------------------------------
//
// Predicates filter the results of a `StrSplit()` by determining whether or not
// a resultant element is included in the result set. A predicate may be passed
// as an optional third argument to the `StrSplit()` function.
//
// Predicates are unary functions (or functors) that take a single
// `absl::string_view` argument and return a bool indicating whether the
// argument should be included (`true`) or excluded (`false`).
//
// Predicates are useful when filtering out empty substrings. By default, empty
// substrings may be returned by `StrSplit()`, which is similar to the way split
// functions work in other programming languages.
// AllowEmpty()
//
// Always returns `true`, indicating that all strings--including empty
// strings--should be included in the split output. This predicate is not
// strictly needed because this is the default behavior of `StrSplit()`;
// however, it might be useful at some call sites to make the intent explicit.
//
// Example:
//
// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', AllowEmpty());
//
// // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == ""
struct AllowEmpty {
bool operator()(absl::string_view) const { return true; }
};
// SkipEmpty()
//
// Returns `false` if the given `absl::string_view` is empty, indicating that
// `StrSplit()` should omit the empty string.
//
// Example:
//
// std::vector<std::string> v = absl::StrSplit(",a,,b,", ',', SkipEmpty());
//
// // v[0] == "a", v[1] == "b"
//
// Note: `SkipEmpty()` does not consider a string containing only whitespace
// to be empty. To skip such whitespace as well, use the `SkipWhitespace()`
// predicate.
struct SkipEmpty {
bool operator()(absl::string_view sp) const { return !sp.empty(); }
};
// SkipWhitespace()
//
// Returns `false` if the given `absl::string_view` is empty *or* contains only
// whitespace, indicating that `StrSplit()` should omit the string.
//
// Example:
//
// std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
// ',', SkipWhitespace());
// // v[0] == " a ", v[1] == "b"
//
// // SkipEmpty() would return whitespace elements
// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', SkipEmpty());
// // v[0] == " a ", v[1] == " ", v[2] == "b"
struct SkipWhitespace {
bool operator()(absl::string_view sp) const {
sp = absl::StripAsciiWhitespace(sp);
return !sp.empty();
}
};
//------------------------------------------------------------------------------
// StrSplit()
//------------------------------------------------------------------------------
// StrSplit()
//
// Splits a given string based on the provided `Delimiter` object, returning the
// elements within the type specified by the caller. Optionally, you may pass a
// `Predicate` to `StrSplit()` indicating whether to include or exclude the
// resulting element within the final result set. (See the overviews for
// Delimiters and Predicates above.)
//
// Example:
//
// std::vector<std::string> v = absl::StrSplit("a,b,c,d", ',');
// // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d"
//
// You can also provide an explicit `Delimiter` object:
//
// Example:
//
// using absl::ByAnyChar;
// std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
// // v[0] == "a", v[1] == "b", v[2] == "c"
//
// See above for more information on delimiters.
//
// By default, empty strings are included in the result set. You can optionally
// include a third `Predicate` argument to apply a test for whether the
// resultant element should be included in the result set:
//
// Example:
//
// std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
// ',', SkipWhitespace());
// // v[0] == " a ", v[1] == "b"
//
// See above for more information on predicates.
//
//------------------------------------------------------------------------------
// StrSplit() Return Types
//------------------------------------------------------------------------------
//
// The `StrSplit()` function adapts the returned collection to the collection
// specified by the caller (e.g. `std::vector` above). The returned collections
// may contain `std::string`, `absl::string_view` (in which case the original
// string being split must ensure that it outlives the collection), or any
// object that can be explicitly created from an `absl::string_view`. This
// behavior works for:
//
// 1) All standard STL containers including `std::vector`, `std::list`,
// `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap`
// 2) `std::pair` (which is not actually a container). See below.
//
// Example:
//
// // The results are returned as `absl::string_view` objects. Note that we
// // have to ensure that the input string outlives any results.
// std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
//
// // Stores results in a std::set<std::string>, which also performs
// // de-duplication and orders the elements in ascending order.
// std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ',');
// // v[0] == "a", v[1] == "b", v[2] = "c"
//
// // `StrSplit()` can be used within a range-based for loop, in which case
// // each element will be of type `absl::string_view`.
// std::vector<std::string> v;
// for (const auto sv : absl::StrSplit("a,b,c", ',')) {
// if (sv != "b") v.emplace_back(sv);
// }
// // v[0] == "a", v[1] == "c"
//
// // Stores results in a map. The map implementation assumes that the input
// // is provided as a series of key/value pairs. For example, the 0th element
// // resulting from the split will be stored as a key to the 1st element. If
// // an odd number of elements are resolved, the last element is paired with
// // a default-constructed value (e.g., empty string).
// std::map<std::string, std::string> m = absl::StrSplit("a,b,c", ',');
// // m["a"] == "b", m["c"] == "" // last component value equals ""
//
// Splitting to `std::pair` is an interesting case because it can hold only two
// elements and is not a collection type. When splitting to a `std::pair` the
// first two split strings become the `std::pair` `.first` and `.second`
// members, respectively. The remaining split substrings are discarded. If there
// are less than two split substrings, the empty string is used for the
// corresponding
// `std::pair` member.
//
// Example:
//
// // Stores first two split strings as the members in a std::pair.
// std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
// // p.first == "a", p.second == "b" // "c" is omitted.
//
// The `StrSplit()` function can be used multiple times to perform more
// complicated splitting logic, such as intelligently parsing key-value pairs.
//
// Example:
//
// // The input string "a=b=c,d=e,f=,g" becomes
// // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" }
// std::map<std::string, std::string> m;
// for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
// m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
// }
// EXPECT_EQ("b=c", m.find("a")->second);
// EXPECT_EQ("e", m.find("d")->second);
// EXPECT_EQ("", m.find("f")->second);
// EXPECT_EQ("", m.find("g")->second);
//
// WARNING: Due to a legacy bug that is maintained for backward compatibility,
// splitting the following empty string_views produces different results:
//
// absl::StrSplit(absl::string_view(""), '-'); // {""}
// absl::StrSplit(absl::string_view(), '-'); // {}, but should be {""}
//
// Try not to depend on this distinction because the bug may one day be fixed.
template <typename Delimiter>
strings_internal::Splitter<
typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty>
StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) {
using DelimiterType =
typename strings_internal::SelectDelimiter<Delimiter>::type;
return strings_internal::Splitter<DelimiterType, AllowEmpty>(
std::move(text), DelimiterType(d), AllowEmpty());
}
template <typename Delimiter, typename Predicate>
strings_internal::Splitter<
typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate>
StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d,
Predicate p) {
using DelimiterType =
typename strings_internal::SelectDelimiter<Delimiter>::type;
return strings_internal::Splitter<DelimiterType, Predicate>(
std::move(text), DelimiterType(d), std::move(p));
}
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_STR_SPLIT_H_

View file

@ -0,0 +1,180 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/str_split.h"
#include <iterator>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "benchmark/benchmark.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/string_view.h"
namespace {
std::string MakeTestString(int desired_length) {
static const int kAverageValueLen = 25;
std::string test(desired_length * kAverageValueLen, 'x');
for (int i = 1; i < test.size(); i += kAverageValueLen) {
test[i] = ';';
}
return test;
}
void BM_Split2StringView(benchmark::State& state) {
std::string test = MakeTestString(state.range(0));
for (auto _ : state) {
std::vector<absl::string_view> result = absl::StrSplit(test, ';');
benchmark::DoNotOptimize(result);
}
}
BENCHMARK_RANGE(BM_Split2StringView, 0, 1 << 20);
static const absl::string_view kDelimiters = ";:,.";
std::string MakeMultiDelimiterTestString(int desired_length) {
static const int kAverageValueLen = 25;
std::string test(desired_length * kAverageValueLen, 'x');
for (int i = 0; i * kAverageValueLen < test.size(); ++i) {
// Cycle through a variety of delimiters.
test[i * kAverageValueLen] = kDelimiters[i % kDelimiters.size()];
}
return test;
}
// Measure StrSplit with ByAnyChar with four delimiters to choose from.
void BM_Split2StringViewByAnyChar(benchmark::State& state) {
std::string test = MakeMultiDelimiterTestString(state.range(0));
for (auto _ : state) {
std::vector<absl::string_view> result =
absl::StrSplit(test, absl::ByAnyChar(kDelimiters));
benchmark::DoNotOptimize(result);
}
}
BENCHMARK_RANGE(BM_Split2StringViewByAnyChar, 0, 1 << 20);
void BM_Split2StringViewLifted(benchmark::State& state) {
std::string test = MakeTestString(state.range(0));
std::vector<absl::string_view> result;
for (auto _ : state) {
result = absl::StrSplit(test, ';');
}
benchmark::DoNotOptimize(result);
}
BENCHMARK_RANGE(BM_Split2StringViewLifted, 0, 1 << 20);
void BM_Split2String(benchmark::State& state) {
std::string test = MakeTestString(state.range(0));
for (auto _ : state) {
std::vector<std::string> result = absl::StrSplit(test, ';');
benchmark::DoNotOptimize(result);
}
}
BENCHMARK_RANGE(BM_Split2String, 0, 1 << 20);
// This benchmark is for comparing Split2 to Split1 (SplitStringUsing). In
// particular, this benchmark uses SkipEmpty() to match SplitStringUsing's
// behavior.
void BM_Split2SplitStringUsing(benchmark::State& state) {
std::string test = MakeTestString(state.range(0));
for (auto _ : state) {
std::vector<std::string> result =
absl::StrSplit(test, ';', absl::SkipEmpty());
benchmark::DoNotOptimize(result);
}
}
BENCHMARK_RANGE(BM_Split2SplitStringUsing, 0, 1 << 20);
void BM_SplitStringToUnorderedSet(benchmark::State& state) {
const int len = state.range(0);
std::string test(len, 'x');
for (int i = 1; i < len; i += 2) {
test[i] = ';';
}
for (auto _ : state) {
std::unordered_set<std::string> result =
absl::StrSplit(test, ':', absl::SkipEmpty());
benchmark::DoNotOptimize(result);
}
}
BENCHMARK_RANGE(BM_SplitStringToUnorderedSet, 0, 1 << 20);
void BM_SplitStringToUnorderedMap(benchmark::State& state) {
const int len = state.range(0);
std::string test(len, 'x');
for (int i = 1; i < len; i += 2) {
test[i] = ';';
}
for (auto _ : state) {
std::unordered_map<std::string, std::string> result =
absl::StrSplit(test, ':', absl::SkipEmpty());
benchmark::DoNotOptimize(result);
}
}
BENCHMARK_RANGE(BM_SplitStringToUnorderedMap, 0, 1 << 20);
void BM_SplitStringAllowEmpty(benchmark::State& state) {
const int len = state.range(0);
std::string test(len, 'x');
for (int i = 1; i < len; i += 2) {
test[i] = ';';
}
for (auto _ : state) {
std::vector<std::string> result = absl::StrSplit(test, ';');
benchmark::DoNotOptimize(result);
}
}
BENCHMARK_RANGE(BM_SplitStringAllowEmpty, 0, 1 << 20);
struct OneCharLiteral {
char operator()() const { return 'X'; }
};
struct OneCharStringLiteral {
const char* operator()() const { return "X"; }
};
template <typename DelimiterFactory>
void BM_SplitStringWithOneChar(benchmark::State& state) {
const auto delimiter = DelimiterFactory()();
std::vector<absl::string_view> pieces;
size_t v = 0;
for (auto _ : state) {
pieces = absl::StrSplit("The quick brown fox jumps over the lazy dog",
delimiter);
v += pieces.size();
}
ABSL_RAW_CHECK(v == state.iterations(), "");
}
BENCHMARK_TEMPLATE(BM_SplitStringWithOneChar, OneCharLiteral);
BENCHMARK_TEMPLATE(BM_SplitStringWithOneChar, OneCharStringLiteral);
template <typename DelimiterFactory>
void BM_SplitStringWithOneCharNoVector(benchmark::State& state) {
const auto delimiter = DelimiterFactory()();
size_t v = 0;
for (auto _ : state) {
auto splitter = absl::StrSplit(
"The quick brown fox jumps over the lazy dog", delimiter);
v += std::distance(splitter.begin(), splitter.end());
}
ABSL_RAW_CHECK(v == state.iterations(), "");
}
BENCHMARK_TEMPLATE(BM_SplitStringWithOneCharNoVector, OneCharLiteral);
BENCHMARK_TEMPLATE(BM_SplitStringWithOneCharNoVector, OneCharStringLiteral);
} // namespace

View file

@ -0,0 +1,953 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/str_split.h"
#include <deque>
#include <initializer_list>
#include <list>
#include <map>
#include <memory>
#include <string>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/dynamic_annotations.h" // for RunningOnValgrind
#include "absl/base/macros.h"
#include "absl/container/flat_hash_map.h"
#include "absl/container/node_hash_map.h"
#include "absl/strings/numbers.h"
namespace {
using ::testing::ElementsAre;
using ::testing::Pair;
using ::testing::UnorderedElementsAre;
TEST(Split, TraitsTest) {
static_assert(!absl::strings_internal::SplitterIsConvertibleTo<int>::value,
"");
static_assert(
!absl::strings_internal::SplitterIsConvertibleTo<std::string>::value, "");
static_assert(absl::strings_internal::SplitterIsConvertibleTo<
std::vector<std::string>>::value,
"");
static_assert(
!absl::strings_internal::SplitterIsConvertibleTo<std::vector<int>>::value,
"");
static_assert(absl::strings_internal::SplitterIsConvertibleTo<
std::vector<absl::string_view>>::value,
"");
static_assert(absl::strings_internal::SplitterIsConvertibleTo<
std::map<std::string, std::string>>::value,
"");
static_assert(absl::strings_internal::SplitterIsConvertibleTo<
std::map<absl::string_view, absl::string_view>>::value,
"");
static_assert(!absl::strings_internal::SplitterIsConvertibleTo<
std::map<int, std::string>>::value,
"");
static_assert(!absl::strings_internal::SplitterIsConvertibleTo<
std::map<std::string, int>>::value,
"");
}
// This tests the overall split API, which is made up of the absl::StrSplit()
// function and the Delimiter objects in the absl:: namespace.
// This TEST macro is outside of any namespace to require full specification of
// namespaces just like callers will need to use.
TEST(Split, APIExamples) {
{
// Passes string delimiter. Assumes the default of ByString.
std::vector<std::string> v = absl::StrSplit("a,b,c", ","); // NOLINT
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
// Equivalent to...
using absl::ByString;
v = absl::StrSplit("a,b,c", ByString(","));
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
// Equivalent to...
EXPECT_THAT(absl::StrSplit("a,b,c", ByString(",")),
ElementsAre("a", "b", "c"));
}
{
// Same as above, but using a single character as the delimiter.
std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
// Equivalent to...
using absl::ByChar;
v = absl::StrSplit("a,b,c", ByChar(','));
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// Uses the Literal string "=>" as the delimiter.
const std::vector<std::string> v = absl::StrSplit("a=>b=>c", "=>");
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// The substrings are returned as string_views, eliminating copying.
std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// Leading and trailing empty substrings.
std::vector<std::string> v = absl::StrSplit(",a,b,c,", ',');
EXPECT_THAT(v, ElementsAre("", "a", "b", "c", ""));
}
{
// Splits on a delimiter that is not found.
std::vector<std::string> v = absl::StrSplit("abc", ',');
EXPECT_THAT(v, ElementsAre("abc"));
}
{
// Splits the input string into individual characters by using an empty
// string as the delimiter.
std::vector<std::string> v = absl::StrSplit("abc", "");
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// Splits string data with embedded NUL characters, using NUL as the
// delimiter. A simple delimiter of "\0" doesn't work because strlen() will
// say that's the empty string when constructing the absl::string_view
// delimiter. Instead, a non-empty string containing NUL can be used as the
// delimiter.
std::string embedded_nulls("a\0b\0c", 5);
std::string null_delim("\0", 1);
std::vector<std::string> v = absl::StrSplit(embedded_nulls, null_delim);
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// Stores first two split strings as the members in a std::pair.
std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
EXPECT_EQ("a", p.first);
EXPECT_EQ("b", p.second);
// "c" is omitted because std::pair can hold only two elements.
}
{
// Results stored in std::set<std::string>
std::set<std::string> v = absl::StrSplit("a,b,c,a,b,c,a,b,c", ',');
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// Uses a non-const char* delimiter.
char a[] = ",";
char* d = a + 0;
std::vector<std::string> v = absl::StrSplit("a,b,c", d);
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// Results split using either of , or ;
using absl::ByAnyChar;
std::vector<std::string> v = absl::StrSplit("a,b;c", ByAnyChar(",;"));
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// Uses the SkipWhitespace predicate.
using absl::SkipWhitespace;
std::vector<std::string> v =
absl::StrSplit(" a , ,,b,", ',', SkipWhitespace());
EXPECT_THAT(v, ElementsAre(" a ", "b"));
}
{
// Uses the ByLength delimiter.
using absl::ByLength;
std::vector<std::string> v = absl::StrSplit("abcdefg", ByLength(3));
EXPECT_THAT(v, ElementsAre("abc", "def", "g"));
}
{
// Different forms of initialization / conversion.
std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
EXPECT_THAT(v1, ElementsAre("a", "b", "c"));
std::vector<std::string> v2(absl::StrSplit("a,b,c", ','));
EXPECT_THAT(v2, ElementsAre("a", "b", "c"));
auto v3 = std::vector<std::string>(absl::StrSplit("a,b,c", ','));
EXPECT_THAT(v3, ElementsAre("a", "b", "c"));
v3 = absl::StrSplit("a,b,c", ',');
EXPECT_THAT(v3, ElementsAre("a", "b", "c"));
}
{
// Results stored in a std::map.
std::map<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ',');
EXPECT_EQ(2, m.size());
EXPECT_EQ("3", m["a"]);
EXPECT_EQ("2", m["b"]);
}
{
// Results stored in a std::multimap.
std::multimap<std::string, std::string> m =
absl::StrSplit("a,1,b,2,a,3", ',');
EXPECT_EQ(3, m.size());
auto it = m.find("a");
EXPECT_EQ("1", it->second);
++it;
EXPECT_EQ("3", it->second);
it = m.find("b");
EXPECT_EQ("2", it->second);
}
{
// Demonstrates use in a range-based for loop in C++11.
std::string s = "x,x,x,x,x,x,x";
for (absl::string_view sp : absl::StrSplit(s, ',')) {
EXPECT_EQ("x", sp);
}
}
{
// Demonstrates use with a Predicate in a range-based for loop.
using absl::SkipWhitespace;
std::string s = " ,x,,x,,x,x,x,,";
for (absl::string_view sp : absl::StrSplit(s, ',', SkipWhitespace())) {
EXPECT_EQ("x", sp);
}
}
{
// Demonstrates a "smart" split to std::map using two separate calls to
// absl::StrSplit. One call to split the records, and another call to split
// the keys and values. This also uses the Limit delimiter so that the
// std::string "a=b=c" will split to "a" -> "b=c".
std::map<std::string, std::string> m;
for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
}
EXPECT_EQ("b=c", m.find("a")->second);
EXPECT_EQ("e", m.find("d")->second);
EXPECT_EQ("", m.find("f")->second);
EXPECT_EQ("", m.find("g")->second);
}
}
//
// Tests for SplitIterator
//
TEST(SplitIterator, Basics) {
auto splitter = absl::StrSplit("a,b", ',');
auto it = splitter.begin();
auto end = splitter.end();
EXPECT_NE(it, end);
EXPECT_EQ("a", *it); // tests dereference
++it; // tests preincrement
EXPECT_NE(it, end);
EXPECT_EQ("b",
std::string(it->data(), it->size())); // tests dereference as ptr
it++; // tests postincrement
EXPECT_EQ(it, end);
}
// Simple Predicate to skip a particular string.
class Skip {
public:
explicit Skip(const std::string& s) : s_(s) {}
bool operator()(absl::string_view sp) { return sp != s_; }
private:
std::string s_;
};
TEST(SplitIterator, Predicate) {
auto splitter = absl::StrSplit("a,b,c", ',', Skip("b"));
auto it = splitter.begin();
auto end = splitter.end();
EXPECT_NE(it, end);
EXPECT_EQ("a", *it); // tests dereference
++it; // tests preincrement -- "b" should be skipped here.
EXPECT_NE(it, end);
EXPECT_EQ("c",
std::string(it->data(), it->size())); // tests dereference as ptr
it++; // tests postincrement
EXPECT_EQ(it, end);
}
TEST(SplitIterator, EdgeCases) {
// Expected input and output, assuming a delimiter of ','
struct {
std::string in;
std::vector<std::string> expect;
} specs[] = {
{"", {""}},
{"foo", {"foo"}},
{",", {"", ""}},
{",foo", {"", "foo"}},
{"foo,", {"foo", ""}},
{",foo,", {"", "foo", ""}},
{"foo,bar", {"foo", "bar"}},
};
for (const auto& spec : specs) {
SCOPED_TRACE(spec.in);
auto splitter = absl::StrSplit(spec.in, ',');
auto it = splitter.begin();
auto end = splitter.end();
for (const auto& expected : spec.expect) {
EXPECT_NE(it, end);
EXPECT_EQ(expected, *it++);
}
EXPECT_EQ(it, end);
}
}
TEST(Splitter, Const) {
const auto splitter = absl::StrSplit("a,b,c", ',');
EXPECT_THAT(splitter, ElementsAre("a", "b", "c"));
}
TEST(Split, EmptyAndNull) {
// Attention: Splitting a null absl::string_view is different than splitting
// an empty absl::string_view even though both string_views are considered
// equal. This behavior is likely surprising and undesirable. However, to
// maintain backward compatibility, there is a small "hack" in
// str_split_internal.h that preserves this behavior. If that behavior is ever
// changed/fixed, this test will need to be updated.
EXPECT_THAT(absl::StrSplit(absl::string_view(""), '-'), ElementsAre(""));
EXPECT_THAT(absl::StrSplit(absl::string_view(), '-'), ElementsAre());
}
TEST(SplitIterator, EqualityAsEndCondition) {
auto splitter = absl::StrSplit("a,b,c", ',');
auto it = splitter.begin();
auto it2 = it;
// Increments it2 twice to point to "c" in the input text.
++it2;
++it2;
EXPECT_EQ("c", *it2);
// This test uses a non-end SplitIterator as the terminating condition in a
// for loop. This relies on SplitIterator equality for non-end SplitIterators
// working correctly. At this point it2 points to "c", and we use that as the
// "end" condition in this test.
std::vector<absl::string_view> v;
for (; it != it2; ++it) {
v.push_back(*it);
}
EXPECT_THAT(v, ElementsAre("a", "b"));
}
//
// Tests for Splitter
//
TEST(Splitter, RangeIterators) {
auto splitter = absl::StrSplit("a,b,c", ',');
std::vector<absl::string_view> output;
for (const absl::string_view p : splitter) {
output.push_back(p);
}
EXPECT_THAT(output, ElementsAre("a", "b", "c"));
}
// Some template functions for use in testing conversion operators
template <typename ContainerType, typename Splitter>
void TestConversionOperator(const Splitter& splitter) {
ContainerType output = splitter;
EXPECT_THAT(output, UnorderedElementsAre("a", "b", "c", "d"));
}
template <typename MapType, typename Splitter>
void TestMapConversionOperator(const Splitter& splitter) {
MapType m = splitter;
EXPECT_THAT(m, UnorderedElementsAre(Pair("a", "b"), Pair("c", "d")));
}
template <typename FirstType, typename SecondType, typename Splitter>
void TestPairConversionOperator(const Splitter& splitter) {
std::pair<FirstType, SecondType> p = splitter;
EXPECT_EQ(p, (std::pair<FirstType, SecondType>("a", "b")));
}
TEST(Splitter, ConversionOperator) {
auto splitter = absl::StrSplit("a,b,c,d", ',');
TestConversionOperator<std::vector<absl::string_view>>(splitter);
TestConversionOperator<std::vector<std::string>>(splitter);
TestConversionOperator<std::list<absl::string_view>>(splitter);
TestConversionOperator<std::list<std::string>>(splitter);
TestConversionOperator<std::deque<absl::string_view>>(splitter);
TestConversionOperator<std::deque<std::string>>(splitter);
TestConversionOperator<std::set<absl::string_view>>(splitter);
TestConversionOperator<std::set<std::string>>(splitter);
TestConversionOperator<std::multiset<absl::string_view>>(splitter);
TestConversionOperator<std::multiset<std::string>>(splitter);
TestConversionOperator<std::unordered_set<std::string>>(splitter);
// Tests conversion to map-like objects.
TestMapConversionOperator<std::map<absl::string_view, absl::string_view>>(
splitter);
TestMapConversionOperator<std::map<absl::string_view, std::string>>(splitter);
TestMapConversionOperator<std::map<std::string, absl::string_view>>(splitter);
TestMapConversionOperator<std::map<std::string, std::string>>(splitter);
TestMapConversionOperator<
std::multimap<absl::string_view, absl::string_view>>(splitter);
TestMapConversionOperator<std::multimap<absl::string_view, std::string>>(
splitter);
TestMapConversionOperator<std::multimap<std::string, absl::string_view>>(
splitter);
TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter);
TestMapConversionOperator<std::unordered_map<std::string, std::string>>(
splitter);
TestMapConversionOperator<
absl::node_hash_map<absl::string_view, absl::string_view>>(splitter);
TestMapConversionOperator<
absl::node_hash_map<absl::string_view, std::string>>(splitter);
TestMapConversionOperator<
absl::node_hash_map<std::string, absl::string_view>>(splitter);
TestMapConversionOperator<
absl::flat_hash_map<absl::string_view, absl::string_view>>(splitter);
TestMapConversionOperator<
absl::flat_hash_map<absl::string_view, std::string>>(splitter);
TestMapConversionOperator<
absl::flat_hash_map<std::string, absl::string_view>>(splitter);
// Tests conversion to std::pair
TestPairConversionOperator<absl::string_view, absl::string_view>(splitter);
TestPairConversionOperator<absl::string_view, std::string>(splitter);
TestPairConversionOperator<std::string, absl::string_view>(splitter);
TestPairConversionOperator<std::string, std::string>(splitter);
}
// A few additional tests for conversion to std::pair. This conversion is
// different from others because a std::pair always has exactly two elements:
// .first and .second. The split has to work even when the split has
// less-than, equal-to, and more-than 2 strings.
TEST(Splitter, ToPair) {
{
// Empty string
std::pair<std::string, std::string> p = absl::StrSplit("", ',');
EXPECT_EQ("", p.first);
EXPECT_EQ("", p.second);
}
{
// Only first
std::pair<std::string, std::string> p = absl::StrSplit("a", ',');
EXPECT_EQ("a", p.first);
EXPECT_EQ("", p.second);
}
{
// Only second
std::pair<std::string, std::string> p = absl::StrSplit(",b", ',');
EXPECT_EQ("", p.first);
EXPECT_EQ("b", p.second);
}
{
// First and second.
std::pair<std::string, std::string> p = absl::StrSplit("a,b", ',');
EXPECT_EQ("a", p.first);
EXPECT_EQ("b", p.second);
}
{
// First and second and then more stuff that will be ignored.
std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
EXPECT_EQ("a", p.first);
EXPECT_EQ("b", p.second);
// "c" is omitted.
}
}
TEST(Splitter, Predicates) {
static const char kTestChars[] = ",a, ,b,";
using absl::AllowEmpty;
using absl::SkipEmpty;
using absl::SkipWhitespace;
{
// No predicate. Does not skip empties.
auto splitter = absl::StrSplit(kTestChars, ',');
std::vector<std::string> v = splitter;
EXPECT_THAT(v, ElementsAre("", "a", " ", "b", ""));
}
{
// Allows empty strings. Same behavior as no predicate at all.
auto splitter = absl::StrSplit(kTestChars, ',', AllowEmpty());
std::vector<std::string> v_allowempty = splitter;
EXPECT_THAT(v_allowempty, ElementsAre("", "a", " ", "b", ""));
// Ensures AllowEmpty equals the behavior with no predicate.
auto splitter_nopredicate = absl::StrSplit(kTestChars, ',');
std::vector<std::string> v_nopredicate = splitter_nopredicate;
EXPECT_EQ(v_allowempty, v_nopredicate);
}
{
// Skips empty strings.
auto splitter = absl::StrSplit(kTestChars, ',', SkipEmpty());
std::vector<std::string> v = splitter;
EXPECT_THAT(v, ElementsAre("a", " ", "b"));
}
{
// Skips empty and all-whitespace strings.
auto splitter = absl::StrSplit(kTestChars, ',', SkipWhitespace());
std::vector<std::string> v = splitter;
EXPECT_THAT(v, ElementsAre("a", "b"));
}
}
//
// Tests for StrSplit()
//
TEST(Split, Basics) {
{
// Doesn't really do anything useful because the return value is ignored,
// but it should work.
absl::StrSplit("a,b,c", ',');
}
{
std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
}
{
// Ensures that assignment works. This requires a little extra work with
// C++11 because of overloads with initializer_list.
std::vector<std::string> v;
v = absl::StrSplit("a,b,c", ',');
EXPECT_THAT(v, ElementsAre("a", "b", "c"));
std::map<std::string, std::string> m;
m = absl::StrSplit("a,b,c", ',');
EXPECT_EQ(2, m.size());
std::unordered_map<std::string, std::string> hm;
hm = absl::StrSplit("a,b,c", ',');
EXPECT_EQ(2, hm.size());
}
}
absl::string_view ReturnStringView() { return "Hello World"; }
const char* ReturnConstCharP() { return "Hello World"; }
char* ReturnCharP() { return const_cast<char*>("Hello World"); }
TEST(Split, AcceptsCertainTemporaries) {
std::vector<std::string> v;
v = absl::StrSplit(ReturnStringView(), ' ');
EXPECT_THAT(v, ElementsAre("Hello", "World"));
v = absl::StrSplit(ReturnConstCharP(), ' ');
EXPECT_THAT(v, ElementsAre("Hello", "World"));
v = absl::StrSplit(ReturnCharP(), ' ');
EXPECT_THAT(v, ElementsAre("Hello", "World"));
}
TEST(Split, Temporary) {
// Use a std::string longer than the SSO length, so that when the temporary is
// destroyed, if the splitter keeps a reference to the string's contents,
// it'll reference freed memory instead of just dead on-stack memory.
const char input[] = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u";
EXPECT_LT(sizeof(std::string), ABSL_ARRAYSIZE(input))
<< "Input should be larger than fits on the stack.";
// This happens more often in C++11 as part of a range-based for loop.
auto splitter = absl::StrSplit(std::string(input), ',');
std::string expected = "a";
for (absl::string_view letter : splitter) {
EXPECT_EQ(expected, letter);
++expected[0];
}
EXPECT_EQ("v", expected);
// This happens more often in C++11 as part of a range-based for loop.
auto std_splitter = absl::StrSplit(std::string(input), ',');
expected = "a";
for (absl::string_view letter : std_splitter) {
EXPECT_EQ(expected, letter);
++expected[0];
}
EXPECT_EQ("v", expected);
}
template <typename T>
static std::unique_ptr<T> CopyToHeap(const T& value) {
return std::unique_ptr<T>(new T(value));
}
TEST(Split, LvalueCaptureIsCopyable) {
std::string input = "a,b";
auto heap_splitter = CopyToHeap(absl::StrSplit(input, ','));
auto stack_splitter = *heap_splitter;
heap_splitter.reset();
std::vector<std::string> result = stack_splitter;
EXPECT_THAT(result, testing::ElementsAre("a", "b"));
}
TEST(Split, TemporaryCaptureIsCopyable) {
auto heap_splitter = CopyToHeap(absl::StrSplit(std::string("a,b"), ','));
auto stack_splitter = *heap_splitter;
heap_splitter.reset();
std::vector<std::string> result = stack_splitter;
EXPECT_THAT(result, testing::ElementsAre("a", "b"));
}
TEST(Split, SplitterIsCopyableAndMoveable) {
auto a = absl::StrSplit("foo", '-');
// Ensures that the following expressions compile.
auto b = a; // Copy construct
auto c = std::move(a); // Move construct
b = c; // Copy assign
c = std::move(b); // Move assign
EXPECT_THAT(c, ElementsAre("foo"));
}
TEST(Split, StringDelimiter) {
{
std::vector<absl::string_view> v = absl::StrSplit("a,b", ',');
EXPECT_THAT(v, ElementsAre("a", "b"));
}
{
std::vector<absl::string_view> v = absl::StrSplit("a,b", std::string(","));
EXPECT_THAT(v, ElementsAre("a", "b"));
}
{
std::vector<absl::string_view> v =
absl::StrSplit("a,b", absl::string_view(","));
EXPECT_THAT(v, ElementsAre("a", "b"));
}
}
#if !defined(__cpp_char8_t)
#if defined(__clang__)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wc++2a-compat"
#endif
TEST(Split, UTF8) {
// Tests splitting utf8 strings and utf8 delimiters.
std::string utf8_string = u8"\u03BA\u1F79\u03C3\u03BC\u03B5";
{
// A utf8 input string with an ascii delimiter.
std::string to_split = "a," + utf8_string;
std::vector<absl::string_view> v = absl::StrSplit(to_split, ',');
EXPECT_THAT(v, ElementsAre("a", utf8_string));
}
{
// A utf8 input string and a utf8 delimiter.
std::string to_split = "a," + utf8_string + ",b";
std::string unicode_delimiter = "," + utf8_string + ",";
std::vector<absl::string_view> v =
absl::StrSplit(to_split, unicode_delimiter);
EXPECT_THAT(v, ElementsAre("a", "b"));
}
{
// A utf8 input string and ByAnyChar with ascii chars.
std::vector<absl::string_view> v =
absl::StrSplit(u8"Foo h\u00E4llo th\u4E1Ere", absl::ByAnyChar(" \t"));
EXPECT_THAT(v, ElementsAre("Foo", u8"h\u00E4llo", u8"th\u4E1Ere"));
}
}
#if defined(__clang__)
#pragma clang diagnostic pop
#endif
#endif // !defined(__cpp_char8_t)
TEST(Split, EmptyStringDelimiter) {
{
std::vector<std::string> v = absl::StrSplit("", "");
EXPECT_THAT(v, ElementsAre(""));
}
{
std::vector<std::string> v = absl::StrSplit("a", "");
EXPECT_THAT(v, ElementsAre("a"));
}
{
std::vector<std::string> v = absl::StrSplit("ab", "");
EXPECT_THAT(v, ElementsAre("a", "b"));
}
{
std::vector<std::string> v = absl::StrSplit("a b", "");
EXPECT_THAT(v, ElementsAre("a", " ", "b"));
}
}
TEST(Split, SubstrDelimiter) {
std::vector<absl::string_view> results;
absl::string_view delim("//");
results = absl::StrSplit("", delim);
EXPECT_THAT(results, ElementsAre(""));
results = absl::StrSplit("//", delim);
EXPECT_THAT(results, ElementsAre("", ""));
results = absl::StrSplit("ab", delim);
EXPECT_THAT(results, ElementsAre("ab"));
results = absl::StrSplit("ab//", delim);
EXPECT_THAT(results, ElementsAre("ab", ""));
results = absl::StrSplit("ab/", delim);
EXPECT_THAT(results, ElementsAre("ab/"));
results = absl::StrSplit("a/b", delim);
EXPECT_THAT(results, ElementsAre("a/b"));
results = absl::StrSplit("a//b", delim);
EXPECT_THAT(results, ElementsAre("a", "b"));
results = absl::StrSplit("a///b", delim);
EXPECT_THAT(results, ElementsAre("a", "/b"));
results = absl::StrSplit("a////b", delim);
EXPECT_THAT(results, ElementsAre("a", "", "b"));
}
TEST(Split, EmptyResults) {
std::vector<absl::string_view> results;
results = absl::StrSplit("", '#');
EXPECT_THAT(results, ElementsAre(""));
results = absl::StrSplit("#", '#');
EXPECT_THAT(results, ElementsAre("", ""));
results = absl::StrSplit("#cd", '#');
EXPECT_THAT(results, ElementsAre("", "cd"));
results = absl::StrSplit("ab#cd#", '#');
EXPECT_THAT(results, ElementsAre("ab", "cd", ""));
results = absl::StrSplit("ab##cd", '#');
EXPECT_THAT(results, ElementsAre("ab", "", "cd"));
results = absl::StrSplit("ab##", '#');
EXPECT_THAT(results, ElementsAre("ab", "", ""));
results = absl::StrSplit("ab#ab#", '#');
EXPECT_THAT(results, ElementsAre("ab", "ab", ""));
results = absl::StrSplit("aaaa", 'a');
EXPECT_THAT(results, ElementsAre("", "", "", "", ""));
results = absl::StrSplit("", '#', absl::SkipEmpty());
EXPECT_THAT(results, ElementsAre());
}
template <typename Delimiter>
static bool IsFoundAtStartingPos(absl::string_view text, Delimiter d,
size_t starting_pos, int expected_pos) {
absl::string_view found = d.Find(text, starting_pos);
return found.data() != text.data() + text.size() &&
expected_pos == found.data() - text.data();
}
// Helper function for testing Delimiter objects. Returns true if the given
// Delimiter is found in the given string at the given position. This function
// tests two cases:
// 1. The actual text given, staring at position 0
// 2. The text given with leading padding that should be ignored
template <typename Delimiter>
static bool IsFoundAt(absl::string_view text, Delimiter d, int expected_pos) {
const std::string leading_text = ",x,y,z,";
return IsFoundAtStartingPos(text, d, 0, expected_pos) &&
IsFoundAtStartingPos(leading_text + std::string(text), d,
leading_text.length(),
expected_pos + leading_text.length());
}
//
// Tests for ByString
//
// Tests using any delimiter that represents a single comma.
template <typename Delimiter>
void TestComma(Delimiter d) {
EXPECT_TRUE(IsFoundAt(",", d, 0));
EXPECT_TRUE(IsFoundAt("a,", d, 1));
EXPECT_TRUE(IsFoundAt(",b", d, 0));
EXPECT_TRUE(IsFoundAt("a,b", d, 1));
EXPECT_TRUE(IsFoundAt("a,b,", d, 1));
EXPECT_TRUE(IsFoundAt("a,b,c", d, 1));
EXPECT_FALSE(IsFoundAt("", d, -1));
EXPECT_FALSE(IsFoundAt(" ", d, -1));
EXPECT_FALSE(IsFoundAt("a", d, -1));
EXPECT_FALSE(IsFoundAt("a b c", d, -1));
EXPECT_FALSE(IsFoundAt("a;b;c", d, -1));
EXPECT_FALSE(IsFoundAt(";", d, -1));
}
TEST(Delimiter, ByString) {
using absl::ByString;
TestComma(ByString(","));
// Works as named variable.
ByString comma_string(",");
TestComma(comma_string);
// The first occurrence of empty string ("") in a string is at position 0.
// There is a test below that demonstrates this for absl::string_view::find().
// If the ByString delimiter returned position 0 for this, there would
// be an infinite loop in the SplitIterator code. To avoid this, empty string
// is a special case in that it always returns the item at position 1.
absl::string_view abc("abc");
EXPECT_EQ(0, abc.find("")); // "" is found at position 0
ByString empty("");
EXPECT_FALSE(IsFoundAt("", empty, 0));
EXPECT_FALSE(IsFoundAt("a", empty, 0));
EXPECT_TRUE(IsFoundAt("ab", empty, 1));
EXPECT_TRUE(IsFoundAt("abc", empty, 1));
}
TEST(Split, ByChar) {
using absl::ByChar;
TestComma(ByChar(','));
// Works as named variable.
ByChar comma_char(',');
TestComma(comma_char);
}
//
// Tests for ByAnyChar
//
TEST(Delimiter, ByAnyChar) {
using absl::ByAnyChar;
ByAnyChar one_delim(",");
// Found
EXPECT_TRUE(IsFoundAt(",", one_delim, 0));
EXPECT_TRUE(IsFoundAt("a,", one_delim, 1));
EXPECT_TRUE(IsFoundAt("a,b", one_delim, 1));
EXPECT_TRUE(IsFoundAt(",b", one_delim, 0));
// Not found
EXPECT_FALSE(IsFoundAt("", one_delim, -1));
EXPECT_FALSE(IsFoundAt(" ", one_delim, -1));
EXPECT_FALSE(IsFoundAt("a", one_delim, -1));
EXPECT_FALSE(IsFoundAt("a;b;c", one_delim, -1));
EXPECT_FALSE(IsFoundAt(";", one_delim, -1));
ByAnyChar two_delims(",;");
// Found
EXPECT_TRUE(IsFoundAt(",", two_delims, 0));
EXPECT_TRUE(IsFoundAt(";", two_delims, 0));
EXPECT_TRUE(IsFoundAt(",;", two_delims, 0));
EXPECT_TRUE(IsFoundAt(";,", two_delims, 0));
EXPECT_TRUE(IsFoundAt(",;b", two_delims, 0));
EXPECT_TRUE(IsFoundAt(";,b", two_delims, 0));
EXPECT_TRUE(IsFoundAt("a;,", two_delims, 1));
EXPECT_TRUE(IsFoundAt("a,;", two_delims, 1));
EXPECT_TRUE(IsFoundAt("a;,b", two_delims, 1));
EXPECT_TRUE(IsFoundAt("a,;b", two_delims, 1));
// Not found
EXPECT_FALSE(IsFoundAt("", two_delims, -1));
EXPECT_FALSE(IsFoundAt(" ", two_delims, -1));
EXPECT_FALSE(IsFoundAt("a", two_delims, -1));
EXPECT_FALSE(IsFoundAt("a=b=c", two_delims, -1));
EXPECT_FALSE(IsFoundAt("=", two_delims, -1));
// ByAnyChar behaves just like ByString when given a delimiter of empty
// string. That is, it always returns a zero-length absl::string_view
// referring to the item at position 1, not position 0.
ByAnyChar empty("");
EXPECT_FALSE(IsFoundAt("", empty, 0));
EXPECT_FALSE(IsFoundAt("a", empty, 0));
EXPECT_TRUE(IsFoundAt("ab", empty, 1));
EXPECT_TRUE(IsFoundAt("abc", empty, 1));
}
//
// Tests for ByLength
//
TEST(Delimiter, ByLength) {
using absl::ByLength;
ByLength four_char_delim(4);
// Found
EXPECT_TRUE(IsFoundAt("abcde", four_char_delim, 4));
EXPECT_TRUE(IsFoundAt("abcdefghijklmnopqrstuvwxyz", four_char_delim, 4));
EXPECT_TRUE(IsFoundAt("a b,c\nd", four_char_delim, 4));
// Not found
EXPECT_FALSE(IsFoundAt("", four_char_delim, 0));
EXPECT_FALSE(IsFoundAt("a", four_char_delim, 0));
EXPECT_FALSE(IsFoundAt("ab", four_char_delim, 0));
EXPECT_FALSE(IsFoundAt("abc", four_char_delim, 0));
EXPECT_FALSE(IsFoundAt("abcd", four_char_delim, 0));
}
TEST(Split, WorksWithLargeStrings) {
if (sizeof(size_t) > 4) {
std::string s((uint32_t{1} << 31) + 1, 'x'); // 2G + 1 byte
s.back() = '-';
std::vector<absl::string_view> v = absl::StrSplit(s, '-');
EXPECT_EQ(2, v.size());
// The first element will contain 2G of 'x's.
// testing::StartsWith is too slow with a 2G string.
EXPECT_EQ('x', v[0][0]);
EXPECT_EQ('x', v[0][1]);
EXPECT_EQ('x', v[0][3]);
EXPECT_EQ("", v[1]);
}
}
TEST(SplitInternalTest, TypeTraits) {
EXPECT_FALSE(absl::strings_internal::HasMappedType<int>::value);
EXPECT_TRUE(
(absl::strings_internal::HasMappedType<std::map<int, int>>::value));
EXPECT_FALSE(absl::strings_internal::HasValueType<int>::value);
EXPECT_TRUE(
(absl::strings_internal::HasValueType<std::map<int, int>>::value));
EXPECT_FALSE(absl::strings_internal::HasConstIterator<int>::value);
EXPECT_TRUE(
(absl::strings_internal::HasConstIterator<std::map<int, int>>::value));
EXPECT_FALSE(absl::strings_internal::IsInitializerList<int>::value);
EXPECT_TRUE((absl::strings_internal::IsInitializerList<
std::initializer_list<int>>::value));
}
} // namespace

View file

@ -0,0 +1,235 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/string_view.h"
#ifndef ABSL_USES_STD_STRING_VIEW
#include <algorithm>
#include <climits>
#include <cstring>
#include <ostream>
#include "absl/strings/internal/memutil.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace {
void WritePadding(std::ostream& o, size_t pad) {
char fill_buf[32];
memset(fill_buf, o.fill(), sizeof(fill_buf));
while (pad) {
size_t n = std::min(pad, sizeof(fill_buf));
o.write(fill_buf, n);
pad -= n;
}
}
class LookupTable {
public:
// For each character in wanted, sets the index corresponding
// to the ASCII code of that character. This is used by
// the find_.*_of methods below to tell whether or not a character is in
// the lookup table in constant time.
explicit LookupTable(string_view wanted) {
for (char c : wanted) {
table_[Index(c)] = true;
}
}
bool operator[](char c) const { return table_[Index(c)]; }
private:
static unsigned char Index(char c) { return static_cast<unsigned char>(c); }
bool table_[UCHAR_MAX + 1] = {};
};
} // namespace
std::ostream& operator<<(std::ostream& o, string_view piece) {
std::ostream::sentry sentry(o);
if (sentry) {
size_t lpad = 0;
size_t rpad = 0;
if (static_cast<size_t>(o.width()) > piece.size()) {
size_t pad = o.width() - piece.size();
if ((o.flags() & o.adjustfield) == o.left) {
rpad = pad;
} else {
lpad = pad;
}
}
if (lpad) WritePadding(o, lpad);
o.write(piece.data(), piece.size());
if (rpad) WritePadding(o, rpad);
o.width(0);
}
return o;
}
string_view::size_type string_view::find(string_view s, size_type pos) const
noexcept {
if (empty() || pos > length_) {
if (empty() && pos == 0 && s.empty()) return 0;
return npos;
}
const char* result =
strings_internal::memmatch(ptr_ + pos, length_ - pos, s.ptr_, s.length_);
return result ? result - ptr_ : npos;
}
string_view::size_type string_view::find(char c, size_type pos) const noexcept {
if (empty() || pos >= length_) {
return npos;
}
const char* result =
static_cast<const char*>(memchr(ptr_ + pos, c, length_ - pos));
return result != nullptr ? result - ptr_ : npos;
}
string_view::size_type string_view::rfind(string_view s, size_type pos) const
noexcept {
if (length_ < s.length_) return npos;
if (s.empty()) return std::min(length_, pos);
const char* last = ptr_ + std::min(length_ - s.length_, pos) + s.length_;
const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_);
return result != last ? result - ptr_ : npos;
}
// Search range is [0..pos] inclusive. If pos == npos, search everything.
string_view::size_type string_view::rfind(char c, size_type pos) const
noexcept {
// Note: memrchr() is not available on Windows.
if (empty()) return npos;
for (size_type i = std::min(pos, length_ - 1);; --i) {
if (ptr_[i] == c) {
return i;
}
if (i == 0) break;
}
return npos;
}
string_view::size_type string_view::find_first_of(string_view s,
size_type pos) const
noexcept {
if (empty() || s.empty()) {
return npos;
}
// Avoid the cost of LookupTable() for a single-character search.
if (s.length_ == 1) return find_first_of(s.ptr_[0], pos);
LookupTable tbl(s);
for (size_type i = pos; i < length_; ++i) {
if (tbl[ptr_[i]]) {
return i;
}
}
return npos;
}
string_view::size_type string_view::find_first_not_of(string_view s,
size_type pos) const
noexcept {
if (empty()) return npos;
// Avoid the cost of LookupTable() for a single-character search.
if (s.length_ == 1) return find_first_not_of(s.ptr_[0], pos);
LookupTable tbl(s);
for (size_type i = pos; i < length_; ++i) {
if (!tbl[ptr_[i]]) {
return i;
}
}
return npos;
}
string_view::size_type string_view::find_first_not_of(char c,
size_type pos) const
noexcept {
if (empty()) return npos;
for (; pos < length_; ++pos) {
if (ptr_[pos] != c) {
return pos;
}
}
return npos;
}
string_view::size_type string_view::find_last_of(string_view s,
size_type pos) const noexcept {
if (empty() || s.empty()) return npos;
// Avoid the cost of LookupTable() for a single-character search.
if (s.length_ == 1) return find_last_of(s.ptr_[0], pos);
LookupTable tbl(s);
for (size_type i = std::min(pos, length_ - 1);; --i) {
if (tbl[ptr_[i]]) {
return i;
}
if (i == 0) break;
}
return npos;
}
string_view::size_type string_view::find_last_not_of(string_view s,
size_type pos) const
noexcept {
if (empty()) return npos;
size_type i = std::min(pos, length_ - 1);
if (s.empty()) return i;
// Avoid the cost of LookupTable() for a single-character search.
if (s.length_ == 1) return find_last_not_of(s.ptr_[0], pos);
LookupTable tbl(s);
for (;; --i) {
if (!tbl[ptr_[i]]) {
return i;
}
if (i == 0) break;
}
return npos;
}
string_view::size_type string_view::find_last_not_of(char c,
size_type pos) const
noexcept {
if (empty()) return npos;
size_type i = std::min(pos, length_ - 1);
for (;; --i) {
if (ptr_[i] != c) {
return i;
}
if (i == 0) break;
}
return npos;
}
// MSVC has non-standard behavior that implicitly creates definitions for static
// const members. These implicit definitions conflict with explicit out-of-class
// member definitions that are required by the C++ standard, resulting in
// LNK1169 "multiply defined" errors at link time. __declspec(selectany) asks
// MSVC to choose only one definition for the symbol it decorates. See details
// at https://msdn.microsoft.com/en-us/library/34h23df8(v=vs.100).aspx
#ifdef _MSC_VER
#define ABSL_STRING_VIEW_SELECTANY __declspec(selectany)
#else
#define ABSL_STRING_VIEW_SELECTANY
#endif
ABSL_STRING_VIEW_SELECTANY
constexpr string_view::size_type string_view::npos;
ABSL_STRING_VIEW_SELECTANY
constexpr string_view::size_type string_view::kMaxSize;
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_USES_STD_STRING_VIEW

View file

@ -0,0 +1,623 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: string_view.h
// -----------------------------------------------------------------------------
//
// This file contains the definition of the `absl::string_view` class. A
// `string_view` points to a contiguous span of characters, often part or all of
// another `std::string`, double-quoted string literal, character array, or even
// another `string_view`.
//
// This `absl::string_view` abstraction is designed to be a drop-in
// replacement for the C++17 `std::string_view` abstraction.
#ifndef ABSL_STRINGS_STRING_VIEW_H_
#define ABSL_STRINGS_STRING_VIEW_H_
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstring>
#include <iosfwd>
#include <iterator>
#include <limits>
#include <string>
#include "absl/base/config.h"
#include "absl/base/internal/throw_delegate.h"
#include "absl/base/macros.h"
#include "absl/base/optimization.h"
#include "absl/base/port.h"
#ifdef ABSL_USES_STD_STRING_VIEW
#include <string_view> // IWYU pragma: export
namespace absl {
ABSL_NAMESPACE_BEGIN
using string_view = std::string_view;
ABSL_NAMESPACE_END
} // namespace absl
#else // ABSL_USES_STD_STRING_VIEW
#if ABSL_HAVE_BUILTIN(__builtin_memcmp) || \
(defined(__GNUC__) && !defined(__clang__))
#define ABSL_INTERNAL_STRING_VIEW_MEMCMP __builtin_memcmp
#else // ABSL_HAVE_BUILTIN(__builtin_memcmp)
#define ABSL_INTERNAL_STRING_VIEW_MEMCMP memcmp
#endif // ABSL_HAVE_BUILTIN(__builtin_memcmp)
namespace absl {
ABSL_NAMESPACE_BEGIN
// absl::string_view
//
// A `string_view` provides a lightweight view into the string data provided by
// a `std::string`, double-quoted string literal, character array, or even
// another `string_view`. A `string_view` does *not* own the string to which it
// points, and that data cannot be modified through the view.
//
// You can use `string_view` as a function or method parameter anywhere a
// parameter can receive a double-quoted string literal, `const char*`,
// `std::string`, or another `absl::string_view` argument with no need to copy
// the string data. Systematic use of `string_view` within function arguments
// reduces data copies and `strlen()` calls.
//
// Because of its small size, prefer passing `string_view` by value:
//
// void MyFunction(absl::string_view arg);
//
// If circumstances require, you may also pass one by const reference:
//
// void MyFunction(const absl::string_view& arg); // not preferred
//
// Passing by value generates slightly smaller code for many architectures.
//
// In either case, the source data of the `string_view` must outlive the
// `string_view` itself.
//
// A `string_view` is also suitable for local variables if you know that the
// lifetime of the underlying object is longer than the lifetime of your
// `string_view` variable. However, beware of binding a `string_view` to a
// temporary value:
//
// // BAD use of string_view: lifetime problem
// absl::string_view sv = obj.ReturnAString();
//
// // GOOD use of string_view: str outlives sv
// std::string str = obj.ReturnAString();
// absl::string_view sv = str;
//
// Due to lifetime issues, a `string_view` is sometimes a poor choice for a
// return value and usually a poor choice for a data member. If you do use a
// `string_view` this way, it is your responsibility to ensure that the object
// pointed to by the `string_view` outlives the `string_view`.
//
// A `string_view` may represent a whole string or just part of a string. For
// example, when splitting a string, `std::vector<absl::string_view>` is a
// natural data type for the output.
//
// When constructed from a source which is NUL-terminated, the `string_view`
// itself will not include the NUL-terminator unless a specific size (including
// the NUL) is passed to the constructor. As a result, common idioms that work
// on NUL-terminated strings do not work on `string_view` objects. If you write
// code that scans a `string_view`, you must check its length rather than test
// for nul, for example. Note, however, that nuls may still be embedded within
// a `string_view` explicitly.
//
// You may create a null `string_view` in two ways:
//
// absl::string_view sv;
// absl::string_view sv(nullptr, 0);
//
// For the above, `sv.data() == nullptr`, `sv.length() == 0`, and
// `sv.empty() == true`. Also, if you create a `string_view` with a non-null
// pointer then `sv.data() != nullptr`. Thus, you can use `string_view()` to
// signal an undefined value that is different from other `string_view` values
// in a similar fashion to how `const char* p1 = nullptr;` is different from
// `const char* p2 = "";`. However, in practice, it is not recommended to rely
// on this behavior.
//
// Be careful not to confuse a null `string_view` with an empty one. A null
// `string_view` is an empty `string_view`, but some empty `string_view`s are
// not null. Prefer checking for emptiness over checking for null.
//
// There are many ways to create an empty string_view:
//
// const char* nullcp = nullptr;
// // string_view.size() will return 0 in all cases.
// absl::string_view();
// absl::string_view(nullcp, 0);
// absl::string_view("");
// absl::string_view("", 0);
// absl::string_view("abcdef", 0);
// absl::string_view("abcdef" + 6, 0);
//
// All empty `string_view` objects whether null or not, are equal:
//
// absl::string_view() == absl::string_view("", 0)
// absl::string_view(nullptr, 0) == absl::string_view("abcdef"+6, 0)
class string_view {
public:
using traits_type = std::char_traits<char>;
using value_type = char;
using pointer = char*;
using const_pointer = const char*;
using reference = char&;
using const_reference = const char&;
using const_iterator = const char*;
using iterator = const_iterator;
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
using reverse_iterator = const_reverse_iterator;
using size_type = size_t;
using difference_type = std::ptrdiff_t;
static constexpr size_type npos = static_cast<size_type>(-1);
// Null `string_view` constructor
constexpr string_view() noexcept : ptr_(nullptr), length_(0) {}
// Implicit constructors
template <typename Allocator>
string_view( // NOLINT(runtime/explicit)
const std::basic_string<char, std::char_traits<char>, Allocator>&
str) noexcept
// This is implemented in terms of `string_view(p, n)` so `str.size()`
// doesn't need to be reevaluated after `ptr_` is set.
: string_view(str.data(), str.size()) {}
// Implicit constructor of a `string_view` from NUL-terminated `str`. When
// accepting possibly null strings, use `absl::NullSafeStringView(str)`
// instead (see below).
constexpr string_view(const char* str) // NOLINT(runtime/explicit)
: ptr_(str),
length_(str ? CheckLengthInternal(StrlenInternal(str)) : 0) {}
// Implicit constructor of a `string_view` from a `const char*` and length.
constexpr string_view(const char* data, size_type len)
: ptr_(data), length_(CheckLengthInternal(len)) {}
// NOTE: Harmlessly omitted to work around gdb bug.
// constexpr string_view(const string_view&) noexcept = default;
// string_view& operator=(const string_view&) noexcept = default;
// Iterators
// string_view::begin()
//
// Returns an iterator pointing to the first character at the beginning of the
// `string_view`, or `end()` if the `string_view` is empty.
constexpr const_iterator begin() const noexcept { return ptr_; }
// string_view::end()
//
// Returns an iterator pointing just beyond the last character at the end of
// the `string_view`. This iterator acts as a placeholder; attempting to
// access it results in undefined behavior.
constexpr const_iterator end() const noexcept { return ptr_ + length_; }
// string_view::cbegin()
//
// Returns a const iterator pointing to the first character at the beginning
// of the `string_view`, or `end()` if the `string_view` is empty.
constexpr const_iterator cbegin() const noexcept { return begin(); }
// string_view::cend()
//
// Returns a const iterator pointing just beyond the last character at the end
// of the `string_view`. This pointer acts as a placeholder; attempting to
// access its element results in undefined behavior.
constexpr const_iterator cend() const noexcept { return end(); }
// string_view::rbegin()
//
// Returns a reverse iterator pointing to the last character at the end of the
// `string_view`, or `rend()` if the `string_view` is empty.
const_reverse_iterator rbegin() const noexcept {
return const_reverse_iterator(end());
}
// string_view::rend()
//
// Returns a reverse iterator pointing just before the first character at the
// beginning of the `string_view`. This pointer acts as a placeholder;
// attempting to access its element results in undefined behavior.
const_reverse_iterator rend() const noexcept {
return const_reverse_iterator(begin());
}
// string_view::crbegin()
//
// Returns a const reverse iterator pointing to the last character at the end
// of the `string_view`, or `crend()` if the `string_view` is empty.
const_reverse_iterator crbegin() const noexcept { return rbegin(); }
// string_view::crend()
//
// Returns a const reverse iterator pointing just before the first character
// at the beginning of the `string_view`. This pointer acts as a placeholder;
// attempting to access its element results in undefined behavior.
const_reverse_iterator crend() const noexcept { return rend(); }
// Capacity Utilities
// string_view::size()
//
// Returns the number of characters in the `string_view`.
constexpr size_type size() const noexcept {
return length_;
}
// string_view::length()
//
// Returns the number of characters in the `string_view`. Alias for `size()`.
constexpr size_type length() const noexcept { return size(); }
// string_view::max_size()
//
// Returns the maximum number of characters the `string_view` can hold.
constexpr size_type max_size() const noexcept { return kMaxSize; }
// string_view::empty()
//
// Checks if the `string_view` is empty (refers to no characters).
constexpr bool empty() const noexcept { return length_ == 0; }
// string_view::operator[]
//
// Returns the ith element of the `string_view` using the array operator.
// Note that this operator does not perform any bounds checking.
constexpr const_reference operator[](size_type i) const {
return ABSL_HARDENING_ASSERT(i < size()), ptr_[i];
}
// string_view::at()
//
// Returns the ith element of the `string_view`. Bounds checking is performed,
// and an exception of type `std::out_of_range` will be thrown on invalid
// access.
constexpr const_reference at(size_type i) const {
return ABSL_PREDICT_TRUE(i < size())
? ptr_[i]
: ((void)base_internal::ThrowStdOutOfRange(
"absl::string_view::at"),
ptr_[i]);
}
// string_view::front()
//
// Returns the first element of a `string_view`.
constexpr const_reference front() const {
return ABSL_HARDENING_ASSERT(!empty()), ptr_[0];
}
// string_view::back()
//
// Returns the last element of a `string_view`.
constexpr const_reference back() const {
return ABSL_HARDENING_ASSERT(!empty()), ptr_[size() - 1];
}
// string_view::data()
//
// Returns a pointer to the underlying character array (which is of course
// stored elsewhere). Note that `string_view::data()` may contain embedded nul
// characters, but the returned buffer may or may not be NUL-terminated;
// therefore, do not pass `data()` to a routine that expects a NUL-terminated
// string.
constexpr const_pointer data() const noexcept { return ptr_; }
// Modifiers
// string_view::remove_prefix()
//
// Removes the first `n` characters from the `string_view`. Note that the
// underlying string is not changed, only the view.
void remove_prefix(size_type n) {
ABSL_HARDENING_ASSERT(n <= length_);
ptr_ += n;
length_ -= n;
}
// string_view::remove_suffix()
//
// Removes the last `n` characters from the `string_view`. Note that the
// underlying string is not changed, only the view.
void remove_suffix(size_type n) {
ABSL_HARDENING_ASSERT(n <= length_);
length_ -= n;
}
// string_view::swap()
//
// Swaps this `string_view` with another `string_view`.
void swap(string_view& s) noexcept {
auto t = *this;
*this = s;
s = t;
}
// Explicit conversion operators
// Converts to `std::basic_string`.
template <typename A>
explicit operator std::basic_string<char, traits_type, A>() const {
if (!data()) return {};
return std::basic_string<char, traits_type, A>(data(), size());
}
// string_view::copy()
//
// Copies the contents of the `string_view` at offset `pos` and length `n`
// into `buf`.
size_type copy(char* buf, size_type n, size_type pos = 0) const {
if (ABSL_PREDICT_FALSE(pos > length_)) {
base_internal::ThrowStdOutOfRange("absl::string_view::copy");
}
size_type rlen = (std::min)(length_ - pos, n);
if (rlen > 0) {
const char* start = ptr_ + pos;
traits_type::copy(buf, start, rlen);
}
return rlen;
}
// string_view::substr()
//
// Returns a "substring" of the `string_view` (at offset `pos` and length
// `n`) as another string_view. This function throws `std::out_of_bounds` if
// `pos > size`.
constexpr string_view substr(size_type pos, size_type n = npos) const {
return ABSL_PREDICT_FALSE(pos > length_)
? (base_internal::ThrowStdOutOfRange(
"absl::string_view::substr"),
string_view())
: string_view(ptr_ + pos, Min(n, length_ - pos));
}
// string_view::compare()
//
// Performs a lexicographical comparison between the `string_view` and
// another `absl::string_view`, returning -1 if `this` is less than, 0 if
// `this` is equal to, and 1 if `this` is greater than the passed string
// view. Note that in the case of data equality, a further comparison is made
// on the respective sizes of the two `string_view`s to determine which is
// smaller, equal, or greater.
constexpr int compare(string_view x) const noexcept {
return CompareImpl(length_, x.length_,
Min(length_, x.length_) == 0
? 0
: ABSL_INTERNAL_STRING_VIEW_MEMCMP(
ptr_, x.ptr_, Min(length_, x.length_)));
}
// Overload of `string_view::compare()` for comparing a substring of the
// 'string_view` and another `absl::string_view`.
int compare(size_type pos1, size_type count1, string_view v) const {
return substr(pos1, count1).compare(v);
}
// Overload of `string_view::compare()` for comparing a substring of the
// `string_view` and a substring of another `absl::string_view`.
int compare(size_type pos1, size_type count1, string_view v, size_type pos2,
size_type count2) const {
return substr(pos1, count1).compare(v.substr(pos2, count2));
}
// Overload of `string_view::compare()` for comparing a `string_view` and a
// a different C-style string `s`.
int compare(const char* s) const { return compare(string_view(s)); }
// Overload of `string_view::compare()` for comparing a substring of the
// `string_view` and a different string C-style string `s`.
int compare(size_type pos1, size_type count1, const char* s) const {
return substr(pos1, count1).compare(string_view(s));
}
// Overload of `string_view::compare()` for comparing a substring of the
// `string_view` and a substring of a different C-style string `s`.
int compare(size_type pos1, size_type count1, const char* s,
size_type count2) const {
return substr(pos1, count1).compare(string_view(s, count2));
}
// Find Utilities
// string_view::find()
//
// Finds the first occurrence of the substring `s` within the `string_view`,
// returning the position of the first character's match, or `npos` if no
// match was found.
size_type find(string_view s, size_type pos = 0) const noexcept;
// Overload of `string_view::find()` for finding the given character `c`
// within the `string_view`.
size_type find(char c, size_type pos = 0) const noexcept;
// string_view::rfind()
//
// Finds the last occurrence of a substring `s` within the `string_view`,
// returning the position of the first character's match, or `npos` if no
// match was found.
size_type rfind(string_view s, size_type pos = npos) const
noexcept;
// Overload of `string_view::rfind()` for finding the last given character `c`
// within the `string_view`.
size_type rfind(char c, size_type pos = npos) const noexcept;
// string_view::find_first_of()
//
// Finds the first occurrence of any of the characters in `s` within the
// `string_view`, returning the start position of the match, or `npos` if no
// match was found.
size_type find_first_of(string_view s, size_type pos = 0) const
noexcept;
// Overload of `string_view::find_first_of()` for finding a character `c`
// within the `string_view`.
size_type find_first_of(char c, size_type pos = 0) const
noexcept {
return find(c, pos);
}
// string_view::find_last_of()
//
// Finds the last occurrence of any of the characters in `s` within the
// `string_view`, returning the start position of the match, or `npos` if no
// match was found.
size_type find_last_of(string_view s, size_type pos = npos) const
noexcept;
// Overload of `string_view::find_last_of()` for finding a character `c`
// within the `string_view`.
size_type find_last_of(char c, size_type pos = npos) const
noexcept {
return rfind(c, pos);
}
// string_view::find_first_not_of()
//
// Finds the first occurrence of any of the characters not in `s` within the
// `string_view`, returning the start position of the first non-match, or
// `npos` if no non-match was found.
size_type find_first_not_of(string_view s, size_type pos = 0) const noexcept;
// Overload of `string_view::find_first_not_of()` for finding a character
// that is not `c` within the `string_view`.
size_type find_first_not_of(char c, size_type pos = 0) const noexcept;
// string_view::find_last_not_of()
//
// Finds the last occurrence of any of the characters not in `s` within the
// `string_view`, returning the start position of the last non-match, or
// `npos` if no non-match was found.
size_type find_last_not_of(string_view s,
size_type pos = npos) const noexcept;
// Overload of `string_view::find_last_not_of()` for finding a character
// that is not `c` within the `string_view`.
size_type find_last_not_of(char c, size_type pos = npos) const
noexcept;
private:
static constexpr size_type kMaxSize =
(std::numeric_limits<difference_type>::max)();
static constexpr size_type CheckLengthInternal(size_type len) {
return ABSL_HARDENING_ASSERT(len <= kMaxSize), len;
}
static constexpr size_type StrlenInternal(const char* str) {
#if defined(_MSC_VER) && _MSC_VER >= 1910 && !defined(__clang__)
// MSVC 2017+ can evaluate this at compile-time.
const char* begin = str;
while (*str != '\0') ++str;
return str - begin;
#elif ABSL_HAVE_BUILTIN(__builtin_strlen) || \
(defined(__GNUC__) && !defined(__clang__))
// GCC has __builtin_strlen according to
// https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Other-Builtins.html, but
// ABSL_HAVE_BUILTIN doesn't detect that, so we use the extra checks above.
// __builtin_strlen is constexpr.
return __builtin_strlen(str);
#else
return str ? strlen(str) : 0;
#endif
}
static constexpr size_t Min(size_type length_a, size_type length_b) {
return length_a < length_b ? length_a : length_b;
}
static constexpr int CompareImpl(size_type length_a, size_type length_b,
int compare_result) {
return compare_result == 0 ? static_cast<int>(length_a > length_b) -
static_cast<int>(length_a < length_b)
: (compare_result < 0 ? -1 : 1);
}
const char* ptr_;
size_type length_;
};
// This large function is defined inline so that in a fairly common case where
// one of the arguments is a literal, the compiler can elide a lot of the
// following comparisons.
constexpr bool operator==(string_view x, string_view y) noexcept {
return x.size() == y.size() &&
(x.empty() ||
ABSL_INTERNAL_STRING_VIEW_MEMCMP(x.data(), y.data(), x.size()) == 0);
}
constexpr bool operator!=(string_view x, string_view y) noexcept {
return !(x == y);
}
constexpr bool operator<(string_view x, string_view y) noexcept {
return x.compare(y) < 0;
}
constexpr bool operator>(string_view x, string_view y) noexcept {
return y < x;
}
constexpr bool operator<=(string_view x, string_view y) noexcept {
return !(y < x);
}
constexpr bool operator>=(string_view x, string_view y) noexcept {
return !(x < y);
}
// IO Insertion Operator
std::ostream& operator<<(std::ostream& o, string_view piece);
ABSL_NAMESPACE_END
} // namespace absl
#undef ABSL_INTERNAL_STRING_VIEW_MEMCMP
#endif // ABSL_USES_STD_STRING_VIEW
namespace absl {
ABSL_NAMESPACE_BEGIN
// ClippedSubstr()
//
// Like `s.substr(pos, n)`, but clips `pos` to an upper bound of `s.size()`.
// Provided because std::string_view::substr throws if `pos > size()`
inline string_view ClippedSubstr(string_view s, size_t pos,
size_t n = string_view::npos) {
pos = (std::min)(pos, static_cast<size_t>(s.size()));
return s.substr(pos, n);
}
// NullSafeStringView()
//
// Creates an `absl::string_view` from a pointer `p` even if it's null-valued.
// This function should be used where an `absl::string_view` can be created from
// a possibly-null pointer.
constexpr string_view NullSafeStringView(const char* p) {
return p ? string_view(p) : string_view();
}
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_STRING_VIEW_H_

View file

@ -0,0 +1,381 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/string_view.h"
#include <algorithm>
#include <cstdint>
#include <map>
#include <random>
#include <string>
#include <unordered_set>
#include <vector>
#include "benchmark/benchmark.h"
#include "absl/base/attributes.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/macros.h"
#include "absl/strings/str_cat.h"
namespace {
void BM_StringViewFromString(benchmark::State& state) {
std::string s(state.range(0), 'x');
std::string* ps = &s;
struct SV {
SV() = default;
explicit SV(const std::string& s) : sv(s) {}
absl::string_view sv;
} sv;
SV* psv = &sv;
benchmark::DoNotOptimize(ps);
benchmark::DoNotOptimize(psv);
for (auto _ : state) {
new (psv) SV(*ps);
benchmark::DoNotOptimize(sv);
}
}
BENCHMARK(BM_StringViewFromString)->Arg(12)->Arg(128);
// Provide a forcibly out-of-line wrapper for operator== that can be used in
// benchmarks to measure the impact of inlining.
ABSL_ATTRIBUTE_NOINLINE
bool NonInlinedEq(absl::string_view a, absl::string_view b) { return a == b; }
// We use functions that cannot be inlined to perform the comparison loops so
// that inlining of the operator== can't optimize away *everything*.
ABSL_ATTRIBUTE_NOINLINE
void DoEqualityComparisons(benchmark::State& state, absl::string_view a,
absl::string_view b) {
for (auto _ : state) {
benchmark::DoNotOptimize(a == b);
}
}
void BM_EqualIdentical(benchmark::State& state) {
std::string x(state.range(0), 'a');
DoEqualityComparisons(state, x, x);
}
BENCHMARK(BM_EqualIdentical)->DenseRange(0, 3)->Range(4, 1 << 10);
void BM_EqualSame(benchmark::State& state) {
std::string x(state.range(0), 'a');
std::string y = x;
DoEqualityComparisons(state, x, y);
}
BENCHMARK(BM_EqualSame)
->DenseRange(0, 10)
->Arg(20)
->Arg(40)
->Arg(70)
->Arg(110)
->Range(160, 4096);
void BM_EqualDifferent(benchmark::State& state) {
const int len = state.range(0);
std::string x(len, 'a');
std::string y = x;
if (len > 0) {
y[len - 1] = 'b';
}
DoEqualityComparisons(state, x, y);
}
BENCHMARK(BM_EqualDifferent)->DenseRange(0, 3)->Range(4, 1 << 10);
// This benchmark is intended to check that important simplifications can be
// made with absl::string_view comparisons against constant strings. The idea is
// that if constant strings cause redundant components of the comparison, the
// compiler should detect and eliminate them. Here we use 8 different strings,
// each with the same size. Provided our comparison makes the implementation
// inline-able by the compiler, it should fold all of these away into a single
// size check once per loop iteration.
ABSL_ATTRIBUTE_NOINLINE
void DoConstantSizeInlinedEqualityComparisons(benchmark::State& state,
absl::string_view a) {
for (auto _ : state) {
benchmark::DoNotOptimize(a == "aaa");
benchmark::DoNotOptimize(a == "bbb");
benchmark::DoNotOptimize(a == "ccc");
benchmark::DoNotOptimize(a == "ddd");
benchmark::DoNotOptimize(a == "eee");
benchmark::DoNotOptimize(a == "fff");
benchmark::DoNotOptimize(a == "ggg");
benchmark::DoNotOptimize(a == "hhh");
}
}
void BM_EqualConstantSizeInlined(benchmark::State& state) {
std::string x(state.range(0), 'a');
DoConstantSizeInlinedEqualityComparisons(state, x);
}
// We only need to check for size of 3, and <> 3 as this benchmark only has to
// do with size differences.
BENCHMARK(BM_EqualConstantSizeInlined)->DenseRange(2, 4);
// This benchmark exists purely to give context to the above timings: this is
// what they would look like if the compiler is completely unable to simplify
// between two comparisons when they are comparing against constant strings.
ABSL_ATTRIBUTE_NOINLINE
void DoConstantSizeNonInlinedEqualityComparisons(benchmark::State& state,
absl::string_view a) {
for (auto _ : state) {
// Force these out-of-line to compare with the above function.
benchmark::DoNotOptimize(NonInlinedEq(a, "aaa"));
benchmark::DoNotOptimize(NonInlinedEq(a, "bbb"));
benchmark::DoNotOptimize(NonInlinedEq(a, "ccc"));
benchmark::DoNotOptimize(NonInlinedEq(a, "ddd"));
benchmark::DoNotOptimize(NonInlinedEq(a, "eee"));
benchmark::DoNotOptimize(NonInlinedEq(a, "fff"));
benchmark::DoNotOptimize(NonInlinedEq(a, "ggg"));
benchmark::DoNotOptimize(NonInlinedEq(a, "hhh"));
}
}
void BM_EqualConstantSizeNonInlined(benchmark::State& state) {
std::string x(state.range(0), 'a');
DoConstantSizeNonInlinedEqualityComparisons(state, x);
}
// We only need to check for size of 3, and <> 3 as this benchmark only has to
// do with size differences.
BENCHMARK(BM_EqualConstantSizeNonInlined)->DenseRange(2, 4);
void BM_CompareSame(benchmark::State& state) {
const int len = state.range(0);
std::string x;
for (int i = 0; i < len; i++) {
x += 'a';
}
std::string y = x;
absl::string_view a = x;
absl::string_view b = y;
for (auto _ : state) {
benchmark::DoNotOptimize(a);
benchmark::DoNotOptimize(b);
benchmark::DoNotOptimize(a.compare(b));
}
}
BENCHMARK(BM_CompareSame)->DenseRange(0, 3)->Range(4, 1 << 10);
void BM_CompareFirstOneLess(benchmark::State& state) {
const int len = state.range(0);
std::string x(len, 'a');
std::string y = x;
y.back() = 'b';
absl::string_view a = x;
absl::string_view b = y;
for (auto _ : state) {
benchmark::DoNotOptimize(a);
benchmark::DoNotOptimize(b);
benchmark::DoNotOptimize(a.compare(b));
}
}
BENCHMARK(BM_CompareFirstOneLess)->DenseRange(1, 3)->Range(4, 1 << 10);
void BM_CompareSecondOneLess(benchmark::State& state) {
const int len = state.range(0);
std::string x(len, 'a');
std::string y = x;
x.back() = 'b';
absl::string_view a = x;
absl::string_view b = y;
for (auto _ : state) {
benchmark::DoNotOptimize(a);
benchmark::DoNotOptimize(b);
benchmark::DoNotOptimize(a.compare(b));
}
}
BENCHMARK(BM_CompareSecondOneLess)->DenseRange(1, 3)->Range(4, 1 << 10);
void BM_find_string_view_len_one(benchmark::State& state) {
std::string haystack(state.range(0), '0');
absl::string_view s(haystack);
for (auto _ : state) {
benchmark::DoNotOptimize(s.find("x")); // not present; length 1
}
}
BENCHMARK(BM_find_string_view_len_one)->Range(1, 1 << 20);
void BM_find_string_view_len_two(benchmark::State& state) {
std::string haystack(state.range(0), '0');
absl::string_view s(haystack);
for (auto _ : state) {
benchmark::DoNotOptimize(s.find("xx")); // not present; length 2
}
}
BENCHMARK(BM_find_string_view_len_two)->Range(1, 1 << 20);
void BM_find_one_char(benchmark::State& state) {
std::string haystack(state.range(0), '0');
absl::string_view s(haystack);
for (auto _ : state) {
benchmark::DoNotOptimize(s.find('x')); // not present
}
}
BENCHMARK(BM_find_one_char)->Range(1, 1 << 20);
void BM_rfind_one_char(benchmark::State& state) {
std::string haystack(state.range(0), '0');
absl::string_view s(haystack);
for (auto _ : state) {
benchmark::DoNotOptimize(s.rfind('x')); // not present
}
}
BENCHMARK(BM_rfind_one_char)->Range(1, 1 << 20);
void BM_worst_case_find_first_of(benchmark::State& state, int haystack_len) {
const int needle_len = state.range(0);
std::string needle;
for (int i = 0; i < needle_len; ++i) {
needle += 'a' + i;
}
std::string haystack(haystack_len, '0'); // 1000 zeros.
absl::string_view s(haystack);
for (auto _ : state) {
benchmark::DoNotOptimize(s.find_first_of(needle));
}
}
void BM_find_first_of_short(benchmark::State& state) {
BM_worst_case_find_first_of(state, 10);
}
void BM_find_first_of_medium(benchmark::State& state) {
BM_worst_case_find_first_of(state, 100);
}
void BM_find_first_of_long(benchmark::State& state) {
BM_worst_case_find_first_of(state, 1000);
}
BENCHMARK(BM_find_first_of_short)->DenseRange(0, 4)->Arg(8)->Arg(16)->Arg(32);
BENCHMARK(BM_find_first_of_medium)->DenseRange(0, 4)->Arg(8)->Arg(16)->Arg(32);
BENCHMARK(BM_find_first_of_long)->DenseRange(0, 4)->Arg(8)->Arg(16)->Arg(32);
struct EasyMap : public std::map<absl::string_view, uint64_t> {
explicit EasyMap(size_t) {}
};
// This templated benchmark helper function is intended to stress operator== or
// operator< in a realistic test. It surely isn't entirely realistic, but it's
// a start. The test creates a map of type Map, a template arg, and populates
// it with table_size key/value pairs. Each key has WordsPerKey words. After
// creating the map, a number of lookups are done in random order. Some keys
// are used much more frequently than others in this phase of the test.
template <typename Map, int WordsPerKey>
void StringViewMapBenchmark(benchmark::State& state) {
const int table_size = state.range(0);
const double kFractionOfKeysThatAreHot = 0.2;
const int kNumLookupsOfHotKeys = 20;
const int kNumLookupsOfColdKeys = 1;
const char* words[] = {"the", "quick", "brown", "fox", "jumped",
"over", "the", "lazy", "dog", "and",
"found", "a", "large", "mushroom", "and",
"a", "couple", "crickets", "eating", "pie"};
// Create some keys that consist of words in random order.
std::random_device r;
std::seed_seq seed({r(), r(), r(), r(), r(), r(), r(), r()});
std::mt19937 rng(seed);
std::vector<std::string> keys(table_size);
std::vector<int> all_indices;
const int kBlockSize = 1 << 12;
std::unordered_set<std::string> t(kBlockSize);
std::uniform_int_distribution<int> uniform(0, ABSL_ARRAYSIZE(words) - 1);
for (int i = 0; i < table_size; i++) {
all_indices.push_back(i);
do {
keys[i].clear();
for (int j = 0; j < WordsPerKey; j++) {
absl::StrAppend(&keys[i], j > 0 ? " " : "", words[uniform(rng)]);
}
} while (!t.insert(keys[i]).second);
}
// Create a list of strings to lookup: a permutation of the array of
// keys we just created, with repeats. "Hot" keys get repeated more.
std::shuffle(all_indices.begin(), all_indices.end(), rng);
const int num_hot = table_size * kFractionOfKeysThatAreHot;
const int num_cold = table_size - num_hot;
std::vector<int> hot_indices(all_indices.begin(),
all_indices.begin() + num_hot);
std::vector<int> indices;
for (int i = 0; i < kNumLookupsOfColdKeys; i++) {
indices.insert(indices.end(), all_indices.begin(), all_indices.end());
}
for (int i = 0; i < kNumLookupsOfHotKeys - kNumLookupsOfColdKeys; i++) {
indices.insert(indices.end(), hot_indices.begin(), hot_indices.end());
}
std::shuffle(indices.begin(), indices.end(), rng);
ABSL_RAW_CHECK(
num_cold * kNumLookupsOfColdKeys + num_hot * kNumLookupsOfHotKeys ==
indices.size(),
"");
// After constructing the array we probe it with absl::string_views built from
// test_strings. This means operator== won't see equal pointers, so
// it'll have to check for equal lengths and equal characters.
std::vector<std::string> test_strings(indices.size());
for (int i = 0; i < indices.size(); i++) {
test_strings[i] = keys[indices[i]];
}
// Run the benchmark. It includes map construction but is mostly
// map lookups.
for (auto _ : state) {
Map h(table_size);
for (int i = 0; i < table_size; i++) {
h[keys[i]] = i * 2;
}
ABSL_RAW_CHECK(h.size() == table_size, "");
uint64_t sum = 0;
for (int i = 0; i < indices.size(); i++) {
sum += h[test_strings[i]];
}
benchmark::DoNotOptimize(sum);
}
}
void BM_StdMap_4(benchmark::State& state) {
StringViewMapBenchmark<EasyMap, 4>(state);
}
BENCHMARK(BM_StdMap_4)->Range(1 << 10, 1 << 16);
void BM_StdMap_8(benchmark::State& state) {
StringViewMapBenchmark<EasyMap, 8>(state);
}
BENCHMARK(BM_StdMap_8)->Range(1 << 10, 1 << 16);
void BM_CopyToStringNative(benchmark::State& state) {
std::string src(state.range(0), 'x');
absl::string_view sv(src);
std::string dst;
for (auto _ : state) {
dst.assign(sv.begin(), sv.end());
}
}
BENCHMARK(BM_CopyToStringNative)->Range(1 << 3, 1 << 12);
void BM_AppendToStringNative(benchmark::State& state) {
std::string src(state.range(0), 'x');
absl::string_view sv(src);
std::string dst;
for (auto _ : state) {
dst.clear();
dst.insert(dst.end(), sv.begin(), sv.end());
}
}
BENCHMARK(BM_AppendToStringNative)->Range(1 << 3, 1 << 12);
} // namespace

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,91 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: strip.h
// -----------------------------------------------------------------------------
//
// This file contains various functions for stripping substrings from a string.
#ifndef ABSL_STRINGS_STRIP_H_
#define ABSL_STRINGS_STRIP_H_
#include <cstddef>
#include <string>
#include "absl/base/macros.h"
#include "absl/strings/ascii.h"
#include "absl/strings/match.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
// ConsumePrefix()
//
// Strips the `expected` prefix from the start of the given string, returning
// `true` if the strip operation succeeded or false otherwise.
//
// Example:
//
// absl::string_view input("abc");
// EXPECT_TRUE(absl::ConsumePrefix(&input, "a"));
// EXPECT_EQ(input, "bc");
inline bool ConsumePrefix(absl::string_view* str, absl::string_view expected) {
if (!absl::StartsWith(*str, expected)) return false;
str->remove_prefix(expected.size());
return true;
}
// ConsumeSuffix()
//
// Strips the `expected` suffix from the end of the given string, returning
// `true` if the strip operation succeeded or false otherwise.
//
// Example:
//
// absl::string_view input("abcdef");
// EXPECT_TRUE(absl::ConsumeSuffix(&input, "def"));
// EXPECT_EQ(input, "abc");
inline bool ConsumeSuffix(absl::string_view* str, absl::string_view expected) {
if (!absl::EndsWith(*str, expected)) return false;
str->remove_suffix(expected.size());
return true;
}
// StripPrefix()
//
// Returns a view into the input string 'str' with the given 'prefix' removed,
// but leaving the original string intact. If the prefix does not match at the
// start of the string, returns the original string instead.
ABSL_MUST_USE_RESULT inline absl::string_view StripPrefix(
absl::string_view str, absl::string_view prefix) {
if (absl::StartsWith(str, prefix)) str.remove_prefix(prefix.size());
return str;
}
// StripSuffix()
//
// Returns a view into the input string 'str' with the given 'suffix' removed,
// but leaving the original string intact. If the suffix does not match at the
// end of the string, returns the original string instead.
ABSL_MUST_USE_RESULT inline absl::string_view StripSuffix(
absl::string_view str, absl::string_view suffix) {
if (absl::EndsWith(str, suffix)) str.remove_suffix(suffix.size());
return str;
}
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_STRIP_H_

Some files were not shown because too many files have changed in this diff Show more