- 64db19b773134c6c8004e3b23c9ca892efbf8bae Move SpinLock's adaptive spin count computation from a st... by Derek Mauro <dmauro@google.com>
  - 6f9533fb44a52485a7c2bbb9b4efc7bf8d6c359a Import of CCTZ from GitHub. by Abseil Team <absl-team@google.com>
  - a211d7255c986e8dd4ceada362c0d054a6a1969a Cleanup exception flags by Abseil Team <absl-team@google.com>
  - babdb29c590126fe9bba5229fe91034b5b5c358a Release time benchmarks. by Alex Strelnikov <strel@google.com>
  - 5803b32a3ff123d1fb57a0c471d199c818357c9f Release memutil microbenchmarks. by Alex Strelnikov <strel@google.com>
  - 5357d4890d30e80c53beb05af32500fb20e9402b Add parens around expansion of ABSL_PREDICT_{FALSE,TRUE} ... by Abseil Team <absl-team@google.com>
  - 32023f61a239a5f6b1c59e577bfe81b179bbcd2d Reformat build rule tag. by Alex Strelnikov <strel@google.com>
  - 833758ecf2b0cf7a42bbd50b5b127e416425c168 Release uint128 microbenchmarks. by Alex Strelnikov <strel@google.com>
  - c115a9bca1f944b90fdc78a56b2de176466b124f Disambiguate bitwise-not of size_type by Abseil Team <absl-team@google.com>
  - f6905f5b5f6e425792de646edafde440548d9346 Updates ConstructorTracker and TrackedObjects with 1) a m... by Abseil Team <absl-team@google.com>
  - 147c553bdd5d2db20a38f75c4d1ef973d6c709c5 Changes the absl::Duration factory functions to disallow ... by Greg Miller <jgm@google.com>
  - dba2b96d11b5264546b283ba452f2de1303b0f07 White space fix by Alex Strelnikov <strel@google.com>
GitOrigin-RevId: abacaab4b11a69dd4db627bd183571d7cabb8def
Change-Id: I6fa34f20d0b2f898e7b5475a603111413bb80a67
		
	
			
		
			
				
	
	
		
			323 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			323 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Copyright 2018 The Abseil Authors.
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| //
 | |
| //      http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| 
 | |
| #include "absl/strings/internal/memutil.h"
 | |
| 
 | |
| #include <algorithm>
 | |
| #include <cstdlib>
 | |
| 
 | |
| #include "benchmark/benchmark.h"
 | |
| #include "absl/strings/ascii.h"
 | |
| 
 | |
| // We fill the haystack with aaaaaaaaaaaaaaaaaa...aaaab.
 | |
| // That gives us:
 | |
| // - an easy search: 'b'
 | |
| // - a medium search: 'ab'.  That means every letter is a possible match.
 | |
| // - a pathological search: 'aaaaaa.......aaaaab' (half as many a's as haytack)
 | |
| // We benchmark case-sensitive and case-insensitive versions of
 | |
| // three memmem implementations:
 | |
| // - memmem() from memutil.h
 | |
| // - search() from STL
 | |
| // - memmatch(), a custom implementation using memchr and memcmp.
 | |
| // Here are sample results:
 | |
| //
 | |
| // Run on (12 X 3800 MHz CPU s)
 | |
| // CPU Caches:
 | |
| //   L1 Data 32K (x6)
 | |
| //   L1 Instruction 32K (x6)
 | |
| //   L2 Unified 256K (x6)
 | |
| //   L3 Unified 15360K (x1)
 | |
| // ----------------------------------------------------------------
 | |
| // Benchmark                           Time          CPU Iterations
 | |
| // ----------------------------------------------------------------
 | |
| // BM_Memmem                        3583 ns      3582 ns     196469  2.59966GB/s
 | |
| // BM_MemmemMedium                 13743 ns     13742 ns      50901  693.986MB/s
 | |
| // BM_MemmemPathological        13695030 ns  13693977 ns         51  713.133kB/s
 | |
| // BM_Memcasemem                    3299 ns      3299 ns     212942  2.82309GB/s
 | |
| // BM_MemcasememMedium             16407 ns     16406 ns      42170  581.309MB/s
 | |
| // BM_MemcasememPathological    17267745 ns  17266030 ns         41  565.598kB/s
 | |
| // BM_Search                        1610 ns      1609 ns     431321  5.78672GB/s
 | |
| // BM_SearchMedium                 11111 ns     11110 ns      63001  858.414MB/s
 | |
| // BM_SearchPathological        12117390 ns  12116397 ns         58  805.984kB/s
 | |
| // BM_Searchcase                    3081 ns      3081 ns     229949  3.02313GB/s
 | |
| // BM_SearchcaseMedium             16003 ns     16001 ns      44170  595.998MB/s
 | |
| // BM_SearchcasePathological    15823413 ns  15821909 ns         44  617.222kB/s
 | |
| // BM_Memmatch                       197 ns       197 ns    3584225  47.2951GB/s
 | |
| // BM_MemmatchMedium               52333 ns     52329 ns      13280  182.244MB/s
 | |
| // BM_MemmatchPathological        659799 ns    659727 ns       1058  14.4556MB/s
 | |
| // BM_Memcasematch                  5460 ns      5460 ns     127606  1.70586GB/s
 | |
| // BM_MemcasematchMedium           32861 ns     32857 ns      21258  290.248MB/s
 | |
| // BM_MemcasematchPathological  15154243 ns  15153089 ns         46  644.464kB/s
 | |
| // BM_MemmemStartup                    5 ns         5 ns  150821500
 | |
| // BM_SearchStartup                    5 ns         5 ns  150644203
 | |
| // BM_MemmatchStartup                  7 ns         7 ns   97068802
 | |
| //
 | |
| // Conclusions:
 | |
| //
 | |
| // The following recommendations are based on the sample results above. However,
 | |
| // we have found that the performance of STL search can vary significantly
 | |
| // depending on compiler and standard library implementation. We recommend you
 | |
| // run the benchmarks for yourself on relevant platforms.
 | |
| //
 | |
| // If you need case-insensitive, STL search is slightly better than memmem for
 | |
| // all cases.
 | |
| //
 | |
| // Case-sensitive is more subtle:
 | |
| // Custom memmatch is _very_ fast at scanning, so if you have very few possible
 | |
| // matches in your haystack, that's the way to go. Performance drops
 | |
| // significantly with more matches.
 | |
| //
 | |
| // STL search is slightly faster than memmem in the medium and pathological
 | |
| // benchmarks. However, the performance of memmem is currently more dependable
 | |
| // across platforms and build configurations.
 | |
| 
 | |
| namespace {
 | |
| 
 | |
| constexpr int kHaystackSize = 10000;
 | |
| constexpr int64_t kHaystackSize64 = kHaystackSize;
 | |
| const char* MakeHaystack() {
 | |
|   char* haystack = new char[kHaystackSize];
 | |
|   for (int i = 0; i < kHaystackSize - 1; ++i) haystack[i] = 'a';
 | |
|   haystack[kHaystackSize - 1] = 'b';
 | |
|   return haystack;
 | |
| }
 | |
| const char* const kHaystack = MakeHaystack();
 | |
| 
 | |
| void BM_Memmem(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(
 | |
|         absl::strings_internal::memmem(kHaystack, kHaystackSize, "b", 1));
 | |
|   }
 | |
|   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
 | |
| }
 | |
| BENCHMARK(BM_Memmem);
 | |
| 
 | |
| void BM_MemmemMedium(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(
 | |
|         absl::strings_internal::memmem(kHaystack, kHaystackSize, "ab", 2));
 | |
|   }
 | |
|   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
 | |
| }
 | |
| BENCHMARK(BM_MemmemMedium);
 | |
| 
 | |
| void BM_MemmemPathological(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(absl::strings_internal::memmem(
 | |
|         kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
 | |
|         kHaystackSize - kHaystackSize / 2));
 | |
|   }
 | |
|   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
 | |
| }
 | |
| BENCHMARK(BM_MemmemPathological);
 | |
| 
 | |
| void BM_Memcasemem(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(
 | |
|         absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "b", 1));
 | |
|   }
 | |
|   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
 | |
| }
 | |
| BENCHMARK(BM_Memcasemem);
 | |
| 
 | |
| void BM_MemcasememMedium(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(
 | |
|         absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "ab", 2));
 | |
|   }
 | |
|   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
 | |
| }
 | |
| BENCHMARK(BM_MemcasememMedium);
 | |
| 
 | |
| void BM_MemcasememPathological(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(absl::strings_internal::memcasemem(
 | |
|         kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
 | |
|         kHaystackSize - kHaystackSize / 2));
 | |
|   }
 | |
|   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
 | |
| }
 | |
| BENCHMARK(BM_MemcasememPathological);
 | |
| 
 | |
| bool case_eq(const char a, const char b) {
 | |
|   return absl::ascii_tolower(a) == absl::ascii_tolower(b);
 | |
| }
 | |
| 
 | |
| void BM_Search(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
 | |
|                                          kHaystack + kHaystackSize - 1,
 | |
|                                          kHaystack + kHaystackSize));
 | |
|   }
 | |
|   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
 | |
| }
 | |
| BENCHMARK(BM_Search);
 | |
| 
 | |
| void BM_SearchMedium(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
 | |
|                                          kHaystack + kHaystackSize - 2,
 | |
|                                          kHaystack + kHaystackSize));
 | |
|   }
 | |
|   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
 | |
| }
 | |
| BENCHMARK(BM_SearchMedium);
 | |
| 
 | |
| void BM_SearchPathological(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
 | |
|                                          kHaystack + kHaystackSize / 2,
 | |
|                                          kHaystack + kHaystackSize));
 | |
|   }
 | |
|   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
 | |
| }
 | |
| BENCHMARK(BM_SearchPathological);
 | |
| 
 | |
| void BM_Searchcase(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
 | |
|                                          kHaystack + kHaystackSize - 1,
 | |
|                                          kHaystack + kHaystackSize, case_eq));
 | |
|   }
 | |
|   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
 | |
| }
 | |
| BENCHMARK(BM_Searchcase);
 | |
| 
 | |
| void BM_SearchcaseMedium(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
 | |
|                                          kHaystack + kHaystackSize - 2,
 | |
|                                          kHaystack + kHaystackSize, case_eq));
 | |
|   }
 | |
|   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
 | |
| }
 | |
| BENCHMARK(BM_SearchcaseMedium);
 | |
| 
 | |
| void BM_SearchcasePathological(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
 | |
|                                          kHaystack + kHaystackSize / 2,
 | |
|                                          kHaystack + kHaystackSize, case_eq));
 | |
|   }
 | |
|   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
 | |
| }
 | |
| BENCHMARK(BM_SearchcasePathological);
 | |
| 
 | |
| char* memcasechr(const char* s, int c, size_t slen) {
 | |
|   c = absl::ascii_tolower(c);
 | |
|   for (; slen; ++s, --slen) {
 | |
|     if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s);
 | |
|   }
 | |
|   return nullptr;
 | |
| }
 | |
| 
 | |
| const char* memcasematch(const char* phaystack, size_t haylen,
 | |
|                          const char* pneedle, size_t neelen) {
 | |
|   if (0 == neelen) {
 | |
|     return phaystack;  // even if haylen is 0
 | |
|   }
 | |
|   if (haylen < neelen) return nullptr;
 | |
| 
 | |
|   const char* match;
 | |
|   const char* hayend = phaystack + haylen - neelen + 1;
 | |
|   while ((match = static_cast<char*>(
 | |
|               memcasechr(phaystack, pneedle[0], hayend - phaystack)))) {
 | |
|     if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0)
 | |
|       return match;
 | |
|     else
 | |
|       phaystack = match + 1;
 | |
|   }
 | |
|   return nullptr;
 | |
| }
 | |
| 
 | |
| void BM_Memmatch(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(
 | |
|         absl::strings_internal::memmatch(kHaystack, kHaystackSize, "b", 1));
 | |
|   }
 | |
|   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
 | |
| }
 | |
| BENCHMARK(BM_Memmatch);
 | |
| 
 | |
| void BM_MemmatchMedium(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(
 | |
|         absl::strings_internal::memmatch(kHaystack, kHaystackSize, "ab", 2));
 | |
|   }
 | |
|   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
 | |
| }
 | |
| BENCHMARK(BM_MemmatchMedium);
 | |
| 
 | |
| void BM_MemmatchPathological(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(absl::strings_internal::memmatch(
 | |
|         kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
 | |
|         kHaystackSize - kHaystackSize / 2));
 | |
|   }
 | |
|   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
 | |
| }
 | |
| BENCHMARK(BM_MemmatchPathological);
 | |
| 
 | |
| void BM_Memcasematch(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "b", 1));
 | |
|   }
 | |
|   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
 | |
| }
 | |
| BENCHMARK(BM_Memcasematch);
 | |
| 
 | |
| void BM_MemcasematchMedium(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "ab", 2));
 | |
|   }
 | |
|   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
 | |
| }
 | |
| BENCHMARK(BM_MemcasematchMedium);
 | |
| 
 | |
| void BM_MemcasematchPathological(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize,
 | |
|                                           kHaystack + kHaystackSize / 2,
 | |
|                                           kHaystackSize - kHaystackSize / 2));
 | |
|   }
 | |
|   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
 | |
| }
 | |
| BENCHMARK(BM_MemcasematchPathological);
 | |
| 
 | |
| void BM_MemmemStartup(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(absl::strings_internal::memmem(
 | |
|         kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1));
 | |
|   }
 | |
| }
 | |
| BENCHMARK(BM_MemmemStartup);
 | |
| 
 | |
| void BM_SearchStartup(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(
 | |
|         std::search(kHaystack + kHaystackSize - 10, kHaystack + kHaystackSize,
 | |
|                     kHaystack + kHaystackSize - 1, kHaystack + kHaystackSize));
 | |
|   }
 | |
| }
 | |
| BENCHMARK(BM_SearchStartup);
 | |
| 
 | |
| void BM_MemmatchStartup(benchmark::State& state) {
 | |
|   for (auto _ : state) {
 | |
|     benchmark::DoNotOptimize(absl::strings_internal::memmatch(
 | |
|         kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1));
 | |
|   }
 | |
| }
 | |
| BENCHMARK(BM_MemmatchStartup);
 | |
| 
 | |
| }  // namespace
 |