Export of internal Abseil changes.

--
4eacae3ff1b14b1d309e8092185bc10e8a6203cf by Derek Mauro <dmauro@google.com>:

Release SwissTable - a fast, efficient, cache-friendly hash table.

https://www.youtube.com/watch?v=ncHmEUmJZf4

PiperOrigin-RevId: 214816527

--
df8c3dfab3cfb2f4365909a84d0683b193cfbb11 by Derek Mauro <dmauro@google.com>:

Internal change

PiperOrigin-RevId: 214785288

--
1eabd5266bbcebc33eecc91e5309b751856a75c8 by Abseil Team <absl-team@google.com>:

Internal change

PiperOrigin-RevId: 214722931

--
2ebbfac950f83146b46253038e7dd7dcde9f2951 by Derek Mauro <dmauro@google.com>:

Internal change

PiperOrigin-RevId: 214701684
GitOrigin-RevId: 4eacae3ff1b14b1d309e8092185bc10e8a6203cf
Change-Id: I9ba64e395b22ad7863213d157b8019b082adc19d
This commit is contained in:
Abseil Team 2018-09-27 12:24:54 -07:00 committed by Derek Mauro
parent e291c279e4
commit 48cd2c3f35
55 changed files with 18696 additions and 0 deletions

589
absl/hash/internal/city.cc Normal file
View file

@ -0,0 +1,589 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file provides CityHash64() and related functions.
//
// It's probably possible to create even faster hash functions by
// writing a program that systematically explores some of the space of
// possible hash functions, by using SIMD instructions, or by
// compromising on hash quality.
#include "absl/hash/internal/city.h"
#include <string.h> // for memcpy and memset
#include <algorithm>
#include "absl/base/config.h"
#include "absl/base/internal/endian.h"
#include "absl/base/internal/unaligned_access.h"
#include "absl/base/optimization.h"
namespace absl {
namespace hash_internal {
#ifdef ABSL_IS_BIG_ENDIAN
#define uint32_in_expected_order(x) (absl::gbswap_32(x))
#define uint64_in_expected_order(x) (absl::gbswap_64(x))
#else
#define uint32_in_expected_order(x) (x)
#define uint64_in_expected_order(x) (x)
#endif
static uint64_t Fetch64(const char *p) {
return uint64_in_expected_order(ABSL_INTERNAL_UNALIGNED_LOAD64(p));
}
static uint32_t Fetch32(const char *p) {
return uint32_in_expected_order(ABSL_INTERNAL_UNALIGNED_LOAD32(p));
}
// Some primes between 2^63 and 2^64 for various uses.
static const uint64_t k0 = 0xc3a5c85c97cb3127ULL;
static const uint64_t k1 = 0xb492b66fbe98f273ULL;
static const uint64_t k2 = 0x9ae16a3b2f90404fULL;
// Magic numbers for 32-bit hashing. Copied from Murmur3.
static const uint32_t c1 = 0xcc9e2d51;
static const uint32_t c2 = 0x1b873593;
// A 32-bit to 32-bit integer hash copied from Murmur3.
static uint32_t fmix(uint32_t h) {
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
static uint32_t Rotate32(uint32_t val, int shift) {
// Avoid shifting by 32: doing so yields an undefined result.
return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
}
#undef PERMUTE3
#define PERMUTE3(a, b, c) \
do { \
std::swap(a, b); \
std::swap(a, c); \
} while (0)
static uint32_t Mur(uint32_t a, uint32_t h) {
// Helper from Murmur3 for combining two 32-bit values.
a *= c1;
a = Rotate32(a, 17);
a *= c2;
h ^= a;
h = Rotate32(h, 19);
return h * 5 + 0xe6546b64;
}
static uint32_t Hash32Len13to24(const char *s, size_t len) {
uint32_t a = Fetch32(s - 4 + (len >> 1));
uint32_t b = Fetch32(s + 4);
uint32_t c = Fetch32(s + len - 8);
uint32_t d = Fetch32(s + (len >> 1));
uint32_t e = Fetch32(s);
uint32_t f = Fetch32(s + len - 4);
uint32_t h = len;
return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
}
static uint32_t Hash32Len0to4(const char *s, size_t len) {
uint32_t b = 0;
uint32_t c = 9;
for (size_t i = 0; i < len; i++) {
signed char v = s[i];
b = b * c1 + v;
c ^= b;
}
return fmix(Mur(b, Mur(len, c)));
}
static uint32_t Hash32Len5to12(const char *s, size_t len) {
uint32_t a = len, b = len * 5, c = 9, d = b;
a += Fetch32(s);
b += Fetch32(s + len - 4);
c += Fetch32(s + ((len >> 1) & 4));
return fmix(Mur(c, Mur(b, Mur(a, d))));
}
uint32_t CityHash32(const char *s, size_t len) {
if (len <= 24) {
return len <= 12
? (len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len))
: Hash32Len13to24(s, len);
}
// len > 24
uint32_t h = len, g = c1 * len, f = g;
uint32_t a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2;
uint32_t a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2;
uint32_t a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2;
uint32_t a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2;
uint32_t a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2;
h ^= a0;
h = Rotate32(h, 19);
h = h * 5 + 0xe6546b64;
h ^= a2;
h = Rotate32(h, 19);
h = h * 5 + 0xe6546b64;
g ^= a1;
g = Rotate32(g, 19);
g = g * 5 + 0xe6546b64;
g ^= a3;
g = Rotate32(g, 19);
g = g * 5 + 0xe6546b64;
f += a4;
f = Rotate32(f, 19);
f = f * 5 + 0xe6546b64;
size_t iters = (len - 1) / 20;
do {
uint32_t a0 = Rotate32(Fetch32(s) * c1, 17) * c2;
uint32_t a1 = Fetch32(s + 4);
uint32_t a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2;
uint32_t a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2;
uint32_t a4 = Fetch32(s + 16);
h ^= a0;
h = Rotate32(h, 18);
h = h * 5 + 0xe6546b64;
f += a1;
f = Rotate32(f, 19);
f = f * c1;
g += a2;
g = Rotate32(g, 18);
g = g * 5 + 0xe6546b64;
h ^= a3 + a1;
h = Rotate32(h, 19);
h = h * 5 + 0xe6546b64;
g ^= a4;
g = absl::gbswap_32(g) * 5;
h += a4 * 5;
h = absl::gbswap_32(h);
f += a0;
PERMUTE3(f, h, g);
s += 20;
} while (--iters != 0);
g = Rotate32(g, 11) * c1;
g = Rotate32(g, 17) * c1;
f = Rotate32(f, 11) * c1;
f = Rotate32(f, 17) * c1;
h = Rotate32(h + g, 19);
h = h * 5 + 0xe6546b64;
h = Rotate32(h, 17) * c1;
h = Rotate32(h + f, 19);
h = h * 5 + 0xe6546b64;
h = Rotate32(h, 17) * c1;
return h;
}
// Bitwise right rotate. Normally this will compile to a single
// instruction, especially if the shift is a manifest constant.
static uint64_t Rotate(uint64_t val, int shift) {
// Avoid shifting by 64: doing so yields an undefined result.
return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
}
static uint64_t ShiftMix(uint64_t val) { return val ^ (val >> 47); }
static uint64_t HashLen16(uint64_t u, uint64_t v) {
return Hash128to64(uint128(u, v));
}
static uint64_t HashLen16(uint64_t u, uint64_t v, uint64_t mul) {
// Murmur-inspired hashing.
uint64_t a = (u ^ v) * mul;
a ^= (a >> 47);
uint64_t b = (v ^ a) * mul;
b ^= (b >> 47);
b *= mul;
return b;
}
static uint64_t HashLen0to16(const char *s, size_t len) {
if (len >= 8) {
uint64_t mul = k2 + len * 2;
uint64_t a = Fetch64(s) + k2;
uint64_t b = Fetch64(s + len - 8);
uint64_t c = Rotate(b, 37) * mul + a;
uint64_t d = (Rotate(a, 25) + b) * mul;
return HashLen16(c, d, mul);
}
if (len >= 4) {
uint64_t mul = k2 + len * 2;
uint64_t a = Fetch32(s);
return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
}
if (len > 0) {
uint8_t a = s[0];
uint8_t b = s[len >> 1];
uint8_t c = s[len - 1];
uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
uint32_t z = len + (static_cast<uint32_t>(c) << 2);
return ShiftMix(y * k2 ^ z * k0) * k2;
}
return k2;
}
// This probably works well for 16-byte strings as well, but it may be overkill
// in that case.
static uint64_t HashLen17to32(const char *s, size_t len) {
uint64_t mul = k2 + len * 2;
uint64_t a = Fetch64(s) * k1;
uint64_t b = Fetch64(s + 8);
uint64_t c = Fetch64(s + len - 8) * mul;
uint64_t d = Fetch64(s + len - 16) * k2;
return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d,
a + Rotate(b + k2, 18) + c, mul);
}
// Return a 16-byte hash for 48 bytes. Quick and dirty.
// Callers do best to use "random-looking" values for a and b.
static std::pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(uint64_t w, uint64_t x,
uint64_t y, uint64_t z,
uint64_t a, uint64_t b) {
a += w;
b = Rotate(b + a + z, 21);
uint64_t c = a;
a += x;
a += y;
b += Rotate(a, 44);
return std::make_pair(a + z, b + c);
}
// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
static std::pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(const char *s, uint64_t a,
uint64_t b) {
return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16),
Fetch64(s + 24), a, b);
}
// Return an 8-byte hash for 33 to 64 bytes.
static uint64_t HashLen33to64(const char *s, size_t len) {
uint64_t mul = k2 + len * 2;
uint64_t a = Fetch64(s) * k2;
uint64_t b = Fetch64(s + 8);
uint64_t c = Fetch64(s + len - 24);
uint64_t d = Fetch64(s + len - 32);
uint64_t e = Fetch64(s + 16) * k2;
uint64_t f = Fetch64(s + 24) * 9;
uint64_t g = Fetch64(s + len - 8);
uint64_t h = Fetch64(s + len - 16) * mul;
uint64_t u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
uint64_t v = ((a + g) ^ d) + f + 1;
uint64_t w = absl::gbswap_64((u + v) * mul) + h;
uint64_t x = Rotate(e + f, 42) + c;
uint64_t y = (absl::gbswap_64((v + w) * mul) + g) * mul;
uint64_t z = e + f + c;
a = absl::gbswap_64((x + z) * mul + y) + b;
b = ShiftMix((z + a) * mul + d + h) * mul;
return b + x;
}
uint64_t CityHash64(const char *s, size_t len) {
if (len <= 32) {
if (len <= 16) {
return HashLen0to16(s, len);
} else {
return HashLen17to32(s, len);
}
} else if (len <= 64) {
return HashLen33to64(s, len);
}
// For strings over 64 bytes we hash the end first, and then as we
// loop we keep 56 bytes of state: v, w, x, y, and z.
uint64_t x = Fetch64(s + len - 40);
uint64_t y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
uint64_t z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
std::pair<uint64_t, uint64_t> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
std::pair<uint64_t, uint64_t> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
x = x * k1 + Fetch64(s);
// Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
len = (len - 1) & ~static_cast<size_t>(63);
do {
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
x ^= w.second;
y += v.first + Fetch64(s + 40);
z = Rotate(z + w.first, 33) * k1;
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
std::swap(z, x);
s += 64;
len -= 64;
} while (len != 0);
return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
HashLen16(v.second, w.second) + x);
}
uint64_t CityHash64WithSeed(const char *s, size_t len, uint64_t seed) {
return CityHash64WithSeeds(s, len, k2, seed);
}
uint64_t CityHash64WithSeeds(const char *s, size_t len, uint64_t seed0,
uint64_t seed1) {
return HashLen16(CityHash64(s, len) - seed0, seed1);
}
// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
// of any length representable in signed long. Based on City and Murmur.
static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
uint64_t a = Uint128Low64(seed);
uint64_t b = Uint128High64(seed);
uint64_t c = 0;
uint64_t d = 0;
int64_t l = len - 16;
if (l <= 0) { // len <= 16
a = ShiftMix(a * k1) * k1;
c = b * k1 + HashLen0to16(s, len);
d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
} else { // len > 16
c = HashLen16(Fetch64(s + len - 8) + k1, a);
d = HashLen16(b + len, c + Fetch64(s + len - 16));
a += d;
do {
a ^= ShiftMix(Fetch64(s) * k1) * k1;
a *= k1;
b ^= a;
c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
c *= k1;
d ^= c;
s += 16;
l -= 16;
} while (l > 0);
}
a = HashLen16(a, c);
b = HashLen16(d, b);
return uint128(a ^ b, HashLen16(b, a));
}
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
if (len < 128) {
return CityMurmur(s, len, seed);
}
// We expect len >= 128 to be the common case. Keep 56 bytes of state:
// v, w, x, y, and z.
std::pair<uint64_t, uint64_t> v, w;
uint64_t x = Uint128Low64(seed);
uint64_t y = Uint128High64(seed);
uint64_t z = len * k1;
v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
w.first = Rotate(y + z, 35) * k1 + x;
w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
// This is the same inner loop as CityHash64(), manually unrolled.
do {
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
x ^= w.second;
y += v.first + Fetch64(s + 40);
z = Rotate(z + w.first, 33) * k1;
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
std::swap(z, x);
s += 64;
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
x ^= w.second;
y += v.first + Fetch64(s + 40);
z = Rotate(z + w.first, 33) * k1;
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
std::swap(z, x);
s += 64;
len -= 128;
} while (ABSL_PREDICT_TRUE(len >= 128));
x += Rotate(v.first + z, 49) * k0;
y = y * k0 + Rotate(w.second, 37);
z = z * k0 + Rotate(w.first, 27);
w.first *= 9;
v.first *= k0;
// If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
for (size_t tail_done = 0; tail_done < len;) {
tail_done += 32;
y = Rotate(x + y, 42) * k0 + v.second;
w.first += Fetch64(s + len - tail_done + 16);
x = x * k0 + w.first;
z += w.second + Fetch64(s + len - tail_done);
w.second += v.first;
v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
v.first *= k0;
}
// At this point our 56 bytes of state should contain more than
// enough information for a strong 128-bit hash. We use two
// different 56-byte-to-8-byte hashes to get a 16-byte final result.
x = HashLen16(x, v.first);
y = HashLen16(y + z, w.first);
return uint128(HashLen16(x + v.second, w.second) + y,
HashLen16(x + w.second, y + v.second));
}
uint128 CityHash128(const char *s, size_t len) {
return len >= 16
? CityHash128WithSeed(s + 16, len - 16,
uint128(Fetch64(s), Fetch64(s + 8) + k0))
: CityHash128WithSeed(s, len, uint128(k0, k1));
}
} // namespace hash_internal
} // namespace absl
#ifdef __SSE4_2__
#include <nmmintrin.h>
#include "absl/hash/internal/city_crc.h"
namespace absl {
namespace hash_internal {
// Requires len >= 240.
static void CityHashCrc256Long(const char *s, size_t len, uint32_t seed,
uint64_t *result) {
uint64_t a = Fetch64(s + 56) + k0;
uint64_t b = Fetch64(s + 96) + k0;
uint64_t c = result[0] = HashLen16(b, len);
uint64_t d = result[1] = Fetch64(s + 120) * k0 + len;
uint64_t e = Fetch64(s + 184) + seed;
uint64_t f = 0;
uint64_t g = 0;
uint64_t h = c + d;
uint64_t x = seed;
uint64_t y = 0;
uint64_t z = 0;
// 240 bytes of input per iter.
size_t iters = len / 240;
len -= iters * 240;
do {
#undef CHUNK
#define CHUNK(r) \
PERMUTE3(x, z, y); \
b += Fetch64(s); \
c += Fetch64(s + 8); \
d += Fetch64(s + 16); \
e += Fetch64(s + 24); \
f += Fetch64(s + 32); \
a += b; \
h += f; \
b += c; \
f += d; \
g += e; \
e += z; \
g += x; \
z = _mm_crc32_u64(z, b + g); \
y = _mm_crc32_u64(y, e + h); \
x = _mm_crc32_u64(x, f + a); \
e = Rotate(e, r); \
c += e; \
s += 40
CHUNK(0);
PERMUTE3(a, h, c);
CHUNK(33);
PERMUTE3(a, h, f);
CHUNK(0);
PERMUTE3(b, h, f);
CHUNK(42);
PERMUTE3(b, h, d);
CHUNK(0);
PERMUTE3(b, h, e);
CHUNK(33);
PERMUTE3(a, h, e);
} while (--iters > 0);
while (len >= 40) {
CHUNK(29);
e ^= Rotate(a, 20);
h += Rotate(b, 30);
g ^= Rotate(c, 40);
f += Rotate(d, 34);
PERMUTE3(c, h, g);
len -= 40;
}
if (len > 0) {
s = s + len - 40;
CHUNK(33);
e ^= Rotate(a, 43);
h += Rotate(b, 42);
g ^= Rotate(c, 41);
f += Rotate(d, 40);
}
result[0] ^= h;
result[1] ^= g;
g += h;
a = HashLen16(a, g + z);
x += y << 32;
b += x;
c = HashLen16(c, z) + h;
d = HashLen16(d, e + result[0]);
g += e;
h += HashLen16(x, f);
e = HashLen16(a, d) + g;
z = HashLen16(b, c) + a;
y = HashLen16(g, h) + c;
result[0] = e + z + y + x;
a = ShiftMix((a + y) * k0) * k0 + b;
result[1] += a + result[0];
a = ShiftMix(a * k0) * k0 + c;
result[2] = a + result[1];
a = ShiftMix((a + e) * k0) * k0;
result[3] = a + result[2];
}
// Requires len < 240.
static void CityHashCrc256Short(const char *s, size_t len, uint64_t *result) {
char buf[240];
memcpy(buf, s, len);
memset(buf + len, 0, 240 - len);
CityHashCrc256Long(buf, 240, ~static_cast<uint32_t>(len), result);
}
void CityHashCrc256(const char *s, size_t len, uint64_t *result) {
if (ABSL_PREDICT_TRUE(len >= 240)) {
CityHashCrc256Long(s, len, 0, result);
} else {
CityHashCrc256Short(s, len, result);
}
}
uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) {
if (len <= 900) {
return CityHash128WithSeed(s, len, seed);
} else {
uint64_t result[4];
CityHashCrc256(s, len, result);
uint64_t u = Uint128High64(seed) + result[0];
uint64_t v = Uint128Low64(seed) + result[1];
return uint128(HashLen16(u, v + result[2]),
HashLen16(Rotate(v, 32), u * k0 + result[3]));
}
}
uint128 CityHashCrc128(const char *s, size_t len) {
if (len <= 900) {
return CityHash128(s, len);
} else {
uint64_t result[4];
CityHashCrc256(s, len, result);
return uint128(result[2], result[3]);
}
}
} // namespace hash_internal
} // namespace absl
#endif

108
absl/hash/internal/city.h Normal file
View file

@ -0,0 +1,108 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// http://code.google.com/p/cityhash/
//
// This file provides a few functions for hashing strings. All of them are
// high-quality functions in the sense that they pass standard tests such
// as Austin Appleby's SMHasher. They are also fast.
//
// For 64-bit x86 code, on short strings, we don't know of anything faster than
// CityHash64 that is of comparable quality. We believe our nearest competitor
// is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash
// tables and most other hashing (excluding cryptography).
//
// For 64-bit x86 code, on long strings, the picture is more complicated.
// On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc.,
// CityHashCrc128 appears to be faster than all competitors of comparable
// quality. CityHash128 is also good but not quite as fast. We believe our
// nearest competitor is Bob Jenkins' Spooky. We don't have great data for
// other 64-bit CPUs, but for long strings we know that Spooky is slightly
// faster than CityHash on some relatively recent AMD x86-64 CPUs, for example.
// Note that CityHashCrc128 is declared in citycrc.h.
//
// For 32-bit x86 code, we don't know of anything faster than CityHash32 that
// is of comparable quality. We believe our nearest competitor is Murmur3A.
// (On 64-bit CPUs, it is typically faster to use the other CityHash variants.)
//
// Functions in the CityHash family are not suitable for cryptography.
//
// Please see CityHash's README file for more details on our performance
// measurements and so on.
//
// WARNING: This code has been only lightly tested on big-endian platforms!
// It is known to work well on little-endian platforms that have a small penalty
// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
// It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
// bug reports are welcome.
//
// By the way, for some hash functions, given strings a and b, the hash
// of a+b is easily derived from the hashes of a and b. This property
// doesn't hold for any hash functions in this file.
#ifndef ABSL_HASH_INTERNAL_CITY_H_
#define ABSL_HASH_INTERNAL_CITY_H_
#include <stdint.h>
#include <stdlib.h> // for size_t.
#include <utility>
namespace absl {
namespace hash_internal {
typedef std::pair<uint64_t, uint64_t> uint128;
inline uint64_t Uint128Low64(const uint128 &x) { return x.first; }
inline uint64_t Uint128High64(const uint128 &x) { return x.second; }
// Hash function for a byte array.
uint64_t CityHash64(const char *s, size_t len);
// Hash function for a byte array. For convenience, a 64-bit seed is also
// hashed into the result.
uint64_t CityHash64WithSeed(const char *s, size_t len, uint64_t seed);
// Hash function for a byte array. For convenience, two seeds are also
// hashed into the result.
uint64_t CityHash64WithSeeds(const char *s, size_t len, uint64_t seed0,
uint64_t seed1);
// Hash function for a byte array.
uint128 CityHash128(const char *s, size_t len);
// Hash function for a byte array. For convenience, a 128-bit seed is also
// hashed into the result.
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
// Hash function for a byte array. Most useful in 32-bit binaries.
uint32_t CityHash32(const char *s, size_t len);
// Hash 128 input bits down to 64 bits of output.
// This is intended to be a reasonably good hash function.
inline uint64_t Hash128to64(const uint128 &x) {
// Murmur-inspired hashing.
const uint64_t kMul = 0x9ddfea08eb382d69ULL;
uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
a ^= (a >> 47);
uint64_t b = (Uint128High64(x) ^ a) * kMul;
b ^= (b >> 47);
b *= kMul;
return b;
}
} // namespace hash_internal
} // namespace absl
#endif // ABSL_HASH_INTERNAL_CITY_H_

View file

@ -0,0 +1,41 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file declares the subset of the CityHash functions that require
// _mm_crc32_u64(). See the CityHash README for details.
//
// Functions in the CityHash family are not suitable for cryptography.
#ifndef ABSL_HASH_INTERNAL_CITY_CRC_H_
#define ABSL_HASH_INTERNAL_CITY_CRC_H_
#include "absl/hash/internal/city.h"
namespace absl {
namespace hash_internal {
// Hash function for a byte array.
uint128 CityHashCrc128(const char *s, size_t len);
// Hash function for a byte array. For convenience, a 128-bit seed is also
// hashed into the result.
uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed);
// Hash function for a byte array. Sets result[0] ... result[3].
void CityHashCrc256(const char *s, size_t len, uint64_t *result);
} // namespace hash_internal
} // namespace absl
#endif // ABSL_HASH_INTERNAL_CITY_CRC_H_

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,23 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/hash/internal/hash.h"
namespace absl {
namespace hash_internal {
ABSL_CONST_INIT const void* const CityHashState::kSeed = &kSeed;
} // namespace hash_internal
} // namespace absl

885
absl/hash/internal/hash.h Normal file
View file

@ -0,0 +1,885 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: hash.h
// -----------------------------------------------------------------------------
//
#ifndef ABSL_HASH_INTERNAL_HASH_H_
#define ABSL_HASH_INTERNAL_HASH_H_
#include <algorithm>
#include <array>
#include <cmath>
#include <cstring>
#include <deque>
#include <forward_list>
#include <functional>
#include <iterator>
#include <limits>
#include <list>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>
#include "absl/base/internal/endian.h"
#include "absl/base/port.h"
#include "absl/container/fixed_array.h"
#include "absl/meta/type_traits.h"
#include "absl/numeric/int128.h"
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "absl/types/variant.h"
#include "absl/utility/utility.h"
#include "absl/hash/internal/city.h"
namespace absl {
namespace hash_internal {
// HashStateBase
//
// A hash state object represents an intermediate state in the computation
// of an unspecified hash algorithm. `HashStateBase` provides a CRTP style
// base class for hash state implementations. Developers adding type support
// for `absl::Hash` should not rely on any parts of the state object other than
// the following member functions:
//
// * HashStateBase::combine()
// * HashStateBase::combine_contiguous()
//
// A derived hash state class of type `H` must provide a static member function
// with a signature similar to the following:
//
// `static H combine_contiguous(H state, const unsigned char*, size_t)`.
//
// `HashStateBase` will provide a complete implementations for a hash state
// object in terms of this method.
//
// Example:
//
// // Use CRTP to define your derived class.
// struct MyHashState : HashStateBase<MyHashState> {
// static H combine_contiguous(H state, const unsigned char*, size_t);
// using MyHashState::HashStateBase::combine;
// using MyHashState::HashStateBase::combine_contiguous;
// };
template <typename H>
class HashStateBase {
public:
// HashStateBase::combine()
//
// Combines an arbitrary number of values into a hash state, returning the
// updated state.
//
// Each of the value types `T` must be separately hashable by the Abseil
// hashing framework.
//
// NOTE:
//
// state = H::combine(std::move(state), value1, value2, value3);
//
// is guaranteed to produce the same hash expansion as:
//
// state = H::combine(std::move(state), value1);
// state = H::combine(std::move(state), value2);
// state = H::combine(std::move(state), value3);
template <typename T, typename... Ts>
static H combine(H state, const T& value, const Ts&... values);
static H combine(H state) { return state; }
// HashStateBase::combine_contiguous()
//
// Combines a contiguous array of `size` elements into a hash state, returning
// the updated state.
//
// NOTE:
//
// state = H::combine_contiguous(std::move(state), data, size);
//
// is NOT guaranteed to produce the same hash expansion as a for-loop (it may
// perform internal optimizations). If you need this guarantee, use the
// for-loop instead.
template <typename T>
static H combine_contiguous(H state, const T* data, size_t size);
};
// is_uniquely_represented
//
// `is_uniquely_represented<T>` is a trait class that indicates whether `T`
// is uniquely represented.
//
// A type is "uniquely represented" if two equal values of that type are
// guaranteed to have the same bytes in their underlying storage. In other
// words, if `a == b`, then `memcmp(&a, &b, sizeof(T))` is guaranteed to be
// zero. This property cannot be detected automatically, so this trait is false
// by default, but can be specialized by types that wish to assert that they are
// uniquely represented. This makes them eligible for certain optimizations.
//
// If you have any doubt whatsoever, do not specialize this template.
// The default is completely safe, and merely disables some optimizations
// that will not matter for most types. Specializing this template,
// on the other hand, can be very hazardous.
//
// To be uniquely represented, a type must not have multiple ways of
// representing the same value; for example, float and double are not
// uniquely represented, because they have distinct representations for
// +0 and -0. Furthermore, the type's byte representation must consist
// solely of user-controlled data, with no padding bits and no compiler-
// controlled data such as vptrs or sanitizer metadata. This is usually
// very difficult to guarantee, because in most cases the compiler can
// insert data and padding bits at its own discretion.
//
// If you specialize this template for a type `T`, you must do so in the file
// that defines that type (or in this file). If you define that specialization
// anywhere else, `is_uniquely_represented<T>` could have different meanings
// in different places.
//
// The Enable parameter is meaningless; it is provided as a convenience,
// to support certain SFINAE techniques when defining specializations.
template <typename T, typename Enable = void>
struct is_uniquely_represented : std::false_type {};
// is_uniquely_represented<unsigned char>
//
// unsigned char is a synonym for "byte", so it is guaranteed to be
// uniquely represented.
template <>
struct is_uniquely_represented<unsigned char> : std::true_type {};
// is_uniquely_represented for non-standard integral types
//
// Integral types other than bool should be uniquely represented on any
// platform that this will plausibly be ported to.
template <typename Integral>
struct is_uniquely_represented<
Integral, typename std::enable_if<std::is_integral<Integral>::value>::type>
: std::true_type {};
// is_uniquely_represented<bool>
//
//
template <>
struct is_uniquely_represented<bool> : std::false_type {};
// hash_bytes()
//
// Convenience function that combines `hash_state` with the byte representation
// of `value`.
template <typename H, typename T>
H hash_bytes(H hash_state, const T& value) {
const unsigned char* start = reinterpret_cast<const unsigned char*>(&value);
return H::combine_contiguous(std::move(hash_state), start, sizeof(value));
}
// -----------------------------------------------------------------------------
// AbslHashValue for Basic Types
// -----------------------------------------------------------------------------
// Note: Default `AbslHashValue` implementations live in `hash_internal`. This
// allows us to block lexical scope lookup when doing an unqualified call to
// `AbslHashValue` below. User-defined implementations of `AbslHashValue` can
// only be found via ADL.
// AbslHashValue() for hashing bool values
//
// We use SFINAE to ensure that this overload only accepts bool, not types that
// are convertible to bool.
template <typename H, typename B>
typename std::enable_if<std::is_same<B, bool>::value, H>::type AbslHashValue(
H hash_state, B value) {
return H::combine(std::move(hash_state),
static_cast<unsigned char>(value ? 1 : 0));
}
// AbslHashValue() for hashing enum values
template <typename H, typename Enum>
typename std::enable_if<std::is_enum<Enum>::value, H>::type AbslHashValue(
H hash_state, Enum e) {
// In practice, we could almost certainly just invoke hash_bytes directly,
// but it's possible that a sanitizer might one day want to
// store data in the unused bits of an enum. To avoid that risk, we
// convert to the underlying type before hashing. Hopefully this will get
// optimized away; if not, we can reopen discussion with c-toolchain-team.
return H::combine(std::move(hash_state),
static_cast<typename std::underlying_type<Enum>::type>(e));
}
// AbslHashValue() for hashing floating-point values
template <typename H, typename Float>
typename std::enable_if<std::is_floating_point<Float>::value, H>::type
AbslHashValue(H hash_state, Float value) {
return hash_internal::hash_bytes(std::move(hash_state),
value == 0 ? 0 : value);
}
// Long double has the property that it might have extra unused bytes in it.
// For example, in x86 sizeof(long double)==16 but it only really uses 80-bits
// of it. This means we can't use hash_bytes on a long double and have to
// convert it to something else first.
template <typename H>
H AbslHashValue(H hash_state, long double value) {
const int category = std::fpclassify(value);
switch (category) {
case FP_INFINITE:
// Add the sign bit to differentiate between +Inf and -Inf
hash_state = H::combine(std::move(hash_state), std::signbit(value));
break;
case FP_NAN:
case FP_ZERO:
default:
// Category is enough for these.
break;
case FP_NORMAL:
case FP_SUBNORMAL:
// We can't convert `value` directly to double because this would have
// undefined behavior if the value is out of range.
// std::frexp gives us a value in the range (-1, -.5] or [.5, 1) that is
// guaranteed to be in range for `double`. The truncation is
// implementation defined, but that works as long as it is deterministic.
int exp;
auto mantissa = static_cast<double>(std::frexp(value, &exp));
hash_state = H::combine(std::move(hash_state), mantissa, exp);
}
return H::combine(std::move(hash_state), category);
}
// AbslHashValue() for hashing pointers
template <typename H, typename T>
H AbslHashValue(H hash_state, T* ptr) {
return hash_internal::hash_bytes(std::move(hash_state), ptr);
}
// AbslHashValue() for hashing nullptr_t
template <typename H>
H AbslHashValue(H hash_state, std::nullptr_t) {
return H::combine(std::move(hash_state), static_cast<void*>(nullptr));
}
// -----------------------------------------------------------------------------
// AbslHashValue for Composite Types
// -----------------------------------------------------------------------------
// is_hashable()
//
// Trait class which returns true if T is hashable by the absl::Hash framework.
// Used for the AbslHashValue implementations for composite types below.
template <typename T>
struct is_hashable;
// AbslHashValue() for hashing pairs
template <typename H, typename T1, typename T2>
typename std::enable_if<is_hashable<T1>::value && is_hashable<T2>::value,
H>::type
AbslHashValue(H hash_state, const std::pair<T1, T2>& p) {
return H::combine(std::move(hash_state), p.first, p.second);
}
// hash_tuple()
//
// Helper function for hashing a tuple. The third argument should
// be an index_sequence running from 0 to tuple_size<Tuple> - 1.
template <typename H, typename Tuple, size_t... Is>
H hash_tuple(H hash_state, const Tuple& t, absl::index_sequence<Is...>) {
return H::combine(std::move(hash_state), std::get<Is>(t)...);
}
// AbslHashValue for hashing tuples
template <typename H, typename... Ts>
#if _MSC_VER
// This SFINAE gets MSVC confused under some conditions. Let's just disable it
// for now.
H
#else
typename std::enable_if<absl::conjunction<is_hashable<Ts>...>::value, H>::type
#endif
AbslHashValue(H hash_state, const std::tuple<Ts...>& t) {
return hash_internal::hash_tuple(std::move(hash_state), t,
absl::make_index_sequence<sizeof...(Ts)>());
}
// -----------------------------------------------------------------------------
// AbslHashValue for Pointers
// -----------------------------------------------------------------------------
// AbslHashValue for hashing unique_ptr
template <typename H, typename T, typename D>
H AbslHashValue(H hash_state, const std::unique_ptr<T, D>& ptr) {
return H::combine(std::move(hash_state), ptr.get());
}
// AbslHashValue for hashing shared_ptr
template <typename H, typename T>
H AbslHashValue(H hash_state, const std::shared_ptr<T>& ptr) {
return H::combine(std::move(hash_state), ptr.get());
}
// -----------------------------------------------------------------------------
// AbslHashValue for String-Like Types
// -----------------------------------------------------------------------------
// AbslHashValue for hashing strings
//
// All the string-like types supported here provide the same hash expansion for
// the same character sequence. These types are:
//
// - `std::string` (and std::basic_string<char, std::char_traits<char>, A> for
// any allocator A)
// - `absl::string_view` and `std::string_view`
//
// For simplicity, we currently support only `char` strings. This support may
// be broadened, if necessary, but with some caution - this overload would
// misbehave in cases where the traits' `eq()` member isn't equivalent to `==`
// on the underlying character type.
template <typename H>
H AbslHashValue(H hash_state, absl::string_view str) {
return H::combine(
H::combine_contiguous(std::move(hash_state), str.data(), str.size()),
str.size());
}
// -----------------------------------------------------------------------------
// AbslHashValue for Sequence Containers
// -----------------------------------------------------------------------------
// AbslHashValue for hashing std::array
template <typename H, typename T, size_t N>
typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue(
H hash_state, const std::array<T, N>& array) {
return H::combine_contiguous(std::move(hash_state), array.data(),
array.size());
}
// AbslHashValue for hashing std::deque
template <typename H, typename T, typename Allocator>
typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue(
H hash_state, const std::deque<T, Allocator>& deque) {
// TODO(gromer): investigate a more efficient implementation taking
// advantage of the chunk structure.
for (const auto& t : deque) {
hash_state = H::combine(std::move(hash_state), t);
}
return H::combine(std::move(hash_state), deque.size());
}
// AbslHashValue for hashing std::forward_list
template <typename H, typename T, typename Allocator>
typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue(
H hash_state, const std::forward_list<T, Allocator>& list) {
size_t size = 0;
for (const T& t : list) {
hash_state = H::combine(std::move(hash_state), t);
++size;
}
return H::combine(std::move(hash_state), size);
}
// AbslHashValue for hashing std::list
template <typename H, typename T, typename Allocator>
typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue(
H hash_state, const std::list<T, Allocator>& list) {
for (const auto& t : list) {
hash_state = H::combine(std::move(hash_state), t);
}
return H::combine(std::move(hash_state), list.size());
}
// AbslHashValue for hashing std::vector
//
// Do not use this for vector<bool>. It does not have a .data(), and a fallback
// for std::hash<> is most likely faster.
template <typename H, typename T, typename Allocator>
typename std::enable_if<is_hashable<T>::value && !std::is_same<T, bool>::value,
H>::type
AbslHashValue(H hash_state, const std::vector<T, Allocator>& vector) {
return H::combine(H::combine_contiguous(std::move(hash_state), vector.data(),
vector.size()),
vector.size());
}
// -----------------------------------------------------------------------------
// AbslHashValue for Ordered Associative Containers
// -----------------------------------------------------------------------------
// AbslHashValue for hashing std::map
template <typename H, typename Key, typename T, typename Compare,
typename Allocator>
typename std::enable_if<is_hashable<Key>::value && is_hashable<T>::value,
H>::type
AbslHashValue(H hash_state, const std::map<Key, T, Compare, Allocator>& map) {
for (const auto& t : map) {
hash_state = H::combine(std::move(hash_state), t);
}
return H::combine(std::move(hash_state), map.size());
}
// AbslHashValue for hashing std::multimap
template <typename H, typename Key, typename T, typename Compare,
typename Allocator>
typename std::enable_if<is_hashable<Key>::value && is_hashable<T>::value,
H>::type
AbslHashValue(H hash_state,
const std::multimap<Key, T, Compare, Allocator>& map) {
for (const auto& t : map) {
hash_state = H::combine(std::move(hash_state), t);
}
return H::combine(std::move(hash_state), map.size());
}
// AbslHashValue for hashing std::set
template <typename H, typename Key, typename Compare, typename Allocator>
typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue(
H hash_state, const std::set<Key, Compare, Allocator>& set) {
for (const auto& t : set) {
hash_state = H::combine(std::move(hash_state), t);
}
return H::combine(std::move(hash_state), set.size());
}
// AbslHashValue for hashing std::multiset
template <typename H, typename Key, typename Compare, typename Allocator>
typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue(
H hash_state, const std::multiset<Key, Compare, Allocator>& set) {
for (const auto& t : set) {
hash_state = H::combine(std::move(hash_state), t);
}
return H::combine(std::move(hash_state), set.size());
}
// -----------------------------------------------------------------------------
// AbslHashValue for Wrapper Types
// -----------------------------------------------------------------------------
// AbslHashValue for hashing absl::optional
template <typename H, typename T>
typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue(
H hash_state, const absl::optional<T>& opt) {
if (opt) hash_state = H::combine(std::move(hash_state), *opt);
return H::combine(std::move(hash_state), opt.has_value());
}
// VariantVisitor
template <typename H>
struct VariantVisitor {
H&& hash_state;
template <typename T>
H operator()(const T& t) const {
return H::combine(std::move(hash_state), t);
}
};
// AbslHashValue for hashing absl::variant
template <typename H, typename... T>
typename std::enable_if<conjunction<is_hashable<T>...>::value, H>::type
AbslHashValue(H hash_state, const absl::variant<T...>& v) {
if (!v.valueless_by_exception()) {
hash_state = absl::visit(VariantVisitor<H>{std::move(hash_state)}, v);
}
return H::combine(std::move(hash_state), v.index());
}
// -----------------------------------------------------------------------------
// hash_range_or_bytes()
//
// Mixes all values in the range [data, data+size) into the hash state.
// This overload accepts only uniquely-represented types, and hashes them by
// hashing the entire range of bytes.
template <typename H, typename T>
typename std::enable_if<is_uniquely_represented<T>::value, H>::type
hash_range_or_bytes(H hash_state, const T* data, size_t size) {
const auto* bytes = reinterpret_cast<const unsigned char*>(data);
return H::combine_contiguous(std::move(hash_state), bytes, sizeof(T) * size);
}
// hash_range_or_bytes()
template <typename H, typename T>
typename std::enable_if<!is_uniquely_represented<T>::value, H>::type
hash_range_or_bytes(H hash_state, const T* data, size_t size) {
for (const auto end = data + size; data < end; ++data) {
hash_state = H::combine(std::move(hash_state), *data);
}
return hash_state;
}
// InvokeHashTag
//
// InvokeHash(H, const T&) invokes the appropriate hash implementation for a
// hasher of type `H` and a value of type `T`. If `T` is not hashable, there
// will be no matching overload of InvokeHash().
// Note: Some platforms (eg MSVC) do not support the detect idiom on
// std::hash. In those platforms the last fallback will be std::hash and
// InvokeHash() will always have a valid overload even if std::hash<T> is not
// valid.
//
// We try the following options in order:
// * If is_uniquely_represented, hash bytes directly.
// * ADL AbslHashValue(H, const T&) call.
// * std::hash<T>
// In MSVC we can't probe std::hash or stdext::hash because it triggers a
// static_assert instead of failing substitution.
#if defined(_MSC_VER)
#undef ABSL_HASH_INTERNAL_CAN_POISON_
#else // _MSC_VER
#define ABSL_HASH_INTERNAL_CAN_POISON_ 1
#endif // _MSC_VER
#if defined(ABSL_INTERNAL_LEGACY_HASH_NAMESPACE) && \
ABSL_HASH_INTERNAL_CAN_POISON_
#define ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_ 1
#endif
enum class InvokeHashTag {
kUniquelyRepresented,
kHashValue,
#if ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_
kLegacyHash,
#endif // ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_
kStdHash,
kNone
};
// HashSelect
//
// Type trait to select the appropriate hash implementation to use.
// HashSelect<T>::value is an instance of InvokeHashTag that indicates the best
// available hashing mechanism.
// See `Note` above about MSVC.
template <typename T>
struct HashSelect {
private:
struct State : HashStateBase<State> {
static State combine_contiguous(State hash_state, const unsigned char*,
size_t);
using State::HashStateBase::combine_contiguous;
};
// `Probe<V, Tag>::value` evaluates to `V<T>::value` if it is a valid
// expression, and `false` otherwise.
// `Probe<V, Tag>::tag` always evaluates to `Tag`.
template <template <typename> class V, InvokeHashTag Tag>
struct Probe {
private:
template <typename U, typename std::enable_if<V<U>::value, int>::type = 0>
static std::true_type Test(int);
template <typename U>
static std::false_type Test(char);
public:
static constexpr InvokeHashTag kTag = Tag;
static constexpr bool value = decltype(
Test<absl::remove_const_t<absl::remove_reference_t<T>>>(0))::value;
};
template <typename U>
using ProbeUniquelyRepresented = is_uniquely_represented<U>;
template <typename U>
using ProbeHashValue =
std::is_same<State, decltype(AbslHashValue(std::declval<State>(),
std::declval<const U&>()))>;
#if ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_
template <typename U>
using ProbeLegacyHash =
std::is_convertible<decltype(ABSL_INTERNAL_LEGACY_HASH_NAMESPACE::hash<
U>()(std::declval<const U&>())),
size_t>;
#endif // ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_
template <typename U>
using ProbeStdHash =
#if ABSL_HASH_INTERNAL_CAN_POISON_
std::is_convertible<decltype(std::hash<U>()(std::declval<const U&>())),
size_t>;
#else // ABSL_HASH_INTERNAL_CAN_POISON_
std::true_type;
#endif // ABSL_HASH_INTERNAL_CAN_POISON_
template <typename U>
using ProbeNone = std::true_type;
public:
// Probe each implementation in order.
// disjunction provides short circuting wrt instantiation.
static constexpr InvokeHashTag value = absl::disjunction<
Probe<ProbeUniquelyRepresented, InvokeHashTag::kUniquelyRepresented>,
Probe<ProbeHashValue, InvokeHashTag::kHashValue>,
#if ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_
Probe<ProbeLegacyHash, InvokeHashTag::kLegacyHash>,
#endif // ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_
Probe<ProbeStdHash, InvokeHashTag::kStdHash>,
Probe<ProbeNone, InvokeHashTag::kNone>>::kTag;
};
template <typename T>
struct is_hashable : std::integral_constant<bool, HashSelect<T>::value !=
InvokeHashTag::kNone> {};
template <typename H, typename T>
absl::enable_if_t<HashSelect<T>::value == InvokeHashTag::kUniquelyRepresented,
H>
InvokeHash(H state, const T& value) {
return hash_internal::hash_bytes(std::move(state), value);
}
template <typename H, typename T>
absl::enable_if_t<HashSelect<T>::value == InvokeHashTag::kHashValue, H>
InvokeHash(H state, const T& value) {
return AbslHashValue(std::move(state), value);
}
#if ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_
template <typename H, typename T>
absl::enable_if_t<HashSelect<T>::value == InvokeHashTag::kLegacyHash, H>
InvokeHash(H state, const T& value) {
return hash_internal::hash_bytes(
std::move(state), ABSL_INTERNAL_LEGACY_HASH_NAMESPACE::hash<T>{}(value));
}
#endif // ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_
template <typename H, typename T>
absl::enable_if_t<HashSelect<T>::value == InvokeHashTag::kStdHash, H>
InvokeHash(H state, const T& value) {
return hash_internal::hash_bytes(std::move(state), std::hash<T>{}(value));
}
// CityHashState
class CityHashState : public HashStateBase<CityHashState> {
// absl::uint128 is not an alias or a thin wrapper around the intrinsic.
// We use the intrinsic when available to improve performance.
#ifdef ABSL_HAVE_INTRINSIC_INT128
using uint128 = __uint128_t;
#else // ABSL_HAVE_INTRINSIC_INT128
using uint128 = absl::uint128;
#endif // ABSL_HAVE_INTRINSIC_INT128
static constexpr uint64_t kMul =
sizeof(size_t) == 4 ? uint64_t{0xcc9e2d51} : uint64_t{0x9ddfea08eb382d69};
template <typename T>
using IntegralFastPath =
conjunction<std::is_integral<T>, is_uniquely_represented<T>>;
public:
// Move only
CityHashState(CityHashState&&) = default;
CityHashState& operator=(CityHashState&&) = default;
// CityHashState::combine_contiguous()
//
// Fundamental base case for hash recursion: mixes the given range of bytes
// into the hash state.
static CityHashState combine_contiguous(CityHashState hash_state,
const unsigned char* first,
size_t size) {
return CityHashState(
CombineContiguousImpl(hash_state.state_, first, size,
std::integral_constant<int, sizeof(size_t)>{}));
}
using CityHashState::HashStateBase::combine_contiguous;
// CityHashState::hash()
//
// For performance reasons in non-opt mode, we specialize this for
// integral types.
// Otherwise we would be instantiating and calling dozens of functions for
// something that is just one multiplication and a couple xor's.
// The result should be the same as running the whole algorithm, but faster.
template <typename T, absl::enable_if_t<IntegralFastPath<T>::value, int> = 0>
static size_t hash(T value) {
return static_cast<size_t>(Mix(Seed(), static_cast<uint64_t>(value)));
}
// Overload of CityHashState::hash()
template <typename T, absl::enable_if_t<!IntegralFastPath<T>::value, int> = 0>
static size_t hash(const T& value) {
return static_cast<size_t>(combine(CityHashState{}, value).state_);
}
private:
// Invoked only once for a given argument; that plus the fact that this is
// move-only ensures that there is only one non-moved-from object.
CityHashState() : state_(Seed()) {}
// Workaround for MSVC bug.
// We make the type copyable to fix the calling convention, even though we
// never actually copy it. Keep it private to not affect the public API of the
// type.
CityHashState(const CityHashState&) = default;
explicit CityHashState(uint64_t state) : state_(state) {}
// Implementation of the base case for combine_contiguous where we actually
// mix the bytes into the state.
// Dispatch to different implementations of the combine_contiguous depending
// on the value of `sizeof(size_t)`.
static uint64_t CombineContiguousImpl(uint64_t state,
const unsigned char* first, size_t len,
std::integral_constant<int, 4>
/* sizeof_size_t */);
static uint64_t CombineContiguousImpl(uint64_t state,
const unsigned char* first, size_t len,
std::integral_constant<int, 8>
/* sizeof_size_t*/);
// Reads 9 to 16 bytes from p.
// The first 8 bytes are in .first, the rest (zero padded) bytes are in
// .second.
static std::pair<uint64_t, uint64_t> Read9To16(const unsigned char* p,
size_t len) {
uint64_t high = little_endian::Load64(p + len - 8);
return {little_endian::Load64(p), high >> (128 - len * 8)};
}
// Reads 4 to 8 bytes from p. Zero pads to fill uint64_t.
static uint64_t Read4To8(const unsigned char* p, size_t len) {
return (static_cast<uint64_t>(little_endian::Load32(p + len - 4))
<< (len - 4) * 8) |
little_endian::Load32(p);
}
// Reads 1 to 3 bytes from p. Zero pads to fill uint32_t.
static uint32_t Read1To3(const unsigned char* p, size_t len) {
return static_cast<uint32_t>((p[0]) | //
(p[len / 2] << (len / 2 * 8)) | //
(p[len - 1] << ((len - 1) * 8)));
}
ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Mix(uint64_t state, uint64_t v) {
using MultType =
absl::conditional_t<sizeof(size_t) == 4, uint64_t, uint128>;
// We do the addition in 64-bit space to make sure the 128-bit
// multiplication is fast. If we were to do it as MultType the compiler has
// to assume that the high word is non-zero and needs to perform 2
// multiplications instead of one.
MultType m = state + v;
m *= kMul;
return static_cast<uint64_t>(m ^ (m >> (sizeof(m) * 8 / 2)));
}
// Seed()
//
// A non-deterministic seed.
//
// The current purpose of this seed is to generate non-deterministic results
// and prevent having users depend on the particular hash values.
// It is not meant as a security feature right now, but it leaves the door
// open to upgrade it to a true per-process random seed. A true random seed
// costs more and we don't need to pay for that right now.
//
// On platforms with ASLR, we take advantage of it to make a per-process
// random value.
// See https://en.wikipedia.org/wiki/Address_space_layout_randomization
//
// On other platforms this is still going to be non-deterministic but most
// probably per-build and not per-process.
ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Seed() {
return static_cast<uint64_t>(reinterpret_cast<uintptr_t>(kSeed));
}
static const void* const kSeed;
uint64_t state_;
};
// CityHashState::CombineContiguousImpl()
inline uint64_t CityHashState::CombineContiguousImpl(
uint64_t state, const unsigned char* first, size_t len,
std::integral_constant<int, 4> /* sizeof_size_t */) {
// For large values we use CityHash, for small ones we just use a
// multiplicative hash.
uint64_t v;
if (len > 8) {
v = absl::hash_internal::CityHash32(reinterpret_cast<const char*>(first), len);
} else if (len >= 4) {
v = Read4To8(first, len);
} else if (len > 0) {
v = Read1To3(first, len);
} else {
// Empty ranges have no effect.
return state;
}
return Mix(state, v);
}
// Overload of CityHashState::CombineContiguousImpl()
inline uint64_t CityHashState::CombineContiguousImpl(
uint64_t state, const unsigned char* first, size_t len,
std::integral_constant<int, 8> /* sizeof_size_t */) {
// For large values we use CityHash, for small ones we just use a
// multiplicative hash.
uint64_t v;
if (len > 16) {
v = absl::hash_internal::CityHash64(reinterpret_cast<const char*>(first), len);
} else if (len > 8) {
auto p = Read9To16(first, len);
state = Mix(state, p.first);
v = p.second;
} else if (len >= 4) {
v = Read4To8(first, len);
} else if (len > 0) {
v = Read1To3(first, len);
} else {
// Empty ranges have no effect.
return state;
}
return Mix(state, v);
}
struct AggregateBarrier {};
// HashImpl
// Add a private base class to make sure this type is not an aggregate.
// Aggregates can be aggregate initialized even if the default constructor is
// deleted.
struct PoisonedHash : private AggregateBarrier {
PoisonedHash() = delete;
PoisonedHash(const PoisonedHash&) = delete;
PoisonedHash& operator=(const PoisonedHash&) = delete;
};
template <typename T>
struct HashImpl {
size_t operator()(const T& value) const { return CityHashState::hash(value); }
};
template <typename T>
struct Hash
: absl::conditional_t<is_hashable<T>::value, HashImpl<T>, PoisonedHash> {};
template <typename H>
template <typename T, typename... Ts>
H HashStateBase<H>::combine(H state, const T& value, const Ts&... values) {
return H::combine(hash_internal::InvokeHash(std::move(state), value),
values...);
}
// HashStateBase::combine_contiguous()
template <typename H>
template <typename T>
H HashStateBase<H>::combine_contiguous(H state, const T* data, size_t size) {
return hash_internal::hash_range_or_bytes(std::move(state), data, size);
}
} // namespace hash_internal
} // namespace absl
#endif // ABSL_HASH_INTERNAL_HASH_H_

View file

@ -0,0 +1,23 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdlib>
#include "absl/hash/hash.h"
// Prints the hash of argv[1].
int main(int argc, char** argv) {
if (argc < 2) return 1;
printf("%zu\n", absl::Hash<int>{}(std::atoi(argv[1]))); // NOLINT
}

View file

@ -0,0 +1,218 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_HASH_INTERNAL_SPY_HASH_STATE_H_
#define ABSL_HASH_INTERNAL_SPY_HASH_STATE_H_
#include <ostream>
#include <string>
#include <vector>
#include "absl/hash/hash.h"
#include "absl/strings/match.h"
#include "absl/strings/str_format.h"
#include "absl/strings/str_join.h"
namespace absl {
namespace hash_internal {
// SpyHashState is an implementation of the HashState API that simply
// accumulates all input bytes in an internal buffer. This makes it useful
// for testing AbslHashValue overloads (so long as they are templated on the
// HashState parameter), since it can report the exact hash representation
// that the AbslHashValue overload produces.
//
// Sample usage:
// EXPECT_EQ(SpyHashState::combine(SpyHashState(), foo),
// SpyHashState::combine(SpyHashState(), bar));
template <typename T>
class SpyHashStateImpl : public HashStateBase<SpyHashStateImpl<T>> {
public:
SpyHashStateImpl()
: error_(std::make_shared<absl::optional<std::string>>()) {
static_assert(std::is_void<T>::value, "");
}
// Move-only
SpyHashStateImpl(const SpyHashStateImpl&) = delete;
SpyHashStateImpl& operator=(const SpyHashStateImpl&) = delete;
SpyHashStateImpl(SpyHashStateImpl&& other) noexcept {
*this = std::move(other);
}
SpyHashStateImpl& operator=(SpyHashStateImpl&& other) noexcept {
hash_representation_ = std::move(other.hash_representation_);
error_ = other.error_;
moved_from_ = other.moved_from_;
other.moved_from_ = true;
return *this;
}
template <typename U>
SpyHashStateImpl(SpyHashStateImpl<U>&& other) { // NOLINT
hash_representation_ = std::move(other.hash_representation_);
error_ = other.error_;
moved_from_ = other.moved_from_;
other.moved_from_ = true;
}
template <typename A, typename... Args>
static SpyHashStateImpl combine(SpyHashStateImpl s, const A& a,
const Args&... args) {
// Pass an instance of SpyHashStateImpl<A> when trying to combine `A`. This
// allows us to test that the user only uses this instance for combine calls
// and does not call AbslHashValue directly.
// See AbslHashValue implementation at the bottom.
s = SpyHashStateImpl<A>::HashStateBase::combine(std::move(s), a);
return SpyHashStateImpl::combine(std::move(s), args...);
}
static SpyHashStateImpl combine(SpyHashStateImpl s) {
if (direct_absl_hash_value_error_) {
*s.error_ = "AbslHashValue should not be invoked directly.";
} else if (s.moved_from_) {
*s.error_ = "Used moved-from instance of the hash state object.";
}
return s;
}
static void SetDirectAbslHashValueError() {
direct_absl_hash_value_error_ = true;
}
// Two SpyHashStateImpl objects are equal if they hold equal hash
// representations.
friend bool operator==(const SpyHashStateImpl& lhs,
const SpyHashStateImpl& rhs) {
return lhs.hash_representation_ == rhs.hash_representation_;
}
friend bool operator!=(const SpyHashStateImpl& lhs,
const SpyHashStateImpl& rhs) {
return !(lhs == rhs);
}
enum class CompareResult {
kEqual,
kASuffixB,
kBSuffixA,
kUnequal,
};
static CompareResult Compare(const SpyHashStateImpl& a,
const SpyHashStateImpl& b) {
const std::string a_flat = absl::StrJoin(a.hash_representation_, "");
const std::string b_flat = absl::StrJoin(b.hash_representation_, "");
if (a_flat == b_flat) return CompareResult::kEqual;
if (absl::EndsWith(a_flat, b_flat)) return CompareResult::kBSuffixA;
if (absl::EndsWith(b_flat, a_flat)) return CompareResult::kASuffixB;
return CompareResult::kUnequal;
}
// operator<< prints the hash representation as a hex and ASCII dump, to
// facilitate debugging.
friend std::ostream& operator<<(std::ostream& out,
const SpyHashStateImpl& hash_state) {
out << "[\n";
for (auto& s : hash_state.hash_representation_) {
size_t offset = 0;
for (char c : s) {
if (offset % 16 == 0) {
out << absl::StreamFormat("\n0x%04x: ", offset);
}
if (offset % 2 == 0) {
out << " ";
}
out << absl::StreamFormat("%02x", c);
++offset;
}
out << "\n";
}
return out << "]";
}
// The base case of the combine recursion, which writes raw bytes into the
// internal buffer.
static SpyHashStateImpl combine_contiguous(SpyHashStateImpl hash_state,
const unsigned char* begin,
size_t size) {
hash_state.hash_representation_.emplace_back(
reinterpret_cast<const char*>(begin), size);
return hash_state;
}
using SpyHashStateImpl::HashStateBase::combine_contiguous;
absl::optional<std::string> error() const {
if (moved_from_) {
return "Returned a moved-from instance of the hash state object.";
}
return *error_;
}
private:
template <typename U>
friend class SpyHashStateImpl;
// This is true if SpyHashStateImpl<T> has been passed to a call of
// AbslHashValue with the wrong type. This detects that the user called
// AbslHashValue directly (because the hash state type does not match).
static bool direct_absl_hash_value_error_;
std::vector<std::string> hash_representation_;
// This is a shared_ptr because we want all instances of the particular
// SpyHashState run to share the field. This way we can set the error for
// use-after-move and all the copies will see it.
std::shared_ptr<absl::optional<std::string>> error_;
bool moved_from_ = false;
};
template <typename T>
bool SpyHashStateImpl<T>::direct_absl_hash_value_error_;
template <bool& B>
struct OdrUse {
constexpr OdrUse() {}
bool& b = B;
};
template <void (*)()>
struct RunOnStartup {
static bool run;
static constexpr OdrUse<run> kOdrUse{};
};
template <void (*f)()>
bool RunOnStartup<f>::run = (f(), true);
template <
typename T, typename U,
// Only trigger for when (T != U),
absl::enable_if_t<!std::is_same<T, U>::value, int> = 0,
// This statement works in two ways:
// - First, it instantiates RunOnStartup and forces the initialization of
// `run`, which set the global variable.
// - Second, it triggers a SFINAE error disabling the overload to prevent
// compile time errors. If we didn't disable the overload we would get
// ambiguous overload errors, which we don't want.
int = RunOnStartup<SpyHashStateImpl<T>::SetDirectAbslHashValueError>::run>
void AbslHashValue(SpyHashStateImpl<T>, const U&);
using SpyHashState = SpyHashStateImpl<void>;
} // namespace hash_internal
} // namespace absl
#endif // ABSL_HASH_INTERNAL_SPY_HASH_STATE_H_