Export of internal Abseil changes.

--
4eacae3ff1b14b1d309e8092185bc10e8a6203cf by Derek Mauro <dmauro@google.com>:

Release SwissTable - a fast, efficient, cache-friendly hash table.

https://www.youtube.com/watch?v=ncHmEUmJZf4

PiperOrigin-RevId: 214816527

--
df8c3dfab3cfb2f4365909a84d0683b193cfbb11 by Derek Mauro <dmauro@google.com>:

Internal change

PiperOrigin-RevId: 214785288

--
1eabd5266bbcebc33eecc91e5309b751856a75c8 by Abseil Team <absl-team@google.com>:

Internal change

PiperOrigin-RevId: 214722931

--
2ebbfac950f83146b46253038e7dd7dcde9f2951 by Derek Mauro <dmauro@google.com>:

Internal change

PiperOrigin-RevId: 214701684
GitOrigin-RevId: 4eacae3ff1b14b1d309e8092185bc10e8a6203cf
Change-Id: I9ba64e395b22ad7863213d157b8019b082adc19d
This commit is contained in:
Abseil Team 2018-09-27 12:24:54 -07:00 committed by Derek Mauro
parent e291c279e4
commit 48cd2c3f35
55 changed files with 18696 additions and 0 deletions

114
absl/hash/BUILD.bazel Normal file
View file

@ -0,0 +1,114 @@
#
# Copyright 2018 The Abseil Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
load(
"//absl:copts.bzl",
"ABSL_DEFAULT_COPTS",
"ABSL_TEST_COPTS",
)
package(default_visibility = ["//visibility:public"])
licenses(["notice"]) # Apache 2.0
cc_library(
name = "hash",
srcs = [
"internal/hash.cc",
"internal/hash.h",
],
hdrs = ["hash.h"],
copts = ABSL_DEFAULT_COPTS,
deps = [
":city",
"//absl/base:core_headers",
"//absl/base:endian",
"//absl/container:fixed_array",
"//absl/meta:type_traits",
"//absl/numeric:int128",
"//absl/strings",
"//absl/types:optional",
"//absl/types:variant",
"//absl/utility",
],
)
cc_library(
name = "hash_testing",
testonly = 1,
hdrs = ["hash_testing.h"],
deps = [
":spy_hash_state",
"//absl/meta:type_traits",
"//absl/strings",
"//absl/types:variant",
"@com_google_googletest//:gtest",
],
)
cc_test(
name = "hash_test",
srcs = ["hash_test.cc"],
copts = ABSL_TEST_COPTS,
deps = [
":hash",
":hash_testing",
"//absl/base:core_headers",
"//absl/container:flat_hash_set",
"//absl/hash:spy_hash_state",
"//absl/meta:type_traits",
"//absl/numeric:int128",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "spy_hash_state",
testonly = 1,
hdrs = ["internal/spy_hash_state.h"],
copts = ABSL_DEFAULT_COPTS,
visibility = ["//visibility:private"],
deps = [
":hash",
"//absl/strings",
"//absl/strings:str_format",
],
)
cc_library(
name = "city",
srcs = ["internal/city.cc"],
hdrs = [
"internal/city.h",
"internal/city_crc.h",
],
copts = ABSL_DEFAULT_COPTS,
deps = [
"//absl/base:config",
"//absl/base:core_headers",
"//absl/base:endian",
],
)
cc_test(
name = "city_test",
srcs = ["internal/city_test.cc"],
copts = ABSL_TEST_COPTS,
deps = [
":city",
"@com_google_googletest//:gtest_main",
],
)

80
absl/hash/CMakeLists.txt Normal file
View file

@ -0,0 +1,80 @@
#
# Copyright 2018 The Abseil Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
list(APPEND HASH_PUBLIC_HEADERS
"hash.h"
)
list(APPEND HASH_INTERNAL_HEADERS
"internal/city.h"
"internal/city_crc.h"
"internal/hash.h"
)
# absl_hash library
list(APPEND HASH_SRC
"internal/city.cc"
"internal/hash.cc"
${HASH_PUBLIC_HEADERS}
${HASH_INTERNAL_HEADERS}
)
set(HASH_PUBLIC_LIBRARIES absl::hash absl::container absl::strings absl::str_format absl::utility)
absl_library(
TARGET
absl_hash
SOURCES
${HASH_SRC}
PUBLIC_LIBRARIES
${HASH_PUBLIC_LIBRARIES}
EXPORT_NAME
hash
)
#
## TESTS
#
# testing support
set(HASH_TEST_HEADERS hash_testing.h internal/spy_hash_state.h)
set(HASH_TEST_PUBLIC_LIBRARIES absl::hash absl::container absl::numeric absl::strings absl::str_format)
# hash_test
set(HASH_TEST_SRC "hash_test.cc" ${HASH_TEST_HEADERS})
absl_test(
TARGET
hash_test
SOURCES
${HASH_TEST_SRC}
PUBLIC_LIBRARIES
${HASH_TEST_PUBLIC_LIBRARIES}
)
# hash_test
set(CITY_TEST_SRC "internal/city_test.cc")
absl_test(
TARGET
city_test
SOURCES
${CITY_TEST_SRC}
PUBLIC_LIBRARIES
${HASH_TEST_PUBLIC_LIBRARIES}
)

312
absl/hash/hash.h Normal file
View file

@ -0,0 +1,312 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: hash.h
// -----------------------------------------------------------------------------
//
// This header file defines the Abseil `hash` library and the Abseil hashing
// framework. This framework consists of the following:
//
// * The `absl::Hash` functor, which is used to invoke the hasher within the
// Abseil hashing framework. `absl::Hash<T>` supports most basic types and
// a number of Abseil types out of the box.
// * `AbslHashValue`, an extension point that allows you to extend types to
// support Abseil hashing without requiring you to define a hashing
// algorithm.
// * `HashState`, a type-erased class which implement the manipulation of the
// hash state (H) itself. containing member functions `combine()` and
// `combine_contiguous()`, which you can use to contribute to an existing
// hash state when hashing your types.
//
// Unlike `std::hash` or other hashing frameworks, the Abseil hashing framework
// provides most of its utility by abstracting away the hash algorithm (and its
// implementation) entirely. Instead, a type invokes the Abseil hashing
// framework by simply combining its state with the state of known, hashable
// types. Hashing of that combined state is separately done by `absl::Hash`.
//
// Example:
//
// // Suppose we have a class `Circle` for which we want to add hashing
// class Circle {
// public:
// ...
// private:
// std::pair<int, int> center_;
// int radius_;
// };
//
// // To add hashing support to `Circle`, we simply need to add an ordinary
// // function `AbslHashValue()`, and return the combined hash state of the
// // existing hash state and the class state:
//
// template <typename H>
// friend H AbslHashValue(H h, const Circle& c) {
// return H::combine(std::move(h), c.center_, c.radius_);
// }
//
// For more information, see Adding Type Support to `absl::Hash` below.
//
#ifndef ABSL_HASH_HASH_H_
#define ABSL_HASH_HASH_H_
#include "absl/hash/internal/hash.h"
namespace absl {
// -----------------------------------------------------------------------------
// `absl::Hash`
// -----------------------------------------------------------------------------
//
// `absl::Hash<T>` is a convenient general-purpose hash functor for a type `T`
// satisfying any of the following conditions (in order):
//
// * T is an arithmetic or pointer type
// * T defines an overload for `AbslHashValue(H, const T&)` for an arbitrary
// hash state `H`.
// - T defines a specialization of `HASH_NAMESPACE::hash<T>`
// - T defines a specialization of `std::hash<T>`
//
// `absl::Hash` intrinsically supports the following types:
//
// * All integral types (including bool)
// * All enum types
// * All floating-point types (although hashing them is discouraged)
// * All pointer types, including nullptr_t
// * std::pair<T1, T2>, if T1 and T2 are hashable
// * std::tuple<Ts...>, if all the Ts... are hashable
// * std::unique_ptr and std::shared_ptr
// * All string-like types including:
// * std::string
// * std::string_view (as well as any instance of std::basic_string that
// uses char and std::char_traits)
// * All the standard sequence containers (provided the elements are hashable)
// * All the standard ordered associative containers (provided the elements are
// hashable)
// * absl types such as the following:
// * absl::string_view
// * absl::InlinedVector
// * absl::FixedArray
// * absl::unit128
// * absl::Time, absl::Duration, and absl::TimeZone
//
// Note: the list above is not meant to be exhaustive. Additional type support
// may be added, in which case the above list will be updated.
//
// -----------------------------------------------------------------------------
// absl::Hash Invocation Evaluation
// -----------------------------------------------------------------------------
//
// When invoked, `absl::Hash<T>` searches for supplied hash functions in the
// following order:
//
// * Natively supported types out of the box (see above)
// * Types for which an `AbslHashValue()` overload is provided (such as
// user-defined types). See "Adding Type Support to `absl::Hash`" below.
// * Types which define a `HASH_NAMESPACE::hash<T>` specialization (aka
// `__gnu_cxx::hash<T>` for gcc/Clang or `stdext::hash<T>` for MSVC)
// * Types which define a `std::hash<T>` specialization
//
// The fallback to legacy hash functions exists mainly for backwards
// compatibility. If you have a choice, prefer defining an `AbslHashValue`
// overload instead of specializing any legacy hash functors.
//
// -----------------------------------------------------------------------------
// The Hash State Concept, and using `HashState` for Type Erasure
// -----------------------------------------------------------------------------
//
// The `absl::Hash` framework relies on the Concept of a "hash state." Such a
// hash state is used in several places:
//
// * Within existing implementations of `absl::Hash<T>` to store the hashed
// state of an object. Note that it is up to the implementation how it stores
// such state. A hash table, for example, may mix the state to produce an
// integer value; a testing framework may simply hold a vector of that state.
// * Within implementations of `AbslHashValue()` used to extend user-defined
// types. (See "Adding Type Support to absl::Hash" below.)
// * Inside a `HashState`, providing type erasure for the concept of a hash
// state, which you can use to extend the `absl::Hash` framework for types
// that are otherwise difficult to extend using `AbslHashValue()`. (See the
// `HashState` class below.)
//
// The "hash state" concept contains two member functions for mixing hash state:
//
// * `H::combine()`
//
// Combines an arbitrary number of values into a hash state, returning the
// updated state. Note that the existing hash state is move-only and must be
// passed by value.
//
// Each of the value types T must be hashable by H.
//
// NOTE:
//
// state = H::combine(std::move(state), value1, value2, value3);
//
// must be guaranteed to produce the same hash expansion as
//
// state = H::combine(std::move(state), value1);
// state = H::combine(std::move(state), value2);
// state = H::combine(std::move(state), value3);
//
// * `H::combine_contiguous()`
//
// Combines a contiguous array of `size` elements into a hash state,
// returning the updated state. Note that the existing hash state is
// move-only and must be passed by value.
//
// NOTE:
//
// state = H::combine_contiguous(std::move(state), data, size);
//
// need NOT be guaranteed to produce the same hash expansion as a loop
// (it may perform internal optimizations). If you need this guarantee, use a
// loop instead.
//
// -----------------------------------------------------------------------------
// Adding Type Support to `absl::Hash`
// -----------------------------------------------------------------------------
//
// To add support for your user-defined type, add a proper `AbslHashValue()`
// overload as a free (non-member) function. The overload will take an
// existing hash state and should combine that state with state from the type.
//
// Example:
//
// template <typename H>
// H AbslHashValue(H state, const MyType& v) {
// return H::combine(std::move(state), v.field1, ..., v.fieldN);
// }
//
// where `(field1, ..., fieldN)` are the members you would use on your
// `operator==` to define equality.
//
// Notice that `AbslHashValue` is not a class member, but an ordinary function.
// An `AbslHashValue` overload for a type should only be declared in the same
// file and namespace as said type. The proper `AbslHashValue` implementation
// for a given type will be discovered via ADL.
//
// Note: unlike `std::hash', `absl::Hash` should never be specialized. It must
// only be extended by adding `AbslHashValue()` overloads.
//
template <typename T>
using Hash = absl::hash_internal::Hash<T>;
// HashState
//
// A type erased version of the hash state concept, for use in user-defined
// `AbslHashValue` implementations that can't use templates (such as PImpl
// classes, virtual functions, etc.). The type erasure adds overhead so it
// should be avoided unless necessary.
//
// Note: This wrapper will only erase calls to:
// combine_contiguous(H, const unsigned char*, size_t)
//
// All other calls will be handled internally and will not invoke overloads
// provided by the wrapped class.
//
// Users of this class should still define a template `AbslHashValue` function,
// but can use `absl::HashState::Create(&state)` to erase the type of the hash
// state and dispatch to their private hashing logic.
//
// This state can be used like any other hash state. In particular, you can call
// `HashState::combine()` and `HashState::combine_contiguous()` on it.
//
// Example:
//
// class Interface {
// public:
// template <typename H>
// friend H AbslHashValue(H state, const Interface& value) {
// state = H::combine(std::move(state), std::type_index(typeid(*this)));
// value.HashValue(absl::HashState::Create(&state));
// return state;
// }
// private:
// virtual void HashValue(absl::HashState state) const = 0;
// };
//
// class Impl : Interface {
// private:
// void HashValue(absl::HashState state) const override {
// absl::HashState::combine(std::move(state), v1_, v2_);
// }
// int v1_;
// string v2_;
// };
class HashState : public hash_internal::HashStateBase<HashState> {
public:
// HashState::Create()
//
// Create a new `HashState` instance that wraps `state`. All calls to
// `combine()` and `combine_contiguous()` on the new instance will be
// redirected to the original `state` object. The `state` object must outlive
// the `HashState` instance.
template <typename T>
static HashState Create(T* state) {
HashState s;
s.Init(state);
return s;
}
HashState(const HashState&) = delete;
HashState& operator=(const HashState&) = delete;
HashState(HashState&&) = default;
HashState& operator=(HashState&&) = default;
// HashState::combine()
//
// Combines an arbitrary number of values into a hash state, returning the
// updated state.
using HashState::HashStateBase::combine;
// HashState::combine_contiguous()
//
// Combines a contiguous array of `size` elements into a hash state, returning
// the updated state.
static HashState combine_contiguous(HashState hash_state,
const unsigned char* first, size_t size) {
hash_state.combine_contiguous_(hash_state.state_, first, size);
return hash_state;
}
using HashState::HashStateBase::combine_contiguous;
private:
HashState() = default;
template <typename T>
static void CombineContiguousImpl(void* p, const unsigned char* first,
size_t size) {
T& state = *static_cast<T*>(p);
state = T::combine_contiguous(std::move(state), first, size);
}
template <typename T>
void Init(T* state) {
state_ = state;
combine_contiguous_ = &CombineContiguousImpl<T>;
}
// Do not erase an already erased state.
void Init(HashState* state) {
state_ = state->state_;
combine_contiguous_ = state->combine_contiguous_;
}
void* state_;
void (*combine_contiguous_)(void*, const unsigned char*, size_t);
};
} // namespace absl
#endif // ABSL_HASH_HASH_H_

425
absl/hash/hash_test.cc Normal file
View file

@ -0,0 +1,425 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/hash/hash.h"
#include <array>
#include <cstring>
#include <deque>
#include <forward_list>
#include <functional>
#include <iterator>
#include <limits>
#include <list>
#include <map>
#include <memory>
#include <numeric>
#include <random>
#include <set>
#include <string>
#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <utility>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/container/flat_hash_set.h"
#include "absl/hash/hash_testing.h"
#include "absl/hash/internal/spy_hash_state.h"
#include "absl/meta/type_traits.h"
#include "absl/numeric/int128.h"
namespace {
using absl::Hash;
using absl::hash_internal::SpyHashState;
template <typename T>
class HashValueIntTest : public testing::Test {
};
TYPED_TEST_CASE_P(HashValueIntTest);
template <typename T>
SpyHashState SpyHash(const T& value) {
return SpyHashState::combine(SpyHashState(), value);
}
// Helper trait to verify if T is hashable. We use absl::Hash's poison status to
// detect it.
template <typename T>
using is_hashable = std::is_default_constructible<absl::Hash<T>>;
TYPED_TEST_P(HashValueIntTest, BasicUsage) {
EXPECT_TRUE((is_hashable<TypeParam>::value));
TypeParam n = 42;
EXPECT_EQ(SpyHash(n), SpyHash(TypeParam{42}));
EXPECT_NE(SpyHash(n), SpyHash(TypeParam{0}));
EXPECT_NE(SpyHash(std::numeric_limits<TypeParam>::max()),
SpyHash(std::numeric_limits<TypeParam>::min()));
}
TYPED_TEST_P(HashValueIntTest, FastPath) {
// Test the fast-path to make sure the values are the same.
TypeParam n = 42;
EXPECT_EQ(absl::Hash<TypeParam>{}(n),
absl::Hash<std::tuple<TypeParam>>{}(std::tuple<TypeParam>(n)));
}
REGISTER_TYPED_TEST_CASE_P(HashValueIntTest, BasicUsage, FastPath);
using IntTypes = testing::Types<unsigned char, char, int, int32_t, int64_t, uint32_t,
uint64_t, size_t>;
INSTANTIATE_TYPED_TEST_CASE_P(My, HashValueIntTest, IntTypes);
template <typename T, typename = void>
struct IsHashCallble : std::false_type {};
template <typename T>
struct IsHashCallble<T, absl::void_t<decltype(std::declval<absl::Hash<T>>()(
std::declval<const T&>()))>> : std::true_type {};
template <typename T, typename = void>
struct IsAggregateInitializable : std::false_type {};
template <typename T>
struct IsAggregateInitializable<T, absl::void_t<decltype(T{})>>
: std::true_type {};
TEST(IsHashableTest, ValidHash) {
EXPECT_TRUE((is_hashable<int>::value));
EXPECT_TRUE(std::is_default_constructible<absl::Hash<int>>::value);
EXPECT_TRUE(std::is_copy_constructible<absl::Hash<int>>::value);
EXPECT_TRUE(std::is_move_constructible<absl::Hash<int>>::value);
EXPECT_TRUE(absl::is_copy_assignable<absl::Hash<int>>::value);
EXPECT_TRUE(absl::is_move_assignable<absl::Hash<int>>::value);
EXPECT_TRUE(IsHashCallble<int>::value);
EXPECT_TRUE(IsAggregateInitializable<absl::Hash<int>>::value);
}
#if ABSL_HASH_INTERNAL_CAN_POISON_ && !defined(__APPLE__)
TEST(IsHashableTest, PoisonHash) {
struct X {};
EXPECT_FALSE((is_hashable<X>::value));
EXPECT_FALSE(std::is_default_constructible<absl::Hash<X>>::value);
EXPECT_FALSE(std::is_copy_constructible<absl::Hash<X>>::value);
EXPECT_FALSE(std::is_move_constructible<absl::Hash<X>>::value);
EXPECT_FALSE(absl::is_copy_assignable<absl::Hash<X>>::value);
EXPECT_FALSE(absl::is_move_assignable<absl::Hash<X>>::value);
EXPECT_FALSE(IsHashCallble<X>::value);
EXPECT_FALSE(IsAggregateInitializable<absl::Hash<X>>::value);
}
#endif // ABSL_HASH_INTERNAL_CAN_POISON_
// Hashable types
//
// These types exist simply to exercise various AbslHashValue behaviors, so
// they are named by what their AbslHashValue overload does.
struct NoOp {
template <typename HashCode>
friend HashCode AbslHashValue(HashCode h, NoOp n) {
return std::move(h);
}
};
struct EmptyCombine {
template <typename HashCode>
friend HashCode AbslHashValue(HashCode h, EmptyCombine e) {
return HashCode::combine(std::move(h));
}
};
template <typename Int>
struct CombineIterative {
template <typename HashCode>
friend HashCode AbslHashValue(HashCode h, CombineIterative c) {
for (int i = 0; i < 5; ++i) {
h = HashCode::combine(std::move(h), Int(i));
}
return h;
}
};
template <typename Int>
struct CombineVariadic {
template <typename HashCode>
friend HashCode AbslHashValue(HashCode h, CombineVariadic c) {
return HashCode::combine(std::move(h), Int(0), Int(1), Int(2), Int(3),
Int(4));
}
};
using InvokeTag = absl::hash_internal::InvokeHashTag;
template <InvokeTag T>
using InvokeTagConstant = std::integral_constant<InvokeTag, T>;
template <InvokeTag... Tags>
struct MinTag;
template <InvokeTag a, InvokeTag b, InvokeTag... Tags>
struct MinTag<a, b, Tags...> : MinTag<(a < b ? a : b), Tags...> {};
template <InvokeTag a>
struct MinTag<a> : InvokeTagConstant<a> {};
template <InvokeTag... Tags>
struct CustomHashType {
size_t value;
};
template <InvokeTag allowed, InvokeTag... tags>
struct EnableIfContained
: std::enable_if<absl::disjunction<
std::integral_constant<bool, allowed == tags>...>::value> {};
template <
typename H, InvokeTag... Tags,
typename = typename EnableIfContained<InvokeTag::kHashValue, Tags...>::type>
H AbslHashValue(H state, CustomHashType<Tags...> t) {
static_assert(MinTag<Tags...>::value == InvokeTag::kHashValue, "");
return H::combine(std::move(state),
t.value + static_cast<int>(InvokeTag::kHashValue));
}
} // namespace
namespace absl {
namespace hash_internal {
template <InvokeTag... Tags>
struct is_uniquely_represented<
CustomHashType<Tags...>,
typename EnableIfContained<InvokeTag::kUniquelyRepresented, Tags...>::type>
: std::true_type {};
} // namespace hash_internal
} // namespace absl
#if ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_
namespace ABSL_INTERNAL_LEGACY_HASH_NAMESPACE {
template <InvokeTag... Tags>
struct hash<CustomHashType<Tags...>> {
template <InvokeTag... TagsIn, typename = typename EnableIfContained<
InvokeTag::kLegacyHash, TagsIn...>::type>
size_t operator()(CustomHashType<TagsIn...> t) const {
static_assert(MinTag<Tags...>::value == InvokeTag::kLegacyHash, "");
return t.value + static_cast<int>(InvokeTag::kLegacyHash);
}
};
} // namespace ABSL_INTERNAL_LEGACY_HASH_NAMESPACE
#endif // ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_
namespace std {
template <InvokeTag... Tags> // NOLINT
struct hash<CustomHashType<Tags...>> {
template <InvokeTag... TagsIn, typename = typename EnableIfContained<
InvokeTag::kStdHash, TagsIn...>::type>
size_t operator()(CustomHashType<TagsIn...> t) const {
static_assert(MinTag<Tags...>::value == InvokeTag::kStdHash, "");
return t.value + static_cast<int>(InvokeTag::kStdHash);
}
};
} // namespace std
namespace {
template <typename... T>
void TestCustomHashType(InvokeTagConstant<InvokeTag::kNone>, T...) {
using type = CustomHashType<T::value...>;
SCOPED_TRACE(testing::PrintToString(std::vector<InvokeTag>{T::value...}));
EXPECT_TRUE(is_hashable<type>());
EXPECT_TRUE(is_hashable<const type>());
EXPECT_TRUE(is_hashable<const type&>());
const size_t offset = static_cast<int>(std::min({T::value...}));
EXPECT_EQ(SpyHash(type{7}), SpyHash(size_t{7 + offset}));
}
void TestCustomHashType(InvokeTagConstant<InvokeTag::kNone>) {
#if ABSL_HASH_INTERNAL_CAN_POISON_
// is_hashable is false if we don't support any of the hooks.
using type = CustomHashType<>;
EXPECT_FALSE(is_hashable<type>());
EXPECT_FALSE(is_hashable<const type>());
EXPECT_FALSE(is_hashable<const type&>());
#endif // ABSL_HASH_INTERNAL_CAN_POISON_
}
template <InvokeTag Tag, typename... T>
void TestCustomHashType(InvokeTagConstant<Tag> tag, T... t) {
constexpr auto next = static_cast<InvokeTag>(static_cast<int>(Tag) + 1);
TestCustomHashType(InvokeTagConstant<next>(), tag, t...);
TestCustomHashType(InvokeTagConstant<next>(), t...);
}
TEST(HashTest, CustomHashType) {
TestCustomHashType(InvokeTagConstant<InvokeTag{}>());
}
TEST(HashTest, NoOpsAreEquivalent) {
EXPECT_EQ(Hash<NoOp>()({}), Hash<NoOp>()({}));
EXPECT_EQ(Hash<NoOp>()({}), Hash<EmptyCombine>()({}));
}
template <typename T>
class HashIntTest : public testing::Test {
};
TYPED_TEST_CASE_P(HashIntTest);
TYPED_TEST_P(HashIntTest, BasicUsage) {
EXPECT_NE(Hash<NoOp>()({}), Hash<TypeParam>()(0));
EXPECT_NE(Hash<NoOp>()({}),
Hash<TypeParam>()(std::numeric_limits<TypeParam>::max()));
if (std::numeric_limits<TypeParam>::min() != 0) {
EXPECT_NE(Hash<NoOp>()({}),
Hash<TypeParam>()(std::numeric_limits<TypeParam>::min()));
}
EXPECT_EQ(Hash<CombineIterative<TypeParam>>()({}),
Hash<CombineVariadic<TypeParam>>()({}));
}
REGISTER_TYPED_TEST_CASE_P(HashIntTest, BasicUsage);
using IntTypes = testing::Types<unsigned char, char, int, int32_t, int64_t, uint32_t,
uint64_t, size_t>;
INSTANTIATE_TYPED_TEST_CASE_P(My, HashIntTest, IntTypes);
struct StructWithPadding {
char c;
int i;
template <typename H>
friend H AbslHashValue(H hash_state, const StructWithPadding& s) {
return H::combine(std::move(hash_state), s.c, s.i);
}
};
static_assert(sizeof(StructWithPadding) > sizeof(char) + sizeof(int),
"StructWithPadding doesn't have padding");
static_assert(std::is_standard_layout<StructWithPadding>::value, "");
// This check has to be disabled because libstdc++ doesn't support it.
// static_assert(std::is_trivially_constructible<StructWithPadding>::value, "");
template <typename T>
struct ArraySlice {
T* begin;
T* end;
template <typename H>
friend H AbslHashValue(H hash_state, const ArraySlice& slice) {
for (auto t = slice.begin; t != slice.end; ++t) {
hash_state = H::combine(std::move(hash_state), *t);
}
return hash_state;
}
};
TEST(HashTest, HashNonUniquelyRepresentedType) {
// Create equal StructWithPadding objects that are known to have non-equal
// padding bytes.
static const size_t kNumStructs = 10;
unsigned char buffer1[kNumStructs * sizeof(StructWithPadding)];
std::memset(buffer1, 0, sizeof(buffer1));
auto* s1 = reinterpret_cast<StructWithPadding*>(buffer1);
unsigned char buffer2[kNumStructs * sizeof(StructWithPadding)];
std::memset(buffer2, 255, sizeof(buffer2));
auto* s2 = reinterpret_cast<StructWithPadding*>(buffer2);
for (int i = 0; i < kNumStructs; ++i) {
SCOPED_TRACE(i);
s1[i].c = s2[i].c = '0' + i;
s1[i].i = s2[i].i = i;
ASSERT_FALSE(memcmp(buffer1 + i * sizeof(StructWithPadding),
buffer2 + i * sizeof(StructWithPadding),
sizeof(StructWithPadding)) == 0)
<< "Bug in test code: objects do not have unequal"
<< " object representations";
}
EXPECT_EQ(Hash<StructWithPadding>()(s1[0]), Hash<StructWithPadding>()(s2[0]));
EXPECT_EQ(Hash<ArraySlice<StructWithPadding>>()({s1, s1 + kNumStructs}),
Hash<ArraySlice<StructWithPadding>>()({s2, s2 + kNumStructs}));
}
TEST(HashTest, StandardHashContainerUsage) {
std::unordered_map<int, std::string, Hash<int>> map = {{0, "foo"}, { 42, "bar" }};
EXPECT_NE(map.find(0), map.end());
EXPECT_EQ(map.find(1), map.end());
EXPECT_NE(map.find(0u), map.end());
}
struct ConvertibleFromNoOp {
ConvertibleFromNoOp(NoOp) {} // NOLINT(runtime/explicit)
template <typename H>
friend H AbslHashValue(H hash_state, ConvertibleFromNoOp) {
return H::combine(std::move(hash_state), 1);
}
};
TEST(HashTest, HeterogeneousCall) {
EXPECT_NE(Hash<ConvertibleFromNoOp>()(NoOp()),
Hash<NoOp>()(NoOp()));
}
TEST(IsUniquelyRepresentedTest, SanityTest) {
using absl::hash_internal::is_uniquely_represented;
EXPECT_TRUE(is_uniquely_represented<unsigned char>::value);
EXPECT_TRUE(is_uniquely_represented<int>::value);
EXPECT_FALSE(is_uniquely_represented<bool>::value);
EXPECT_FALSE(is_uniquely_represented<int*>::value);
}
struct IntAndString {
int i;
std::string s;
template <typename H>
friend H AbslHashValue(H hash_state, IntAndString int_and_string) {
return H::combine(std::move(hash_state), int_and_string.s,
int_and_string.i);
}
};
TEST(HashTest, SmallValueOn64ByteBoundary) {
Hash<IntAndString>()(IntAndString{0, std::string(63, '0')});
}
struct TypeErased {
size_t n;
template <typename H>
friend H AbslHashValue(H hash_state, const TypeErased& v) {
v.HashValue(absl::HashState::Create(&hash_state));
return hash_state;
}
void HashValue(absl::HashState state) const {
absl::HashState::combine(std::move(state), n);
}
};
TEST(HashTest, TypeErased) {
EXPECT_TRUE((is_hashable<TypeErased>::value));
EXPECT_TRUE((is_hashable<std::pair<TypeErased, int>>::value));
EXPECT_EQ(SpyHash(TypeErased{7}), SpyHash(size_t{7}));
EXPECT_NE(SpyHash(TypeErased{7}), SpyHash(size_t{13}));
EXPECT_EQ(SpyHash(std::make_pair(TypeErased{7}, 17)),
SpyHash(std::make_pair(size_t{7}, 17)));
}
} // namespace

372
absl/hash/hash_testing.h Normal file
View file

@ -0,0 +1,372 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_HASH_HASH_TESTING_H_
#define ABSL_HASH_HASH_TESTING_H_
#include <initializer_list>
#include <tuple>
#include <type_traits>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/hash/internal/spy_hash_state.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/str_cat.h"
#include "absl/types/variant.h"
namespace absl {
// Run the absl::Hash algorithm over all the elements passed in and verify that
// their hash expansion is congruent with their `==` operator.
//
// It is used in conjunction with EXPECT_TRUE. Failures will output information
// on what requirement failed and on which objects.
//
// Users should pass a collection of types as either an initializer list or a
// container of cases.
//
// EXPECT_TRUE(absl::VerifyTypeImplementsAbslHashCorrectly(
// {v1, v2, ..., vN}));
//
// std::vector<MyType> cases;
// // Fill cases...
// EXPECT_TRUE(absl::VerifyTypeImplementsAbslHashCorrectly(cases));
//
// Users can pass a variety of types for testing heterogeneous lookup with
// `std::make_tuple`:
//
// EXPECT_TRUE(absl::VerifyTypeImplementsAbslHashCorrectly(
// std::make_tuple(v1, v2, ..., vN)));
//
//
// Ideally, the values passed should provide enough coverage of the `==`
// operator and the AbslHashValue implementations.
// For dynamically sized types, the empty state should usually be included in
// the values.
//
// The function accepts an optional comparator function, in case that `==` is
// not enough for the values provided.
//
// Usage:
//
// EXPECT_TRUE(absl::VerifyTypeImplementsAbslHashCorrectly(
// std::make_tuple(v1, v2, ..., vN), MyCustomEq{}));
//
// It checks the following requirements:
// 1. The expansion for a value is deterministic.
// 2. For any two objects `a` and `b` in the sequence, if `a == b` evaluates
// to true, then their hash expansion must be equal.
// 3. If `a == b` evaluates to false their hash expansion must be unequal.
// 4. If `a == b` evaluates to false neither hash expansion can be a
// suffix of the other.
// 5. AbslHashValue overloads should not be called by the user. They are only
// meant to be called by the framework. Users should call H::combine() and
// H::combine_contiguous().
// 6. No moved-from instance of the hash state is used in the implementation
// of AbslHashValue.
//
// The values do not have to have the same type. This can be useful for
// equivalent types that support heterogeneous lookup.
//
// A possible reason for breaking (2) is combining state in the hash expansion
// that was not used in `==`.
// For example:
//
// struct Bad2 {
// int a, b;
// template <typename H>
// friend H AbslHashValue(H state, Bad2 x) {
// // Uses a and b.
// return H::combine(x.a, x.b);
// }
// friend bool operator==(Bad2 x, Bad2 y) {
// // Only uses a.
// return x.a == y.a;
// }
// };
//
// As for (3), breaking this usually means that there is state being passed to
// the `==` operator that is not used in the hash expansion.
// For example:
//
// struct Bad3 {
// int a, b;
// template <typename H>
// friend H AbslHashValue(H state, Bad3 x) {
// // Only uses a.
// return H::combine(x.a);
// }
// friend bool operator==(Bad3 x, Bad3 y) {
// // Uses a and b.
// return x.a == y.a && x.b == y.b;
// }
// };
//
// Finally, a common way to break 4 is by combining dynamic ranges without
// combining the size of the range.
// For example:
//
// struct Bad4 {
// int *p, size;
// template <typename H>
// friend H AbslHashValue(H state, Bad4 x) {
// return H::combine_range(x.p, x.p + x.size);
// }
// friend bool operator==(Bad4 x, Bad4 y) {
// return std::equal(x.p, x.p + x.size, y.p, y.p + y.size);
// }
// };
//
// An easy solution to this is to combine the size after combining the range,
// like so:
// template <typename H>
// friend H AbslHashValue(H state, Bad4 x) {
// return H::combine(H::combine_range(x.p, x.p + x.size), x.size);
// }
//
template <int&... ExplicitBarrier, typename Container>
ABSL_MUST_USE_RESULT testing::AssertionResult
VerifyTypeImplementsAbslHashCorrectly(const Container& values);
template <int&... ExplicitBarrier, typename Container, typename Eq>
ABSL_MUST_USE_RESULT testing::AssertionResult
VerifyTypeImplementsAbslHashCorrectly(const Container& values, Eq equals);
template <int&..., typename T>
ABSL_MUST_USE_RESULT testing::AssertionResult
VerifyTypeImplementsAbslHashCorrectly(std::initializer_list<T> values);
template <int&..., typename T, typename Eq>
ABSL_MUST_USE_RESULT testing::AssertionResult
VerifyTypeImplementsAbslHashCorrectly(std::initializer_list<T> values,
Eq equals);
namespace hash_internal {
struct PrintVisitor {
size_t index;
template <typename T>
std::string operator()(const T* value) const {
return absl::StrCat("#", index, "(", testing::PrintToString(*value), ")");
}
};
template <typename Eq>
struct EqVisitor {
Eq eq;
template <typename T, typename U>
bool operator()(const T* t, const U* u) const {
return eq(*t, *u);
}
};
struct ExpandVisitor {
template <typename T>
SpyHashState operator()(const T* value) const {
return SpyHashState::combine(SpyHashState(), *value);
}
};
template <typename Container, typename Eq>
ABSL_MUST_USE_RESULT testing::AssertionResult
VerifyTypeImplementsAbslHashCorrectly(const Container& values, Eq equals) {
using V = typename Container::value_type;
struct Info {
const V& value;
size_t index;
std::string ToString() const { return absl::visit(PrintVisitor{index}, value); }
SpyHashState expand() const { return absl::visit(ExpandVisitor{}, value); }
};
using EqClass = std::vector<Info>;
std::vector<EqClass> classes;
// Gather the values in equivalence classes.
size_t i = 0;
for (const auto& value : values) {
EqClass* c = nullptr;
for (auto& eqclass : classes) {
if (absl::visit(EqVisitor<Eq>{equals}, value, eqclass[0].value)) {
c = &eqclass;
break;
}
}
if (c == nullptr) {
classes.emplace_back();
c = &classes.back();
}
c->push_back({value, i});
++i;
// Verify potential errors captured by SpyHashState.
if (auto error = c->back().expand().error()) {
return testing::AssertionFailure() << *error;
}
}
if (classes.size() < 2) {
return testing::AssertionFailure()
<< "At least two equivalence classes are expected.";
}
// We assume that equality is correctly implemented.
// Now we verify that AbslHashValue is also correctly implemented.
for (const auto& c : classes) {
// All elements of the equivalence class must have the same hash expansion.
const SpyHashState expected = c[0].expand();
for (const Info& v : c) {
if (v.expand() != v.expand()) {
return testing::AssertionFailure()
<< "Hash expansion for " << v.ToString()
<< " is non-deterministic.";
}
if (v.expand() != expected) {
return testing::AssertionFailure()
<< "Values " << c[0].ToString() << " and " << v.ToString()
<< " evaluate as equal but have an unequal hash expansion.";
}
}
// Elements from other classes must have different hash expansion.
for (const auto& c2 : classes) {
if (&c == &c2) continue;
const SpyHashState c2_hash = c2[0].expand();
switch (SpyHashState::Compare(expected, c2_hash)) {
case SpyHashState::CompareResult::kEqual:
return testing::AssertionFailure()
<< "Values " << c[0].ToString() << " and " << c2[0].ToString()
<< " evaluate as unequal but have an equal hash expansion.";
case SpyHashState::CompareResult::kBSuffixA:
return testing::AssertionFailure()
<< "Hash expansion of " << c2[0].ToString()
<< " is a suffix of the hash expansion of " << c[0].ToString()
<< ".";
case SpyHashState::CompareResult::kASuffixB:
return testing::AssertionFailure()
<< "Hash expansion of " << c[0].ToString()
<< " is a suffix of the hash expansion of " << c2[0].ToString()
<< ".";
case SpyHashState::CompareResult::kUnequal:
break;
}
}
}
return testing::AssertionSuccess();
}
template <typename... T>
struct TypeSet {
template <typename U, bool = disjunction<std::is_same<T, U>...>::value>
struct Insert {
using type = TypeSet<U, T...>;
};
template <typename U>
struct Insert<U, true> {
using type = TypeSet;
};
template <template <typename...> class C>
using apply = C<T...>;
};
template <typename... T>
struct MakeTypeSet : TypeSet<>{};
template <typename T, typename... Ts>
struct MakeTypeSet<T, Ts...> : MakeTypeSet<Ts...>::template Insert<T>::type {};
template <typename... T>
using VariantForTypes = typename MakeTypeSet<
const typename std::decay<T>::type*...>::template apply<absl::variant>;
template <typename Container>
struct ContainerAsVector {
using V = absl::variant<const typename Container::value_type*>;
using Out = std::vector<V>;
static Out Do(const Container& values) {
Out out;
for (const auto& v : values) out.push_back(&v);
return out;
}
};
template <typename... T>
struct ContainerAsVector<std::tuple<T...>> {
using V = VariantForTypes<T...>;
using Out = std::vector<V>;
template <size_t... I>
static Out DoImpl(const std::tuple<T...>& tuple, absl::index_sequence<I...>) {
return Out{&std::get<I>(tuple)...};
}
static Out Do(const std::tuple<T...>& values) {
return DoImpl(values, absl::index_sequence_for<T...>());
}
};
template <>
struct ContainerAsVector<std::tuple<>> {
static std::vector<VariantForTypes<int>> Do(std::tuple<>) { return {}; }
};
struct DefaultEquals {
template <typename T, typename U>
bool operator()(const T& t, const U& u) const {
return t == u;
}
};
} // namespace hash_internal
template <int&..., typename Container>
ABSL_MUST_USE_RESULT testing::AssertionResult
VerifyTypeImplementsAbslHashCorrectly(const Container& values) {
return hash_internal::VerifyTypeImplementsAbslHashCorrectly(
hash_internal::ContainerAsVector<Container>::Do(values),
hash_internal::DefaultEquals{});
}
template <int&..., typename Container, typename Eq>
ABSL_MUST_USE_RESULT testing::AssertionResult
VerifyTypeImplementsAbslHashCorrectly(const Container& values, Eq equals) {
return hash_internal::VerifyTypeImplementsAbslHashCorrectly(
hash_internal::ContainerAsVector<Container>::Do(values),
equals);
}
template <int&..., typename T>
ABSL_MUST_USE_RESULT testing::AssertionResult
VerifyTypeImplementsAbslHashCorrectly(std::initializer_list<T> values) {
return hash_internal::VerifyTypeImplementsAbslHashCorrectly(
hash_internal::ContainerAsVector<std::initializer_list<T>>::Do(values),
hash_internal::DefaultEquals{});
}
template <int&..., typename T, typename Eq>
ABSL_MUST_USE_RESULT testing::AssertionResult
VerifyTypeImplementsAbslHashCorrectly(std::initializer_list<T> values,
Eq equals) {
return hash_internal::VerifyTypeImplementsAbslHashCorrectly(
hash_internal::ContainerAsVector<std::initializer_list<T>>::Do(values),
equals);
}
} // namespace absl
#endif // ABSL_HASH_HASH_TESTING_H_

589
absl/hash/internal/city.cc Normal file
View file

@ -0,0 +1,589 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file provides CityHash64() and related functions.
//
// It's probably possible to create even faster hash functions by
// writing a program that systematically explores some of the space of
// possible hash functions, by using SIMD instructions, or by
// compromising on hash quality.
#include "absl/hash/internal/city.h"
#include <string.h> // for memcpy and memset
#include <algorithm>
#include "absl/base/config.h"
#include "absl/base/internal/endian.h"
#include "absl/base/internal/unaligned_access.h"
#include "absl/base/optimization.h"
namespace absl {
namespace hash_internal {
#ifdef ABSL_IS_BIG_ENDIAN
#define uint32_in_expected_order(x) (absl::gbswap_32(x))
#define uint64_in_expected_order(x) (absl::gbswap_64(x))
#else
#define uint32_in_expected_order(x) (x)
#define uint64_in_expected_order(x) (x)
#endif
static uint64_t Fetch64(const char *p) {
return uint64_in_expected_order(ABSL_INTERNAL_UNALIGNED_LOAD64(p));
}
static uint32_t Fetch32(const char *p) {
return uint32_in_expected_order(ABSL_INTERNAL_UNALIGNED_LOAD32(p));
}
// Some primes between 2^63 and 2^64 for various uses.
static const uint64_t k0 = 0xc3a5c85c97cb3127ULL;
static const uint64_t k1 = 0xb492b66fbe98f273ULL;
static const uint64_t k2 = 0x9ae16a3b2f90404fULL;
// Magic numbers for 32-bit hashing. Copied from Murmur3.
static const uint32_t c1 = 0xcc9e2d51;
static const uint32_t c2 = 0x1b873593;
// A 32-bit to 32-bit integer hash copied from Murmur3.
static uint32_t fmix(uint32_t h) {
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
static uint32_t Rotate32(uint32_t val, int shift) {
// Avoid shifting by 32: doing so yields an undefined result.
return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
}
#undef PERMUTE3
#define PERMUTE3(a, b, c) \
do { \
std::swap(a, b); \
std::swap(a, c); \
} while (0)
static uint32_t Mur(uint32_t a, uint32_t h) {
// Helper from Murmur3 for combining two 32-bit values.
a *= c1;
a = Rotate32(a, 17);
a *= c2;
h ^= a;
h = Rotate32(h, 19);
return h * 5 + 0xe6546b64;
}
static uint32_t Hash32Len13to24(const char *s, size_t len) {
uint32_t a = Fetch32(s - 4 + (len >> 1));
uint32_t b = Fetch32(s + 4);
uint32_t c = Fetch32(s + len - 8);
uint32_t d = Fetch32(s + (len >> 1));
uint32_t e = Fetch32(s);
uint32_t f = Fetch32(s + len - 4);
uint32_t h = len;
return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
}
static uint32_t Hash32Len0to4(const char *s, size_t len) {
uint32_t b = 0;
uint32_t c = 9;
for (size_t i = 0; i < len; i++) {
signed char v = s[i];
b = b * c1 + v;
c ^= b;
}
return fmix(Mur(b, Mur(len, c)));
}
static uint32_t Hash32Len5to12(const char *s, size_t len) {
uint32_t a = len, b = len * 5, c = 9, d = b;
a += Fetch32(s);
b += Fetch32(s + len - 4);
c += Fetch32(s + ((len >> 1) & 4));
return fmix(Mur(c, Mur(b, Mur(a, d))));
}
uint32_t CityHash32(const char *s, size_t len) {
if (len <= 24) {
return len <= 12
? (len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len))
: Hash32Len13to24(s, len);
}
// len > 24
uint32_t h = len, g = c1 * len, f = g;
uint32_t a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2;
uint32_t a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2;
uint32_t a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2;
uint32_t a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2;
uint32_t a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2;
h ^= a0;
h = Rotate32(h, 19);
h = h * 5 + 0xe6546b64;
h ^= a2;
h = Rotate32(h, 19);
h = h * 5 + 0xe6546b64;
g ^= a1;
g = Rotate32(g, 19);
g = g * 5 + 0xe6546b64;
g ^= a3;
g = Rotate32(g, 19);
g = g * 5 + 0xe6546b64;
f += a4;
f = Rotate32(f, 19);
f = f * 5 + 0xe6546b64;
size_t iters = (len - 1) / 20;
do {
uint32_t a0 = Rotate32(Fetch32(s) * c1, 17) * c2;
uint32_t a1 = Fetch32(s + 4);
uint32_t a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2;
uint32_t a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2;
uint32_t a4 = Fetch32(s + 16);
h ^= a0;
h = Rotate32(h, 18);
h = h * 5 + 0xe6546b64;
f += a1;
f = Rotate32(f, 19);
f = f * c1;
g += a2;
g = Rotate32(g, 18);
g = g * 5 + 0xe6546b64;
h ^= a3 + a1;
h = Rotate32(h, 19);
h = h * 5 + 0xe6546b64;
g ^= a4;
g = absl::gbswap_32(g) * 5;
h += a4 * 5;
h = absl::gbswap_32(h);
f += a0;
PERMUTE3(f, h, g);
s += 20;
} while (--iters != 0);
g = Rotate32(g, 11) * c1;
g = Rotate32(g, 17) * c1;
f = Rotate32(f, 11) * c1;
f = Rotate32(f, 17) * c1;
h = Rotate32(h + g, 19);
h = h * 5 + 0xe6546b64;
h = Rotate32(h, 17) * c1;
h = Rotate32(h + f, 19);
h = h * 5 + 0xe6546b64;
h = Rotate32(h, 17) * c1;
return h;
}
// Bitwise right rotate. Normally this will compile to a single
// instruction, especially if the shift is a manifest constant.
static uint64_t Rotate(uint64_t val, int shift) {
// Avoid shifting by 64: doing so yields an undefined result.
return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
}
static uint64_t ShiftMix(uint64_t val) { return val ^ (val >> 47); }
static uint64_t HashLen16(uint64_t u, uint64_t v) {
return Hash128to64(uint128(u, v));
}
static uint64_t HashLen16(uint64_t u, uint64_t v, uint64_t mul) {
// Murmur-inspired hashing.
uint64_t a = (u ^ v) * mul;
a ^= (a >> 47);
uint64_t b = (v ^ a) * mul;
b ^= (b >> 47);
b *= mul;
return b;
}
static uint64_t HashLen0to16(const char *s, size_t len) {
if (len >= 8) {
uint64_t mul = k2 + len * 2;
uint64_t a = Fetch64(s) + k2;
uint64_t b = Fetch64(s + len - 8);
uint64_t c = Rotate(b, 37) * mul + a;
uint64_t d = (Rotate(a, 25) + b) * mul;
return HashLen16(c, d, mul);
}
if (len >= 4) {
uint64_t mul = k2 + len * 2;
uint64_t a = Fetch32(s);
return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
}
if (len > 0) {
uint8_t a = s[0];
uint8_t b = s[len >> 1];
uint8_t c = s[len - 1];
uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
uint32_t z = len + (static_cast<uint32_t>(c) << 2);
return ShiftMix(y * k2 ^ z * k0) * k2;
}
return k2;
}
// This probably works well for 16-byte strings as well, but it may be overkill
// in that case.
static uint64_t HashLen17to32(const char *s, size_t len) {
uint64_t mul = k2 + len * 2;
uint64_t a = Fetch64(s) * k1;
uint64_t b = Fetch64(s + 8);
uint64_t c = Fetch64(s + len - 8) * mul;
uint64_t d = Fetch64(s + len - 16) * k2;
return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d,
a + Rotate(b + k2, 18) + c, mul);
}
// Return a 16-byte hash for 48 bytes. Quick and dirty.
// Callers do best to use "random-looking" values for a and b.
static std::pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(uint64_t w, uint64_t x,
uint64_t y, uint64_t z,
uint64_t a, uint64_t b) {
a += w;
b = Rotate(b + a + z, 21);
uint64_t c = a;
a += x;
a += y;
b += Rotate(a, 44);
return std::make_pair(a + z, b + c);
}
// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
static std::pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(const char *s, uint64_t a,
uint64_t b) {
return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16),
Fetch64(s + 24), a, b);
}
// Return an 8-byte hash for 33 to 64 bytes.
static uint64_t HashLen33to64(const char *s, size_t len) {
uint64_t mul = k2 + len * 2;
uint64_t a = Fetch64(s) * k2;
uint64_t b = Fetch64(s + 8);
uint64_t c = Fetch64(s + len - 24);
uint64_t d = Fetch64(s + len - 32);
uint64_t e = Fetch64(s + 16) * k2;
uint64_t f = Fetch64(s + 24) * 9;
uint64_t g = Fetch64(s + len - 8);
uint64_t h = Fetch64(s + len - 16) * mul;
uint64_t u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
uint64_t v = ((a + g) ^ d) + f + 1;
uint64_t w = absl::gbswap_64((u + v) * mul) + h;
uint64_t x = Rotate(e + f, 42) + c;
uint64_t y = (absl::gbswap_64((v + w) * mul) + g) * mul;
uint64_t z = e + f + c;
a = absl::gbswap_64((x + z) * mul + y) + b;
b = ShiftMix((z + a) * mul + d + h) * mul;
return b + x;
}
uint64_t CityHash64(const char *s, size_t len) {
if (len <= 32) {
if (len <= 16) {
return HashLen0to16(s, len);
} else {
return HashLen17to32(s, len);
}
} else if (len <= 64) {
return HashLen33to64(s, len);
}
// For strings over 64 bytes we hash the end first, and then as we
// loop we keep 56 bytes of state: v, w, x, y, and z.
uint64_t x = Fetch64(s + len - 40);
uint64_t y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
uint64_t z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
std::pair<uint64_t, uint64_t> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
std::pair<uint64_t, uint64_t> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
x = x * k1 + Fetch64(s);
// Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
len = (len - 1) & ~static_cast<size_t>(63);
do {
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
x ^= w.second;
y += v.first + Fetch64(s + 40);
z = Rotate(z + w.first, 33) * k1;
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
std::swap(z, x);
s += 64;
len -= 64;
} while (len != 0);
return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
HashLen16(v.second, w.second) + x);
}
uint64_t CityHash64WithSeed(const char *s, size_t len, uint64_t seed) {
return CityHash64WithSeeds(s, len, k2, seed);
}
uint64_t CityHash64WithSeeds(const char *s, size_t len, uint64_t seed0,
uint64_t seed1) {
return HashLen16(CityHash64(s, len) - seed0, seed1);
}
// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
// of any length representable in signed long. Based on City and Murmur.
static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
uint64_t a = Uint128Low64(seed);
uint64_t b = Uint128High64(seed);
uint64_t c = 0;
uint64_t d = 0;
int64_t l = len - 16;
if (l <= 0) { // len <= 16
a = ShiftMix(a * k1) * k1;
c = b * k1 + HashLen0to16(s, len);
d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
} else { // len > 16
c = HashLen16(Fetch64(s + len - 8) + k1, a);
d = HashLen16(b + len, c + Fetch64(s + len - 16));
a += d;
do {
a ^= ShiftMix(Fetch64(s) * k1) * k1;
a *= k1;
b ^= a;
c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
c *= k1;
d ^= c;
s += 16;
l -= 16;
} while (l > 0);
}
a = HashLen16(a, c);
b = HashLen16(d, b);
return uint128(a ^ b, HashLen16(b, a));
}
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
if (len < 128) {
return CityMurmur(s, len, seed);
}
// We expect len >= 128 to be the common case. Keep 56 bytes of state:
// v, w, x, y, and z.
std::pair<uint64_t, uint64_t> v, w;
uint64_t x = Uint128Low64(seed);
uint64_t y = Uint128High64(seed);
uint64_t z = len * k1;
v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
w.first = Rotate(y + z, 35) * k1 + x;
w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
// This is the same inner loop as CityHash64(), manually unrolled.
do {
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
x ^= w.second;
y += v.first + Fetch64(s + 40);
z = Rotate(z + w.first, 33) * k1;
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
std::swap(z, x);
s += 64;
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
x ^= w.second;
y += v.first + Fetch64(s + 40);
z = Rotate(z + w.first, 33) * k1;
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
std::swap(z, x);
s += 64;
len -= 128;
} while (ABSL_PREDICT_TRUE(len >= 128));
x += Rotate(v.first + z, 49) * k0;
y = y * k0 + Rotate(w.second, 37);
z = z * k0 + Rotate(w.first, 27);
w.first *= 9;
v.first *= k0;
// If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
for (size_t tail_done = 0; tail_done < len;) {
tail_done += 32;
y = Rotate(x + y, 42) * k0 + v.second;
w.first += Fetch64(s + len - tail_done + 16);
x = x * k0 + w.first;
z += w.second + Fetch64(s + len - tail_done);
w.second += v.first;
v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
v.first *= k0;
}
// At this point our 56 bytes of state should contain more than
// enough information for a strong 128-bit hash. We use two
// different 56-byte-to-8-byte hashes to get a 16-byte final result.
x = HashLen16(x, v.first);
y = HashLen16(y + z, w.first);
return uint128(HashLen16(x + v.second, w.second) + y,
HashLen16(x + w.second, y + v.second));
}
uint128 CityHash128(const char *s, size_t len) {
return len >= 16
? CityHash128WithSeed(s + 16, len - 16,
uint128(Fetch64(s), Fetch64(s + 8) + k0))
: CityHash128WithSeed(s, len, uint128(k0, k1));
}
} // namespace hash_internal
} // namespace absl
#ifdef __SSE4_2__
#include <nmmintrin.h>
#include "absl/hash/internal/city_crc.h"
namespace absl {
namespace hash_internal {
// Requires len >= 240.
static void CityHashCrc256Long(const char *s, size_t len, uint32_t seed,
uint64_t *result) {
uint64_t a = Fetch64(s + 56) + k0;
uint64_t b = Fetch64(s + 96) + k0;
uint64_t c = result[0] = HashLen16(b, len);
uint64_t d = result[1] = Fetch64(s + 120) * k0 + len;
uint64_t e = Fetch64(s + 184) + seed;
uint64_t f = 0;
uint64_t g = 0;
uint64_t h = c + d;
uint64_t x = seed;
uint64_t y = 0;
uint64_t z = 0;
// 240 bytes of input per iter.
size_t iters = len / 240;
len -= iters * 240;
do {
#undef CHUNK
#define CHUNK(r) \
PERMUTE3(x, z, y); \
b += Fetch64(s); \
c += Fetch64(s + 8); \
d += Fetch64(s + 16); \
e += Fetch64(s + 24); \
f += Fetch64(s + 32); \
a += b; \
h += f; \
b += c; \
f += d; \
g += e; \
e += z; \
g += x; \
z = _mm_crc32_u64(z, b + g); \
y = _mm_crc32_u64(y, e + h); \
x = _mm_crc32_u64(x, f + a); \
e = Rotate(e, r); \
c += e; \
s += 40
CHUNK(0);
PERMUTE3(a, h, c);
CHUNK(33);
PERMUTE3(a, h, f);
CHUNK(0);
PERMUTE3(b, h, f);
CHUNK(42);
PERMUTE3(b, h, d);
CHUNK(0);
PERMUTE3(b, h, e);
CHUNK(33);
PERMUTE3(a, h, e);
} while (--iters > 0);
while (len >= 40) {
CHUNK(29);
e ^= Rotate(a, 20);
h += Rotate(b, 30);
g ^= Rotate(c, 40);
f += Rotate(d, 34);
PERMUTE3(c, h, g);
len -= 40;
}
if (len > 0) {
s = s + len - 40;
CHUNK(33);
e ^= Rotate(a, 43);
h += Rotate(b, 42);
g ^= Rotate(c, 41);
f += Rotate(d, 40);
}
result[0] ^= h;
result[1] ^= g;
g += h;
a = HashLen16(a, g + z);
x += y << 32;
b += x;
c = HashLen16(c, z) + h;
d = HashLen16(d, e + result[0]);
g += e;
h += HashLen16(x, f);
e = HashLen16(a, d) + g;
z = HashLen16(b, c) + a;
y = HashLen16(g, h) + c;
result[0] = e + z + y + x;
a = ShiftMix((a + y) * k0) * k0 + b;
result[1] += a + result[0];
a = ShiftMix(a * k0) * k0 + c;
result[2] = a + result[1];
a = ShiftMix((a + e) * k0) * k0;
result[3] = a + result[2];
}
// Requires len < 240.
static void CityHashCrc256Short(const char *s, size_t len, uint64_t *result) {
char buf[240];
memcpy(buf, s, len);
memset(buf + len, 0, 240 - len);
CityHashCrc256Long(buf, 240, ~static_cast<uint32_t>(len), result);
}
void CityHashCrc256(const char *s, size_t len, uint64_t *result) {
if (ABSL_PREDICT_TRUE(len >= 240)) {
CityHashCrc256Long(s, len, 0, result);
} else {
CityHashCrc256Short(s, len, result);
}
}
uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) {
if (len <= 900) {
return CityHash128WithSeed(s, len, seed);
} else {
uint64_t result[4];
CityHashCrc256(s, len, result);
uint64_t u = Uint128High64(seed) + result[0];
uint64_t v = Uint128Low64(seed) + result[1];
return uint128(HashLen16(u, v + result[2]),
HashLen16(Rotate(v, 32), u * k0 + result[3]));
}
}
uint128 CityHashCrc128(const char *s, size_t len) {
if (len <= 900) {
return CityHash128(s, len);
} else {
uint64_t result[4];
CityHashCrc256(s, len, result);
return uint128(result[2], result[3]);
}
}
} // namespace hash_internal
} // namespace absl
#endif

108
absl/hash/internal/city.h Normal file
View file

@ -0,0 +1,108 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// http://code.google.com/p/cityhash/
//
// This file provides a few functions for hashing strings. All of them are
// high-quality functions in the sense that they pass standard tests such
// as Austin Appleby's SMHasher. They are also fast.
//
// For 64-bit x86 code, on short strings, we don't know of anything faster than
// CityHash64 that is of comparable quality. We believe our nearest competitor
// is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash
// tables and most other hashing (excluding cryptography).
//
// For 64-bit x86 code, on long strings, the picture is more complicated.
// On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc.,
// CityHashCrc128 appears to be faster than all competitors of comparable
// quality. CityHash128 is also good but not quite as fast. We believe our
// nearest competitor is Bob Jenkins' Spooky. We don't have great data for
// other 64-bit CPUs, but for long strings we know that Spooky is slightly
// faster than CityHash on some relatively recent AMD x86-64 CPUs, for example.
// Note that CityHashCrc128 is declared in citycrc.h.
//
// For 32-bit x86 code, we don't know of anything faster than CityHash32 that
// is of comparable quality. We believe our nearest competitor is Murmur3A.
// (On 64-bit CPUs, it is typically faster to use the other CityHash variants.)
//
// Functions in the CityHash family are not suitable for cryptography.
//
// Please see CityHash's README file for more details on our performance
// measurements and so on.
//
// WARNING: This code has been only lightly tested on big-endian platforms!
// It is known to work well on little-endian platforms that have a small penalty
// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
// It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
// bug reports are welcome.
//
// By the way, for some hash functions, given strings a and b, the hash
// of a+b is easily derived from the hashes of a and b. This property
// doesn't hold for any hash functions in this file.
#ifndef ABSL_HASH_INTERNAL_CITY_H_
#define ABSL_HASH_INTERNAL_CITY_H_
#include <stdint.h>
#include <stdlib.h> // for size_t.
#include <utility>
namespace absl {
namespace hash_internal {
typedef std::pair<uint64_t, uint64_t> uint128;
inline uint64_t Uint128Low64(const uint128 &x) { return x.first; }
inline uint64_t Uint128High64(const uint128 &x) { return x.second; }
// Hash function for a byte array.
uint64_t CityHash64(const char *s, size_t len);
// Hash function for a byte array. For convenience, a 64-bit seed is also
// hashed into the result.
uint64_t CityHash64WithSeed(const char *s, size_t len, uint64_t seed);
// Hash function for a byte array. For convenience, two seeds are also
// hashed into the result.
uint64_t CityHash64WithSeeds(const char *s, size_t len, uint64_t seed0,
uint64_t seed1);
// Hash function for a byte array.
uint128 CityHash128(const char *s, size_t len);
// Hash function for a byte array. For convenience, a 128-bit seed is also
// hashed into the result.
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
// Hash function for a byte array. Most useful in 32-bit binaries.
uint32_t CityHash32(const char *s, size_t len);
// Hash 128 input bits down to 64 bits of output.
// This is intended to be a reasonably good hash function.
inline uint64_t Hash128to64(const uint128 &x) {
// Murmur-inspired hashing.
const uint64_t kMul = 0x9ddfea08eb382d69ULL;
uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
a ^= (a >> 47);
uint64_t b = (Uint128High64(x) ^ a) * kMul;
b ^= (b >> 47);
b *= kMul;
return b;
}
} // namespace hash_internal
} // namespace absl
#endif // ABSL_HASH_INTERNAL_CITY_H_

View file

@ -0,0 +1,41 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file declares the subset of the CityHash functions that require
// _mm_crc32_u64(). See the CityHash README for details.
//
// Functions in the CityHash family are not suitable for cryptography.
#ifndef ABSL_HASH_INTERNAL_CITY_CRC_H_
#define ABSL_HASH_INTERNAL_CITY_CRC_H_
#include "absl/hash/internal/city.h"
namespace absl {
namespace hash_internal {
// Hash function for a byte array.
uint128 CityHashCrc128(const char *s, size_t len);
// Hash function for a byte array. For convenience, a 128-bit seed is also
// hashed into the result.
uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed);
// Hash function for a byte array. Sets result[0] ... result[3].
void CityHashCrc256(const char *s, size_t len, uint64_t *result);
} // namespace hash_internal
} // namespace absl
#endif // ABSL_HASH_INTERNAL_CITY_CRC_H_

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,23 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/hash/internal/hash.h"
namespace absl {
namespace hash_internal {
ABSL_CONST_INIT const void* const CityHashState::kSeed = &kSeed;
} // namespace hash_internal
} // namespace absl

885
absl/hash/internal/hash.h Normal file
View file

@ -0,0 +1,885 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: hash.h
// -----------------------------------------------------------------------------
//
#ifndef ABSL_HASH_INTERNAL_HASH_H_
#define ABSL_HASH_INTERNAL_HASH_H_
#include <algorithm>
#include <array>
#include <cmath>
#include <cstring>
#include <deque>
#include <forward_list>
#include <functional>
#include <iterator>
#include <limits>
#include <list>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>
#include "absl/base/internal/endian.h"
#include "absl/base/port.h"
#include "absl/container/fixed_array.h"
#include "absl/meta/type_traits.h"
#include "absl/numeric/int128.h"
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "absl/types/variant.h"
#include "absl/utility/utility.h"
#include "absl/hash/internal/city.h"
namespace absl {
namespace hash_internal {
// HashStateBase
//
// A hash state object represents an intermediate state in the computation
// of an unspecified hash algorithm. `HashStateBase` provides a CRTP style
// base class for hash state implementations. Developers adding type support
// for `absl::Hash` should not rely on any parts of the state object other than
// the following member functions:
//
// * HashStateBase::combine()
// * HashStateBase::combine_contiguous()
//
// A derived hash state class of type `H` must provide a static member function
// with a signature similar to the following:
//
// `static H combine_contiguous(H state, const unsigned char*, size_t)`.
//
// `HashStateBase` will provide a complete implementations for a hash state
// object in terms of this method.
//
// Example:
//
// // Use CRTP to define your derived class.
// struct MyHashState : HashStateBase<MyHashState> {
// static H combine_contiguous(H state, const unsigned char*, size_t);
// using MyHashState::HashStateBase::combine;
// using MyHashState::HashStateBase::combine_contiguous;
// };
template <typename H>
class HashStateBase {
public:
// HashStateBase::combine()
//
// Combines an arbitrary number of values into a hash state, returning the
// updated state.
//
// Each of the value types `T` must be separately hashable by the Abseil
// hashing framework.
//
// NOTE:
//
// state = H::combine(std::move(state), value1, value2, value3);
//
// is guaranteed to produce the same hash expansion as:
//
// state = H::combine(std::move(state), value1);
// state = H::combine(std::move(state), value2);
// state = H::combine(std::move(state), value3);
template <typename T, typename... Ts>
static H combine(H state, const T& value, const Ts&... values);
static H combine(H state) { return state; }
// HashStateBase::combine_contiguous()
//
// Combines a contiguous array of `size` elements into a hash state, returning
// the updated state.
//
// NOTE:
//
// state = H::combine_contiguous(std::move(state), data, size);
//
// is NOT guaranteed to produce the same hash expansion as a for-loop (it may
// perform internal optimizations). If you need this guarantee, use the
// for-loop instead.
template <typename T>
static H combine_contiguous(H state, const T* data, size_t size);
};
// is_uniquely_represented
//
// `is_uniquely_represented<T>` is a trait class that indicates whether `T`
// is uniquely represented.
//
// A type is "uniquely represented" if two equal values of that type are
// guaranteed to have the same bytes in their underlying storage. In other
// words, if `a == b`, then `memcmp(&a, &b, sizeof(T))` is guaranteed to be
// zero. This property cannot be detected automatically, so this trait is false
// by default, but can be specialized by types that wish to assert that they are
// uniquely represented. This makes them eligible for certain optimizations.
//
// If you have any doubt whatsoever, do not specialize this template.
// The default is completely safe, and merely disables some optimizations
// that will not matter for most types. Specializing this template,
// on the other hand, can be very hazardous.
//
// To be uniquely represented, a type must not have multiple ways of
// representing the same value; for example, float and double are not
// uniquely represented, because they have distinct representations for
// +0 and -0. Furthermore, the type's byte representation must consist
// solely of user-controlled data, with no padding bits and no compiler-
// controlled data such as vptrs or sanitizer metadata. This is usually
// very difficult to guarantee, because in most cases the compiler can
// insert data and padding bits at its own discretion.
//
// If you specialize this template for a type `T`, you must do so in the file
// that defines that type (or in this file). If you define that specialization
// anywhere else, `is_uniquely_represented<T>` could have different meanings
// in different places.
//
// The Enable parameter is meaningless; it is provided as a convenience,
// to support certain SFINAE techniques when defining specializations.
template <typename T, typename Enable = void>
struct is_uniquely_represented : std::false_type {};
// is_uniquely_represented<unsigned char>
//
// unsigned char is a synonym for "byte", so it is guaranteed to be
// uniquely represented.
template <>
struct is_uniquely_represented<unsigned char> : std::true_type {};
// is_uniquely_represented for non-standard integral types
//
// Integral types other than bool should be uniquely represented on any
// platform that this will plausibly be ported to.
template <typename Integral>
struct is_uniquely_represented<
Integral, typename std::enable_if<std::is_integral<Integral>::value>::type>
: std::true_type {};
// is_uniquely_represented<bool>
//
//
template <>
struct is_uniquely_represented<bool> : std::false_type {};
// hash_bytes()
//
// Convenience function that combines `hash_state` with the byte representation
// of `value`.
template <typename H, typename T>
H hash_bytes(H hash_state, const T& value) {
const unsigned char* start = reinterpret_cast<const unsigned char*>(&value);
return H::combine_contiguous(std::move(hash_state), start, sizeof(value));
}
// -----------------------------------------------------------------------------
// AbslHashValue for Basic Types
// -----------------------------------------------------------------------------
// Note: Default `AbslHashValue` implementations live in `hash_internal`. This
// allows us to block lexical scope lookup when doing an unqualified call to
// `AbslHashValue` below. User-defined implementations of `AbslHashValue` can
// only be found via ADL.
// AbslHashValue() for hashing bool values
//
// We use SFINAE to ensure that this overload only accepts bool, not types that
// are convertible to bool.
template <typename H, typename B>
typename std::enable_if<std::is_same<B, bool>::value, H>::type AbslHashValue(
H hash_state, B value) {
return H::combine(std::move(hash_state),
static_cast<unsigned char>(value ? 1 : 0));
}
// AbslHashValue() for hashing enum values
template <typename H, typename Enum>
typename std::enable_if<std::is_enum<Enum>::value, H>::type AbslHashValue(
H hash_state, Enum e) {
// In practice, we could almost certainly just invoke hash_bytes directly,
// but it's possible that a sanitizer might one day want to
// store data in the unused bits of an enum. To avoid that risk, we
// convert to the underlying type before hashing. Hopefully this will get
// optimized away; if not, we can reopen discussion with c-toolchain-team.
return H::combine(std::move(hash_state),
static_cast<typename std::underlying_type<Enum>::type>(e));
}
// AbslHashValue() for hashing floating-point values
template <typename H, typename Float>
typename std::enable_if<std::is_floating_point<Float>::value, H>::type
AbslHashValue(H hash_state, Float value) {
return hash_internal::hash_bytes(std::move(hash_state),
value == 0 ? 0 : value);
}
// Long double has the property that it might have extra unused bytes in it.
// For example, in x86 sizeof(long double)==16 but it only really uses 80-bits
// of it. This means we can't use hash_bytes on a long double and have to
// convert it to something else first.
template <typename H>
H AbslHashValue(H hash_state, long double value) {
const int category = std::fpclassify(value);
switch (category) {
case FP_INFINITE:
// Add the sign bit to differentiate between +Inf and -Inf
hash_state = H::combine(std::move(hash_state), std::signbit(value));
break;
case FP_NAN:
case FP_ZERO:
default:
// Category is enough for these.
break;
case FP_NORMAL:
case FP_SUBNORMAL:
// We can't convert `value` directly to double because this would have
// undefined behavior if the value is out of range.
// std::frexp gives us a value in the range (-1, -.5] or [.5, 1) that is
// guaranteed to be in range for `double`. The truncation is
// implementation defined, but that works as long as it is deterministic.
int exp;
auto mantissa = static_cast<double>(std::frexp(value, &exp));
hash_state = H::combine(std::move(hash_state), mantissa, exp);
}
return H::combine(std::move(hash_state), category);
}
// AbslHashValue() for hashing pointers
template <typename H, typename T>
H AbslHashValue(H hash_state, T* ptr) {
return hash_internal::hash_bytes(std::move(hash_state), ptr);
}
// AbslHashValue() for hashing nullptr_t
template <typename H>
H AbslHashValue(H hash_state, std::nullptr_t) {
return H::combine(std::move(hash_state), static_cast<void*>(nullptr));
}
// -----------------------------------------------------------------------------
// AbslHashValue for Composite Types
// -----------------------------------------------------------------------------
// is_hashable()
//
// Trait class which returns true if T is hashable by the absl::Hash framework.
// Used for the AbslHashValue implementations for composite types below.
template <typename T>
struct is_hashable;
// AbslHashValue() for hashing pairs
template <typename H, typename T1, typename T2>
typename std::enable_if<is_hashable<T1>::value && is_hashable<T2>::value,
H>::type
AbslHashValue(H hash_state, const std::pair<T1, T2>& p) {
return H::combine(std::move(hash_state), p.first, p.second);
}
// hash_tuple()
//
// Helper function for hashing a tuple. The third argument should
// be an index_sequence running from 0 to tuple_size<Tuple> - 1.
template <typename H, typename Tuple, size_t... Is>
H hash_tuple(H hash_state, const Tuple& t, absl::index_sequence<Is...>) {
return H::combine(std::move(hash_state), std::get<Is>(t)...);
}
// AbslHashValue for hashing tuples
template <typename H, typename... Ts>
#if _MSC_VER
// This SFINAE gets MSVC confused under some conditions. Let's just disable it
// for now.
H
#else
typename std::enable_if<absl::conjunction<is_hashable<Ts>...>::value, H>::type
#endif
AbslHashValue(H hash_state, const std::tuple<Ts...>& t) {
return hash_internal::hash_tuple(std::move(hash_state), t,
absl::make_index_sequence<sizeof...(Ts)>());
}
// -----------------------------------------------------------------------------
// AbslHashValue for Pointers
// -----------------------------------------------------------------------------
// AbslHashValue for hashing unique_ptr
template <typename H, typename T, typename D>
H AbslHashValue(H hash_state, const std::unique_ptr<T, D>& ptr) {
return H::combine(std::move(hash_state), ptr.get());
}
// AbslHashValue for hashing shared_ptr
template <typename H, typename T>
H AbslHashValue(H hash_state, const std::shared_ptr<T>& ptr) {
return H::combine(std::move(hash_state), ptr.get());
}
// -----------------------------------------------------------------------------
// AbslHashValue for String-Like Types
// -----------------------------------------------------------------------------
// AbslHashValue for hashing strings
//
// All the string-like types supported here provide the same hash expansion for
// the same character sequence. These types are:
//
// - `std::string` (and std::basic_string<char, std::char_traits<char>, A> for
// any allocator A)
// - `absl::string_view` and `std::string_view`
//
// For simplicity, we currently support only `char` strings. This support may
// be broadened, if necessary, but with some caution - this overload would
// misbehave in cases where the traits' `eq()` member isn't equivalent to `==`
// on the underlying character type.
template <typename H>
H AbslHashValue(H hash_state, absl::string_view str) {
return H::combine(
H::combine_contiguous(std::move(hash_state), str.data(), str.size()),
str.size());
}
// -----------------------------------------------------------------------------
// AbslHashValue for Sequence Containers
// -----------------------------------------------------------------------------
// AbslHashValue for hashing std::array
template <typename H, typename T, size_t N>
typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue(
H hash_state, const std::array<T, N>& array) {
return H::combine_contiguous(std::move(hash_state), array.data(),
array.size());
}
// AbslHashValue for hashing std::deque
template <typename H, typename T, typename Allocator>
typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue(
H hash_state, const std::deque<T, Allocator>& deque) {
// TODO(gromer): investigate a more efficient implementation taking
// advantage of the chunk structure.
for (const auto& t : deque) {
hash_state = H::combine(std::move(hash_state), t);
}
return H::combine(std::move(hash_state), deque.size());
}
// AbslHashValue for hashing std::forward_list
template <typename H, typename T, typename Allocator>
typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue(
H hash_state, const std::forward_list<T, Allocator>& list) {
size_t size = 0;
for (const T& t : list) {
hash_state = H::combine(std::move(hash_state), t);
++size;
}
return H::combine(std::move(hash_state), size);
}
// AbslHashValue for hashing std::list
template <typename H, typename T, typename Allocator>
typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue(
H hash_state, const std::list<T, Allocator>& list) {
for (const auto& t : list) {
hash_state = H::combine(std::move(hash_state), t);
}
return H::combine(std::move(hash_state), list.size());
}
// AbslHashValue for hashing std::vector
//
// Do not use this for vector<bool>. It does not have a .data(), and a fallback
// for std::hash<> is most likely faster.
template <typename H, typename T, typename Allocator>
typename std::enable_if<is_hashable<T>::value && !std::is_same<T, bool>::value,
H>::type
AbslHashValue(H hash_state, const std::vector<T, Allocator>& vector) {
return H::combine(H::combine_contiguous(std::move(hash_state), vector.data(),
vector.size()),
vector.size());
}
// -----------------------------------------------------------------------------
// AbslHashValue for Ordered Associative Containers
// -----------------------------------------------------------------------------
// AbslHashValue for hashing std::map
template <typename H, typename Key, typename T, typename Compare,
typename Allocator>
typename std::enable_if<is_hashable<Key>::value && is_hashable<T>::value,
H>::type
AbslHashValue(H hash_state, const std::map<Key, T, Compare, Allocator>& map) {
for (const auto& t : map) {
hash_state = H::combine(std::move(hash_state), t);
}
return H::combine(std::move(hash_state), map.size());
}
// AbslHashValue for hashing std::multimap
template <typename H, typename Key, typename T, typename Compare,
typename Allocator>
typename std::enable_if<is_hashable<Key>::value && is_hashable<T>::value,
H>::type
AbslHashValue(H hash_state,
const std::multimap<Key, T, Compare, Allocator>& map) {
for (const auto& t : map) {
hash_state = H::combine(std::move(hash_state), t);
}
return H::combine(std::move(hash_state), map.size());
}
// AbslHashValue for hashing std::set
template <typename H, typename Key, typename Compare, typename Allocator>
typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue(
H hash_state, const std::set<Key, Compare, Allocator>& set) {
for (const auto& t : set) {
hash_state = H::combine(std::move(hash_state), t);
}
return H::combine(std::move(hash_state), set.size());
}
// AbslHashValue for hashing std::multiset
template <typename H, typename Key, typename Compare, typename Allocator>
typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue(
H hash_state, const std::multiset<Key, Compare, Allocator>& set) {
for (const auto& t : set) {
hash_state = H::combine(std::move(hash_state), t);
}
return H::combine(std::move(hash_state), set.size());
}
// -----------------------------------------------------------------------------
// AbslHashValue for Wrapper Types
// -----------------------------------------------------------------------------
// AbslHashValue for hashing absl::optional
template <typename H, typename T>
typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue(
H hash_state, const absl::optional<T>& opt) {
if (opt) hash_state = H::combine(std::move(hash_state), *opt);
return H::combine(std::move(hash_state), opt.has_value());
}
// VariantVisitor
template <typename H>
struct VariantVisitor {
H&& hash_state;
template <typename T>
H operator()(const T& t) const {
return H::combine(std::move(hash_state), t);
}
};
// AbslHashValue for hashing absl::variant
template <typename H, typename... T>
typename std::enable_if<conjunction<is_hashable<T>...>::value, H>::type
AbslHashValue(H hash_state, const absl::variant<T...>& v) {
if (!v.valueless_by_exception()) {
hash_state = absl::visit(VariantVisitor<H>{std::move(hash_state)}, v);
}
return H::combine(std::move(hash_state), v.index());
}
// -----------------------------------------------------------------------------
// hash_range_or_bytes()
//
// Mixes all values in the range [data, data+size) into the hash state.
// This overload accepts only uniquely-represented types, and hashes them by
// hashing the entire range of bytes.
template <typename H, typename T>
typename std::enable_if<is_uniquely_represented<T>::value, H>::type
hash_range_or_bytes(H hash_state, const T* data, size_t size) {
const auto* bytes = reinterpret_cast<const unsigned char*>(data);
return H::combine_contiguous(std::move(hash_state), bytes, sizeof(T) * size);
}
// hash_range_or_bytes()
template <typename H, typename T>
typename std::enable_if<!is_uniquely_represented<T>::value, H>::type
hash_range_or_bytes(H hash_state, const T* data, size_t size) {
for (const auto end = data + size; data < end; ++data) {
hash_state = H::combine(std::move(hash_state), *data);
}
return hash_state;
}
// InvokeHashTag
//
// InvokeHash(H, const T&) invokes the appropriate hash implementation for a
// hasher of type `H` and a value of type `T`. If `T` is not hashable, there
// will be no matching overload of InvokeHash().
// Note: Some platforms (eg MSVC) do not support the detect idiom on
// std::hash. In those platforms the last fallback will be std::hash and
// InvokeHash() will always have a valid overload even if std::hash<T> is not
// valid.
//
// We try the following options in order:
// * If is_uniquely_represented, hash bytes directly.
// * ADL AbslHashValue(H, const T&) call.
// * std::hash<T>
// In MSVC we can't probe std::hash or stdext::hash because it triggers a
// static_assert instead of failing substitution.
#if defined(_MSC_VER)
#undef ABSL_HASH_INTERNAL_CAN_POISON_
#else // _MSC_VER
#define ABSL_HASH_INTERNAL_CAN_POISON_ 1
#endif // _MSC_VER
#if defined(ABSL_INTERNAL_LEGACY_HASH_NAMESPACE) && \
ABSL_HASH_INTERNAL_CAN_POISON_
#define ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_ 1
#endif
enum class InvokeHashTag {
kUniquelyRepresented,
kHashValue,
#if ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_
kLegacyHash,
#endif // ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_
kStdHash,
kNone
};
// HashSelect
//
// Type trait to select the appropriate hash implementation to use.
// HashSelect<T>::value is an instance of InvokeHashTag that indicates the best
// available hashing mechanism.
// See `Note` above about MSVC.
template <typename T>
struct HashSelect {
private:
struct State : HashStateBase<State> {
static State combine_contiguous(State hash_state, const unsigned char*,
size_t);
using State::HashStateBase::combine_contiguous;
};
// `Probe<V, Tag>::value` evaluates to `V<T>::value` if it is a valid
// expression, and `false` otherwise.
// `Probe<V, Tag>::tag` always evaluates to `Tag`.
template <template <typename> class V, InvokeHashTag Tag>
struct Probe {
private:
template <typename U, typename std::enable_if<V<U>::value, int>::type = 0>
static std::true_type Test(int);
template <typename U>
static std::false_type Test(char);
public:
static constexpr InvokeHashTag kTag = Tag;
static constexpr bool value = decltype(
Test<absl::remove_const_t<absl::remove_reference_t<T>>>(0))::value;
};
template <typename U>
using ProbeUniquelyRepresented = is_uniquely_represented<U>;
template <typename U>
using ProbeHashValue =
std::is_same<State, decltype(AbslHashValue(std::declval<State>(),
std::declval<const U&>()))>;
#if ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_
template <typename U>
using ProbeLegacyHash =
std::is_convertible<decltype(ABSL_INTERNAL_LEGACY_HASH_NAMESPACE::hash<
U>()(std::declval<const U&>())),
size_t>;
#endif // ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_
template <typename U>
using ProbeStdHash =
#if ABSL_HASH_INTERNAL_CAN_POISON_
std::is_convertible<decltype(std::hash<U>()(std::declval<const U&>())),
size_t>;
#else // ABSL_HASH_INTERNAL_CAN_POISON_
std::true_type;
#endif // ABSL_HASH_INTERNAL_CAN_POISON_
template <typename U>
using ProbeNone = std::true_type;
public:
// Probe each implementation in order.
// disjunction provides short circuting wrt instantiation.
static constexpr InvokeHashTag value = absl::disjunction<
Probe<ProbeUniquelyRepresented, InvokeHashTag::kUniquelyRepresented>,
Probe<ProbeHashValue, InvokeHashTag::kHashValue>,
#if ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_
Probe<ProbeLegacyHash, InvokeHashTag::kLegacyHash>,
#endif // ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_
Probe<ProbeStdHash, InvokeHashTag::kStdHash>,
Probe<ProbeNone, InvokeHashTag::kNone>>::kTag;
};
template <typename T>
struct is_hashable : std::integral_constant<bool, HashSelect<T>::value !=
InvokeHashTag::kNone> {};
template <typename H, typename T>
absl::enable_if_t<HashSelect<T>::value == InvokeHashTag::kUniquelyRepresented,
H>
InvokeHash(H state, const T& value) {
return hash_internal::hash_bytes(std::move(state), value);
}
template <typename H, typename T>
absl::enable_if_t<HashSelect<T>::value == InvokeHashTag::kHashValue, H>
InvokeHash(H state, const T& value) {
return AbslHashValue(std::move(state), value);
}
#if ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_
template <typename H, typename T>
absl::enable_if_t<HashSelect<T>::value == InvokeHashTag::kLegacyHash, H>
InvokeHash(H state, const T& value) {
return hash_internal::hash_bytes(
std::move(state), ABSL_INTERNAL_LEGACY_HASH_NAMESPACE::hash<T>{}(value));
}
#endif // ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_
template <typename H, typename T>
absl::enable_if_t<HashSelect<T>::value == InvokeHashTag::kStdHash, H>
InvokeHash(H state, const T& value) {
return hash_internal::hash_bytes(std::move(state), std::hash<T>{}(value));
}
// CityHashState
class CityHashState : public HashStateBase<CityHashState> {
// absl::uint128 is not an alias or a thin wrapper around the intrinsic.
// We use the intrinsic when available to improve performance.
#ifdef ABSL_HAVE_INTRINSIC_INT128
using uint128 = __uint128_t;
#else // ABSL_HAVE_INTRINSIC_INT128
using uint128 = absl::uint128;
#endif // ABSL_HAVE_INTRINSIC_INT128
static constexpr uint64_t kMul =
sizeof(size_t) == 4 ? uint64_t{0xcc9e2d51} : uint64_t{0x9ddfea08eb382d69};
template <typename T>
using IntegralFastPath =
conjunction<std::is_integral<T>, is_uniquely_represented<T>>;
public:
// Move only
CityHashState(CityHashState&&) = default;
CityHashState& operator=(CityHashState&&) = default;
// CityHashState::combine_contiguous()
//
// Fundamental base case for hash recursion: mixes the given range of bytes
// into the hash state.
static CityHashState combine_contiguous(CityHashState hash_state,
const unsigned char* first,
size_t size) {
return CityHashState(
CombineContiguousImpl(hash_state.state_, first, size,
std::integral_constant<int, sizeof(size_t)>{}));
}
using CityHashState::HashStateBase::combine_contiguous;
// CityHashState::hash()
//
// For performance reasons in non-opt mode, we specialize this for
// integral types.
// Otherwise we would be instantiating and calling dozens of functions for
// something that is just one multiplication and a couple xor's.
// The result should be the same as running the whole algorithm, but faster.
template <typename T, absl::enable_if_t<IntegralFastPath<T>::value, int> = 0>
static size_t hash(T value) {
return static_cast<size_t>(Mix(Seed(), static_cast<uint64_t>(value)));
}
// Overload of CityHashState::hash()
template <typename T, absl::enable_if_t<!IntegralFastPath<T>::value, int> = 0>
static size_t hash(const T& value) {
return static_cast<size_t>(combine(CityHashState{}, value).state_);
}
private:
// Invoked only once for a given argument; that plus the fact that this is
// move-only ensures that there is only one non-moved-from object.
CityHashState() : state_(Seed()) {}
// Workaround for MSVC bug.
// We make the type copyable to fix the calling convention, even though we
// never actually copy it. Keep it private to not affect the public API of the
// type.
CityHashState(const CityHashState&) = default;
explicit CityHashState(uint64_t state) : state_(state) {}
// Implementation of the base case for combine_contiguous where we actually
// mix the bytes into the state.
// Dispatch to different implementations of the combine_contiguous depending
// on the value of `sizeof(size_t)`.
static uint64_t CombineContiguousImpl(uint64_t state,
const unsigned char* first, size_t len,
std::integral_constant<int, 4>
/* sizeof_size_t */);
static uint64_t CombineContiguousImpl(uint64_t state,
const unsigned char* first, size_t len,
std::integral_constant<int, 8>
/* sizeof_size_t*/);
// Reads 9 to 16 bytes from p.
// The first 8 bytes are in .first, the rest (zero padded) bytes are in
// .second.
static std::pair<uint64_t, uint64_t> Read9To16(const unsigned char* p,
size_t len) {
uint64_t high = little_endian::Load64(p + len - 8);
return {little_endian::Load64(p), high >> (128 - len * 8)};
}
// Reads 4 to 8 bytes from p. Zero pads to fill uint64_t.
static uint64_t Read4To8(const unsigned char* p, size_t len) {
return (static_cast<uint64_t>(little_endian::Load32(p + len - 4))
<< (len - 4) * 8) |
little_endian::Load32(p);
}
// Reads 1 to 3 bytes from p. Zero pads to fill uint32_t.
static uint32_t Read1To3(const unsigned char* p, size_t len) {
return static_cast<uint32_t>((p[0]) | //
(p[len / 2] << (len / 2 * 8)) | //
(p[len - 1] << ((len - 1) * 8)));
}
ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Mix(uint64_t state, uint64_t v) {
using MultType =
absl::conditional_t<sizeof(size_t) == 4, uint64_t, uint128>;
// We do the addition in 64-bit space to make sure the 128-bit
// multiplication is fast. If we were to do it as MultType the compiler has
// to assume that the high word is non-zero and needs to perform 2
// multiplications instead of one.
MultType m = state + v;
m *= kMul;
return static_cast<uint64_t>(m ^ (m >> (sizeof(m) * 8 / 2)));
}
// Seed()
//
// A non-deterministic seed.
//
// The current purpose of this seed is to generate non-deterministic results
// and prevent having users depend on the particular hash values.
// It is not meant as a security feature right now, but it leaves the door
// open to upgrade it to a true per-process random seed. A true random seed
// costs more and we don't need to pay for that right now.
//
// On platforms with ASLR, we take advantage of it to make a per-process
// random value.
// See https://en.wikipedia.org/wiki/Address_space_layout_randomization
//
// On other platforms this is still going to be non-deterministic but most
// probably per-build and not per-process.
ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Seed() {
return static_cast<uint64_t>(reinterpret_cast<uintptr_t>(kSeed));
}
static const void* const kSeed;
uint64_t state_;
};
// CityHashState::CombineContiguousImpl()
inline uint64_t CityHashState::CombineContiguousImpl(
uint64_t state, const unsigned char* first, size_t len,
std::integral_constant<int, 4> /* sizeof_size_t */) {
// For large values we use CityHash, for small ones we just use a
// multiplicative hash.
uint64_t v;
if (len > 8) {
v = absl::hash_internal::CityHash32(reinterpret_cast<const char*>(first), len);
} else if (len >= 4) {
v = Read4To8(first, len);
} else if (len > 0) {
v = Read1To3(first, len);
} else {
// Empty ranges have no effect.
return state;
}
return Mix(state, v);
}
// Overload of CityHashState::CombineContiguousImpl()
inline uint64_t CityHashState::CombineContiguousImpl(
uint64_t state, const unsigned char* first, size_t len,
std::integral_constant<int, 8> /* sizeof_size_t */) {
// For large values we use CityHash, for small ones we just use a
// multiplicative hash.
uint64_t v;
if (len > 16) {
v = absl::hash_internal::CityHash64(reinterpret_cast<const char*>(first), len);
} else if (len > 8) {
auto p = Read9To16(first, len);
state = Mix(state, p.first);
v = p.second;
} else if (len >= 4) {
v = Read4To8(first, len);
} else if (len > 0) {
v = Read1To3(first, len);
} else {
// Empty ranges have no effect.
return state;
}
return Mix(state, v);
}
struct AggregateBarrier {};
// HashImpl
// Add a private base class to make sure this type is not an aggregate.
// Aggregates can be aggregate initialized even if the default constructor is
// deleted.
struct PoisonedHash : private AggregateBarrier {
PoisonedHash() = delete;
PoisonedHash(const PoisonedHash&) = delete;
PoisonedHash& operator=(const PoisonedHash&) = delete;
};
template <typename T>
struct HashImpl {
size_t operator()(const T& value) const { return CityHashState::hash(value); }
};
template <typename T>
struct Hash
: absl::conditional_t<is_hashable<T>::value, HashImpl<T>, PoisonedHash> {};
template <typename H>
template <typename T, typename... Ts>
H HashStateBase<H>::combine(H state, const T& value, const Ts&... values) {
return H::combine(hash_internal::InvokeHash(std::move(state), value),
values...);
}
// HashStateBase::combine_contiguous()
template <typename H>
template <typename T>
H HashStateBase<H>::combine_contiguous(H state, const T* data, size_t size) {
return hash_internal::hash_range_or_bytes(std::move(state), data, size);
}
} // namespace hash_internal
} // namespace absl
#endif // ABSL_HASH_INTERNAL_HASH_H_

View file

@ -0,0 +1,23 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdlib>
#include "absl/hash/hash.h"
// Prints the hash of argv[1].
int main(int argc, char** argv) {
if (argc < 2) return 1;
printf("%zu\n", absl::Hash<int>{}(std::atoi(argv[1]))); // NOLINT
}

View file

@ -0,0 +1,218 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_HASH_INTERNAL_SPY_HASH_STATE_H_
#define ABSL_HASH_INTERNAL_SPY_HASH_STATE_H_
#include <ostream>
#include <string>
#include <vector>
#include "absl/hash/hash.h"
#include "absl/strings/match.h"
#include "absl/strings/str_format.h"
#include "absl/strings/str_join.h"
namespace absl {
namespace hash_internal {
// SpyHashState is an implementation of the HashState API that simply
// accumulates all input bytes in an internal buffer. This makes it useful
// for testing AbslHashValue overloads (so long as they are templated on the
// HashState parameter), since it can report the exact hash representation
// that the AbslHashValue overload produces.
//
// Sample usage:
// EXPECT_EQ(SpyHashState::combine(SpyHashState(), foo),
// SpyHashState::combine(SpyHashState(), bar));
template <typename T>
class SpyHashStateImpl : public HashStateBase<SpyHashStateImpl<T>> {
public:
SpyHashStateImpl()
: error_(std::make_shared<absl::optional<std::string>>()) {
static_assert(std::is_void<T>::value, "");
}
// Move-only
SpyHashStateImpl(const SpyHashStateImpl&) = delete;
SpyHashStateImpl& operator=(const SpyHashStateImpl&) = delete;
SpyHashStateImpl(SpyHashStateImpl&& other) noexcept {
*this = std::move(other);
}
SpyHashStateImpl& operator=(SpyHashStateImpl&& other) noexcept {
hash_representation_ = std::move(other.hash_representation_);
error_ = other.error_;
moved_from_ = other.moved_from_;
other.moved_from_ = true;
return *this;
}
template <typename U>
SpyHashStateImpl(SpyHashStateImpl<U>&& other) { // NOLINT
hash_representation_ = std::move(other.hash_representation_);
error_ = other.error_;
moved_from_ = other.moved_from_;
other.moved_from_ = true;
}
template <typename A, typename... Args>
static SpyHashStateImpl combine(SpyHashStateImpl s, const A& a,
const Args&... args) {
// Pass an instance of SpyHashStateImpl<A> when trying to combine `A`. This
// allows us to test that the user only uses this instance for combine calls
// and does not call AbslHashValue directly.
// See AbslHashValue implementation at the bottom.
s = SpyHashStateImpl<A>::HashStateBase::combine(std::move(s), a);
return SpyHashStateImpl::combine(std::move(s), args...);
}
static SpyHashStateImpl combine(SpyHashStateImpl s) {
if (direct_absl_hash_value_error_) {
*s.error_ = "AbslHashValue should not be invoked directly.";
} else if (s.moved_from_) {
*s.error_ = "Used moved-from instance of the hash state object.";
}
return s;
}
static void SetDirectAbslHashValueError() {
direct_absl_hash_value_error_ = true;
}
// Two SpyHashStateImpl objects are equal if they hold equal hash
// representations.
friend bool operator==(const SpyHashStateImpl& lhs,
const SpyHashStateImpl& rhs) {
return lhs.hash_representation_ == rhs.hash_representation_;
}
friend bool operator!=(const SpyHashStateImpl& lhs,
const SpyHashStateImpl& rhs) {
return !(lhs == rhs);
}
enum class CompareResult {
kEqual,
kASuffixB,
kBSuffixA,
kUnequal,
};
static CompareResult Compare(const SpyHashStateImpl& a,
const SpyHashStateImpl& b) {
const std::string a_flat = absl::StrJoin(a.hash_representation_, "");
const std::string b_flat = absl::StrJoin(b.hash_representation_, "");
if (a_flat == b_flat) return CompareResult::kEqual;
if (absl::EndsWith(a_flat, b_flat)) return CompareResult::kBSuffixA;
if (absl::EndsWith(b_flat, a_flat)) return CompareResult::kASuffixB;
return CompareResult::kUnequal;
}
// operator<< prints the hash representation as a hex and ASCII dump, to
// facilitate debugging.
friend std::ostream& operator<<(std::ostream& out,
const SpyHashStateImpl& hash_state) {
out << "[\n";
for (auto& s : hash_state.hash_representation_) {
size_t offset = 0;
for (char c : s) {
if (offset % 16 == 0) {
out << absl::StreamFormat("\n0x%04x: ", offset);
}
if (offset % 2 == 0) {
out << " ";
}
out << absl::StreamFormat("%02x", c);
++offset;
}
out << "\n";
}
return out << "]";
}
// The base case of the combine recursion, which writes raw bytes into the
// internal buffer.
static SpyHashStateImpl combine_contiguous(SpyHashStateImpl hash_state,
const unsigned char* begin,
size_t size) {
hash_state.hash_representation_.emplace_back(
reinterpret_cast<const char*>(begin), size);
return hash_state;
}
using SpyHashStateImpl::HashStateBase::combine_contiguous;
absl::optional<std::string> error() const {
if (moved_from_) {
return "Returned a moved-from instance of the hash state object.";
}
return *error_;
}
private:
template <typename U>
friend class SpyHashStateImpl;
// This is true if SpyHashStateImpl<T> has been passed to a call of
// AbslHashValue with the wrong type. This detects that the user called
// AbslHashValue directly (because the hash state type does not match).
static bool direct_absl_hash_value_error_;
std::vector<std::string> hash_representation_;
// This is a shared_ptr because we want all instances of the particular
// SpyHashState run to share the field. This way we can set the error for
// use-after-move and all the copies will see it.
std::shared_ptr<absl::optional<std::string>> error_;
bool moved_from_ = false;
};
template <typename T>
bool SpyHashStateImpl<T>::direct_absl_hash_value_error_;
template <bool& B>
struct OdrUse {
constexpr OdrUse() {}
bool& b = B;
};
template <void (*)()>
struct RunOnStartup {
static bool run;
static constexpr OdrUse<run> kOdrUse{};
};
template <void (*f)()>
bool RunOnStartup<f>::run = (f(), true);
template <
typename T, typename U,
// Only trigger for when (T != U),
absl::enable_if_t<!std::is_same<T, U>::value, int> = 0,
// This statement works in two ways:
// - First, it instantiates RunOnStartup and forces the initialization of
// `run`, which set the global variable.
// - Second, it triggers a SFINAE error disabling the overload to prevent
// compile time errors. If we didn't disable the overload we would get
// ambiguous overload errors, which we don't want.
int = RunOnStartup<SpyHashStateImpl<T>::SetDirectAbslHashValueError>::run>
void AbslHashValue(SpyHashStateImpl<T>, const U&);
using SpyHashState = SpyHashStateImpl<void>;
} // namespace hash_internal
} // namespace absl
#endif // ABSL_HASH_INTERNAL_SPY_HASH_STATE_H_