Export of internal Abseil changes.
-- 7a6ff16a85beb730c172d5d25cf1b5e1be885c56 by Laramie Leavitt <lar@google.com>: Internal change. PiperOrigin-RevId: 254454546 -- ff8f9bafaefc26d451f576ea4a06d150aed63f6f by Andy Soffer <asoffer@google.com>: Internal changes PiperOrigin-RevId: 254451562 -- deefc5b651b479ce36f0b4ef203e119c0c8936f2 by CJ Johnson <johnsoncj@google.com>: Account for subtracting unsigned values from the size of InlinedVector PiperOrigin-RevId: 254450625 -- 3c677316a27bcadc17e41957c809ca472d5fef14 by Andy Soffer <asoffer@google.com>: Add C++17's std::make_from_tuple to absl/utility/utility.h PiperOrigin-RevId: 254411573 -- 4ee3536a918830eeec402a28fc31a62c7c90b940 by CJ Johnson <johnsoncj@google.com>: Adds benchmark for the rest of the InlinedVector public API PiperOrigin-RevId: 254408378 -- e5a21a00700ee83498ff1efbf649169756463ee4 by CJ Johnson <johnsoncj@google.com>: Updates the definition of InlinedVector::shrink_to_fit() to be exception safe and adds exception safety tests for it. PiperOrigin-RevId: 254401387 -- 2ea82e72b86d82d78b4e4712a63a55981b53c64b by Laramie Leavitt <lar@google.com>: Use absl::InsecureBitGen in place of std::mt19937 in tests absl/random/...distribution_test.cc PiperOrigin-RevId: 254289444 -- fa099e02c413a7ffda732415e8105cad26a90337 by Andy Soffer <asoffer@google.com>: Internal changes PiperOrigin-RevId: 254286334 -- ce34b7f36933b30cfa35b9c9a5697a792b5666e4 by Andy Soffer <asoffer@google.com>: Internal changes PiperOrigin-RevId: 254273059 -- 6f9c473da7c2090c2e85a37c5f00622e8a912a89 by Jorg Brown <jorg@google.com>: Change absl::container_internal::CompressedTuple to instantiate its internal Storage class with the name of the type it's holding, rather than the name of the Tuple. This is not an externally-visible change, other than less compiler memory is used and less debug information is generated. PiperOrigin-RevId: 254269285 -- 8bd3c186bf2fc0c55d8a2dd6f28a5327502c9fba by Andy Soffer <asoffer@google.com>: Adding short-hand IntervalClosed for IntervalClosedClosed and IntervalOpen for IntervalOpenOpen. PiperOrigin-RevId: 254252419 -- ea957f99b6a04fccd42aa05605605f3b44b1ecfd by Abseil Team <absl-team@google.com>: Do not directly use __SIZEOF_INT128__. In order to avoid linker errors when building with clang-cl (__fixunsdfti, __udivti3 and __fixunssfti are undefined), this CL uses ABSL_HAVE_INTRINSIC_INT128 which is not defined for clang-cl. PiperOrigin-RevId: 254250739 -- 89ab385cd26b34d64130bce856253aaba96d2345 by Andy Soffer <asoffer@google.com>: Internal changes PiperOrigin-RevId: 254242321 -- cffc793d93eca6d6bdf7de733847b6ab4a255ae9 by CJ Johnson <johnsoncj@google.com>: Adds benchmark for InlinedVector::reserve(size_type) PiperOrigin-RevId: 254199226 -- c90c7a9fa3c8f0c9d5114036979548b055ea2f2a by Gennadiy Rozental <rogeeff@google.com>: Import of CCTZ from GitHub. PiperOrigin-RevId: 254072387 -- c4c388beae016c9570ab54ffa1d52660e4a85b7b by Laramie Leavitt <lar@google.com>: Internal cleanup. PiperOrigin-RevId: 254062381 -- d3c992e221cc74e5372d0c8fa410170b6a43c062 by Tom Manshreck <shreck@google.com>: Update distributions.h to Abseil standards PiperOrigin-RevId: 254054946 -- d15ad0035c34ef11b14fadc5a4a2d3ec415f5518 by CJ Johnson <johnsoncj@google.com>: Removes functions with only one caller from the implementation details of InlinedVector by manually inlining the definitions PiperOrigin-RevId: 254005427 -- 2f37e807efc3a8ef1f4b539bdd379917d4151520 by Andy Soffer <asoffer@google.com>: Initial release of Abseil Random PiperOrigin-RevId: 253999861 -- 24ed1694b6430791d781ed533a8f8ccf6cac5856 by CJ Johnson <johnsoncj@google.com>: Updates the definition of InlinedVector::assign(...)/InlinedVector::operator=(...) to new, exception-safe implementations with exception safety tests to boot PiperOrigin-RevId: 253993691 -- 5613d95f5a7e34a535cfaeadce801441e990843e by CJ Johnson <johnsoncj@google.com>: Adds benchmarks for InlinedVector::shrink_to_fit() PiperOrigin-RevId: 253989647 -- 2a96ddfdac40bbb8cb6a7f1aeab90917067c6e63 by Abseil Team <absl-team@google.com>: Initial release of Abseil Random PiperOrigin-RevId: 253927497 -- bf1aff8fc9ffa921ad74643e9525ecf25b0d8dc1 by Andy Soffer <asoffer@google.com>: Initial release of Abseil Random PiperOrigin-RevId: 253920512 -- bfc03f4a3dcda3cf3a4b84bdb84cda24e3394f41 by Laramie Leavitt <lar@google.com>: Internal change. PiperOrigin-RevId: 253886486 -- 05036cfcc078ca7c5f581a00dfb0daed568cbb69 by Eric Fiselier <ericwf@google.com>: Don't include `winsock2.h` because it drags in `windows.h` and friends, and they define awful macros like OPAQUE, ERROR, and more. This has the potential to break abseil users. Instead we only forward declare `timeval` and require Windows users include `winsock2.h` themselves. This is both inconsistent and poor QoI, but so including 'windows.h' is bad too. PiperOrigin-RevId: 253852615 GitOrigin-RevId: 7a6ff16a85beb730c172d5d25cf1b5e1be885c56 Change-Id: Icd6aff87da26f29ec8915da856f051129987cef6
This commit is contained in:
		
							parent
							
								
									43ef2148c0
								
							
						
					
					
						commit
						e9324d926a
					
				
					 120 changed files with 22842 additions and 340 deletions
				
			
		
							
								
								
									
										565
									
								
								absl/random/poisson_distribution_test.cc
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										565
									
								
								absl/random/poisson_distribution_test.cc
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,565 @@ | |||
| // Copyright 2017 The Abseil Authors.
 | ||||
| //
 | ||||
| // Licensed under the Apache License, Version 2.0 (the "License");
 | ||||
| // you may not use this file except in compliance with the License.
 | ||||
| // You may obtain a copy of the License at
 | ||||
| //
 | ||||
| //      https://www.apache.org/licenses/LICENSE-2.0
 | ||||
| //
 | ||||
| // Unless required by applicable law or agreed to in writing, software
 | ||||
| // distributed under the License is distributed on an "AS IS" BASIS,
 | ||||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | ||||
| // See the License for the specific language governing permissions and
 | ||||
| // limitations under the License.
 | ||||
| 
 | ||||
| #include "absl/random/poisson_distribution.h" | ||||
| 
 | ||||
| #include <algorithm> | ||||
| #include <cstddef> | ||||
| #include <cstdint> | ||||
| #include <iterator> | ||||
| #include <random> | ||||
| #include <sstream> | ||||
| #include <string> | ||||
| #include <vector> | ||||
| 
 | ||||
| #include "gmock/gmock.h" | ||||
| #include "gtest/gtest.h" | ||||
| #include "absl/base/internal/raw_logging.h" | ||||
| #include "absl/base/macros.h" | ||||
| #include "absl/container/flat_hash_map.h" | ||||
| #include "absl/random/internal/chi_square.h" | ||||
| #include "absl/random/internal/distribution_test_util.h" | ||||
| #include "absl/random/internal/sequence_urbg.h" | ||||
| #include "absl/random/random.h" | ||||
| #include "absl/strings/str_cat.h" | ||||
| #include "absl/strings/str_format.h" | ||||
| #include "absl/strings/str_replace.h" | ||||
| #include "absl/strings/strip.h" | ||||
| 
 | ||||
| // Notes about generating poisson variates:
 | ||||
| //
 | ||||
| // It is unlikely that any implementation of std::poisson_distribution
 | ||||
| // will be stable over time and across library implementations. For instance
 | ||||
| // the three different poisson variate generators listed below all differ:
 | ||||
| //
 | ||||
| // https://github.com/ampl/gsl/tree/master/randist/poisson.c
 | ||||
| // * GSL uses a gamma + binomial + knuth method to compute poisson variates.
 | ||||
| //
 | ||||
| // https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/random.tcc
 | ||||
| // * GCC uses the Devroye rejection algorithm, based on
 | ||||
| // Devroye, L. Non-Uniform Random Variates Generation. Springer-Verlag,
 | ||||
| // New York, 1986, Ch. X, Sects. 3.3 & 3.4 (+ Errata!), ~p.511
 | ||||
| //   http://www.nrbook.com/devroye/
 | ||||
| //
 | ||||
| // https://github.com/llvm-mirror/libcxx/blob/master/include/random
 | ||||
| // * CLANG uses a different rejection method, which appears to include a
 | ||||
| // normal-distribution approximation and an exponential distribution to
 | ||||
| // compute the threshold, including a similar factorial approximation to this
 | ||||
| // one, but it is unclear where the algorithm comes from, exactly.
 | ||||
| //
 | ||||
| 
 | ||||
| namespace { | ||||
| 
 | ||||
| using absl::random_internal::kChiSquared; | ||||
| 
 | ||||
| // The PoissonDistributionInterfaceTest provides a basic test that
 | ||||
| // absl::poisson_distribution conforms to the interface and serialization
 | ||||
| // requirements imposed by [rand.req.dist] for the common integer types.
 | ||||
| 
 | ||||
| template <typename IntType> | ||||
| class PoissonDistributionInterfaceTest : public ::testing::Test {}; | ||||
| 
 | ||||
| using IntTypes = ::testing::Types<int, int8_t, int16_t, int32_t, int64_t, | ||||
|                                   uint8_t, uint16_t, uint32_t, uint64_t>; | ||||
| TYPED_TEST_CASE(PoissonDistributionInterfaceTest, IntTypes); | ||||
| 
 | ||||
| TYPED_TEST(PoissonDistributionInterfaceTest, SerializeTest) { | ||||
|   using param_type = typename absl::poisson_distribution<TypeParam>::param_type; | ||||
|   const double kMax = | ||||
|       std::min(1e10 /* assertion limit */, | ||||
|                static_cast<double>(std::numeric_limits<TypeParam>::max())); | ||||
| 
 | ||||
|   const double kParams[] = { | ||||
|       // Cases around 1.
 | ||||
|       1,                         //
 | ||||
|       std::nextafter(1.0, 0.0),  // 1 - epsilon
 | ||||
|       std::nextafter(1.0, 2.0),  // 1 + epsilon
 | ||||
|       // Arbitrary values.
 | ||||
|       1e-8, 1e-4, | ||||
|       0.0000005,  // ~7.2e-7
 | ||||
|       0.2,        // ~0.2x
 | ||||
|       0.5,        // 0.72
 | ||||
|       2,          // ~2.8
 | ||||
|       20,         // 3x ~9.6
 | ||||
|       100, 1e4, 1e8, 1.5e9, 1e20, | ||||
|       // Boundary cases.
 | ||||
|       std::numeric_limits<double>::max(), | ||||
|       std::numeric_limits<double>::epsilon(), | ||||
|       std::nextafter(std::numeric_limits<double>::min(), | ||||
|                      1.0),                        // min + epsilon
 | ||||
|       std::numeric_limits<double>::min(),         // smallest normal
 | ||||
|       std::numeric_limits<double>::denorm_min(),  // smallest denorm
 | ||||
|       std::numeric_limits<double>::min() / 2,     // denorm
 | ||||
|       std::nextafter(std::numeric_limits<double>::min(), | ||||
|                      0.0),  // denorm_max
 | ||||
|   }; | ||||
| 
 | ||||
| 
 | ||||
|   constexpr int kCount = 1000; | ||||
|   absl::InsecureBitGen gen; | ||||
|   for (const double m : kParams) { | ||||
|     const double mean = std::min(kMax, m); | ||||
|     const param_type param(mean); | ||||
| 
 | ||||
|     // Validate parameters.
 | ||||
|     absl::poisson_distribution<TypeParam> before(mean); | ||||
|     EXPECT_EQ(before.mean(), param.mean()); | ||||
| 
 | ||||
|     { | ||||
|       absl::poisson_distribution<TypeParam> via_param(param); | ||||
|       EXPECT_EQ(via_param, before); | ||||
|       EXPECT_EQ(via_param.param(), before.param()); | ||||
|     } | ||||
| 
 | ||||
|     // Smoke test.
 | ||||
|     auto sample_min = before.max(); | ||||
|     auto sample_max = before.min(); | ||||
|     for (int i = 0; i < kCount; i++) { | ||||
|       auto sample = before(gen); | ||||
|       EXPECT_GE(sample, before.min()); | ||||
|       EXPECT_LE(sample, before.max()); | ||||
|       if (sample > sample_max) sample_max = sample; | ||||
|       if (sample < sample_min) sample_min = sample; | ||||
|     } | ||||
| 
 | ||||
|     ABSL_INTERNAL_LOG(INFO, absl::StrCat("Range {", param.mean(), "}: ", | ||||
|                                          +sample_min, ", ", +sample_max)); | ||||
| 
 | ||||
|     // Validate stream serialization.
 | ||||
|     std::stringstream ss; | ||||
|     ss << before; | ||||
| 
 | ||||
|     absl::poisson_distribution<TypeParam> after(3.8); | ||||
| 
 | ||||
|     EXPECT_NE(before.mean(), after.mean()); | ||||
|     EXPECT_NE(before.param(), after.param()); | ||||
|     EXPECT_NE(before, after); | ||||
| 
 | ||||
|     ss >> after; | ||||
| 
 | ||||
|     EXPECT_EQ(before.mean(), after.mean())  //
 | ||||
|         << ss.str() << " "                  //
 | ||||
|         << (ss.good() ? "good " : "")       //
 | ||||
|         << (ss.bad() ? "bad " : "")         //
 | ||||
|         << (ss.eof() ? "eof " : "")         //
 | ||||
|         << (ss.fail() ? "fail " : ""); | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| // See http://www.itl.nist.gov/div898/handbook/eda/section3/eda366j.htm
 | ||||
| 
 | ||||
| class PoissonModel { | ||||
|  public: | ||||
|   explicit PoissonModel(double mean) : mean_(mean) {} | ||||
| 
 | ||||
|   double mean() const { return mean_; } | ||||
|   double variance() const { return mean_; } | ||||
|   double stddev() const { return std::sqrt(variance()); } | ||||
|   double skew() const { return 1.0 / mean_; } | ||||
|   double kurtosis() const { return 3.0 + 1.0 / mean_; } | ||||
| 
 | ||||
|   // InitCDF() initializes the CDF for the distribution parameters.
 | ||||
|   void InitCDF(); | ||||
| 
 | ||||
|   // The InverseCDF, or the Percent-point function returns x, P(x) < v.
 | ||||
|   struct CDF { | ||||
|     size_t index; | ||||
|     double pmf; | ||||
|     double cdf; | ||||
|   }; | ||||
|   CDF InverseCDF(double p) { | ||||
|     CDF target{0, 0, p}; | ||||
|     auto it = std::upper_bound( | ||||
|         std::begin(cdf_), std::end(cdf_), target, | ||||
|         [](const CDF& a, const CDF& b) { return a.cdf < b.cdf; }); | ||||
|     return *it; | ||||
|   } | ||||
| 
 | ||||
|   void LogCDF() { | ||||
|     ABSL_INTERNAL_LOG(INFO, absl::StrCat("CDF (mean = ", mean_, ")")); | ||||
|     for (const auto c : cdf_) { | ||||
|       ABSL_INTERNAL_LOG(INFO, | ||||
|                         absl::StrCat(c.index, ": pmf=", c.pmf, " cdf=", c.cdf)); | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|  private: | ||||
|   const double mean_; | ||||
| 
 | ||||
|   std::vector<CDF> cdf_; | ||||
| }; | ||||
| 
 | ||||
| // The goal is to compute an InverseCDF function, or percent point function for
 | ||||
| // the poisson distribution, and use that to partition our output into equal
 | ||||
| // range buckets.  However there is no closed form solution for the inverse cdf
 | ||||
| // for poisson distributions (the closest is the incomplete gamma function).
 | ||||
| // Instead, `InitCDF` iteratively computes the PMF and the CDF. This enables
 | ||||
| // searching for the bucket points.
 | ||||
| void PoissonModel::InitCDF() { | ||||
|   if (!cdf_.empty()) { | ||||
|     // State already initialized.
 | ||||
|     return; | ||||
|   } | ||||
|   ABSL_ASSERT(mean_ < 201.0); | ||||
| 
 | ||||
|   const size_t max_i = 50 * stddev() + mean(); | ||||
|   const double e_neg_mean = std::exp(-mean()); | ||||
|   ABSL_ASSERT(e_neg_mean > 0); | ||||
| 
 | ||||
|   double d = 1; | ||||
|   double last_result = e_neg_mean; | ||||
|   double cumulative = e_neg_mean; | ||||
|   if (e_neg_mean > 1e-10) { | ||||
|     cdf_.push_back({0, e_neg_mean, cumulative}); | ||||
|   } | ||||
|   for (size_t i = 1; i < max_i; i++) { | ||||
|     d *= (mean() / i); | ||||
|     double result = e_neg_mean * d; | ||||
|     cumulative += result; | ||||
|     if (result < 1e-10 && result < last_result && cumulative > 0.999999) { | ||||
|       break; | ||||
|     } | ||||
|     if (result > 1e-7) { | ||||
|       cdf_.push_back({i, result, cumulative}); | ||||
|     } | ||||
|     last_result = result; | ||||
|   } | ||||
|   ABSL_ASSERT(!cdf_.empty()); | ||||
| } | ||||
| 
 | ||||
| // PoissonDistributionZTest implements a z-test for the poisson distribution.
 | ||||
| 
 | ||||
| struct ZParam { | ||||
|   double mean; | ||||
|   double p_fail;   // Z-Test probability of failure.
 | ||||
|   int trials;      // Z-Test trials.
 | ||||
|   size_t samples;  // Z-Test samples.
 | ||||
| }; | ||||
| 
 | ||||
| class PoissonDistributionZTest : public testing::TestWithParam<ZParam>, | ||||
|                                  public PoissonModel { | ||||
|  public: | ||||
|   PoissonDistributionZTest() : PoissonModel(GetParam().mean) {} | ||||
| 
 | ||||
|   // ZTestImpl provides a basic z-squared test of the mean vs. expected
 | ||||
|   // mean for data generated by the poisson distribution.
 | ||||
|   template <typename D> | ||||
|   bool SingleZTest(const double p, const size_t samples); | ||||
| 
 | ||||
|   absl::InsecureBitGen rng_; | ||||
| }; | ||||
| 
 | ||||
| template <typename D> | ||||
| bool PoissonDistributionZTest::SingleZTest(const double p, | ||||
|                                            const size_t samples) { | ||||
|   D dis(mean()); | ||||
| 
 | ||||
|   absl::flat_hash_map<int32_t, int> buckets; | ||||
|   std::vector<double> data; | ||||
|   data.reserve(samples); | ||||
|   for (int j = 0; j < samples; j++) { | ||||
|     const auto x = dis(rng_); | ||||
|     buckets[x]++; | ||||
|     data.push_back(x); | ||||
|   } | ||||
| 
 | ||||
|   // The null-hypothesis is that the distribution is a poisson distribution with
 | ||||
|   // the provided mean (not estimated from the data).
 | ||||
|   const auto m = absl::random_internal::ComputeDistributionMoments(data); | ||||
|   const double max_err = absl::random_internal::MaxErrorTolerance(p); | ||||
|   const double z = absl::random_internal::ZScore(mean(), m); | ||||
|   const bool pass = absl::random_internal::Near("z", z, 0.0, max_err); | ||||
| 
 | ||||
|   if (!pass) { | ||||
|     ABSL_INTERNAL_LOG( | ||||
|         INFO, absl::StrFormat("p=%f max_err=%f\n" | ||||
|                               " mean=%f vs. %f\n" | ||||
|                               " stddev=%f vs. %f\n" | ||||
|                               " skewness=%f vs. %f\n" | ||||
|                               " kurtosis=%f vs. %f\n" | ||||
|                               " z=%f", | ||||
|                               p, max_err, m.mean, mean(), std::sqrt(m.variance), | ||||
|                               stddev(), m.skewness, skew(), m.kurtosis, | ||||
|                               kurtosis(), z)); | ||||
|   } | ||||
|   return pass; | ||||
| } | ||||
| 
 | ||||
| TEST_P(PoissonDistributionZTest, AbslPoissonDistribution) { | ||||
|   const auto& param = GetParam(); | ||||
|   const int expected_failures = | ||||
|       std::max(1, static_cast<int>(std::ceil(param.trials * param.p_fail))); | ||||
|   const double p = absl::random_internal::RequiredSuccessProbability( | ||||
|       param.p_fail, param.trials); | ||||
| 
 | ||||
|   int failures = 0; | ||||
|   for (int i = 0; i < param.trials; i++) { | ||||
|     failures += | ||||
|         SingleZTest<absl::poisson_distribution<int32_t>>(p, param.samples) ? 0 | ||||
|                                                                            : 1; | ||||
|   } | ||||
|   EXPECT_LE(failures, expected_failures); | ||||
| } | ||||
| 
 | ||||
| std::vector<ZParam> GetZParams() { | ||||
|   // These values have been adjusted from the "exact" computed values to reduce
 | ||||
|   // failure rates.
 | ||||
|   //
 | ||||
|   // It turns out that the actual values are not as close to the expected values
 | ||||
|   // as would be ideal.
 | ||||
|   return std::vector<ZParam>({ | ||||
|       // Knuth method.
 | ||||
|       ZParam{0.5, 0.01, 100, 1000}, | ||||
|       ZParam{1.0, 0.01, 100, 1000}, | ||||
|       ZParam{10.0, 0.01, 100, 5000}, | ||||
|       // Split-knuth method.
 | ||||
|       ZParam{20.0, 0.01, 100, 10000}, | ||||
|       ZParam{50.0, 0.01, 100, 10000}, | ||||
|       // Ratio of gaussians method.
 | ||||
|       ZParam{51.0, 0.01, 100, 10000}, | ||||
|       ZParam{200.0, 0.05, 10, 100000}, | ||||
|       ZParam{100000.0, 0.05, 10, 1000000}, | ||||
|   }); | ||||
| } | ||||
| 
 | ||||
| std::string ZParamName(const ::testing::TestParamInfo<ZParam>& info) { | ||||
|   const auto& p = info.param; | ||||
|   std::string name = absl::StrCat("mean_", absl::SixDigits(p.mean)); | ||||
|   return absl::StrReplaceAll(name, {{"+", "_"}, {"-", "_"}, {".", "_"}}); | ||||
| } | ||||
| 
 | ||||
| INSTANTIATE_TEST_SUITE_P(, PoissonDistributionZTest, | ||||
|                          ::testing::ValuesIn(GetZParams()), ZParamName); | ||||
| 
 | ||||
| // The PoissonDistributionChiSquaredTest class provides a basic test framework
 | ||||
| // for variates generated by a conforming poisson_distribution.
 | ||||
| class PoissonDistributionChiSquaredTest : public testing::TestWithParam<double>, | ||||
|                                           public PoissonModel { | ||||
|  public: | ||||
|   PoissonDistributionChiSquaredTest() : PoissonModel(GetParam()) {} | ||||
| 
 | ||||
|   // The ChiSquaredTestImpl provides a chi-squared goodness of fit test for data
 | ||||
|   // generated by the poisson distribution.
 | ||||
|   template <typename D> | ||||
|   double ChiSquaredTestImpl(); | ||||
| 
 | ||||
|  private: | ||||
|   void InitChiSquaredTest(const double buckets); | ||||
| 
 | ||||
|   absl::InsecureBitGen rng_; | ||||
|   std::vector<size_t> cutoffs_; | ||||
|   std::vector<double> expected_; | ||||
| }; | ||||
| 
 | ||||
| void PoissonDistributionChiSquaredTest::InitChiSquaredTest( | ||||
|     const double buckets) { | ||||
|   if (!cutoffs_.empty() && !expected_.empty()) { | ||||
|     return; | ||||
|   } | ||||
|   InitCDF(); | ||||
| 
 | ||||
|   // The code below finds cuttoffs that yield approximately equally-sized
 | ||||
|   // buckets to the extent that it is possible. However for poisson
 | ||||
|   // distributions this is particularly challenging for small mean parameters.
 | ||||
|   // Track the expected proportion of items in each bucket.
 | ||||
|   double last_cdf = 0; | ||||
|   const double inc = 1.0 / buckets; | ||||
|   for (double p = inc; p <= 1.0; p += inc) { | ||||
|     auto result = InverseCDF(p); | ||||
|     if (!cutoffs_.empty() && cutoffs_.back() == result.index) { | ||||
|       continue; | ||||
|     } | ||||
|     double d = result.cdf - last_cdf; | ||||
|     cutoffs_.push_back(result.index); | ||||
|     expected_.push_back(d); | ||||
|     last_cdf = result.cdf; | ||||
|   } | ||||
|   cutoffs_.push_back(std::numeric_limits<size_t>::max()); | ||||
|   expected_.push_back(std::max(0.0, 1.0 - last_cdf)); | ||||
| } | ||||
| 
 | ||||
| template <typename D> | ||||
| double PoissonDistributionChiSquaredTest::ChiSquaredTestImpl() { | ||||
|   const int kSamples = 2000; | ||||
|   const int kBuckets = 50; | ||||
| 
 | ||||
|   // The poisson CDF fails for large mean values, since e^-mean exceeds the
 | ||||
|   // machine precision. For these cases, using a normal approximation would be
 | ||||
|   // appropriate.
 | ||||
|   ABSL_ASSERT(mean() <= 200); | ||||
|   InitChiSquaredTest(kBuckets); | ||||
| 
 | ||||
|   D dis(mean()); | ||||
| 
 | ||||
|   std::vector<int32_t> counts(cutoffs_.size(), 0); | ||||
|   for (int j = 0; j < kSamples; j++) { | ||||
|     const size_t x = dis(rng_); | ||||
|     auto it = std::lower_bound(std::begin(cutoffs_), std::end(cutoffs_), x); | ||||
|     counts[std::distance(cutoffs_.begin(), it)]++; | ||||
|   } | ||||
| 
 | ||||
|   // Normalize the counts.
 | ||||
|   std::vector<int32_t> e(expected_.size(), 0); | ||||
|   for (int i = 0; i < e.size(); i++) { | ||||
|     e[i] = kSamples * expected_[i]; | ||||
|   } | ||||
| 
 | ||||
|   // The null-hypothesis is that the distribution is a poisson distribution with
 | ||||
|   // the provided mean (not estimated from the data).
 | ||||
|   const int dof = static_cast<int>(counts.size()) - 1; | ||||
| 
 | ||||
|   // The threshold for logging is 1-in-50.
 | ||||
|   const double threshold = absl::random_internal::ChiSquareValue(dof, 0.98); | ||||
| 
 | ||||
|   const double chi_square = absl::random_internal::ChiSquare( | ||||
|       std::begin(counts), std::end(counts), std::begin(e), std::end(e)); | ||||
| 
 | ||||
|   const double p = absl::random_internal::ChiSquarePValue(chi_square, dof); | ||||
| 
 | ||||
|   // Log if the chi_squared value is above the threshold.
 | ||||
|   if (chi_square > threshold) { | ||||
|     LogCDF(); | ||||
| 
 | ||||
|     ABSL_INTERNAL_LOG(INFO, absl::StrCat("VALUES  buckets=", counts.size(), | ||||
|                                          "  samples=", kSamples)); | ||||
|     for (size_t i = 0; i < counts.size(); i++) { | ||||
|       ABSL_INTERNAL_LOG( | ||||
|           INFO, absl::StrCat(cutoffs_[i], ": ", counts[i], " vs. E=", e[i])); | ||||
|     } | ||||
| 
 | ||||
|     ABSL_INTERNAL_LOG( | ||||
|         INFO, | ||||
|         absl::StrCat(kChiSquared, "(data, dof=", dof, ") = ", chi_square, " (", | ||||
|                      p, ")\n", " vs.\n", kChiSquared, " @ 0.98 = ", threshold)); | ||||
|   } | ||||
|   return p; | ||||
| } | ||||
| 
 | ||||
| TEST_P(PoissonDistributionChiSquaredTest, AbslPoissonDistribution) { | ||||
|   const int kTrials = 20; | ||||
| 
 | ||||
|   // Large values are not yet supported -- this requires estimating the cdf
 | ||||
|   // using the normal distribution instead of the poisson in this case.
 | ||||
|   ASSERT_LE(mean(), 200.0); | ||||
|   if (mean() > 200.0) { | ||||
|     return; | ||||
|   } | ||||
| 
 | ||||
|   int failures = 0; | ||||
|   for (int i = 0; i < kTrials; i++) { | ||||
|     double p_value = ChiSquaredTestImpl<absl::poisson_distribution<int32_t>>(); | ||||
|     if (p_value < 0.005) { | ||||
|       failures++; | ||||
|     } | ||||
|   } | ||||
|   // There is a 0.10% chance of producing at least one failure, so raise the
 | ||||
|   // failure threshold high enough to allow for a flake rate < 10,000.
 | ||||
|   EXPECT_LE(failures, 4); | ||||
| } | ||||
| 
 | ||||
| INSTANTIATE_TEST_SUITE_P(, PoissonDistributionChiSquaredTest, | ||||
|                          ::testing::Values(0.5, 1.0, 2.0, 10.0, 50.0, 51.0, | ||||
|                                            200.0)); | ||||
| 
 | ||||
| // NOTE: absl::poisson_distribution is not guaranteed to be stable.
 | ||||
| TEST(PoissonDistributionTest, StabilityTest) { | ||||
|   using testing::ElementsAre; | ||||
|   // absl::poisson_distribution stability relies on stability of
 | ||||
|   // std::exp, std::log, std::sqrt, std::ceil, std::floor, and
 | ||||
|   // absl::FastUniformBits, absl::StirlingLogFactorial, absl::RandU64ToDouble.
 | ||||
|   absl::random_internal::sequence_urbg urbg({ | ||||
|       0x035b0dc7e0a18acfull, 0x06cebe0d2653682eull, 0x0061e9b23861596bull, | ||||
|       0x0003eb76f6f7f755ull, 0xFFCEA50FDB2F953Bull, 0xC332DDEFBE6C5AA5ull, | ||||
|       0x6558218568AB9702ull, 0x2AEF7DAD5B6E2F84ull, 0x1521B62829076170ull, | ||||
|       0xECDD4775619F1510ull, 0x13CCA830EB61BD96ull, 0x0334FE1EAA0363CFull, | ||||
|       0xB5735C904C70A239ull, 0xD59E9E0BCBAADE14ull, 0xEECC86BC60622CA7ull, | ||||
|       0x4864f22c059bf29eull, 0x247856d8b862665cull, 0xe46e86e9a1337e10ull, | ||||
|       0xd8c8541f3519b133ull, 0xe75b5162c567b9e4ull, 0xf732e5ded7009c5bull, | ||||
|       0xb170b98353121eacull, 0x1ec2e8986d2362caull, 0x814c8e35fe9a961aull, | ||||
|       0x0c3cd59c9b638a02ull, 0xcb3bb6478a07715cull, 0x1224e62c978bbc7full, | ||||
|       0x671ef2cb04e81f6eull, 0x3c1cbd811eaf1808ull, 0x1bbc23cfa8fac721ull, | ||||
|       0xa4c2cda65e596a51ull, 0xb77216fad37adf91ull, 0x836d794457c08849ull, | ||||
|       0xe083df03475f49d7ull, 0xbc9feb512e6b0d6cull, 0xb12d74fdd718c8c5ull, | ||||
|       0x12ff09653bfbe4caull, 0x8dd03a105bc4ee7eull, 0x5738341045ba0d85ull, | ||||
|       0xf3fd722dc65ad09eull, 0xfa14fd21ea2a5705ull, 0xffe6ea4d6edb0c73ull, | ||||
|       0xD07E9EFE2BF11FB4ull, 0x95DBDA4DAE909198ull, 0xEAAD8E716B93D5A0ull, | ||||
|       0xD08ED1D0AFC725E0ull, 0x8E3C5B2F8E7594B7ull, 0x8FF6E2FBF2122B64ull, | ||||
|       0x8888B812900DF01Cull, 0x4FAD5EA0688FC31Cull, 0xD1CFF191B3A8C1ADull, | ||||
|       0x2F2F2218BE0E1777ull, 0xEA752DFE8B021FA1ull, 0xE5A0CC0FB56F74E8ull, | ||||
|       0x18ACF3D6CE89E299ull, 0xB4A84FE0FD13E0B7ull, 0x7CC43B81D2ADA8D9ull, | ||||
|       0x165FA26680957705ull, 0x93CC7314211A1477ull, 0xE6AD206577B5FA86ull, | ||||
|       0xC75442F5FB9D35CFull, 0xEBCDAF0C7B3E89A0ull, 0xD6411BD3AE1E7E49ull, | ||||
|       0x00250E2D2071B35Eull, 0x226800BB57B8E0AFull, 0x2464369BF009B91Eull, | ||||
|       0x5563911D59DFA6AAull, 0x78C14389D95A537Full, 0x207D5BA202E5B9C5ull, | ||||
|       0x832603766295CFA9ull, 0x11C819684E734A41ull, 0xB3472DCA7B14A94Aull, | ||||
|   }); | ||||
| 
 | ||||
|   std::vector<int> output(10); | ||||
| 
 | ||||
|   // Method 1.
 | ||||
|   { | ||||
|     absl::poisson_distribution<int> dist(5); | ||||
|     std::generate(std::begin(output), std::end(output), | ||||
|                   [&] { return dist(urbg); }); | ||||
|   } | ||||
|   EXPECT_THAT(output,  // mean = 4.2
 | ||||
|               ElementsAre(1, 0, 0, 4, 2, 10, 3, 3, 7, 12)); | ||||
| 
 | ||||
|   // Method 2.
 | ||||
|   { | ||||
|     urbg.reset(); | ||||
|     absl::poisson_distribution<int> dist(25); | ||||
|     std::generate(std::begin(output), std::end(output), | ||||
|                   [&] { return dist(urbg); }); | ||||
|   } | ||||
|   EXPECT_THAT(output,  // mean = 19.8
 | ||||
|               ElementsAre(9, 35, 18, 10, 35, 18, 10, 35, 18, 10)); | ||||
| 
 | ||||
|   // Method 3.
 | ||||
|   { | ||||
|     urbg.reset(); | ||||
|     absl::poisson_distribution<int> dist(121); | ||||
|     std::generate(std::begin(output), std::end(output), | ||||
|                   [&] { return dist(urbg); }); | ||||
|   } | ||||
|   EXPECT_THAT(output,  // mean = 124.1
 | ||||
|               ElementsAre(161, 122, 129, 124, 112, 112, 117, 120, 130, 114)); | ||||
| } | ||||
| 
 | ||||
| TEST(PoissonDistributionTest, AlgorithmExpectedValue_1) { | ||||
|   // This tests small values of the Knuth method.
 | ||||
|   // The underlying uniform distribution will generate exactly 0.5.
 | ||||
|   absl::random_internal::sequence_urbg urbg({0x8000000000000001ull}); | ||||
|   absl::poisson_distribution<int> dist(5); | ||||
|   EXPECT_EQ(7, dist(urbg)); | ||||
| } | ||||
| 
 | ||||
| TEST(PoissonDistributionTest, AlgorithmExpectedValue_2) { | ||||
|   // This tests larger values of the Knuth method.
 | ||||
|   // The underlying uniform distribution will generate exactly 0.5.
 | ||||
|   absl::random_internal::sequence_urbg urbg({0x8000000000000001ull}); | ||||
|   absl::poisson_distribution<int> dist(25); | ||||
|   EXPECT_EQ(36, dist(urbg)); | ||||
| } | ||||
| 
 | ||||
| TEST(PoissonDistributionTest, AlgorithmExpectedValue_3) { | ||||
|   // This variant uses the ratio of uniforms method.
 | ||||
|   absl::random_internal::sequence_urbg urbg( | ||||
|       {0x7fffffffffffffffull, 0x8000000000000000ull}); | ||||
| 
 | ||||
|   absl::poisson_distribution<int> dist(121); | ||||
|   EXPECT_EQ(121, dist(urbg)); | ||||
| } | ||||
| 
 | ||||
| }  // namespace
 | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue