src/gromacs/random/tabulatednormaldistribution.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015,2016,2018,2019,2021, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \file
  37  * \brief Tabulated normal distribution
  38  *
  39  * A very fast normal distribution, but with limited resolution.
  40  *
  41  * \author Erik Lindahl <erik.lindahl@gmail.com>
  42  * \inpublicapi
  43  * \ingroup module_random
  44  */
  45
  46 #ifndef GMX_RANDOM_TABULATEDNORMALDISTRIBUTION_H
  47 #define GMX_RANDOM_TABULATEDNORMALDISTRIBUTION_H
  48
  49 #include <cmath>
  50
  51 #include <array>
  52 #include <limits>
  53 #include <memory>
  54
  55 #include "gromacs/math/functions.h"
  56 #include "gromacs/math/utilities.h"
  57 #include "gromacs/utility/basedefinitions.h"
  58 #include "gromacs/utility/classhelpers.h"
  59 #include "gromacs/utility/gmxassert.h"
  60 #include "gromacs/utility/real.h"
  61
  62 namespace gmx
  63 {
  64
  65 namespace detail
  66 {
  67
  68 //! Number of bits that determines the resolution of the lookup table for the normal distribution.
  69 constexpr int c_TabulatedNormalDistributionDefaultBits = 14;
  70
  71 } // namespace detail
  72
  73 /*! \brief Tabulated normal random distribution
  74  *
  75  *  Random distribution compatible with C++11 distributions - it can be
  76  *  used with any C++11 random engine.
  77  *
  78  *  \tparam RealType  Type of the return value. Float or double. Note that
  79  *                    GROMACS uses "real" type by default in contrast to the C++11
  80  *                    standard library, to avoid double/float conversions.
  81  *  \tparam tableBits Size of the table, specified in bits. The storage
  82  *                    space required is sizeof(RealType)*2^tableBits. To
  83  *                    keep things sane this is limited to 24 bits.
  84  *
  85  *  Some stochastic integrators depend on drawing a lot of normal
  86  *  distribution random numbers quickly, but in many cases the only
  87  *  important property is the distribution - given the noise in forces
  88  *  we do not need very high resolution.
  89  *  This distribution uses an internal table to return samples from a
  90  *  normal distribution with limited resolution. By default the table
  91  *  uses c_TabulatedNormalDistributionDefaultBits bits, but this is
  92  *  specified with a template parameter.
  93  *
  94  *  Since this distribution only uses tableBits bits per value generated,
  95  *  the values draw from the random engine are used for several results.
  96  *  To make sure you get a reproducible result when using counter-based
  97  *  random engines (such as ThreeFry2x64), remember to call the reset()
  98  *  method to cancel the internal memory of the distribution.
  99  *
 100  *  \note For modern NUMA systems, you likely want to use separate
 101  *        distributions for each thread, and make sure they are initialized
 102  *        on the CPU where they will run, so the table is placed in that
 103  *        NUMA memory pool.
 104  *  \note The finite table resolution means this distribution will NOT
 105  *        return arbitrarily small/large values, but with e.g. 14 bits
 106  *        the results are limited to roughly +/- 4 standard deviations.
 107  */
 108 template<class RealType = real, unsigned int tableBits = detail::c_TabulatedNormalDistributionDefaultBits>
 109 class TabulatedNormalDistribution
 110 {
 111     static_assert(tableBits <= 24,
 112                   "Normal distribution table is limited to 24bits (64MB in single precision)");
 113
 114 public:
 115     /*! \brief  Type of normal distribution results */
 116     typedef RealType result_type;
 117
 118     /*! \brief  Normal distribution parameter class (mean and stddev) */
 119     class param_type
 120     {
 121     public:
 122         /*! \brief The type of distribution the parameters describe */
 123         typedef TabulatedNormalDistribution distribution_type;
 124
 125         /*! \brief Constructor. Default is classical distr. with mean 0, stddev 1.
 126          *
 127          * \param mean     Expectation value.
 128          * \param stddev   Standard deviation.
 129          *
 130          */
 131         explicit param_type(result_type mean = 0.0, result_type stddev = 1.0) :
 132             mean_(mean), stddev_(stddev)
 133         {
 134         }
 135
 136         /*! \brief Return mean parameter of normal distribution */
 137         result_type mean() const { return mean_; }
 138
 139         /*! \brief Return standard deviation parameter of normal distribution */
 140         result_type stddev() const { return stddev_; }
 141
 142         /*! \brief True if two sets of normal distributions parameters are identical
 143          *
 144          * \param x Instance to compare with.
 145          */
 146         bool operator==(const param_type& x) const
 147         {
 148             return (mean_ == x.mean_ && stddev_ == x.stddev_);
 149         }
 150
 151         /*! \brief True if two sets of normal distributions parameters are different.
 152          *
 153          * \param x Instance to compare with.
 154          */
 155         bool operator!=(const param_type& x) const { return !operator==(x); }
 156
 157     private:
 158         /*! \brief Internal storage for mean of normal distribution */
 159         result_type mean_;
 160         /*! \brief Internal storage for standard deviation of normal distribution */
 161         result_type stddev_;
 162     };
 163
 164     /*! \brief Fill the table with values for the normal distribution
 165      *
 166      *  This routine returns a new a std::array with the table data.
 167      *
 168      *  This routine is used to help construct objects of this class,
 169      *  and is exposed only to permit testing. Normal code should not
 170      *  need to call this function.
 171      */
 172     static std::array<RealType, 1 << tableBits> makeTable()
 173     {
 174         /* Fill the table with the integral of a gaussian distribution, which
 175          * corresponds to the inverse error function.
 176          * We avoid integrating a gaussian numerically, since that leads to
 177          * some loss-of-precision which also accumulates so it is worse for
 178          * larger indices in the table. */
 179         constexpr std::size_t tableSize   = 1 << tableBits;
 180         constexpr std::size_t halfSize    = tableSize / 2;
 181         constexpr double      invHalfSize = 1.0 / halfSize;
 182
 183         std::array<RealType, tableSize> table;
 184
 185         // Fill in all but the extremal entries of the table
 186         for (std::size_t i = 0; i < halfSize - 1; i++)
 187         {
 188             double r = (i + 0.5) * invHalfSize;
 189             double x = std::sqrt(2.0) * erfinv(r);
 190
 191             table.at(halfSize - 1 - i) = -x;
 192             table.at(halfSize + i)     = x;
 193         }
 194         // We want to fill in the extremal table entries with
 195         // values that make the total variance equal to 1, so
 196         // measure the variance by summing the squares of the
 197         // other values of the distribution, starting from the
 198         // smallest values.
 199         double sumOfSquares = 0;
 200         for (std::size_t i = 1; i < halfSize; i++)
 201         {
 202             double value = table.at(i);
 203             sumOfSquares += value * value;
 204         }
 205         double missingVariance = 1.0 - 2.0 * sumOfSquares / tableSize;
 206         GMX_RELEASE_ASSERT(missingVariance > 0,
 207                            "Incorrect computation of tabulated normal distribution");
 208         double extremalValue = std::sqrt(0.5 * missingVariance * tableSize);
 209         table.at(0)          = -extremalValue;
 210         table.back()         = extremalValue;
 211
 212         return table;
 213     }
 214
 215     /*! \brief Construct new normal distribution with specified mean & stdddev.
 216      *
 217      *  \param mean    Mean value of tabulated normal distribution
 218      *  \param stddev  Standard deviation of tabulated normal distribution
 219      */
 220     explicit TabulatedNormalDistribution(result_type mean = 0.0, result_type stddev = 1.0) :
 221         param_(param_type(mean, stddev)), savedRandomBits_(0), savedRandomBitsLeft_(0)
 222     {
 223     }
 224
 225     /*! \brief Construct new normal distribution from parameter type.
 226      *
 227      *  \param param Parameter class containing mean and standard deviation.
 228      */
 229     explicit TabulatedNormalDistribution(const param_type& param) :
 230         param_(param), savedRandomBits_(0), savedRandomBitsLeft_(0)
 231     {
 232     }
 233
 234     /*! \brief Smallest value that can be generated in normal distrubiton.
 235      *
 236      * \note The smallest value is not -infinity with a table, but it
 237      *       depends on the table resolution. With 14 bits, this is roughly
 238      *       four standard deviations below the mean.
 239      */
 240     result_type min() const { return c_table_[0]; }
 241
 242     /*! \brief Largest value that can be generated in normal distribution.
 243      *
 244      * \note The largest value is not infinity with a table, but it
 245      *       depends on the table resolution. With 14 bits, this is roughly
 246      *       four standard deviations above the mean.
 247      */
 248     result_type max() const { return c_table_[c_table_.size() - 1]; }
 249
 250     /*! \brief Mean of the present normal distribution */
 251     result_type mean() const { return param_.mean(); }
 252
 253     /*! \brief Standard deviation of the present normal distribution */
 254
 255     result_type stddev() const { return param_.stddev(); }
 256
 257     /*! \brief The parameter class (mean & stddev) of the normal distribution */
 258     param_type param() const { return param_; }
 259
 260     /*! \brief Clear all internal saved random bits from the random engine */
 261     void reset() { savedRandomBitsLeft_ = 0; }
 262
 263     /*! \brief Return normal distribution value specified by internal parameters.
 264      *
 265      * \tparam Rng   Random engine type used to provide uniform random bits.
 266      * \param  g     Random engine of class Rng. For normal GROMACS usage
 267      *               you likely want to use ThreeFry2x64.
 268      */
 269     template<class Rng>
 270     result_type operator()(Rng& g)
 271     {
 272         return (*this)(g, param_);
 273     }
 274
 275     /*! \brief Return normal distribution value specified by given parameters
 276      *
 277      * \tparam Rng   Random engine type used to provide uniform random bits.
 278      * \param  g     Random engine of class Rng. For normal GROMACS usage
 279      *               you likely want to use ThreeFry2x64.
 280      * \param  param Parameters used to specify normal distribution.
 281      */
 282     template<class Rng>
 283     result_type operator()(Rng& g, const param_type& param)
 284     {
 285         if (savedRandomBitsLeft_ < tableBits)
 286         {
 287             // We do not know whether the generator g returns 64 or 32 bits,
 288             // since g is not known when we construct this class.
 289             // To keep things simple, we always draw one random number,
 290             // store it in our 64-bit value, and set the number of active bits.
 291             // For tableBits up to 16 this will be as efficient both with 32
 292             // and 64 bit random engines when drawing multiple numbers
 293             // (our default value is
 294             // c_TabulatedNormalDistributionDefaultBits == 14). It
 295             // also avoids drawing multiple 32-bit random numbers
 296             // even if we just call this routine for a single
 297             // result.
 298             savedRandomBits_     = static_cast<uint64_t>(g());
 299             savedRandomBitsLeft_ = std::numeric_limits<typename Rng::result_type>::digits;
 300         }
 301         result_type value = c_table_[savedRandomBits_ & ((1ULL << tableBits) - 1)];
 302         savedRandomBits_ >>= tableBits;
 303         savedRandomBitsLeft_ -= tableBits;
 304         return param.mean() + value * param.stddev();
 305     }
 306
 307     /*!\brief Check if two tabulated normal distributions have identical states.
 308      *
 309      * \param  x     Instance to compare with.
 310      */
 311     bool operator==(const TabulatedNormalDistribution<RealType, tableBits>& x) const
 312     {
 313         return (param_ == x.param_ && savedRandomBits_ == x.savedRandomBits_
 314                 && savedRandomBitsLeft_ == x.savedRandomBitsLeft_);
 315     }
 316
 317     /*!\brief Check if two tabulated normal distributions have different states.
 318      *
 319      * \param  x     Instance to compare with.
 320      */
 321     bool operator!=(const TabulatedNormalDistribution<RealType, tableBits>& x) const
 322     {
 323         return !operator==(x);
 324     }
 325
 326 private:
 327     /*! \brief Parameters of normal distribution (mean and stddev) */
 328     param_type param_;
 329     /*! \brief Array with tabluated values of normal distribution */
 330     static const std::array<RealType, 1 << tableBits> c_table_;
 331     /*! \brief Saved output from random engine, shifted tableBits right each time */
 332     uint64_t savedRandomBits_;
 333     /*! \brief Number of valid bits remaining i savedRandomBits_ */
 334     unsigned int savedRandomBitsLeft_;
 335
 336     GMX_DISALLOW_COPY_AND_ASSIGN(TabulatedNormalDistribution);
 337 };
 338
 339 // MSVC does not handle extern template class members correctly even in MSVC 2015,
 340 // so in that case we have to instantiate in every object using it. In addition,
 341 // doxygen is convinced this defines a function (which leads to crashes in our python
 342 // scripts), so to avoid confusion we hide it from doxygen too.
 343 #if !defined(_MSC_VER) && !defined(DOXYGEN)
 344 // Declaration of template specialization
 345 template<>
 346 const std::array<real, 1 << detail::c_TabulatedNormalDistributionDefaultBits> TabulatedNormalDistribution<>::c_table_;
 347
 348 extern template const std::array<real, 1 << detail::c_TabulatedNormalDistributionDefaultBits>
 349         TabulatedNormalDistribution<>::c_table_;
 350 #endif
 351
 352 // Instantiation for all tables without specialization
 353 template<class RealType, unsigned int tableBits>
 354 const std::array<RealType, 1 << tableBits> TabulatedNormalDistribution<RealType, tableBits>::c_table_ =
 355         TabulatedNormalDistribution<RealType, tableBits>::makeTable();
 356
 357 } // namespace gmx
 358
 359 #endif // GMX_RANDOM_TABULATEDNORMALDISTRIBUTION_H