src/gromacs/random/tabulatednormaldistribution.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015,2016,2018, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \file
  37  * \brief Tabulated normal distribution
  38  *
  39  * A very fast normal distribution, but with limited resolution.
  40  *
  41  * \author Erik Lindahl <erik.lindahl@gmail.com>
  42  * \inpublicapi
  43  * \ingroup module_random
  44  */
  45
  46 #ifndef GMX_RANDOM_TABULATEDNORMALDISTRIBUTION_H
  47 #define GMX_RANDOM_TABULATEDNORMALDISTRIBUTION_H
  48
  49 #include <cmath>
  50
  51 #include <array>
  52 #include <limits>
  53
  54 #include "gromacs/math/functions.h"
  55 #include "gromacs/math/utilities.h"
  56 #include "gromacs/utility/basedefinitions.h"
  57 #include "gromacs/utility/classhelpers.h"
  58 #include "gromacs/utility/gmxassert.h"
  59 #include "gromacs/utility/real.h"
  60
  61 namespace gmx
  62 {
  63
  64 namespace
  65 {
  66
  67 //! Number of bits that determines the resolution of the lookup table for the normal distribution.
  68 const int c_TabulatedNormalDistributionDefaultBits = 14;
  69
  70 }
  71
  72 /*! \brief Tabulated normal random distribution
  73  *
  74  *  Random distribution compatible with C++11 distributions - it can be
  75  *  used with any C++11 random engine.
  76  *
  77  *  \tparam RealType  Type of the return value. Float or double. Note that
  78  *                    GROMACS uses "real" type by default in contrast to the C++11
  79  *                    standard library, to avoid double/float conversions.
  80  *  \tparam tableBits Size of the table, specified in bits. The storage
  81  *                    space required is sizeof(RealType)*2^tableBits. To
  82  *                    keep things sane this is limited to 24 bits.
  83  *
  84  *  Some stochastic integrators depend on drawing a lot of normal
  85  *  distribution random numbers quickly, but in many cases the only
  86  *  important property is the distribution - given the noise in forces
  87  *  we do not need very high resolution.
  88  *  This distribution uses an internal table to return samples from a
  89  *  normal distribution with limited resolution. By default the table
  90  *  uses c_TabulatedNormalDistributionDefaultBits bits, but this is
  91  *  specified with a template parameter.
  92  *
  93  *  Since this distribution only uses tableBits bits per value generated,
  94  *  the values draw from the random engine are used for several results.
  95  *  To make sure you get a reproducible result when using counter-based
  96  *  random engines (such as ThreeFry2x64), remember to call the reset()
  97  *  method to cancel the internal memory of the distribution.
  98  *
  99  *  \note For modern NUMA systems, you likely want to use separate
 100  *        distributions for each thread, and make sure they are initialized
 101  *        on the CPU where they will run, so the table is placed in that
 102  *        NUMA memory pool.
 103  *  \note The finite table resolution means this distribution will NOT
 104  *        return arbitrarily small/large values, but with e.g. 14 bits
 105  *        the results are limited to roughly +/- 4 standard deviations.
 106  */
 107 template<class RealType = real, unsigned int tableBits = c_TabulatedNormalDistributionDefaultBits>
 108 class TabulatedNormalDistribution
 109 {
 110     static_assert(tableBits <= 24, "Normal distribution table is limited to 24bits (64MB in single precision)");
 111
 112     public:
 113         /*! \brief  Type of normal distribution results */
 114         typedef RealType result_type;
 115
 116         /*! \brief  Normal distribution parameter class (mean and stddev) */
 117         class param_type
 118         {
 119             public:
 120                 /*! \brief The type of distribution the parameters describe */
 121                 typedef TabulatedNormalDistribution distribution_type;
 122
 123                 /*! \brief Constructor. Default is classical distr. with mean 0, stddev 1.
 124                  *
 125                  * \param mean     Expectation value.
 126                  * \param stddev   Standard deviation.
 127                  *
 128                  */
 129                 explicit param_type(result_type mean = 0.0, result_type stddev = 1.0)
 130                     : mean_(mean), stddev_(stddev) {}
 131
 132                 /*! \brief Return mean parameter of normal distribution */
 133                 result_type
 134                 mean() const { return mean_; }
 135
 136                 /*! \brief Return standard deviation parameter of normal distribution */
 137                 result_type
 138                 stddev() const { return stddev_; }
 139
 140                 /*! \brief True if two sets of normal distributions parameters are identical
 141                  *
 142                  * \param x Instance to compare with.
 143                  */
 144                 bool
 145                 operator==(const param_type &x) const
 146                 {
 147                     return (mean_ == x.mean_ && stddev_ == x.stddev_);
 148                 }
 149
 150                 /*! \brief True if two sets of normal distributions parameters are different.
 151                  *
 152                  * \param x Instance to compare with.
 153                  */
 154                 bool
 155                 operator!=(const param_type &x) const { return !operator==(x); }
 156
 157             private:
 158                 /*! \brief Internal storage for mean of normal distribution */
 159                 result_type mean_;
 160                 /*! \brief Internal storage for standard deviation of normal distribution */
 161                 result_type stddev_;
 162         };
 163
 164         /*! \brief Fill the table with values for the normal distribution
 165          *
 166          *  This routine returns a new a std::array with the table data.
 167          *
 168          *  This routine is used to help construct objects of this class,
 169          *  and is exposed only to permit testing. Normal code should not
 170          *  need to call this function.
 171          */
 172         static const
 173         std::array<RealType, 1<<tableBits>
 174         // cppcheck-suppress unusedPrivateFunction
 175         makeTable()
 176         {
 177             /* Fill the table with the integral of a gaussian distribution, which
 178              * corresponds to the inverse error function.
 179              * We avoid integrating a gaussian numerically, since that leads to
 180              * some loss-of-precision which also accumulates so it is worse for
 181              * larger indices in the table. */
 182             constexpr std::size_t            tableSize        = 1 << tableBits;
 183             constexpr std::size_t            halfSize         = tableSize/2;
 184             constexpr double                 invHalfSize      = 1.0/halfSize;
 185
 186             std::array<RealType, tableSize>  table;
 187
 188             // Fill in all but the extremal entries of the table
 189             for (std::size_t i = 0; i < halfSize-1; i++)
 190             {
 191                 double r = (i + 0.5) * invHalfSize;
 192                 double x = std::sqrt(2.0) * erfinv(r);
 193
 194                 table.at(halfSize-1-i) = -x;
 195                 table.at(halfSize+i)   =  x;
 196             }
 197             // We want to fill in the extremal table entries with
 198             // values that make the total variance equal to 1, so
 199             // measure the variance by summing the squares of the
 200             // other values of the distribution, starting from the
 201             // smallest values.
 202             double sumOfSquares = 0;
 203             for (std::size_t i = 1; i < halfSize; i++)
 204             {
 205                 double value = table.at(i);
 206                 sumOfSquares += value * value;
 207             }
 208             double missingVariance = 1.0 - 2.0*sumOfSquares/tableSize;
 209             GMX_RELEASE_ASSERT(missingVariance > 0, "Incorrect computation of tabulated normal distribution");
 210             double extremalValue   = std::sqrt(0.5*missingVariance*tableSize);
 211             table.at(0)  = -extremalValue;
 212             table.back() = extremalValue;
 213
 214             return table;
 215         }
 216
 217     public:
 218
 219         /*! \brief Construct new normal distribution with specified mean & stdddev.
 220          *
 221          *  \param mean    Mean value of tabulated normal distribution
 222          *  \param stddev  Standard deviation of tabulated normal distribution
 223          */
 224         explicit TabulatedNormalDistribution(result_type mean = 0.0, result_type stddev = 1.0 )
 225             : param_(param_type(mean, stddev)), savedRandomBits_(0), savedRandomBitsLeft_(0)
 226         {
 227         }
 228
 229         /*! \brief Construct new normal distribution from parameter type.
 230          *
 231          *  \param param Parameter class containing mean and standard deviation.
 232          */
 233         explicit TabulatedNormalDistribution(  const param_type &param )
 234             : param_(param), savedRandomBits_(0), savedRandomBitsLeft_(0)
 235         {
 236         }
 237
 238         /*! \brief Smallest value that can be generated in normal distrubiton.
 239          *
 240          * \note The smallest value is not -infinity with a table, but it
 241          *       depends on the table resolution. With 14 bits, this is roughly
 242          *       four standard deviations below the mean.
 243          */
 244         result_type
 245         min() const
 246         {
 247             return c_table_[0];
 248         }
 249
 250         /*! \brief Largest value that can be generated in normal distribution.
 251          *
 252          * \note The largest value is not infinity with a table, but it
 253          *       depends on the table resolution. With 14 bits, this is roughly
 254          *       four standard deviations above the mean.
 255          */
 256         result_type
 257         max() const
 258         {
 259             return c_table_[c_table_.size()-1];
 260         }
 261
 262         /*! \brief Mean of the present normal distribution */
 263         result_type
 264         mean() const
 265         {
 266             return param_.mean();
 267         }
 268
 269         /*! \brief Standard deviation of the present normal distribution */
 270
 271         result_type
 272         stddev() const
 273         {
 274             return param_.stddev();
 275         }
 276
 277         /*! \brief The parameter class (mean & stddev) of the normal distribution */
 278         param_type
 279         param() const
 280         {
 281             return param_;
 282         }
 283
 284         /*! \brief Clear all internal saved random bits from the random engine */
 285         void
 286         reset()
 287         {
 288             savedRandomBitsLeft_ = 0;
 289         }
 290
 291         /*! \brief Return normal distribution value specified by internal parameters.
 292          *
 293          * \tparam Rng   Random engine type used to provide uniform random bits.
 294          * \param  g     Random engine of class Rng. For normal GROMACS usage
 295          *               you likely want to use ThreeFry2x64.
 296          */
 297         template<class Rng>
 298         result_type
 299         operator()(Rng &g)
 300         {
 301             return (*this)(g, param_);
 302         }
 303
 304         /*! \brief Return normal distribution value specified by given parameters
 305          *
 306          * \tparam Rng   Random engine type used to provide uniform random bits.
 307          * \param  g     Random engine of class Rng. For normal GROMACS usage
 308          *               you likely want to use ThreeFry2x64.
 309          * \param  param Parameters used to specify normal distribution.
 310          */
 311         template<class Rng>
 312         result_type
 313         operator()(Rng &g, const param_type &param)
 314         {
 315             if (savedRandomBitsLeft_ < tableBits)
 316             {
 317                 // We do not know whether the generator g returns 64 or 32 bits,
 318                 // since g is not known when we construct this class.
 319                 // To keep things simple, we always draw one random number,
 320                 // store it in our 64-bit value, and set the number of active bits.
 321                 // For tableBits up to 16 this will be as efficient both with 32
 322                 // and 64 bit random engines when drawing multiple numbers
 323                 // (our default value is
 324                 // c_TabulatedNormalDistributionDefaultBits == 14). It
 325                 // also avoids drawing multiple 32-bit random numbers
 326                 // even if we just call this routine for a single
 327                 // result.
 328                 savedRandomBits_     = static_cast<uint64_t>(g());
 329                 savedRandomBitsLeft_ = std::numeric_limits<typename Rng::result_type>::digits;
 330             }
 331             result_type value        = c_table_[savedRandomBits_ & ( (1ULL << tableBits) - 1 ) ];
 332             savedRandomBits_       >>= tableBits;
 333             savedRandomBitsLeft_    -= tableBits;
 334             return param.mean() + value * param.stddev();
 335         }
 336
 337         /*!\brief Check if two tabulated normal distributions have identical states.
 338          *
 339          * \param  x     Instance to compare with.
 340          */
 341         bool
 342         operator==(const TabulatedNormalDistribution<RealType, tableBits> &x) const
 343         {
 344             return (param_ == x.param_ &&
 345                     savedRandomBits_ == x.savedRandomBits_ &&
 346                     savedRandomBitsLeft_ == x.savedRandomBitsLeft_);
 347         }
 348
 349         /*!\brief Check if two tabulated normal distributions have different states.
 350          *
 351          * \param  x     Instance to compare with.
 352          */
 353         bool
 354         operator!=(const TabulatedNormalDistribution<RealType, tableBits> &x) const
 355         {
 356             return !operator==(x);
 357         }
 358
 359     private:
 360         /*! \brief Parameters of normal distribution (mean and stddev) */
 361         param_type                                                   param_;
 362         /*! \brief Array with tabluated values of normal distribution */
 363         static const std::array<RealType, 1 << tableBits>            c_table_;
 364         /*! \brief Saved output from random engine, shifted tableBits right each time */
 365         uint64_t                                                     savedRandomBits_;
 366         /*! \brief Number of valid bits remaining i savedRandomBits_ */
 367         unsigned int                                                 savedRandomBitsLeft_;
 368
 369         GMX_DISALLOW_COPY_AND_ASSIGN(TabulatedNormalDistribution);
 370 };
 371
 372 // MSVC does not handle extern template class members correctly even in MSVC 2015,
 373 // so in that case we have to instantiate in every object using it. In addition,
 374 // doxygen is convinced this defines a function (which leads to crashes in our python
 375 // scripts), so to avoid confusion we hide it from doxygen too.
 376 #if !defined(_MSC_VER) && !defined(DOXYGEN)
 377 // Declaration of template specialization
 378 template<>
 379 const std::array<real, 1<<c_TabulatedNormalDistributionDefaultBits> TabulatedNormalDistribution<real, c_TabulatedNormalDistributionDefaultBits>::c_table_;
 380
 381 extern template
 382 const std::array<real, 1<<c_TabulatedNormalDistributionDefaultBits> TabulatedNormalDistribution<real, c_TabulatedNormalDistributionDefaultBits>::c_table_;
 383 #endif
 384
 385 // Instantiation for all tables without specialization
 386 template<class RealType, unsigned int tableBits>
 387 const std::array<RealType, 1<<tableBits> TabulatedNormalDistribution<RealType, tableBits>::c_table_ = TabulatedNormalDistribution<RealType, tableBits>::makeTable();
 388
 389 }      // namespace gmx
 390
 391 #endif // GMX_RANDOM_TABULATEDNORMALDISTRIBUTION_H