src/gromacs/applied_forces/awh/biasstate.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  * \brief
  38  * Implements the BiasState class.
  39  *
  40  * \author Viveca Lindahl
  41  * \author Berk Hess <hess@kth.se>
  42  * \ingroup module_awh
  43  */
  44
  45 #include "gmxpre.h"
  46
  47 #include "biasstate.h"
  48
  49 #include <cassert>
  50 #include <cmath>
  51 #include <cstdio>
  52 #include <cstdlib>
  53 #include <cstring>
  54
  55 #include <algorithm>
  56 #include <optional>
  57
  58 #include "gromacs/fileio/gmxfio.h"
  59 #include "gromacs/fileio/xvgr.h"
  60 #include "gromacs/gmxlib/network.h"
  61 #include "gromacs/math/utilities.h"
  62 #include "gromacs/mdrunutility/multisim.h"
  63 #include "gromacs/mdtypes/awh_history.h"
  64 #include "gromacs/mdtypes/awh_params.h"
  65 #include "gromacs/mdtypes/commrec.h"
  66 #include "gromacs/simd/simd.h"
  67 #include "gromacs/simd/simd_math.h"
  68 #include "gromacs/utility/arrayref.h"
  69 #include "gromacs/utility/exceptions.h"
  70 #include "gromacs/utility/gmxassert.h"
  71 #include "gromacs/utility/smalloc.h"
  72 #include "gromacs/utility/stringutil.h"
  73
  74 #include "biasgrid.h"
  75 #include "pointstate.h"
  76
  77 namespace gmx
  78 {
  79
  80 void BiasState::getPmf(gmx::ArrayRef<float> pmf) const
  81 {
  82     GMX_ASSERT(pmf.size() == points_.size(), "pmf should have the size of the bias grid");
  83
  84     /* The PMF is just the negative of the log of the sampled PMF histogram.
  85      * Points with zero target weight are ignored, they will mostly contain noise.
  86      */
  87     for (size_t i = 0; i < points_.size(); i++)
  88     {
  89         pmf[i] = points_[i].inTargetRegion() ? -points_[i].logPmfSum() : GMX_FLOAT_MAX;
  90     }
  91 }
  92
  93 namespace
  94 {
  95
  96 /*! \brief
  97  * Sum an array over all simulations on the master rank of each simulation.
  98  *
  99  * \param[in,out] arrayRef      The data to sum.
 100  * \param[in]     multiSimComm  Struct for multi-simulation communication.
 101  */
 102 void sumOverSimulations(gmx::ArrayRef<int> arrayRef, const gmx_multisim_t* multiSimComm)
 103 {
 104     gmx_sumi_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
 105 }
 106
 107 /*! \brief
 108  * Sum an array over all simulations on the master rank of each simulation.
 109  *
 110  * \param[in,out] arrayRef      The data to sum.
 111  * \param[in]     multiSimComm  Struct for multi-simulation communication.
 112  */
 113 void sumOverSimulations(gmx::ArrayRef<double> arrayRef, const gmx_multisim_t* multiSimComm)
 114 {
 115     gmx_sumd_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
 116 }
 117
 118 /*! \brief
 119  * Sum an array over all simulations on all ranks of each simulation.
 120  *
 121  * This assumes the data is identical on all ranks within each simulation.
 122  *
 123  * \param[in,out] arrayRef      The data to sum.
 124  * \param[in]     commRecord    Struct for intra-simulation communication.
 125  * \param[in]     multiSimComm  Struct for multi-simulation communication.
 126  */
 127 template<typename T>
 128 void sumOverSimulations(gmx::ArrayRef<T> arrayRef, const t_commrec* commRecord, const gmx_multisim_t* multiSimComm)
 129 {
 130     if (MASTER(commRecord))
 131     {
 132         sumOverSimulations(arrayRef, multiSimComm);
 133     }
 134     if (commRecord->nnodes > 1)
 135     {
 136         gmx_bcast(arrayRef.size() * sizeof(T), arrayRef.data(), commRecord->mpi_comm_mygroup);
 137     }
 138 }
 139
 140 /*! \brief
 141  * Sum PMF over multiple simulations, when requested.
 142  *
 143  * \param[in,out] pointState         The state of the points in the bias.
 144  * \param[in]     numSharedUpdate    The number of biases sharing the histogram.
 145  * \param[in]     commRecord         Struct for intra-simulation communication.
 146  * \param[in]     multiSimComm       Struct for multi-simulation communication.
 147  */
 148 void sumPmf(gmx::ArrayRef<PointState> pointState,
 149             int                       numSharedUpdate,
 150             const t_commrec*          commRecord,
 151             const gmx_multisim_t*     multiSimComm)
 152 {
 153     if (numSharedUpdate == 1)
 154     {
 155         return;
 156     }
 157     GMX_ASSERT(multiSimComm != nullptr && numSharedUpdate % multiSimComm->numSimulations_ == 0,
 158                "numSharedUpdate should be a multiple of multiSimComm->numSimulations_");
 159     GMX_ASSERT(numSharedUpdate == multiSimComm->numSimulations_,
 160                "Sharing within a simulation is not implemented (yet)");
 161
 162     std::vector<double> buffer(pointState.size());
 163
 164     /* Need to temporarily exponentiate the log weights to sum over simulations */
 165     for (size_t i = 0; i < buffer.size(); i++)
 166     {
 167         buffer[i] = pointState[i].inTargetRegion() ? std::exp(-pointState[i].logPmfSum()) : 0;
 168     }
 169
 170     sumOverSimulations(gmx::ArrayRef<double>(buffer), commRecord, multiSimComm);
 171
 172     /* Take log again to get (non-normalized) PMF */
 173     double normFac = 1.0 / numSharedUpdate;
 174     for (gmx::index i = 0; i < pointState.ssize(); i++)
 175     {
 176         if (pointState[i].inTargetRegion())
 177         {
 178             pointState[i].setLogPmfSum(-std::log(buffer[i] * normFac));
 179         }
 180     }
 181 }
 182
 183 /*! \brief
 184  * Find the minimum free energy value.
 185  *
 186  * \param[in] pointState  The state of the points.
 187  * \returns the minimum free energy value.
 188  */
 189 double freeEnergyMinimumValue(gmx::ArrayRef<const PointState> pointState)
 190 {
 191     double fMin = GMX_FLOAT_MAX;
 192
 193     for (auto const& ps : pointState)
 194     {
 195         if (ps.inTargetRegion() && ps.freeEnergy() < fMin)
 196         {
 197             fMin = ps.freeEnergy();
 198         }
 199     }
 200
 201     return fMin;
 202 }
 203
 204 /*! \brief
 205  * Find and return the log of the probability weight of a point given a coordinate value.
 206  *
 207  * The unnormalized weight is given by
 208  * w(point|value) = exp(bias(point) - U(value,point)),
 209  * where U is a harmonic umbrella potential.
 210  *
 211  * \param[in] dimParams              The bias dimensions parameters
 212  * \param[in] points                 The point state.
 213  * \param[in] grid                   The grid.
 214  * \param[in] pointIndex             Point to evaluate probability weight for.
 215  * \param[in] pointBias              Bias for the point (as a log weight).
 216  * \param[in] value                  Coordinate value.
 217  * \param[in] neighborLambdaEnergies The energy of the system in neighboring lambdas states. Can be
 218  * empty when there are no free energy lambda state dimensions.
 219  * \param[in] gridpointIndex         The index of the current grid point.
 220  * \returns the log of the biased probability weight.
 221  */
 222 double biasedLogWeightFromPoint(const std::vector<DimParams>&  dimParams,
 223                                 const std::vector<PointState>& points,
 224                                 const BiasGrid&                grid,
 225                                 int                            pointIndex,
 226                                 double                         pointBias,
 227                                 const awh_dvec                 value,
 228                                 gmx::ArrayRef<const double>    neighborLambdaEnergies,
 229                                 int                            gridpointIndex)
 230 {
 231     double logWeight = detail::c_largeNegativeExponent;
 232
 233     /* Only points in the target region have non-zero weight */
 234     if (points[pointIndex].inTargetRegion())
 235     {
 236         logWeight = pointBias;
 237
 238         /* Add potential for all parameter dimensions */
 239         for (size_t d = 0; d < dimParams.size(); d++)
 240         {
 241             if (dimParams[d].isFepLambdaDimension())
 242             {
 243                 /* If this is not a sampling step or if this function is called from
 244                  * calcConvolvedBias(), when writing energy subblocks, neighborLambdaEnergies will
 245                  * be empty. No convolution is required along the lambda dimension. */
 246                 if (!neighborLambdaEnergies.empty())
 247                 {
 248                     const int pointLambdaIndex     = grid.point(pointIndex).coordValue[d];
 249                     const int gridpointLambdaIndex = grid.point(gridpointIndex).coordValue[d];
 250                     logWeight -= dimParams[d].fepDimParams().beta
 251                                  * (neighborLambdaEnergies[pointLambdaIndex]
 252                                     - neighborLambdaEnergies[gridpointLambdaIndex]);
 253                 }
 254             }
 255             else
 256             {
 257                 double dev = getDeviationFromPointAlongGridAxis(grid, d, pointIndex, value[d]);
 258                 logWeight -= 0.5 * dimParams[d].pullDimParams().betak * dev * dev;
 259             }
 260         }
 261     }
 262     return logWeight;
 263 }
 264
 265 /*! \brief
 266  * Calculates the marginal distribution (marginal probability) for each value along
 267  * a free energy lambda axis.
 268  * The marginal distribution of one coordinate dimension value is the sum of the probability
 269  * distribution of all values (herein all neighbor values) with the same value in the dimension
 270  * of interest.
 271  * \param[in] grid               The bias grid.
 272  * \param[in] neighbors          The points to use for the calculation of the marginal distribution.
 273  * \param[in] probWeightNeighbor Probability weights of the neighbors.
 274  * \returns The calculated marginal distribution in a 1D array with
 275  * as many elements as there are points along the axis of interest.
 276  */
 277 std::vector<double> calculateFELambdaMarginalDistribution(const BiasGrid&          grid,
 278                                                           gmx::ArrayRef<const int> neighbors,
 279                                                           gmx::ArrayRef<const double> probWeightNeighbor)
 280 {
 281     const std::optional<int> lambdaAxisIndex = grid.lambdaAxisIndex();
 282     GMX_RELEASE_ASSERT(lambdaAxisIndex,
 283                        "There must be a free energy lambda axis in order to calculate the free "
 284                        "energy lambda marginal distribution.");
 285     const int           numFepLambdaStates = grid.numFepLambdaStates();
 286     std::vector<double> lambdaMarginalDistribution(numFepLambdaStates, 0);
 287
 288     for (size_t i = 0; i < neighbors.size(); i++)
 289     {
 290         const int neighbor    = neighbors[i];
 291         const int lambdaState = grid.point(neighbor).coordValue[lambdaAxisIndex.value()];
 292         lambdaMarginalDistribution[lambdaState] += probWeightNeighbor[i];
 293     }
 294     return lambdaMarginalDistribution;
 295 }
 296
 297 } // namespace
 298
 299 void BiasState::calcConvolvedPmf(const std::vector<DimParams>& dimParams,
 300                                  const BiasGrid&               grid,
 301                                  std::vector<float>*           convolvedPmf) const
 302 {
 303     size_t numPoints = grid.numPoints();
 304
 305     convolvedPmf->resize(numPoints);
 306
 307     /* Get the PMF to convolve. */
 308     std::vector<float> pmf(numPoints);
 309     getPmf(pmf);
 310
 311     for (size_t m = 0; m < numPoints; m++)
 312     {
 313         double           freeEnergyWeights = 0;
 314         const GridPoint& point             = grid.point(m);
 315         for (auto& neighbor : point.neighbor)
 316         {
 317             /* Do not convolve the bias along a lambda axis - only use the pmf from the current point */
 318             if (!pointsHaveDifferentLambda(grid, m, neighbor))
 319             {
 320                 /* The negative PMF is a positive bias. */
 321                 double biasNeighbor = -pmf[neighbor];
 322
 323                 /* Add the convolved PMF weights for the neighbors of this point.
 324                 Note that this function only adds point within the target > 0 region.
 325                 Sum weights, take the logarithm last to get the free energy. */
 326                 double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid, neighbor,
 327                                                             biasNeighbor, point.coordValue, {}, m);
 328                 freeEnergyWeights += std::exp(logWeight);
 329             }
 330         }
 331
 332         GMX_RELEASE_ASSERT(freeEnergyWeights > 0,
 333                            "Attempting to do log(<= 0) in AWH convolved PMF calculation.");
 334         (*convolvedPmf)[m] = -std::log(static_cast<float>(freeEnergyWeights));
 335     }
 336 }
 337
 338 namespace
 339 {
 340
 341 /*! \brief
 342  * Updates the target distribution for all points.
 343  *
 344  * The target distribution is always updated for all points
 345  * at the same time.
 346  *
 347  * \param[in,out] pointState  The state of all points.
 348  * \param[in]     params      The bias parameters.
 349  */
 350 void updateTargetDistribution(gmx::ArrayRef<PointState> pointState, const BiasParams& params)
 351 {
 352     double freeEnergyCutoff = 0;
 353     if (params.eTarget == eawhtargetCUTOFF)
 354     {
 355         freeEnergyCutoff = freeEnergyMinimumValue(pointState) + params.freeEnergyCutoffInKT;
 356     }
 357
 358     double sumTarget = 0;
 359     for (PointState& ps : pointState)
 360     {
 361         sumTarget += ps.updateTargetWeight(params, freeEnergyCutoff);
 362     }
 363     GMX_RELEASE_ASSERT(sumTarget > 0, "We should have a non-zero distribution");
 364
 365     /* Normalize to 1 */
 366     double invSum = 1.0 / sumTarget;
 367     for (PointState& ps : pointState)
 368     {
 369         ps.scaleTarget(invSum);
 370     }
 371 }
 372
 373 /*! \brief
 374  * Puts together a string describing a grid point.
 375  *
 376  * \param[in] grid         The grid.
 377  * \param[in] point        BiasGrid point index.
 378  * \returns a string for the point.
 379  */
 380 std::string gridPointValueString(const BiasGrid& grid, int point)
 381 {
 382     std::string pointString;
 383
 384     pointString += "(";
 385
 386     for (int d = 0; d < grid.numDimensions(); d++)
 387     {
 388         pointString += gmx::formatString("%g", grid.point(point).coordValue[d]);
 389         if (d < grid.numDimensions() - 1)
 390         {
 391             pointString += ",";
 392         }
 393         else
 394         {
 395             pointString += ")";
 396         }
 397     }
 398
 399     return pointString;
 400 }
 401
 402 } // namespace
 403
 404 int BiasState::warnForHistogramAnomalies(const BiasGrid& grid, int biasIndex, double t, FILE* fplog, int maxNumWarnings) const
 405 {
 406     GMX_ASSERT(fplog != nullptr, "Warnings can only be issued if there is log file.");
 407     const double maxHistogramRatio = 0.5; /* Tolerance for printing a warning about the histogram ratios */
 408
 409     /* Sum up the histograms and get their normalization */
 410     double sumVisits  = 0;
 411     double sumWeights = 0;
 412     for (auto& pointState : points_)
 413     {
 414         if (pointState.inTargetRegion())
 415         {
 416             sumVisits += pointState.numVisitsTot();
 417             sumWeights += pointState.weightSumTot();
 418         }
 419     }
 420     GMX_RELEASE_ASSERT(sumVisits > 0, "We should have visits");
 421     GMX_RELEASE_ASSERT(sumWeights > 0, "We should have weight");
 422     double invNormVisits = 1.0 / sumVisits;
 423     double invNormWeight = 1.0 / sumWeights;
 424
 425     /* Check all points for warnings */
 426     int    numWarnings = 0;
 427     size_t numPoints   = grid.numPoints();
 428     for (size_t m = 0; m < numPoints; m++)
 429     {
 430         /* Skip points close to boundary or non-target region */
 431         const GridPoint& gridPoint = grid.point(m);
 432         bool             skipPoint = false;
 433         for (size_t n = 0; (n < gridPoint.neighbor.size()) && !skipPoint; n++)
 434         {
 435             int neighbor = gridPoint.neighbor[n];
 436             skipPoint    = !points_[neighbor].inTargetRegion();
 437             for (int d = 0; (d < grid.numDimensions()) && !skipPoint; d++)
 438             {
 439                 const GridPoint& neighborPoint = grid.point(neighbor);
 440                 skipPoint                      = neighborPoint.index[d] == 0
 441                             || neighborPoint.index[d] == grid.axis(d).numPoints() - 1;
 442             }
 443         }
 444
 445         /* Warn if the coordinate distribution is less than the target distribution with a certain fraction somewhere */
 446         const double relativeWeight = points_[m].weightSumTot() * invNormWeight;
 447         const double relativeVisits = points_[m].numVisitsTot() * invNormVisits;
 448         if (!skipPoint && relativeVisits < relativeWeight * maxHistogramRatio)
 449         {
 450             std::string pointValueString = gridPointValueString(grid, m);
 451             std::string warningMessage   = gmx::formatString(
 452                     "\nawh%d warning: "
 453                     "at t = %g ps the obtained coordinate distribution at coordinate value %s "
 454                     "is less than a fraction %g of the reference distribution at that point. "
 455                     "If you are not certain about your settings you might want to increase your "
 456                     "pull force constant or "
 457                     "modify your sampling region.\n",
 458                     biasIndex + 1, t, pointValueString.c_str(), maxHistogramRatio);
 459             gmx::TextLineWrapper wrapper;
 460             wrapper.settings().setLineLength(c_linewidth);
 461             fprintf(fplog, "%s", wrapper.wrapToString(warningMessage).c_str());
 462
 463             numWarnings++;
 464         }
 465         if (numWarnings >= maxNumWarnings)
 466         {
 467             break;
 468         }
 469     }
 470
 471     return numWarnings;
 472 }
 473
 474 double BiasState::calcUmbrellaForceAndPotential(const std::vector<DimParams>& dimParams,
 475                                                 const BiasGrid&               grid,
 476                                                 int                           point,
 477                                                 ArrayRef<const double>        neighborLambdaDhdl,
 478                                                 gmx::ArrayRef<double>         force) const
 479 {
 480     double potential = 0;
 481     for (size_t d = 0; d < dimParams.size(); d++)
 482     {
 483         if (dimParams[d].isFepLambdaDimension())
 484         {
 485             if (!neighborLambdaDhdl.empty())
 486             {
 487                 const int coordpointLambdaIndex = grid.point(point).coordValue[d];
 488                 force[d]                        = neighborLambdaDhdl[coordpointLambdaIndex];
 489                 /* The potential should not be affected by the lambda dimension. */
 490             }
 491         }
 492         else
 493         {
 494             double deviation =
 495                     getDeviationFromPointAlongGridAxis(grid, d, point, coordState_.coordValue()[d]);
 496             double k = dimParams[d].pullDimParams().k;
 497
 498             /* Force from harmonic potential 0.5*k*dev^2 */
 499             force[d] = -k * deviation;
 500             potential += 0.5 * k * deviation * deviation;
 501         }
 502     }
 503
 504     return potential;
 505 }
 506
 507 void BiasState::calcConvolvedForce(const std::vector<DimParams>& dimParams,
 508                                    const BiasGrid&               grid,
 509                                    gmx::ArrayRef<const double>   probWeightNeighbor,
 510                                    ArrayRef<const double>        neighborLambdaDhdl,
 511                                    gmx::ArrayRef<double>         forceWorkBuffer,
 512                                    gmx::ArrayRef<double>         force) const
 513 {
 514     for (size_t d = 0; d < dimParams.size(); d++)
 515     {
 516         force[d] = 0;
 517     }
 518
 519     /* Only neighboring points have non-negligible contribution. */
 520     const std::vector<int>& neighbor          = grid.point(coordState_.gridpointIndex()).neighbor;
 521     gmx::ArrayRef<double>   forceFromNeighbor = forceWorkBuffer;
 522     for (size_t n = 0; n < neighbor.size(); n++)
 523     {
 524         double weightNeighbor = probWeightNeighbor[n];
 525         int    indexNeighbor  = neighbor[n];
 526
 527         /* Get the umbrella force from this point. The returned potential is ignored here. */
 528         calcUmbrellaForceAndPotential(dimParams, grid, indexNeighbor, neighborLambdaDhdl, forceFromNeighbor);
 529
 530         /* Add the weighted umbrella force to the convolved force. */
 531         for (size_t d = 0; d < dimParams.size(); d++)
 532         {
 533             force[d] += forceFromNeighbor[d] * weightNeighbor;
 534         }
 535     }
 536 }
 537
 538 double BiasState::moveUmbrella(const std::vector<DimParams>& dimParams,
 539                                const BiasGrid&               grid,
 540                                gmx::ArrayRef<const double>   probWeightNeighbor,
 541                                ArrayRef<const double>        neighborLambdaDhdl,
 542                                gmx::ArrayRef<double>         biasForce,
 543                                int64_t                       step,
 544                                int64_t                       seed,
 545                                int                           indexSeed,
 546                                bool                          onlySampleUmbrellaGridpoint)
 547 {
 548     /* Generate and set a new coordinate reference value */
 549     coordState_.sampleUmbrellaGridpoint(grid, coordState_.gridpointIndex(), probWeightNeighbor,
 550                                         step, seed, indexSeed);
 551
 552     if (onlySampleUmbrellaGridpoint)
 553     {
 554         return 0;
 555     }
 556
 557     std::vector<double> newForce(dimParams.size());
 558     double              newPotential = calcUmbrellaForceAndPotential(
 559             dimParams, grid, coordState_.umbrellaGridpoint(), neighborLambdaDhdl, newForce);
 560
 561     /*  A modification of the reference value at time t will lead to a different
 562         force over t-dt/2 to t and over t to t+dt/2. For high switching rates
 563         this means the force and velocity will change signs roughly as often.
 564         To avoid any issues we take the average of the previous and new force
 565         at steps when the reference value has been moved. E.g. if the ref. value
 566         is set every step to (coord dvalue +/- delta) would give zero force.
 567      */
 568     for (gmx::index d = 0; d < biasForce.ssize(); d++)
 569     {
 570         /* Average of the current and new force */
 571         biasForce[d] = 0.5 * (biasForce[d] + newForce[d]);
 572     }
 573
 574     return newPotential;
 575 }
 576
 577 namespace
 578 {
 579
 580 /*! \brief
 581  * Sets the histogram rescaling factors needed to control the histogram size.
 582  *
 583  * For sake of robustness, the reference weight histogram can grow at a rate
 584  * different from the actual sampling rate. Typically this happens for a limited
 585  * initial time, alternatively growth is scaled down by a constant factor for all
 586  * times. Since the size of the reference histogram sets the size of the free
 587  * energy update this should be reflected also in the PMF. Thus the PMF histogram
 588  * needs to be rescaled too.
 589  *
 590  * This function should only be called by the bias update function or wrapped by a function that
 591  * knows what scale factors should be applied when, e.g,
 592  * getSkippedUpdateHistogramScaleFactors().
 593  *
 594  * \param[in]  params             The bias parameters.
 595  * \param[in]  newHistogramSize   New reference weight histogram size.
 596  * \param[in]  oldHistogramSize   Previous reference weight histogram size (before adding new samples).
 597  * \param[out] weightHistScaling  Scaling factor for the reference weight histogram.
 598  * \param[out] logPmfSumScaling   Log of the scaling factor for the PMF histogram.
 599  */
 600 void setHistogramUpdateScaleFactors(const BiasParams& params,
 601                                     double            newHistogramSize,
 602                                     double            oldHistogramSize,
 603                                     double*           weightHistScaling,
 604                                     double*           logPmfSumScaling)
 605 {
 606
 607     /* The two scaling factors below are slightly different (ignoring the log factor) because the
 608        reference and the PMF histogram apply weight scaling differently. The weight histogram
 609        applies is  locally, i.e. each sample is scaled down meaning all samples get equal weight.
 610        It is done this way because that is what target type local Boltzmann (for which
 611        target = weight histogram) needs. In contrast, the PMF histogram is rescaled globally
 612        by repeatedly scaling down the whole histogram. The reasons for doing it this way are:
 613        1) empirically this is necessary for converging the PMF; 2) since the extraction of
 614        the PMF is theoretically only valid for a constant bias, new samples should get more
 615        weight than old ones for which the bias is fluctuating more. */
 616     *weightHistScaling =
 617             newHistogramSize / (oldHistogramSize + params.updateWeight * params.localWeightScaling);
 618     *logPmfSumScaling = std::log(newHistogramSize / (oldHistogramSize + params.updateWeight));
 619 }
 620
 621 } // namespace
 622
 623 void BiasState::getSkippedUpdateHistogramScaleFactors(const BiasParams& params,
 624                                                       double*           weightHistScaling,
 625                                                       double*           logPmfSumScaling) const
 626 {
 627     GMX_ASSERT(params.skipUpdates(),
 628                "Calling function for skipped updates when skipping updates is not allowed");
 629
 630     if (inInitialStage())
 631     {
 632         /* In between global updates the reference histogram size is kept constant so we trivially
 633            know what the histogram size was at the time of the skipped update. */
 634         double histogramSize = histogramSize_.histogramSize();
 635         setHistogramUpdateScaleFactors(params, histogramSize, histogramSize, weightHistScaling,
 636                                        logPmfSumScaling);
 637     }
 638     else
 639     {
 640         /* In the final stage, the reference histogram grows at the sampling rate which gives trivial scale factors. */
 641         *weightHistScaling = 1;
 642         *logPmfSumScaling  = 0;
 643     }
 644 }
 645
 646 void BiasState::doSkippedUpdatesForAllPoints(const BiasParams& params)
 647 {
 648     double weightHistScaling;
 649     double logPmfsumScaling;
 650
 651     getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
 652
 653     for (auto& pointState : points_)
 654     {
 655         bool didUpdate = pointState.performPreviouslySkippedUpdates(
 656                 params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
 657
 658         /* Update the bias for this point only if there were skipped updates in the past to avoid calculating the log unneccessarily */
 659         if (didUpdate)
 660         {
 661             pointState.updateBias();
 662         }
 663     }
 664 }
 665
 666 void BiasState::doSkippedUpdatesInNeighborhood(const BiasParams& params, const BiasGrid& grid)
 667 {
 668     double weightHistScaling;
 669     double logPmfsumScaling;
 670
 671     getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
 672
 673     /* For each neighbor point of the center point, refresh its state by adding the results of all past, skipped updates. */
 674     const std::vector<int>& neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
 675     for (auto& neighbor : neighbors)
 676     {
 677         bool didUpdate = points_[neighbor].performPreviouslySkippedUpdates(
 678                 params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
 679
 680         if (didUpdate)
 681         {
 682             points_[neighbor].updateBias();
 683         }
 684     }
 685 }
 686
 687 namespace
 688 {
 689
 690 /*! \brief
 691  * Merge update lists from multiple sharing simulations.
 692  *
 693  * \param[in,out] updateList    Update list for this simulation (assumed >= npoints long).
 694  * \param[in]     numPoints     Total number of points.
 695  * \param[in]     commRecord    Struct for intra-simulation communication.
 696  * \param[in]     multiSimComm  Struct for multi-simulation communication.
 697  */
 698 void mergeSharedUpdateLists(std::vector<int>*     updateList,
 699                             int                   numPoints,
 700                             const t_commrec*      commRecord,
 701                             const gmx_multisim_t* multiSimComm)
 702 {
 703     std::vector<int> numUpdatesOfPoint;
 704
 705     /* Flag the update points of this sim.
 706        TODO: we can probably avoid allocating this array and just use the input array. */
 707     numUpdatesOfPoint.resize(numPoints, 0);
 708     for (auto& pointIndex : *updateList)
 709     {
 710         numUpdatesOfPoint[pointIndex] = 1;
 711     }
 712
 713     /* Sum over the sims to get all the flagged points */
 714     sumOverSimulations(arrayRefFromArray(numUpdatesOfPoint.data(), numPoints), commRecord, multiSimComm);
 715
 716     /* Collect the indices of the flagged points in place. The resulting array will be the merged update list.*/
 717     updateList->clear();
 718     for (int m = 0; m < numPoints; m++)
 719     {
 720         if (numUpdatesOfPoint[m] > 0)
 721         {
 722             updateList->push_back(m);
 723         }
 724     }
 725 }
 726
 727 /*! \brief
 728  * Generate an update list of points sampled since the last update.
 729  *
 730  * \param[in] grid              The AWH bias.
 731  * \param[in] points            The point state.
 732  * \param[in] originUpdatelist  The origin of the rectangular region that has been sampled since
 733  * last update. \param[in] endUpdatelist     The end of the rectangular that has been sampled since
 734  * last update. \param[in,out] updateList    Local update list to set (assumed >= npoints long).
 735  */
 736 void makeLocalUpdateList(const BiasGrid&                grid,
 737                          const std::vector<PointState>& points,
 738                          const awh_ivec                 originUpdatelist,
 739                          const awh_ivec                 endUpdatelist,
 740                          std::vector<int>*              updateList)
 741 {
 742     awh_ivec origin;
 743     awh_ivec numPoints;
 744
 745     /* Define the update search grid */
 746     for (int d = 0; d < grid.numDimensions(); d++)
 747     {
 748         origin[d]    = originUpdatelist[d];
 749         numPoints[d] = endUpdatelist[d] - originUpdatelist[d] + 1;
 750
 751         /* Because the end_updatelist is unwrapped it can be > (npoints - 1) so that numPoints can be > npoints in grid.
 752            This helps for calculating the distance/number of points but should be removed and fixed when the way of
 753            updating origin/end updatelist is changed (see sampleProbabilityWeights). */
 754         numPoints[d] = std::min(grid.axis(d).numPoints(), numPoints[d]);
 755     }
 756
 757     /* Make the update list */
 758     updateList->clear();
 759     int  pointIndex  = -1;
 760     bool pointExists = true;
 761     while (pointExists)
 762     {
 763         pointExists = advancePointInSubgrid(grid, origin, numPoints, &pointIndex);
 764
 765         if (pointExists && points[pointIndex].inTargetRegion())
 766         {
 767             updateList->push_back(pointIndex);
 768         }
 769     }
 770 }
 771
 772 } // namespace
 773
 774 void BiasState::resetLocalUpdateRange(const BiasGrid& grid)
 775 {
 776     const int gridpointIndex = coordState_.gridpointIndex();
 777     for (int d = 0; d < grid.numDimensions(); d++)
 778     {
 779         /* This gives the  minimum range consisting only of the current closest point. */
 780         originUpdatelist_[d] = grid.point(gridpointIndex).index[d];
 781         endUpdatelist_[d]    = grid.point(gridpointIndex).index[d];
 782     }
 783 }
 784
 785 namespace
 786 {
 787
 788 /*! \brief
 789  * Add partial histograms (accumulating between updates) to accumulating histograms.
 790  *
 791  * \param[in,out] pointState         The state of the points in the bias.
 792  * \param[in,out] weightSumCovering  The weights for checking covering.
 793  * \param[in]     numSharedUpdate    The number of biases sharing the histrogram.
 794  * \param[in]     commRecord         Struct for intra-simulation communication.
 795  * \param[in]     multiSimComm       Struct for multi-simulation communication.
 796  * \param[in]     localUpdateList    List of points with data.
 797  */
 798 void sumHistograms(gmx::ArrayRef<PointState> pointState,
 799                    gmx::ArrayRef<double>     weightSumCovering,
 800                    int                       numSharedUpdate,
 801                    const t_commrec*          commRecord,
 802                    const gmx_multisim_t*     multiSimComm,
 803                    const std::vector<int>&   localUpdateList)
 804 {
 805     /* The covering checking histograms are added before summing over simulations, so that the
 806        weights from different simulations are kept distinguishable. */
 807     for (int globalIndex : localUpdateList)
 808     {
 809         weightSumCovering[globalIndex] += pointState[globalIndex].weightSumIteration();
 810     }
 811
 812     /* Sum histograms over multiple simulations if needed. */
 813     if (numSharedUpdate > 1)
 814     {
 815         GMX_ASSERT(numSharedUpdate == multiSimComm->numSimulations_,
 816                    "Sharing within a simulation is not implemented (yet)");
 817
 818         /* Collect the weights and counts in linear arrays to be able to use gmx_sumd_sim. */
 819         std::vector<double> weightSum;
 820         std::vector<double> coordVisits;
 821
 822         weightSum.resize(localUpdateList.size());
 823         coordVisits.resize(localUpdateList.size());
 824
 825         for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
 826         {
 827             const PointState& ps = pointState[localUpdateList[localIndex]];
 828
 829             weightSum[localIndex]   = ps.weightSumIteration();
 830             coordVisits[localIndex] = ps.numVisitsIteration();
 831         }
 832
 833         sumOverSimulations(gmx::ArrayRef<double>(weightSum), commRecord, multiSimComm);
 834         sumOverSimulations(gmx::ArrayRef<double>(coordVisits), commRecord, multiSimComm);
 835
 836         /* Transfer back the result */
 837         for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
 838         {
 839             PointState& ps = pointState[localUpdateList[localIndex]];
 840
 841             ps.setPartialWeightAndCount(weightSum[localIndex], coordVisits[localIndex]);
 842         }
 843     }
 844
 845     /* Now add the partial counts and weights to the accumulating histograms.
 846        Note: we still need to use the weights for the update so we wait
 847        with resetting them until the end of the update. */
 848     for (int globalIndex : localUpdateList)
 849     {
 850         pointState[globalIndex].addPartialWeightAndCount();
 851     }
 852 }
 853
 854 /*! \brief
 855  * Label points along an axis as covered or not.
 856  *
 857  * A point is covered if it is surrounded by visited points up to a radius = coverRadius.
 858  *
 859  * \param[in]     visited        Visited? For each point.
 860  * \param[in]     checkCovering  Check for covering? For each point.
 861  * \param[in]     numPoints      The number of grid points along this dimension.
 862  * \param[in]     period         Period in number of points.
 863  * \param[in]     coverRadius    Cover radius, in points, needed for defining a point as covered.
 864  * \param[in,out] covered        In this array elements are 1 for covered points and 0 for
 865  * non-covered points, this routine assumes that \p covered has at least size \p numPoints.
 866  */
 867 void labelCoveredPoints(const std::vector<bool>& visited,
 868                         const std::vector<bool>& checkCovering,
 869                         int                      numPoints,
 870                         int                      period,
 871                         int                      coverRadius,
 872                         gmx::ArrayRef<int>       covered)
 873 {
 874     GMX_ASSERT(covered.ssize() >= numPoints, "covered should be at least as large as the grid");
 875
 876     bool haveFirstNotVisited = false;
 877     int  firstNotVisited     = -1;
 878     int  notVisitedLow       = -1;
 879     int  notVisitedHigh      = -1;
 880
 881     for (int n = 0; n < numPoints; n++)
 882     {
 883         if (checkCovering[n] && !visited[n])
 884         {
 885             if (!haveFirstNotVisited)
 886             {
 887                 notVisitedLow       = n;
 888                 firstNotVisited     = n;
 889                 haveFirstNotVisited = true;
 890             }
 891             else
 892             {
 893                 notVisitedHigh = n;
 894
 895                 /* Have now an interval I = [notVisitedLow,notVisitedHigh] of visited points bounded
 896                    by unvisited points. The unvisted end points affect the coveredness of the
 897                    visited with a reach equal to the cover radius. */
 898                 int notCoveredLow  = notVisitedLow + coverRadius;
 899                 int notCoveredHigh = notVisitedHigh - coverRadius;
 900                 for (int i = notVisitedLow; i <= notVisitedHigh; i++)
 901                 {
 902                     covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
 903                 }
 904
 905                 /* Find a new interval to set covering for. Make the notVisitedHigh of this interval
 906                    the notVisitedLow of the next. */
 907                 notVisitedLow = notVisitedHigh;
 908             }
 909         }
 910     }
 911
 912     /* Have labelled all the internal points. Now take care of the boundary regions. */
 913     if (!haveFirstNotVisited)
 914     {
 915         /* No non-visited points <=> all points visited => all points covered. */
 916
 917         for (int n = 0; n < numPoints; n++)
 918         {
 919             covered[n] = 1;
 920         }
 921     }
 922     else
 923     {
 924         int lastNotVisited = notVisitedLow;
 925
 926         /* For periodic boundaries, non-visited points can influence points
 927            on the other side of the boundary so we need to wrap around. */
 928
 929         /* Lower end. For periodic boundaries the last upper end not visited point becomes the low-end not visited point.
 930            For non-periodic boundaries there is no lower end point so a dummy value is used. */
 931         int notVisitedHigh = firstNotVisited;
 932         int notVisitedLow  = period > 0 ? (lastNotVisited - period) : -(coverRadius + 1);
 933
 934         int notCoveredLow  = notVisitedLow + coverRadius;
 935         int notCoveredHigh = notVisitedHigh - coverRadius;
 936
 937         for (int i = 0; i <= notVisitedHigh; i++)
 938         {
 939             /* For non-periodic boundaries notCoveredLow = -1 will impose no restriction. */
 940             covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
 941         }
 942
 943         /* Upper end. Same as for lower end but in the other direction. */
 944         notVisitedHigh = period > 0 ? (firstNotVisited + period) : (numPoints + coverRadius);
 945         notVisitedLow  = lastNotVisited;
 946
 947         notCoveredLow  = notVisitedLow + coverRadius;
 948         notCoveredHigh = notVisitedHigh - coverRadius;
 949
 950         for (int i = notVisitedLow; i <= numPoints - 1; i++)
 951         {
 952             /* For non-periodic boundaries notCoveredHigh = numPoints will impose no restriction. */
 953             covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
 954         }
 955     }
 956 }
 957
 958 } // namespace
 959
 960 bool BiasState::isSamplingRegionCovered(const BiasParams&             params,
 961                                         const std::vector<DimParams>& dimParams,
 962                                         const BiasGrid&               grid,
 963                                         const t_commrec*              commRecord,
 964                                         const gmx_multisim_t*         multiSimComm) const
 965 {
 966     /* Allocate and initialize arrays: one for checking visits along each dimension,
 967        one for keeping track of which points to check and one for the covered points.
 968        Possibly these could be kept as AWH variables to avoid these allocations. */
 969     struct CheckDim
 970     {
 971         std::vector<bool> visited;
 972         std::vector<bool> checkCovering;
 973         // We use int for the covering array since we might use gmx_sumi_sim.
 974         std::vector<int> covered;
 975     };
 976
 977     std::vector<CheckDim> checkDim;
 978     checkDim.resize(grid.numDimensions());
 979
 980     for (int d = 0; d < grid.numDimensions(); d++)
 981     {
 982         const size_t numPoints = grid.axis(d).numPoints();
 983         checkDim[d].visited.resize(numPoints, false);
 984         checkDim[d].checkCovering.resize(numPoints, false);
 985         checkDim[d].covered.resize(numPoints, 0);
 986     }
 987
 988     /* Set visited points along each dimension and which points should be checked for covering.
 989        Specifically, points above the free energy cutoff (if there is one) or points outside
 990        of the target region are ignored. */
 991
 992     /* Set the free energy cutoff */
 993     double maxFreeEnergy = GMX_FLOAT_MAX;
 994
 995     if (params.eTarget == eawhtargetCUTOFF)
 996     {
 997         maxFreeEnergy = freeEnergyMinimumValue(points_) + params.freeEnergyCutoffInKT;
 998     }
 999
1000     /* Set the threshold weight for a point to be considered visited. */
1001     double weightThreshold = 1;
1002     for (int d = 0; d < grid.numDimensions(); d++)
1003     {
1004         if (grid.axis(d).isFepLambdaAxis())
1005         {
1006             /* Do not modify the weight threshold based on a FEP lambda axis. The spread
1007              * of the sampling weights is not depending on a Gaussian distribution (like
1008              * below). */
1009             weightThreshold *= 1.0;
1010         }
1011         else
1012         {
1013             /* The spacing is proportional to 1/sqrt(betak). The weight threshold will be
1014              * approximately (given that the spacing can be modified if the dimension is periodic)
1015              * proportional to sqrt(1/(2*pi)). */
1016             weightThreshold *= grid.axis(d).spacing()
1017                                * std::sqrt(dimParams[d].pullDimParams().betak * 0.5 * M_1_PI);
1018         }
1019     }
1020
1021     /* Project the sampling weights onto each dimension */
1022     for (size_t m = 0; m < grid.numPoints(); m++)
1023     {
1024         const PointState& pointState = points_[m];
1025
1026         for (int d = 0; d < grid.numDimensions(); d++)
1027         {
1028             int n = grid.point(m).index[d];
1029
1030             /* Is visited if it was already visited or if there is enough weight at the current point */
1031             checkDim[d].visited[n] = checkDim[d].visited[n] || (weightSumCovering_[m] > weightThreshold);
1032
1033             /* Check for covering if there is at least point in this slice that is in the target region and within the cutoff */
1034             checkDim[d].checkCovering[n] =
1035                     checkDim[d].checkCovering[n]
1036                     || (pointState.inTargetRegion() && pointState.freeEnergy() < maxFreeEnergy);
1037         }
1038     }
1039
1040     /* Label each point along each dimension as covered or not. */
1041     for (int d = 0; d < grid.numDimensions(); d++)
1042     {
1043         labelCoveredPoints(checkDim[d].visited, checkDim[d].checkCovering, grid.axis(d).numPoints(),
1044                            grid.axis(d).numPointsInPeriod(), params.coverRadius()[d], checkDim[d].covered);
1045     }
1046
1047     /* Now check for global covering. Each dimension needs to be covered separately.
1048        A dimension is covered if each point is covered.  Multiple simulations collectively
1049        cover the points, i.e. a point is covered if any of the simulations covered it.
1050        However, visited points are not shared, i.e. if a point is covered or not is
1051        determined by the visits of a single simulation. In general the covering criterion is
1052        all points covered => all points are surrounded by visited points up to a radius = coverRadius.
1053        For 1 simulation, all points covered <=> all points visited. For multiple simulations
1054        however, all points visited collectively !=> all points covered, except for coverRadius = 0.
1055        In the limit of large coverRadius, all points covered => all points visited by at least one
1056        simulation (since no point will be covered until all points have been visited by a
1057        single simulation). Basically coverRadius sets how much "connectedness" (or mixing) a point
1058        needs with surrounding points before sharing covering information with other simulations. */
1059
1060     /* Communicate the covered points between sharing simulations if needed. */
1061     if (params.numSharedUpdate > 1)
1062     {
1063         /* For multiple dimensions this may not be the best way to do it. */
1064         for (int d = 0; d < grid.numDimensions(); d++)
1065         {
1066             sumOverSimulations(
1067                     gmx::arrayRefFromArray(checkDim[d].covered.data(), grid.axis(d).numPoints()),
1068                     commRecord, multiSimComm);
1069         }
1070     }
1071
1072     /* Now check if for each dimension all points are covered. Break if not true. */
1073     bool allPointsCovered = true;
1074     for (int d = 0; d < grid.numDimensions() && allPointsCovered; d++)
1075     {
1076         for (int n = 0; n < grid.axis(d).numPoints() && allPointsCovered; n++)
1077         {
1078             allPointsCovered = (checkDim[d].covered[n] != 0);
1079         }
1080     }
1081
1082     return allPointsCovered;
1083 }
1084
1085 /*! \brief
1086  * Normalizes the free energy and PMF sum.
1087  *
1088  * \param[in] pointState  The state of the points.
1089  */
1090 static void normalizeFreeEnergyAndPmfSum(std::vector<PointState>* pointState)
1091 {
1092     double minF = freeEnergyMinimumValue(*pointState);
1093
1094     for (PointState& ps : *pointState)
1095     {
1096         ps.normalizeFreeEnergyAndPmfSum(minF);
1097     }
1098 }
1099
1100 void BiasState::updateFreeEnergyAndAddSamplesToHistogram(const std::vector<DimParams>& dimParams,
1101                                                          const BiasGrid&               grid,
1102                                                          const BiasParams&             params,
1103                                                          const t_commrec*              commRecord,
1104                                                          const gmx_multisim_t*         multiSimComm,
1105                                                          double                        t,
1106                                                          int64_t                       step,
1107                                                          FILE*                         fplog,
1108                                                          std::vector<int>*             updateList)
1109 {
1110     /* Note hat updateList is only used in this scope and is always
1111      * re-initialized. We do not use a local vector, because that would
1112      * cause reallocation every time this funtion is called and the vector
1113      * can be the size of the whole grid.
1114      */
1115
1116     /* Make a list of all local points, i.e. those that could have been touched since
1117        the last update. These are the points needed for summing histograms below
1118        (non-local points only add zeros). For local updates, this will also be the
1119        final update list. */
1120     makeLocalUpdateList(grid, points_, originUpdatelist_, endUpdatelist_, updateList);
1121     if (params.numSharedUpdate > 1)
1122     {
1123         mergeSharedUpdateLists(updateList, points_.size(), commRecord, multiSimComm);
1124     }
1125
1126     /* Reset the range for the next update */
1127     resetLocalUpdateRange(grid);
1128
1129     /* Add samples to histograms for all local points and sync simulations if needed */
1130     sumHistograms(points_, weightSumCovering_, params.numSharedUpdate, commRecord, multiSimComm, *updateList);
1131
1132     sumPmf(points_, params.numSharedUpdate, commRecord, multiSimComm);
1133
1134     /* Renormalize the free energy if values are too large. */
1135     bool needToNormalizeFreeEnergy = false;
1136     for (int& globalIndex : *updateList)
1137     {
1138         /* We want to keep the absolute value of the free energies to be less
1139            c_largePositiveExponent to be able to safely pass these values to exp(). The check below
1140            ensures this as long as the free energy values grow less than 0.5*c_largePositiveExponent
1141            in a return time to this neighborhood. For reasonable update sizes it's unlikely that
1142            this requirement would be broken. */
1143         if (std::abs(points_[globalIndex].freeEnergy()) > 0.5 * detail::c_largePositiveExponent)
1144         {
1145             needToNormalizeFreeEnergy = true;
1146             break;
1147         }
1148     }
1149
1150     /* Update target distribution? */
1151     bool needToUpdateTargetDistribution =
1152             (params.eTarget != eawhtargetCONSTANT && params.isUpdateTargetStep(step));
1153
1154     /* In the initial stage, the histogram grows dynamically as a function of the number of coverings. */
1155     bool detectedCovering = false;
1156     if (inInitialStage())
1157     {
1158         detectedCovering =
1159                 (params.isCheckCoveringStep(step)
1160                  && isSamplingRegionCovered(params, dimParams, grid, commRecord, multiSimComm));
1161     }
1162
1163     /* The weighthistogram size after this update. */
1164     double newHistogramSize = histogramSize_.newHistogramSize(params, t, detectedCovering, points_,
1165                                                               weightSumCovering_, fplog);
1166
1167     /* Make the update list. Usually we try to only update local points,
1168      * but if the update has non-trivial or non-deterministic effects
1169      * on non-local points a global update is needed. This is the case when:
1170      * 1) a covering occurred in the initial stage, leading to non-trivial
1171      *    histogram rescaling factors; or
1172      * 2) the target distribution will be updated, since we don't make any
1173      *    assumption on its form; or
1174      * 3) the AWH parameters are such that we never attempt to skip non-local
1175      *    updates; or
1176      * 4) the free energy values have grown so large that a renormalization
1177      *    is needed.
1178      */
1179     if (needToUpdateTargetDistribution || detectedCovering || !params.skipUpdates() || needToNormalizeFreeEnergy)
1180     {
1181         /* Global update, just add all points. */
1182         updateList->clear();
1183         for (size_t m = 0; m < points_.size(); m++)
1184         {
1185             if (points_[m].inTargetRegion())
1186             {
1187                 updateList->push_back(m);
1188             }
1189         }
1190     }
1191
1192     /* Set histogram scale factors. */
1193     double weightHistScalingSkipped = 0;
1194     double logPmfsumScalingSkipped  = 0;
1195     if (params.skipUpdates())
1196     {
1197         getSkippedUpdateHistogramScaleFactors(params, &weightHistScalingSkipped, &logPmfsumScalingSkipped);
1198     }
1199     double weightHistScalingNew;
1200     double logPmfsumScalingNew;
1201     setHistogramUpdateScaleFactors(params, newHistogramSize, histogramSize_.histogramSize(),
1202                                    &weightHistScalingNew, &logPmfsumScalingNew);
1203
1204     /* Update free energy and reference weight histogram for points in the update list. */
1205     for (int pointIndex : *updateList)
1206     {
1207         PointState* pointStateToUpdate = &points_[pointIndex];
1208
1209         /* Do updates from previous update steps that were skipped because this point was at that time non-local. */
1210         if (params.skipUpdates())
1211         {
1212             pointStateToUpdate->performPreviouslySkippedUpdates(params, histogramSize_.numUpdates(),
1213                                                                 weightHistScalingSkipped,
1214                                                                 logPmfsumScalingSkipped);
1215         }
1216
1217         /* Now do an update with new sampling data. */
1218         pointStateToUpdate->updateWithNewSampling(params, histogramSize_.numUpdates(),
1219                                                   weightHistScalingNew, logPmfsumScalingNew);
1220     }
1221
1222     /* Only update the histogram size after we are done with the local point updates */
1223     histogramSize_.setHistogramSize(newHistogramSize, weightHistScalingNew);
1224
1225     if (needToNormalizeFreeEnergy)
1226     {
1227         normalizeFreeEnergyAndPmfSum(&points_);
1228     }
1229
1230     if (needToUpdateTargetDistribution)
1231     {
1232         /* The target distribution is always updated for all points at once. */
1233         updateTargetDistribution(points_, params);
1234     }
1235
1236     /* Update the bias. The bias is updated separately and last since it simply a function of
1237        the free energy and the target distribution and we want to avoid doing extra work. */
1238     for (int pointIndex : *updateList)
1239     {
1240         points_[pointIndex].updateBias();
1241     }
1242
1243     /* Increase the update counter. */
1244     histogramSize_.incrementNumUpdates();
1245 }
1246
1247 double BiasState::updateProbabilityWeightsAndConvolvedBias(const std::vector<DimParams>& dimParams,
1248                                                            const BiasGrid&               grid,
1249                                                            gmx::ArrayRef<const double> neighborLambdaEnergies,
1250                                                            std::vector<double, AlignedAllocator<double>>* weight) const
1251 {
1252     /* Only neighbors of the current coordinate value will have a non-negligible chance of getting sampled */
1253     const std::vector<int>& neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
1254
1255 #if GMX_SIMD_HAVE_DOUBLE
1256     typedef SimdDouble PackType;
1257     constexpr int      packSize = GMX_SIMD_DOUBLE_WIDTH;
1258 #else
1259     typedef double PackType;
1260     constexpr int  packSize = 1;
1261 #endif
1262     /* Round the size of the weight array up to packSize */
1263     const int weightSize = ((neighbors.size() + packSize - 1) / packSize) * packSize;
1264     weight->resize(weightSize);
1265
1266     double* gmx_restrict weightData = weight->data();
1267     PackType             weightSumPack(0.0);
1268     for (size_t i = 0; i < neighbors.size(); i += packSize)
1269     {
1270         for (size_t n = i; n < i + packSize; n++)
1271         {
1272             if (n < neighbors.size())
1273             {
1274                 const int neighbor = neighbors[n];
1275                 (*weight)[n]       = biasedLogWeightFromPoint(
1276                         dimParams, points_, grid, neighbor, points_[neighbor].bias(),
1277                         coordState_.coordValue(), neighborLambdaEnergies, coordState_.gridpointIndex());
1278             }
1279             else
1280             {
1281                 /* Pad with values that don't affect the result */
1282                 (*weight)[n] = detail::c_largeNegativeExponent;
1283             }
1284         }
1285         PackType weightPack = load<PackType>(weightData + i);
1286         weightPack          = gmx::exp(weightPack);
1287         weightSumPack       = weightSumPack + weightPack;
1288         store(weightData + i, weightPack);
1289     }
1290     /* Sum of probability weights */
1291     double weightSum = reduce(weightSumPack);
1292     GMX_RELEASE_ASSERT(weightSum > 0,
1293                        "zero probability weight when updating AWH probability weights.");
1294
1295     /* Normalize probabilities to sum to 1 */
1296     double invWeightSum = 1 / weightSum;
1297
1298     /* When there is a free energy lambda state axis remove the convolved contributions along that
1299      * axis from the total bias. This must be done after calculating invWeightSum (since weightSum
1300      * will be modified), but before normalizing the weights (below). */
1301     if (grid.hasLambdaAxis())
1302     {
1303         /* If there is only one axis the bias will not be convolved in any dimension. */
1304         if (grid.axis().size() == 1)
1305         {
1306             weightSum = gmx::exp(points_[coordState_.gridpointIndex()].bias());
1307         }
1308         else
1309         {
1310             for (size_t i = 0; i < neighbors.size(); i++)
1311             {
1312                 const int neighbor = neighbors[i];
1313                 if (pointsHaveDifferentLambda(grid, coordState_.gridpointIndex(), neighbor))
1314                 {
1315                     weightSum -= weightData[i];
1316                 }
1317             }
1318         }
1319     }
1320
1321     for (double& w : *weight)
1322     {
1323         w *= invWeightSum;
1324     }
1325
1326     /* Return the convolved bias */
1327     return std::log(weightSum);
1328 }
1329
1330 double BiasState::calcConvolvedBias(const std::vector<DimParams>& dimParams,
1331                                     const BiasGrid&               grid,
1332                                     const awh_dvec&               coordValue) const
1333 {
1334     int              point     = grid.nearestIndex(coordValue);
1335     const GridPoint& gridPoint = grid.point(point);
1336
1337     /* Sum the probability weights from the neighborhood of the given point */
1338     double weightSum = 0;
1339     for (int neighbor : gridPoint.neighbor)
1340     {
1341         /* No convolution is required along the lambda dimension. */
1342         if (pointsHaveDifferentLambda(grid, point, neighbor))
1343         {
1344             continue;
1345         }
1346         double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid, neighbor,
1347                                                     points_[neighbor].bias(), coordValue, {}, point);
1348         weightSum += std::exp(logWeight);
1349     }
1350
1351     /* Returns -GMX_FLOAT_MAX if no neighboring points were in the target region. */
1352     return (weightSum > 0) ? std::log(weightSum) : -GMX_FLOAT_MAX;
1353 }
1354
1355 void BiasState::sampleProbabilityWeights(const BiasGrid& grid, gmx::ArrayRef<const double> probWeightNeighbor)
1356 {
1357     const std::vector<int>& neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
1358
1359     /* Save weights for next update */
1360     for (size_t n = 0; n < neighbor.size(); n++)
1361     {
1362         points_[neighbor[n]].increaseWeightSumIteration(probWeightNeighbor[n]);
1363     }
1364
1365     /* Update the local update range. Two corner points define this rectangular
1366      * domain. We need to choose two new corner points such that the new domain
1367      * contains both the old update range and the current neighborhood.
1368      * In the simplest case when an update is performed every sample,
1369      * the update range would simply equal the current neighborhood.
1370      */
1371     int neighborStart = neighbor[0];
1372     int neighborLast  = neighbor[neighbor.size() - 1];
1373     for (int d = 0; d < grid.numDimensions(); d++)
1374     {
1375         int origin = grid.point(neighborStart).index[d];
1376         int last   = grid.point(neighborLast).index[d];
1377
1378         if (origin > last)
1379         {
1380             /* Unwrap if wrapped around the boundary (only happens for periodic
1381              * boundaries). This has been already for the stored index interval.
1382              */
1383             /* TODO: what we want to do is to find the smallest the update
1384              * interval that contains all points that need to be updated.
1385              * This amounts to combining two intervals, the current
1386              * [origin, end] update interval and the new touched neighborhood
1387              * into a new interval that contains all points from both the old
1388              * intervals.
1389              *
1390              * For periodic boundaries it becomes slightly more complicated
1391              * than for closed boundaries because then it needs not be
1392              * true that origin < end (so one can't simply relate the origin/end
1393              * in the min()/max() below). The strategy here is to choose the
1394              * origin closest to a reference point (index 0) and then unwrap
1395              * the end index if needed and choose the largest end index.
1396              * This ensures that both intervals are in the new interval
1397              * but it's not necessarily the smallest.
1398              * Currently we solve this by going through each possibility
1399              * and checking them.
1400              */
1401             last += grid.axis(d).numPointsInPeriod();
1402         }
1403
1404         originUpdatelist_[d] = std::min(originUpdatelist_[d], origin);
1405         endUpdatelist_[d]    = std::max(endUpdatelist_[d], last);
1406     }
1407 }
1408
1409 void BiasState::sampleCoordAndPmf(const std::vector<DimParams>& dimParams,
1410                                   const BiasGrid&               grid,
1411                                   gmx::ArrayRef<const double>   probWeightNeighbor,
1412                                   double                        convolvedBias)
1413 {
1414     /* Sampling-based deconvolution extracting the PMF.
1415      * Update the PMF histogram with the current coordinate value.
1416      *
1417      * Because of the finite width of the harmonic potential, the free energy
1418      * defined for each coordinate point does not exactly equal that of the
1419      * actual coordinate, the PMF. However, the PMF can be estimated by applying
1420      * the relation exp(-PMF) = exp(-bias_convolved)*P_biased/Z, i.e. by keeping a
1421      * reweighted histogram of the coordinate value. Strictly, this relies on
1422      * the unknown normalization constant Z being either constant or known. Here,
1423      * neither is true except in the long simulation time limit. Empirically however,
1424      * it works (mainly because how the PMF histogram is rescaled).
1425      */
1426
1427     const int                gridPointIndex  = coordState_.gridpointIndex();
1428     const std::optional<int> lambdaAxisIndex = grid.lambdaAxisIndex();
1429
1430     /* Update the PMF of points along a lambda axis with their bias. */
1431     if (lambdaAxisIndex)
1432     {
1433         const std::vector<int>& neighbors = grid.point(gridPointIndex).neighbor;
1434
1435         std::vector<double> lambdaMarginalDistribution =
1436                 calculateFELambdaMarginalDistribution(grid, neighbors, probWeightNeighbor);
1437
1438         awh_dvec coordValueAlongLambda = { coordState_.coordValue()[0], coordState_.coordValue()[1],
1439                                            coordState_.coordValue()[2], coordState_.coordValue()[3] };
1440         for (size_t i = 0; i < neighbors.size(); i++)
1441         {
1442             const int neighbor = neighbors[i];
1443             double    bias;
1444             if (pointsAlongLambdaAxis(grid, gridPointIndex, neighbor))
1445             {
1446                 const double neighborLambda = grid.point(neighbor).coordValue[lambdaAxisIndex.value()];
1447                 if (neighbor == gridPointIndex)
1448                 {
1449                     bias = convolvedBias;
1450                 }
1451                 else
1452                 {
1453                     coordValueAlongLambda[lambdaAxisIndex.value()] = neighborLambda;
1454                     bias = calcConvolvedBias(dimParams, grid, coordValueAlongLambda);
1455                 }
1456
1457                 const double probWeight = lambdaMarginalDistribution[neighborLambda];
1458                 const double weightedBias = bias - std::log(std::max(probWeight, GMX_DOUBLE_MIN)); // avoid log(0)
1459
1460                 if (neighbor == gridPointIndex && grid.covers(coordState_.coordValue()))
1461                 {
1462                     points_[neighbor].samplePmf(weightedBias);
1463                 }
1464                 else
1465                 {
1466                     points_[neighbor].updatePmfUnvisited(weightedBias);
1467                 }
1468             }
1469         }
1470     }
1471     else
1472     {
1473         /* Only save coordinate data that is in range (the given index is always
1474          * in range even if the coordinate value is not).
1475          */
1476         if (grid.covers(coordState_.coordValue()))
1477         {
1478             /* Save PMF sum and keep a histogram of the sampled coordinate values */
1479             points_[gridPointIndex].samplePmf(convolvedBias);
1480         }
1481     }
1482
1483     /* Save probability weights for the update */
1484     sampleProbabilityWeights(grid, probWeightNeighbor);
1485 }
1486
1487 void BiasState::initHistoryFromState(AwhBiasHistory* biasHistory) const
1488 {
1489     biasHistory->pointState.resize(points_.size());
1490 }
1491
1492 void BiasState::updateHistory(AwhBiasHistory* biasHistory, const BiasGrid& grid) const
1493 {
1494     GMX_RELEASE_ASSERT(biasHistory->pointState.size() == points_.size(),
1495                        "The AWH history setup does not match the AWH state.");
1496
1497     AwhBiasStateHistory* stateHistory = &biasHistory->state;
1498     stateHistory->umbrellaGridpoint   = coordState_.umbrellaGridpoint();
1499
1500     for (size_t m = 0; m < biasHistory->pointState.size(); m++)
1501     {
1502         AwhPointStateHistory* psh = &biasHistory->pointState[m];
1503
1504         points_[m].storeState(psh);
1505
1506         psh->weightsum_covering = weightSumCovering_[m];
1507     }
1508
1509     histogramSize_.storeState(stateHistory);
1510
1511     stateHistory->origin_index_updatelist = multiDimGridIndexToLinear(grid, originUpdatelist_);
1512     stateHistory->end_index_updatelist    = multiDimGridIndexToLinear(grid, endUpdatelist_);
1513 }
1514
1515 void BiasState::restoreFromHistory(const AwhBiasHistory& biasHistory, const BiasGrid& grid)
1516 {
1517     const AwhBiasStateHistory& stateHistory = biasHistory.state;
1518
1519     coordState_.restoreFromHistory(stateHistory);
1520
1521     if (biasHistory.pointState.size() != points_.size())
1522     {
1523         GMX_THROW(
1524                 InvalidInputError("Bias grid size in checkpoint and simulation do not match. "
1525                                   "Likely you provided a checkpoint from a different simulation."));
1526     }
1527     for (size_t m = 0; m < points_.size(); m++)
1528     {
1529         points_[m].setFromHistory(biasHistory.pointState[m]);
1530     }
1531
1532     for (size_t m = 0; m < weightSumCovering_.size(); m++)
1533     {
1534         weightSumCovering_[m] = biasHistory.pointState[m].weightsum_covering;
1535     }
1536
1537     histogramSize_.restoreFromHistory(stateHistory);
1538
1539     linearGridindexToMultiDim(grid, stateHistory.origin_index_updatelist, originUpdatelist_);
1540     linearGridindexToMultiDim(grid, stateHistory.end_index_updatelist, endUpdatelist_);
1541 }
1542
1543 void BiasState::broadcast(const t_commrec* commRecord)
1544 {
1545     gmx_bcast(sizeof(coordState_), &coordState_, commRecord->mpi_comm_mygroup);
1546
1547     gmx_bcast(points_.size() * sizeof(PointState), points_.data(), commRecord->mpi_comm_mygroup);
1548
1549     gmx_bcast(weightSumCovering_.size() * sizeof(double), weightSumCovering_.data(),
1550               commRecord->mpi_comm_mygroup);
1551
1552     gmx_bcast(sizeof(histogramSize_), &histogramSize_, commRecord->mpi_comm_mygroup);
1553 }
1554
1555 void BiasState::setFreeEnergyToConvolvedPmf(const std::vector<DimParams>& dimParams, const BiasGrid& grid)
1556 {
1557     std::vector<float> convolvedPmf;
1558
1559     calcConvolvedPmf(dimParams, grid, &convolvedPmf);
1560
1561     for (size_t m = 0; m < points_.size(); m++)
1562     {
1563         points_[m].setFreeEnergy(convolvedPmf[m]);
1564     }
1565 }
1566
1567 /*! \brief
1568  * Count trailing data rows containing only zeros.
1569  *
1570  * \param[in] data        2D data array.
1571  * \param[in] numRows     Number of rows in array.
1572  * \param[in] numColumns  Number of cols in array.
1573  * \returns the number of trailing zero rows.
1574  */
1575 static int countTrailingZeroRows(const double* const* data, int numRows, int numColumns)
1576 {
1577     int numZeroRows = 0;
1578     for (int m = numRows - 1; m >= 0; m--)
1579     {
1580         bool rowIsZero = true;
1581         for (int d = 0; d < numColumns; d++)
1582         {
1583             if (data[d][m] != 0)
1584             {
1585                 rowIsZero = false;
1586                 break;
1587             }
1588         }
1589
1590         if (!rowIsZero)
1591         {
1592             /* At a row with non-zero data */
1593             break;
1594         }
1595         else
1596         {
1597             /* Still at a zero data row, keep checking rows higher up. */
1598             numZeroRows++;
1599         }
1600     }
1601
1602     return numZeroRows;
1603 }
1604
1605 /*! \brief
1606  * Initializes the PMF and target with data read from an input table.
1607  *
1608  * \param[in]     dimParams   The dimension parameters.
1609  * \param[in]     grid        The grid.
1610  * \param[in]     filename    The filename to read PMF and target from.
1611  * \param[in]     numBias     Number of biases.
1612  * \param[in]     biasIndex   The index of the bias.
1613  * \param[in,out] pointState  The state of the points in this bias.
1614  */
1615 static void readUserPmfAndTargetDistribution(const std::vector<DimParams>& dimParams,
1616                                              const BiasGrid&               grid,
1617                                              const std::string&            filename,
1618                                              int                           numBias,
1619                                              int                           biasIndex,
1620                                              std::vector<PointState>*      pointState)
1621 {
1622     /* Read the PMF and target distribution.
1623        From the PMF, the convolved PMF, or the reference value free energy, can be calculated
1624        base on the force constant. The free energy and target together determine the bias.
1625      */
1626     std::string filenameModified(filename);
1627     if (numBias > 1)
1628     {
1629         size_t n = filenameModified.rfind('.');
1630         GMX_RELEASE_ASSERT(n != std::string::npos,
1631                            "The filename should contain an extension starting with .");
1632         filenameModified.insert(n, formatString("%d", biasIndex));
1633     }
1634
1635     std::string correctFormatMessage = formatString(
1636             "%s is expected in the following format. "
1637             "The first ndim column(s) should contain the coordinate values for each point, "
1638             "each column containing values of one dimension (in ascending order). "
1639             "For a multidimensional coordinate, points should be listed "
1640             "in the order obtained by traversing lower dimensions first. "
1641             "E.g. for two-dimensional grid of size nxn: "
1642             "(1, 1), (1, 2),..., (1, n), (2, 1), (2, 2), ..., , (n, n - 1), (n, n). "
1643             "Column ndim +  1 should contain the PMF value for each coordinate value. "
1644             "The target distribution values should be in column ndim + 2  or column ndim + 5. "
1645             "Make sure the input file ends with a new line but has no trailing new lines.",
1646             filename.c_str());
1647     gmx::TextLineWrapper wrapper;
1648     wrapper.settings().setLineLength(c_linewidth);
1649     correctFormatMessage = wrapper.wrapToString(correctFormatMessage);
1650
1651     double** data;
1652     int      numColumns;
1653     int      numRows = read_xvg(filenameModified.c_str(), &data, &numColumns);
1654
1655     /* Check basic data properties here. BiasGrid takes care of more complicated things. */
1656
1657     if (numRows <= 0)
1658     {
1659         std::string mesg = gmx::formatString("%s is empty!.\n\n%s", filename.c_str(),
1660                                              correctFormatMessage.c_str());
1661         GMX_THROW(InvalidInputError(mesg));
1662     }
1663
1664     /* Less than 2 points is not useful for PMF or target. */
1665     if (numRows < 2)
1666     {
1667         std::string mesg = gmx::formatString(
1668                 "%s contains too few data points (%d)."
1669                 "The minimum number of points is 2.",
1670                 filename.c_str(), numRows);
1671         GMX_THROW(InvalidInputError(mesg));
1672     }
1673
1674     /* Make sure there are enough columns of data.
1675
1676        Two formats are allowed. Either with columns  {coords, PMF, target} or
1677        {coords, PMF, x, y, z, target, ...}. The latter format is allowed since that
1678        is how AWH output is written (x, y, z being other AWH variables). For this format,
1679        trailing columns are ignored.
1680      */
1681     int columnIndexTarget;
1682     int numColumnsMin  = dimParams.size() + 2;
1683     int columnIndexPmf = dimParams.size();
1684     if (numColumns == numColumnsMin)
1685     {
1686         columnIndexTarget = columnIndexPmf + 1;
1687     }
1688     else
1689     {
1690         columnIndexTarget = columnIndexPmf + 4;
1691     }
1692
1693     if (numColumns < numColumnsMin)
1694     {
1695         std::string mesg = gmx::formatString(
1696                 "The number of columns in %s should be at least %d."
1697                 "\n\n%s",
1698                 filename.c_str(), numColumnsMin, correctFormatMessage.c_str());
1699         GMX_THROW(InvalidInputError(mesg));
1700     }
1701
1702     /* read_xvg can give trailing zero data rows for trailing new lines in the input. We allow 1 zero row,
1703        since this could be real data. But multiple trailing zero rows cannot correspond to valid data. */
1704     int numZeroRows = countTrailingZeroRows(data, numRows, numColumns);
1705     if (numZeroRows > 1)
1706     {
1707         std::string mesg = gmx::formatString(
1708                 "Found %d trailing zero data rows in %s. Please remove trailing empty lines and "
1709                 "try again.",
1710                 numZeroRows, filename.c_str());
1711         GMX_THROW(InvalidInputError(mesg));
1712     }
1713
1714     /* Convert from user units to internal units before sending the data of to grid. */
1715     for (size_t d = 0; d < dimParams.size(); d++)
1716     {
1717         double scalingFactor = dimParams[d].scaleUserInputToInternal(1);
1718         if (scalingFactor == 1)
1719         {
1720             continue;
1721         }
1722         for (size_t m = 0; m < pointState->size(); m++)
1723         {
1724             data[d][m] *= scalingFactor;
1725         }
1726     }
1727
1728     /* Get a data point for each AWH grid point so that they all get data. */
1729     std::vector<int> gridIndexToDataIndex(grid.numPoints());
1730     mapGridToDataGrid(&gridIndexToDataIndex, data, numRows, filename, grid, correctFormatMessage);
1731
1732     /* Extract the data for each grid point.
1733      * We check if the target distribution is zero for all points.
1734      */
1735     bool targetDistributionIsZero = true;
1736     for (size_t m = 0; m < pointState->size(); m++)
1737     {
1738         (*pointState)[m].setLogPmfSum(-data[columnIndexPmf][gridIndexToDataIndex[m]]);
1739         double target = data[columnIndexTarget][gridIndexToDataIndex[m]];
1740
1741         /* Check if the values are allowed. */
1742         if (target < 0)
1743         {
1744             std::string mesg = gmx::formatString(
1745                     "Target distribution weight at point %zu (%g) in %s is negative.", m, target,
1746                     filename.c_str());
1747             GMX_THROW(InvalidInputError(mesg));
1748         }
1749         if (target > 0)
1750         {
1751             targetDistributionIsZero = false;
1752         }
1753         (*pointState)[m].setTargetConstantWeight(target);
1754     }
1755
1756     if (targetDistributionIsZero)
1757     {
1758         std::string mesg =
1759                 gmx::formatString("The target weights given in column %d in %s are all 0",
1760                                   columnIndexTarget, filename.c_str());
1761         GMX_THROW(InvalidInputError(mesg));
1762     }
1763
1764     /* Free the arrays. */
1765     for (int m = 0; m < numColumns; m++)
1766     {
1767         sfree(data[m]);
1768     }
1769     sfree(data);
1770 }
1771
1772 void BiasState::normalizePmf(int numSharingSims)
1773 {
1774     /* The normalization of the PMF estimate matters because it determines how big effect the next sample has.
1775        Approximately (for large enough force constant) we should have:
1776        sum_x(exp(-pmf(x)) = nsamples*sum_xref(exp(-f(xref)).
1777      */
1778
1779     /* Calculate the normalization factor, i.e. divide by the pmf sum, multiply by the number of samples and the f sum */
1780     double expSumPmf = 0;
1781     double expSumF   = 0;
1782     for (const PointState& pointState : points_)
1783     {
1784         if (pointState.inTargetRegion())
1785         {
1786             expSumPmf += std::exp(pointState.logPmfSum());
1787             expSumF += std::exp(-pointState.freeEnergy());
1788         }
1789     }
1790     double numSamples = histogramSize_.histogramSize() / numSharingSims;
1791
1792     /* Renormalize */
1793     double logRenorm = std::log(numSamples * expSumF / expSumPmf);
1794     for (PointState& pointState : points_)
1795     {
1796         if (pointState.inTargetRegion())
1797         {
1798             pointState.setLogPmfSum(pointState.logPmfSum() + logRenorm);
1799         }
1800     }
1801 }
1802
1803 void BiasState::initGridPointState(const AwhBiasParams&          awhBiasParams,
1804                                    const std::vector<DimParams>& dimParams,
1805                                    const BiasGrid&               grid,
1806                                    const BiasParams&             params,
1807                                    const std::string&            filename,
1808                                    int                           numBias)
1809 {
1810     /* Modify PMF, free energy and the constant target distribution factor
1811      * to user input values if there is data given.
1812      */
1813     if (awhBiasParams.bUserData)
1814     {
1815         readUserPmfAndTargetDistribution(dimParams, grid, filename, numBias, params.biasIndex, &points_);
1816         setFreeEnergyToConvolvedPmf(dimParams, grid);
1817     }
1818
1819     /* The local Boltzmann distribution is special because the target distribution is updated as a function of the reference weighthistogram. */
1820     GMX_RELEASE_ASSERT(params.eTarget != eawhtargetLOCALBOLTZMANN || points_[0].weightSumRef() != 0,
1821                        "AWH reference weight histogram not initialized properly with local "
1822                        "Boltzmann target distribution.");
1823
1824     updateTargetDistribution(points_, params);
1825
1826     for (PointState& pointState : points_)
1827     {
1828         if (pointState.inTargetRegion())
1829         {
1830             pointState.updateBias();
1831         }
1832         else
1833         {
1834             /* Note that for zero target this is a value that represents -infinity but should not be used for biasing. */
1835             pointState.setTargetToZero();
1836         }
1837     }
1838
1839     /* Set the initial reference weighthistogram. */
1840     const double histogramSize = histogramSize_.histogramSize();
1841     for (auto& pointState : points_)
1842     {
1843         pointState.setInitialReferenceWeightHistogram(histogramSize);
1844     }
1845
1846     /* Make sure the pmf is normalized consistently with the histogram size.
1847        Note: the target distribution and free energy need to be set here. */
1848     normalizePmf(params.numSharedUpdate);
1849 }
1850
1851 BiasState::BiasState(const AwhBiasParams&          awhBiasParams,
1852                      double                        histogramSizeInitial,
1853                      const std::vector<DimParams>& dimParams,
1854                      const BiasGrid&               grid) :
1855     coordState_(awhBiasParams, dimParams, grid),
1856     points_(grid.numPoints()),
1857     weightSumCovering_(grid.numPoints()),
1858     histogramSize_(awhBiasParams, histogramSizeInitial)
1859 {
1860     /* The minimum and maximum multidimensional point indices that are affected by the next update */
1861     for (size_t d = 0; d < dimParams.size(); d++)
1862     {
1863         int index            = grid.point(coordState_.gridpointIndex()).index[d];
1864         originUpdatelist_[d] = index;
1865         endUpdatelist_[d]    = index;
1866     }
1867 }
1868
1869 } // namespace gmx