src/gromacs/awh/biasstate.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  * \brief
  38  * Implements the BiasState class.
  39  *
  40  * \author Viveca Lindahl
  41  * \author Berk Hess <hess@kth.se>
  42  * \ingroup module_awh
  43  */
  44
  45 #include "gmxpre.h"
  46
  47 #include "biasstate.h"
  48
  49 #include <cassert>
  50 #include <cmath>
  51 #include <cstdio>
  52 #include <cstdlib>
  53 #include <cstring>
  54
  55 #include <algorithm>
  56
  57 #include "gromacs/fileio/gmxfio.h"
  58 #include "gromacs/fileio/xvgr.h"
  59 #include "gromacs/gmxlib/network.h"
  60 #include "gromacs/math/utilities.h"
  61 #include "gromacs/mdrunutility/multisim.h"
  62 #include "gromacs/mdtypes/awh_history.h"
  63 #include "gromacs/mdtypes/awh_params.h"
  64 #include "gromacs/mdtypes/commrec.h"
  65 #include "gromacs/simd/simd.h"
  66 #include "gromacs/simd/simd_math.h"
  67 #include "gromacs/utility/arrayref.h"
  68 #include "gromacs/utility/exceptions.h"
  69 #include "gromacs/utility/gmxassert.h"
  70 #include "gromacs/utility/smalloc.h"
  71 #include "gromacs/utility/stringutil.h"
  72
  73 #include "grid.h"
  74 #include "pointstate.h"
  75
  76 namespace gmx
  77 {
  78
  79 void BiasState::getPmf(gmx::ArrayRef<float> pmf) const
  80 {
  81     GMX_ASSERT(pmf.size() == points_.size(), "pmf should have the size of the bias grid");
  82
  83     /* The PMF is just the negative of the log of the sampled PMF histogram.
  84      * Points with zero target weight are ignored, they will mostly contain noise.
  85      */
  86     for (size_t i = 0; i < points_.size(); i++)
  87     {
  88         pmf[i] = points_[i].inTargetRegion() ? -points_[i].logPmfSum() : GMX_FLOAT_MAX;
  89     }
  90 }
  91
  92 namespace
  93 {
  94
  95 /*! \brief
  96  * Sum an array over all simulations on the master rank of each simulation.
  97  *
  98  * \param[in,out] arrayRef      The data to sum.
  99  * \param[in]     multiSimComm  Struct for multi-simulation communication.
 100  */
 101 void sumOverSimulations(gmx::ArrayRef<int> arrayRef, const gmx_multisim_t* multiSimComm)
 102 {
 103     gmx_sumi_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
 104 }
 105
 106 /*! \brief
 107  * Sum an array over all simulations on the master rank of each simulation.
 108  *
 109  * \param[in,out] arrayRef      The data to sum.
 110  * \param[in]     multiSimComm  Struct for multi-simulation communication.
 111  */
 112 void sumOverSimulations(gmx::ArrayRef<double> arrayRef, const gmx_multisim_t* multiSimComm)
 113 {
 114     gmx_sumd_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
 115 }
 116
 117 /*! \brief
 118  * Sum an array over all simulations on all ranks of each simulation.
 119  *
 120  * This assumes the data is identical on all ranks within each simulation.
 121  *
 122  * \param[in,out] arrayRef      The data to sum.
 123  * \param[in]     commRecord    Struct for intra-simulation communication.
 124  * \param[in]     multiSimComm  Struct for multi-simulation communication.
 125  */
 126 template<typename T>
 127 void sumOverSimulations(gmx::ArrayRef<T> arrayRef, const t_commrec* commRecord, const gmx_multisim_t* multiSimComm)
 128 {
 129     if (MASTER(commRecord))
 130     {
 131         sumOverSimulations(arrayRef, multiSimComm);
 132     }
 133     if (commRecord->nnodes > 1)
 134     {
 135         gmx_bcast(arrayRef.size() * sizeof(T), arrayRef.data(), commRecord);
 136     }
 137 }
 138
 139 /*! \brief
 140  * Sum PMF over multiple simulations, when requested.
 141  *
 142  * \param[in,out] pointState         The state of the points in the bias.
 143  * \param[in]     numSharedUpdate    The number of biases sharing the histogram.
 144  * \param[in]     commRecord         Struct for intra-simulation communication.
 145  * \param[in]     multiSimComm       Struct for multi-simulation communication.
 146  */
 147 void sumPmf(gmx::ArrayRef<PointState> pointState,
 148             int                       numSharedUpdate,
 149             const t_commrec*          commRecord,
 150             const gmx_multisim_t*     multiSimComm)
 151 {
 152     if (numSharedUpdate == 1)
 153     {
 154         return;
 155     }
 156     GMX_ASSERT(multiSimComm != nullptr && numSharedUpdate % multiSimComm->nsim == 0,
 157                "numSharedUpdate should be a multiple of multiSimComm->nsim");
 158     GMX_ASSERT(numSharedUpdate == multiSimComm->nsim,
 159                "Sharing within a simulation is not implemented (yet)");
 160
 161     std::vector<double> buffer(pointState.size());
 162
 163     /* Need to temporarily exponentiate the log weights to sum over simulations */
 164     for (size_t i = 0; i < buffer.size(); i++)
 165     {
 166         buffer[i] = pointState[i].inTargetRegion() ? std::exp(pointState[i].logPmfSum()) : 0;
 167     }
 168
 169     sumOverSimulations(gmx::ArrayRef<double>(buffer), commRecord, multiSimComm);
 170
 171     /* Take log again to get (non-normalized) PMF */
 172     double normFac = 1.0 / numSharedUpdate;
 173     for (gmx::index i = 0; i < pointState.ssize(); i++)
 174     {
 175         if (pointState[i].inTargetRegion())
 176         {
 177             pointState[i].setLogPmfSum(std::log(buffer[i] * normFac));
 178         }
 179     }
 180 }
 181
 182 /*! \brief
 183  * Find the minimum free energy value.
 184  *
 185  * \param[in] pointState  The state of the points.
 186  * \returns the minimum free energy value.
 187  */
 188 double freeEnergyMinimumValue(gmx::ArrayRef<const PointState> pointState)
 189 {
 190     double fMin = GMX_FLOAT_MAX;
 191
 192     for (auto const& ps : pointState)
 193     {
 194         if (ps.inTargetRegion() && ps.freeEnergy() < fMin)
 195         {
 196             fMin = ps.freeEnergy();
 197         }
 198     }
 199
 200     return fMin;
 201 }
 202
 203 /*! \brief
 204  * Find and return the log of the probability weight of a point given a coordinate value.
 205  *
 206  * The unnormalized weight is given by
 207  * w(point|value) = exp(bias(point) - U(value,point)),
 208  * where U is a harmonic umbrella potential.
 209  *
 210  * \param[in] dimParams     The bias dimensions parameters
 211  * \param[in] points        The point state.
 212  * \param[in] grid          The grid.
 213  * \param[in] pointIndex    Point to evaluate probability weight for.
 214  * \param[in] pointBias     Bias for the point (as a log weight).
 215  * \param[in] value         Coordinate value.
 216  * \returns the log of the biased probability weight.
 217  */
 218 double biasedLogWeightFromPoint(const std::vector<DimParams>&  dimParams,
 219                                 const std::vector<PointState>& points,
 220                                 const Grid&                    grid,
 221                                 int                            pointIndex,
 222                                 double                         pointBias,
 223                                 const awh_dvec                 value)
 224 {
 225     double logWeight = detail::c_largeNegativeExponent;
 226
 227     /* Only points in the target reigon have non-zero weight */
 228     if (points[pointIndex].inTargetRegion())
 229     {
 230         logWeight = pointBias;
 231
 232         /* Add potential for all parameter dimensions */
 233         for (size_t d = 0; d < dimParams.size(); d++)
 234         {
 235             double dev = getDeviationFromPointAlongGridAxis(grid, d, pointIndex, value[d]);
 236             logWeight -= 0.5 * dimParams[d].betak * dev * dev;
 237         }
 238     }
 239
 240     return logWeight;
 241 }
 242
 243 } // namespace
 244
 245 void BiasState::calcConvolvedPmf(const std::vector<DimParams>& dimParams,
 246                                  const Grid&                   grid,
 247                                  std::vector<float>*           convolvedPmf) const
 248 {
 249     size_t numPoints = grid.numPoints();
 250
 251     convolvedPmf->resize(numPoints);
 252
 253     /* Get the PMF to convolve. */
 254     std::vector<float> pmf(numPoints);
 255     getPmf(pmf);
 256
 257     for (size_t m = 0; m < numPoints; m++)
 258     {
 259         double           freeEnergyWeights = 0;
 260         const GridPoint& point             = grid.point(m);
 261         for (auto& neighbor : point.neighbor)
 262         {
 263             /* The negative PMF is a positive bias. */
 264             double biasNeighbor = -pmf[neighbor];
 265
 266             /* Add the convolved PMF weights for the neighbors of this point.
 267                Note that this function only adds point within the target > 0 region.
 268                Sum weights, take the logarithm last to get the free energy. */
 269             double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid, neighbor,
 270                                                         biasNeighbor, point.coordValue);
 271             freeEnergyWeights += std::exp(logWeight);
 272         }
 273
 274         GMX_RELEASE_ASSERT(freeEnergyWeights > 0,
 275                            "Attempting to do log(<= 0) in AWH convolved PMF calculation.");
 276         (*convolvedPmf)[m] = -std::log(static_cast<float>(freeEnergyWeights));
 277     }
 278 }
 279
 280 namespace
 281 {
 282
 283 /*! \brief
 284  * Updates the target distribution for all points.
 285  *
 286  * The target distribution is always updated for all points
 287  * at the same time.
 288  *
 289  * \param[in,out] pointState  The state of all points.
 290  * \param[in]     params      The bias parameters.
 291  */
 292 void updateTargetDistribution(gmx::ArrayRef<PointState> pointState, const BiasParams& params)
 293 {
 294     double freeEnergyCutoff = 0;
 295     if (params.eTarget == eawhtargetCUTOFF)
 296     {
 297         freeEnergyCutoff = freeEnergyMinimumValue(pointState) + params.freeEnergyCutoffInKT;
 298     }
 299
 300     double sumTarget = 0;
 301     for (PointState& ps : pointState)
 302     {
 303         sumTarget += ps.updateTargetWeight(params, freeEnergyCutoff);
 304     }
 305     GMX_RELEASE_ASSERT(sumTarget > 0, "We should have a non-zero distribution");
 306
 307     /* Normalize to 1 */
 308     double invSum = 1.0 / sumTarget;
 309     for (PointState& ps : pointState)
 310     {
 311         ps.scaleTarget(invSum);
 312     }
 313 }
 314
 315 /*! \brief
 316  * Puts together a string describing a grid point.
 317  *
 318  * \param[in] grid         The grid.
 319  * \param[in] point        Grid point index.
 320  * \returns a string for the point.
 321  */
 322 std::string gridPointValueString(const Grid& grid, int point)
 323 {
 324     std::string pointString;
 325
 326     pointString += "(";
 327
 328     for (int d = 0; d < grid.numDimensions(); d++)
 329     {
 330         pointString += gmx::formatString("%g", grid.point(point).coordValue[d]);
 331         if (d < grid.numDimensions() - 1)
 332         {
 333             pointString += ",";
 334         }
 335         else
 336         {
 337             pointString += ")";
 338         }
 339     }
 340
 341     return pointString;
 342 }
 343
 344 } // namespace
 345
 346 int BiasState::warnForHistogramAnomalies(const Grid& grid, int biasIndex, double t, FILE* fplog, int maxNumWarnings) const
 347 {
 348     GMX_ASSERT(fplog != nullptr, "Warnings can only be issued if there is log file.");
 349     const double maxHistogramRatio = 0.5; /* Tolerance for printing a warning about the histogram ratios */
 350
 351     /* Sum up the histograms and get their normalization */
 352     double sumVisits  = 0;
 353     double sumWeights = 0;
 354     for (auto& pointState : points_)
 355     {
 356         if (pointState.inTargetRegion())
 357         {
 358             sumVisits += pointState.numVisitsTot();
 359             sumWeights += pointState.weightSumTot();
 360         }
 361     }
 362     GMX_RELEASE_ASSERT(sumVisits > 0, "We should have visits");
 363     GMX_RELEASE_ASSERT(sumWeights > 0, "We should have weight");
 364     double invNormVisits = 1.0 / sumVisits;
 365     double invNormWeight = 1.0 / sumWeights;
 366
 367     /* Check all points for warnings */
 368     int    numWarnings = 0;
 369     size_t numPoints   = grid.numPoints();
 370     for (size_t m = 0; m < numPoints; m++)
 371     {
 372         /* Skip points close to boundary or non-target region */
 373         const GridPoint& gridPoint = grid.point(m);
 374         bool             skipPoint = false;
 375         for (size_t n = 0; (n < gridPoint.neighbor.size()) && !skipPoint; n++)
 376         {
 377             int neighbor = gridPoint.neighbor[n];
 378             skipPoint    = !points_[neighbor].inTargetRegion();
 379             for (int d = 0; (d < grid.numDimensions()) && !skipPoint; d++)
 380             {
 381                 const GridPoint& neighborPoint = grid.point(neighbor);
 382                 skipPoint                      = neighborPoint.index[d] == 0
 383                             || neighborPoint.index[d] == grid.axis(d).numPoints() - 1;
 384             }
 385         }
 386
 387         /* Warn if the coordinate distribution is less than the target distribution with a certain fraction somewhere */
 388         const double relativeWeight = points_[m].weightSumTot() * invNormWeight;
 389         const double relativeVisits = points_[m].numVisitsTot() * invNormVisits;
 390         if (!skipPoint && relativeVisits < relativeWeight * maxHistogramRatio)
 391         {
 392             std::string pointValueString = gridPointValueString(grid, m);
 393             std::string warningMessage   = gmx::formatString(
 394                     "\nawh%d warning: "
 395                     "at t = %g ps the obtained coordinate distribution at coordinate value %s "
 396                     "is less than a fraction %g of the reference distribution at that point. "
 397                     "If you are not certain about your settings you might want to increase your "
 398                     "pull force constant or "
 399                     "modify your sampling region.\n",
 400                     biasIndex + 1, t, pointValueString.c_str(), maxHistogramRatio);
 401             gmx::TextLineWrapper wrapper;
 402             wrapper.settings().setLineLength(c_linewidth);
 403             fprintf(fplog, "%s", wrapper.wrapToString(warningMessage).c_str());
 404
 405             numWarnings++;
 406         }
 407         if (numWarnings >= maxNumWarnings)
 408         {
 409             break;
 410         }
 411     }
 412
 413     return numWarnings;
 414 }
 415
 416 double BiasState::calcUmbrellaForceAndPotential(const std::vector<DimParams>& dimParams,
 417                                                 const Grid&                   grid,
 418                                                 int                           point,
 419                                                 gmx::ArrayRef<double>         force) const
 420 {
 421     double potential = 0;
 422     for (size_t d = 0; d < dimParams.size(); d++)
 423     {
 424         double deviation =
 425                 getDeviationFromPointAlongGridAxis(grid, d, point, coordState_.coordValue()[d]);
 426
 427         double k = dimParams[d].k;
 428
 429         /* Force from harmonic potential 0.5*k*dev^2 */
 430         force[d] = -k * deviation;
 431         potential += 0.5 * k * deviation * deviation;
 432     }
 433
 434     return potential;
 435 }
 436
 437 void BiasState::calcConvolvedForce(const std::vector<DimParams>& dimParams,
 438                                    const Grid&                   grid,
 439                                    gmx::ArrayRef<const double>   probWeightNeighbor,
 440                                    gmx::ArrayRef<double>         forceWorkBuffer,
 441                                    gmx::ArrayRef<double>         force) const
 442 {
 443     for (size_t d = 0; d < dimParams.size(); d++)
 444     {
 445         force[d] = 0;
 446     }
 447
 448     /* Only neighboring points have non-negligible contribution. */
 449     const std::vector<int>& neighbor          = grid.point(coordState_.gridpointIndex()).neighbor;
 450     gmx::ArrayRef<double>   forceFromNeighbor = forceWorkBuffer;
 451     for (size_t n = 0; n < neighbor.size(); n++)
 452     {
 453         double weightNeighbor = probWeightNeighbor[n];
 454         int    indexNeighbor  = neighbor[n];
 455
 456         /* Get the umbrella force from this point. The returned potential is ignored here. */
 457         calcUmbrellaForceAndPotential(dimParams, grid, indexNeighbor, forceFromNeighbor);
 458
 459         /* Add the weighted umbrella force to the convolved force. */
 460         for (size_t d = 0; d < dimParams.size(); d++)
 461         {
 462             force[d] += forceFromNeighbor[d] * weightNeighbor;
 463         }
 464     }
 465 }
 466
 467 double BiasState::moveUmbrella(const std::vector<DimParams>& dimParams,
 468                                const Grid&                   grid,
 469                                gmx::ArrayRef<const double>   probWeightNeighbor,
 470                                gmx::ArrayRef<double>         biasForce,
 471                                int64_t                       step,
 472                                int64_t                       seed,
 473                                int                           indexSeed)
 474 {
 475     /* Generate and set a new coordinate reference value */
 476     coordState_.sampleUmbrellaGridpoint(grid, coordState_.gridpointIndex(), probWeightNeighbor,
 477                                         step, seed, indexSeed);
 478
 479     std::vector<double> newForce(dimParams.size());
 480     double              newPotential =
 481             calcUmbrellaForceAndPotential(dimParams, grid, coordState_.umbrellaGridpoint(), newForce);
 482
 483     /*  A modification of the reference value at time t will lead to a different
 484         force over t-dt/2 to t and over t to t+dt/2. For high switching rates
 485         this means the force and velocity will change signs roughly as often.
 486         To avoid any issues we take the average of the previous and new force
 487         at steps when the reference value has been moved. E.g. if the ref. value
 488         is set every step to (coord dvalue +/- delta) would give zero force.
 489      */
 490     for (gmx::index d = 0; d < biasForce.ssize(); d++)
 491     {
 492         /* Average of the current and new force */
 493         biasForce[d] = 0.5 * (biasForce[d] + newForce[d]);
 494     }
 495
 496     return newPotential;
 497 }
 498
 499 namespace
 500 {
 501
 502 /*! \brief
 503  * Sets the histogram rescaling factors needed to control the histogram size.
 504  *
 505  * For sake of robustness, the reference weight histogram can grow at a rate
 506  * different from the actual sampling rate. Typically this happens for a limited
 507  * initial time, alternatively growth is scaled down by a constant factor for all
 508  * times. Since the size of the reference histogram sets the size of the free
 509  * energy update this should be reflected also in the PMF. Thus the PMF histogram
 510  * needs to be rescaled too.
 511  *
 512  * This function should only be called by the bias update function or wrapped by a function that
 513  * knows what scale factors should be applied when, e.g,
 514  * getSkippedUpdateHistogramScaleFactors().
 515  *
 516  * \param[in]  params             The bias parameters.
 517  * \param[in]  newHistogramSize   New reference weight histogram size.
 518  * \param[in]  oldHistogramSize   Previous reference weight histogram size (before adding new samples).
 519  * \param[out] weightHistScaling  Scaling factor for the reference weight histogram.
 520  * \param[out] logPmfSumScaling   Log of the scaling factor for the PMF histogram.
 521  */
 522 void setHistogramUpdateScaleFactors(const BiasParams& params,
 523                                     double            newHistogramSize,
 524                                     double            oldHistogramSize,
 525                                     double*           weightHistScaling,
 526                                     double*           logPmfSumScaling)
 527 {
 528
 529     /* The two scaling factors below are slightly different (ignoring the log factor) because the
 530        reference and the PMF histogram apply weight scaling differently. The weight histogram
 531        applies is  locally, i.e. each sample is scaled down meaning all samples get equal weight.
 532        It is done this way because that is what target type local Boltzmann (for which
 533        target = weight histogram) needs. In contrast, the PMF histogram is rescaled globally
 534        by repeatedly scaling down the whole histogram. The reasons for doing it this way are:
 535        1) empirically this is necessary for converging the PMF; 2) since the extraction of
 536        the PMF is theoretically only valid for a constant bias, new samples should get more
 537        weight than old ones for which the bias is fluctuating more. */
 538     *weightHistScaling =
 539             newHistogramSize / (oldHistogramSize + params.updateWeight * params.localWeightScaling);
 540     *logPmfSumScaling = std::log(newHistogramSize / (oldHistogramSize + params.updateWeight));
 541 }
 542
 543 } // namespace
 544
 545 void BiasState::getSkippedUpdateHistogramScaleFactors(const BiasParams& params,
 546                                                       double*           weightHistScaling,
 547                                                       double*           logPmfSumScaling) const
 548 {
 549     GMX_ASSERT(params.skipUpdates(),
 550                "Calling function for skipped updates when skipping updates is not allowed");
 551
 552     if (inInitialStage())
 553     {
 554         /* In between global updates the reference histogram size is kept constant so we trivially
 555            know what the histogram size was at the time of the skipped update. */
 556         double histogramSize = histogramSize_.histogramSize();
 557         setHistogramUpdateScaleFactors(params, histogramSize, histogramSize, weightHistScaling,
 558                                        logPmfSumScaling);
 559     }
 560     else
 561     {
 562         /* In the final stage, the reference histogram grows at the sampling rate which gives trivial scale factors. */
 563         *weightHistScaling = 1;
 564         *logPmfSumScaling  = 0;
 565     }
 566 }
 567
 568 void BiasState::doSkippedUpdatesForAllPoints(const BiasParams& params)
 569 {
 570     double weightHistScaling;
 571     double logPmfsumScaling;
 572
 573     getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
 574
 575     for (auto& pointState : points_)
 576     {
 577         bool didUpdate = pointState.performPreviouslySkippedUpdates(
 578                 params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
 579
 580         /* Update the bias for this point only if there were skipped updates in the past to avoid calculating the log unneccessarily */
 581         if (didUpdate)
 582         {
 583             pointState.updateBias();
 584         }
 585     }
 586 }
 587
 588 void BiasState::doSkippedUpdatesInNeighborhood(const BiasParams& params, const Grid& grid)
 589 {
 590     double weightHistScaling;
 591     double logPmfsumScaling;
 592
 593     getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
 594
 595     /* For each neighbor point of the center point, refresh its state by adding the results of all past, skipped updates. */
 596     const std::vector<int>& neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
 597     for (auto& neighbor : neighbors)
 598     {
 599         bool didUpdate = points_[neighbor].performPreviouslySkippedUpdates(
 600                 params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
 601
 602         if (didUpdate)
 603         {
 604             points_[neighbor].updateBias();
 605         }
 606     }
 607 }
 608
 609 namespace
 610 {
 611
 612 /*! \brief
 613  * Merge update lists from multiple sharing simulations.
 614  *
 615  * \param[in,out] updateList    Update list for this simulation (assumed >= npoints long).
 616  * \param[in]     numPoints     Total number of points.
 617  * \param[in]     commRecord    Struct for intra-simulation communication.
 618  * \param[in]     multiSimComm  Struct for multi-simulation communication.
 619  */
 620 void mergeSharedUpdateLists(std::vector<int>*     updateList,
 621                             int                   numPoints,
 622                             const t_commrec*      commRecord,
 623                             const gmx_multisim_t* multiSimComm)
 624 {
 625     std::vector<int> numUpdatesOfPoint;
 626
 627     /* Flag the update points of this sim.
 628        TODO: we can probably avoid allocating this array and just use the input array. */
 629     numUpdatesOfPoint.resize(numPoints, 0);
 630     for (auto& pointIndex : *updateList)
 631     {
 632         numUpdatesOfPoint[pointIndex] = 1;
 633     }
 634
 635     /* Sum over the sims to get all the flagged points */
 636     sumOverSimulations(arrayRefFromArray(numUpdatesOfPoint.data(), numPoints), commRecord, multiSimComm);
 637
 638     /* Collect the indices of the flagged points in place. The resulting array will be the merged update list.*/
 639     updateList->clear();
 640     for (int m = 0; m < numPoints; m++)
 641     {
 642         if (numUpdatesOfPoint[m] > 0)
 643         {
 644             updateList->push_back(m);
 645         }
 646     }
 647 }
 648
 649 /*! \brief
 650  * Generate an update list of points sampled since the last update.
 651  *
 652  * \param[in] grid              The AWH bias.
 653  * \param[in] points            The point state.
 654  * \param[in] originUpdatelist  The origin of the rectangular region that has been sampled since
 655  * last update. \param[in] endUpdatelist     The end of the rectangular that has been sampled since
 656  * last update. \param[in,out] updateList    Local update list to set (assumed >= npoints long).
 657  */
 658 void makeLocalUpdateList(const Grid&                    grid,
 659                          const std::vector<PointState>& points,
 660                          const awh_ivec                 originUpdatelist,
 661                          const awh_ivec                 endUpdatelist,
 662                          std::vector<int>*              updateList)
 663 {
 664     awh_ivec origin;
 665     awh_ivec numPoints;
 666
 667     /* Define the update search grid */
 668     for (int d = 0; d < grid.numDimensions(); d++)
 669     {
 670         origin[d]    = originUpdatelist[d];
 671         numPoints[d] = endUpdatelist[d] - originUpdatelist[d] + 1;
 672
 673         /* Because the end_updatelist is unwrapped it can be > (npoints - 1) so that numPoints can be > npoints in grid.
 674            This helps for calculating the distance/number of points but should be removed and fixed when the way of
 675            updating origin/end updatelist is changed (see sampleProbabilityWeights). */
 676         numPoints[d] = std::min(grid.axis(d).numPoints(), numPoints[d]);
 677     }
 678
 679     /* Make the update list */
 680     updateList->clear();
 681     int  pointIndex  = -1;
 682     bool pointExists = true;
 683     while (pointExists)
 684     {
 685         pointExists = advancePointInSubgrid(grid, origin, numPoints, &pointIndex);
 686
 687         if (pointExists && points[pointIndex].inTargetRegion())
 688         {
 689             updateList->push_back(pointIndex);
 690         }
 691     }
 692 }
 693
 694 } // namespace
 695
 696 void BiasState::resetLocalUpdateRange(const Grid& grid)
 697 {
 698     const int gridpointIndex = coordState_.gridpointIndex();
 699     for (int d = 0; d < grid.numDimensions(); d++)
 700     {
 701         /* This gives the  minimum range consisting only of the current closest point. */
 702         originUpdatelist_[d] = grid.point(gridpointIndex).index[d];
 703         endUpdatelist_[d]    = grid.point(gridpointIndex).index[d];
 704     }
 705 }
 706
 707 namespace
 708 {
 709
 710 /*! \brief
 711  * Add partial histograms (accumulating between updates) to accumulating histograms.
 712  *
 713  * \param[in,out] pointState         The state of the points in the bias.
 714  * \param[in,out] weightSumCovering  The weights for checking covering.
 715  * \param[in]     numSharedUpdate    The number of biases sharing the histrogram.
 716  * \param[in]     commRecord         Struct for intra-simulation communication.
 717  * \param[in]     multiSimComm       Struct for multi-simulation communication.
 718  * \param[in]     localUpdateList    List of points with data.
 719  */
 720 void sumHistograms(gmx::ArrayRef<PointState> pointState,
 721                    gmx::ArrayRef<double>     weightSumCovering,
 722                    int                       numSharedUpdate,
 723                    const t_commrec*          commRecord,
 724                    const gmx_multisim_t*     multiSimComm,
 725                    const std::vector<int>&   localUpdateList)
 726 {
 727     /* The covering checking histograms are added before summing over simulations, so that the
 728        weights from different simulations are kept distinguishable. */
 729     for (int globalIndex : localUpdateList)
 730     {
 731         weightSumCovering[globalIndex] += pointState[globalIndex].weightSumIteration();
 732     }
 733
 734     /* Sum histograms over multiple simulations if needed. */
 735     if (numSharedUpdate > 1)
 736     {
 737         GMX_ASSERT(numSharedUpdate == multiSimComm->nsim,
 738                    "Sharing within a simulation is not implemented (yet)");
 739
 740         /* Collect the weights and counts in linear arrays to be able to use gmx_sumd_sim. */
 741         std::vector<double> weightSum;
 742         std::vector<double> coordVisits;
 743
 744         weightSum.resize(localUpdateList.size());
 745         coordVisits.resize(localUpdateList.size());
 746
 747         for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
 748         {
 749             const PointState& ps = pointState[localUpdateList[localIndex]];
 750
 751             weightSum[localIndex]   = ps.weightSumIteration();
 752             coordVisits[localIndex] = ps.numVisitsIteration();
 753         }
 754
 755         sumOverSimulations(gmx::ArrayRef<double>(weightSum), commRecord, multiSimComm);
 756         sumOverSimulations(gmx::ArrayRef<double>(coordVisits), commRecord, multiSimComm);
 757
 758         /* Transfer back the result */
 759         for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
 760         {
 761             PointState& ps = pointState[localUpdateList[localIndex]];
 762
 763             ps.setPartialWeightAndCount(weightSum[localIndex], coordVisits[localIndex]);
 764         }
 765     }
 766
 767     /* Now add the partial counts and weights to the accumulating histograms.
 768        Note: we still need to use the weights for the update so we wait
 769        with resetting them until the end of the update. */
 770     for (int globalIndex : localUpdateList)
 771     {
 772         pointState[globalIndex].addPartialWeightAndCount();
 773     }
 774 }
 775
 776 /*! \brief
 777  * Label points along an axis as covered or not.
 778  *
 779  * A point is covered if it is surrounded by visited points up to a radius = coverRadius.
 780  *
 781  * \param[in]     visited        Visited? For each point.
 782  * \param[in]     checkCovering  Check for covering? For each point.
 783  * \param[in]     numPoints      The number of grid points along this dimension.
 784  * \param[in]     period         Period in number of points.
 785  * \param[in]     coverRadius    Cover radius, in points, needed for defining a point as covered.
 786  * \param[in,out] covered        In this array elements are 1 for covered points and 0 for
 787  * non-covered points, this routine assumes that \p covered has at least size \p numPoints.
 788  */
 789 void labelCoveredPoints(const std::vector<bool>& visited,
 790                         const std::vector<bool>& checkCovering,
 791                         int                      numPoints,
 792                         int                      period,
 793                         int                      coverRadius,
 794                         gmx::ArrayRef<int>       covered)
 795 {
 796     GMX_ASSERT(covered.ssize() >= numPoints, "covered should be at least as large as the grid");
 797
 798     bool haveFirstNotVisited = false;
 799     int  firstNotVisited     = -1;
 800     int  notVisitedLow       = -1;
 801     int  notVisitedHigh      = -1;
 802
 803     for (int n = 0; n < numPoints; n++)
 804     {
 805         if (checkCovering[n] && !visited[n])
 806         {
 807             if (!haveFirstNotVisited)
 808             {
 809                 notVisitedLow       = n;
 810                 firstNotVisited     = n;
 811                 haveFirstNotVisited = true;
 812             }
 813             else
 814             {
 815                 notVisitedHigh = n;
 816
 817                 /* Have now an interval I = [notVisitedLow,notVisitedHigh] of visited points bounded
 818                    by unvisited points. The unvisted end points affect the coveredness of the
 819                    visited with a reach equal to the cover radius. */
 820                 int notCoveredLow  = notVisitedLow + coverRadius;
 821                 int notCoveredHigh = notVisitedHigh - coverRadius;
 822                 for (int i = notVisitedLow; i <= notVisitedHigh; i++)
 823                 {
 824                     covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
 825                 }
 826
 827                 /* Find a new interval to set covering for. Make the notVisitedHigh of this interval
 828                    the notVisitedLow of the next. */
 829                 notVisitedLow = notVisitedHigh;
 830             }
 831         }
 832     }
 833
 834     /* Have labelled all the internal points. Now take care of the boundary regions. */
 835     if (!haveFirstNotVisited)
 836     {
 837         /* No non-visited points <=> all points visited => all points covered. */
 838
 839         for (int n = 0; n < numPoints; n++)
 840         {
 841             covered[n] = 1;
 842         }
 843     }
 844     else
 845     {
 846         int lastNotVisited = notVisitedLow;
 847
 848         /* For periodic boundaries, non-visited points can influence points
 849            on the other side of the boundary so we need to wrap around. */
 850
 851         /* Lower end. For periodic boundaries the last upper end not visited point becomes the low-end not visited point.
 852            For non-periodic boundaries there is no lower end point so a dummy value is used. */
 853         int notVisitedHigh = firstNotVisited;
 854         int notVisitedLow  = period > 0 ? (lastNotVisited - period) : -(coverRadius + 1);
 855
 856         int notCoveredLow  = notVisitedLow + coverRadius;
 857         int notCoveredHigh = notVisitedHigh - coverRadius;
 858
 859         for (int i = 0; i <= notVisitedHigh; i++)
 860         {
 861             /* For non-periodic boundaries notCoveredLow = -1 will impose no restriction. */
 862             covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
 863         }
 864
 865         /* Upper end. Same as for lower end but in the other direction. */
 866         notVisitedHigh = period > 0 ? (firstNotVisited + period) : (numPoints + coverRadius);
 867         notVisitedLow  = lastNotVisited;
 868
 869         notCoveredLow  = notVisitedLow + coverRadius;
 870         notCoveredHigh = notVisitedHigh - coverRadius;
 871
 872         for (int i = notVisitedLow; i <= numPoints - 1; i++)
 873         {
 874             /* For non-periodic boundaries notCoveredHigh = numPoints will impose no restriction. */
 875             covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
 876         }
 877     }
 878 }
 879
 880 } // namespace
 881
 882 bool BiasState::isSamplingRegionCovered(const BiasParams&             params,
 883                                         const std::vector<DimParams>& dimParams,
 884                                         const Grid&                   grid,
 885                                         const t_commrec*              commRecord,
 886                                         const gmx_multisim_t*         multiSimComm) const
 887 {
 888     /* Allocate and initialize arrays: one for checking visits along each dimension,
 889        one for keeping track of which points to check and one for the covered points.
 890        Possibly these could be kept as AWH variables to avoid these allocations. */
 891     struct CheckDim
 892     {
 893         std::vector<bool> visited;
 894         std::vector<bool> checkCovering;
 895         // We use int for the covering array since we might use gmx_sumi_sim.
 896         std::vector<int> covered;
 897     };
 898
 899     std::vector<CheckDim> checkDim;
 900     checkDim.resize(grid.numDimensions());
 901
 902     for (int d = 0; d < grid.numDimensions(); d++)
 903     {
 904         const size_t numPoints = grid.axis(d).numPoints();
 905         checkDim[d].visited.resize(numPoints, false);
 906         checkDim[d].checkCovering.resize(numPoints, false);
 907         checkDim[d].covered.resize(numPoints, 0);
 908     }
 909
 910     /* Set visited points along each dimension and which points should be checked for covering.
 911        Specifically, points above the free energy cutoff (if there is one) or points outside
 912        of the target region are ignored. */
 913
 914     /* Set the free energy cutoff */
 915     double maxFreeEnergy = GMX_FLOAT_MAX;
 916
 917     if (params.eTarget == eawhtargetCUTOFF)
 918     {
 919         maxFreeEnergy = freeEnergyMinimumValue(points_) + params.freeEnergyCutoffInKT;
 920     }
 921
 922     /* Set the threshold weight for a point to be considered visited. */
 923     double weightThreshold = 1;
 924     for (int d = 0; d < grid.numDimensions(); d++)
 925     {
 926         weightThreshold *= grid.axis(d).spacing() * std::sqrt(dimParams[d].betak * 0.5 * M_1_PI);
 927     }
 928
 929     /* Project the sampling weights onto each dimension */
 930     for (size_t m = 0; m < grid.numPoints(); m++)
 931     {
 932         const PointState& pointState = points_[m];
 933
 934         for (int d = 0; d < grid.numDimensions(); d++)
 935         {
 936             int n = grid.point(m).index[d];
 937
 938             /* Is visited if it was already visited or if there is enough weight at the current point */
 939             checkDim[d].visited[n] = checkDim[d].visited[n] || (weightSumCovering_[m] > weightThreshold);
 940
 941             /* Check for covering if there is at least point in this slice that is in the target region and within the cutoff */
 942             checkDim[d].checkCovering[n] =
 943                     checkDim[d].checkCovering[n]
 944                     || (pointState.inTargetRegion() && pointState.freeEnergy() < maxFreeEnergy);
 945         }
 946     }
 947
 948     /* Label each point along each dimension as covered or not. */
 949     for (int d = 0; d < grid.numDimensions(); d++)
 950     {
 951         labelCoveredPoints(checkDim[d].visited, checkDim[d].checkCovering, grid.axis(d).numPoints(),
 952                            grid.axis(d).numPointsInPeriod(), params.coverRadius()[d], checkDim[d].covered);
 953     }
 954
 955     /* Now check for global covering. Each dimension needs to be covered separately.
 956        A dimension is covered if each point is covered.  Multiple simulations collectively
 957        cover the points, i.e. a point is covered if any of the simulations covered it.
 958        However, visited points are not shared, i.e. if a point is covered or not is
 959        determined by the visits of a single simulation. In general the covering criterion is
 960        all points covered => all points are surrounded by visited points up to a radius = coverRadius.
 961        For 1 simulation, all points covered <=> all points visited. For multiple simulations
 962        however, all points visited collectively !=> all points covered, except for coverRadius = 0.
 963        In the limit of large coverRadius, all points covered => all points visited by at least one
 964        simulation (since no point will be covered until all points have been visited by a
 965        single simulation). Basically coverRadius sets how much "connectedness" (or mixing) a point
 966        needs with surrounding points before sharing covering information with other simulations. */
 967
 968     /* Communicate the covered points between sharing simulations if needed. */
 969     if (params.numSharedUpdate > 1)
 970     {
 971         /* For multiple dimensions this may not be the best way to do it. */
 972         for (int d = 0; d < grid.numDimensions(); d++)
 973         {
 974             sumOverSimulations(
 975                     gmx::arrayRefFromArray(checkDim[d].covered.data(), grid.axis(d).numPoints()),
 976                     commRecord, multiSimComm);
 977         }
 978     }
 979
 980     /* Now check if for each dimension all points are covered. Break if not true. */
 981     bool allPointsCovered = true;
 982     for (int d = 0; d < grid.numDimensions() && allPointsCovered; d++)
 983     {
 984         for (int n = 0; n < grid.axis(d).numPoints() && allPointsCovered; n++)
 985         {
 986             allPointsCovered = (checkDim[d].covered[n] != 0);
 987         }
 988     }
 989
 990     return allPointsCovered;
 991 }
 992
 993 /*! \brief
 994  * Normalizes the free energy and PMF sum.
 995  *
 996  * \param[in] pointState  The state of the points.
 997  */
 998 static void normalizeFreeEnergyAndPmfSum(std::vector<PointState>* pointState)
 999 {
1000     double minF = freeEnergyMinimumValue(*pointState);
1001
1002     for (PointState& ps : *pointState)
1003     {
1004         ps.normalizeFreeEnergyAndPmfSum(minF);
1005     }
1006 }
1007
1008 void BiasState::updateFreeEnergyAndAddSamplesToHistogram(const std::vector<DimParams>& dimParams,
1009                                                          const Grid&                   grid,
1010                                                          const BiasParams&             params,
1011                                                          const t_commrec*              commRecord,
1012                                                          const gmx_multisim_t*         multiSimComm,
1013                                                          double                        t,
1014                                                          int64_t                       step,
1015                                                          FILE*                         fplog,
1016                                                          std::vector<int>*             updateList)
1017 {
1018     /* Note hat updateList is only used in this scope and is always
1019      * re-initialized. We do not use a local vector, because that would
1020      * cause reallocation every time this funtion is called and the vector
1021      * can be the size of the whole grid.
1022      */
1023
1024     /* Make a list of all local points, i.e. those that could have been touched since
1025        the last update. These are the points needed for summing histograms below
1026        (non-local points only add zeros). For local updates, this will also be the
1027        final update list. */
1028     makeLocalUpdateList(grid, points_, originUpdatelist_, endUpdatelist_, updateList);
1029     if (params.numSharedUpdate > 1)
1030     {
1031         mergeSharedUpdateLists(updateList, points_.size(), commRecord, multiSimComm);
1032     }
1033
1034     /* Reset the range for the next update */
1035     resetLocalUpdateRange(grid);
1036
1037     /* Add samples to histograms for all local points and sync simulations if needed */
1038     sumHistograms(points_, weightSumCovering_, params.numSharedUpdate, commRecord, multiSimComm, *updateList);
1039
1040     sumPmf(points_, params.numSharedUpdate, commRecord, multiSimComm);
1041
1042     /* Renormalize the free energy if values are too large. */
1043     bool needToNormalizeFreeEnergy = false;
1044     for (int& globalIndex : *updateList)
1045     {
1046         /* We want to keep the absolute value of the free energies to be less
1047            c_largePositiveExponent to be able to safely pass these values to exp(). The check below
1048            ensures this as long as the free energy values grow less than 0.5*c_largePositiveExponent
1049            in a return time to this neighborhood. For reasonable update sizes it's unlikely that
1050            this requirement would be broken. */
1051         if (std::abs(points_[globalIndex].freeEnergy()) > 0.5 * detail::c_largePositiveExponent)
1052         {
1053             needToNormalizeFreeEnergy = true;
1054             break;
1055         }
1056     }
1057
1058     /* Update target distribution? */
1059     bool needToUpdateTargetDistribution =
1060             (params.eTarget != eawhtargetCONSTANT && params.isUpdateTargetStep(step));
1061
1062     /* In the initial stage, the histogram grows dynamically as a function of the number of coverings. */
1063     bool detectedCovering = false;
1064     if (inInitialStage())
1065     {
1066         detectedCovering =
1067                 (params.isCheckCoveringStep(step)
1068                  && isSamplingRegionCovered(params, dimParams, grid, commRecord, multiSimComm));
1069     }
1070
1071     /* The weighthistogram size after this update. */
1072     double newHistogramSize = histogramSize_.newHistogramSize(params, t, detectedCovering, points_,
1073                                                               weightSumCovering_, fplog);
1074
1075     /* Make the update list. Usually we try to only update local points,
1076      * but if the update has non-trivial or non-deterministic effects
1077      * on non-local points a global update is needed. This is the case when:
1078      * 1) a covering occurred in the initial stage, leading to non-trivial
1079      *    histogram rescaling factors; or
1080      * 2) the target distribution will be updated, since we don't make any
1081      *    assumption on its form; or
1082      * 3) the AWH parameters are such that we never attempt to skip non-local
1083      *    updates; or
1084      * 4) the free energy values have grown so large that a renormalization
1085      *    is needed.
1086      */
1087     if (needToUpdateTargetDistribution || detectedCovering || !params.skipUpdates() || needToNormalizeFreeEnergy)
1088     {
1089         /* Global update, just add all points. */
1090         updateList->clear();
1091         for (size_t m = 0; m < points_.size(); m++)
1092         {
1093             if (points_[m].inTargetRegion())
1094             {
1095                 updateList->push_back(m);
1096             }
1097         }
1098     }
1099
1100     /* Set histogram scale factors. */
1101     double weightHistScalingSkipped = 0;
1102     double logPmfsumScalingSkipped  = 0;
1103     if (params.skipUpdates())
1104     {
1105         getSkippedUpdateHistogramScaleFactors(params, &weightHistScalingSkipped, &logPmfsumScalingSkipped);
1106     }
1107     double weightHistScalingNew;
1108     double logPmfsumScalingNew;
1109     setHistogramUpdateScaleFactors(params, newHistogramSize, histogramSize_.histogramSize(),
1110                                    &weightHistScalingNew, &logPmfsumScalingNew);
1111
1112     /* Update free energy and reference weight histogram for points in the update list. */
1113     for (int pointIndex : *updateList)
1114     {
1115         PointState* pointStateToUpdate = &points_[pointIndex];
1116
1117         /* Do updates from previous update steps that were skipped because this point was at that time non-local. */
1118         if (params.skipUpdates())
1119         {
1120             pointStateToUpdate->performPreviouslySkippedUpdates(params, histogramSize_.numUpdates(),
1121                                                                 weightHistScalingSkipped,
1122                                                                 logPmfsumScalingSkipped);
1123         }
1124
1125         /* Now do an update with new sampling data. */
1126         pointStateToUpdate->updateWithNewSampling(params, histogramSize_.numUpdates(),
1127                                                   weightHistScalingNew, logPmfsumScalingNew);
1128     }
1129
1130     /* Only update the histogram size after we are done with the local point updates */
1131     histogramSize_.setHistogramSize(newHistogramSize, weightHistScalingNew);
1132
1133     if (needToNormalizeFreeEnergy)
1134     {
1135         normalizeFreeEnergyAndPmfSum(&points_);
1136     }
1137
1138     if (needToUpdateTargetDistribution)
1139     {
1140         /* The target distribution is always updated for all points at once. */
1141         updateTargetDistribution(points_, params);
1142     }
1143
1144     /* Update the bias. The bias is updated separately and last since it simply a function of
1145        the free energy and the target distribution and we want to avoid doing extra work. */
1146     for (int pointIndex : *updateList)
1147     {
1148         points_[pointIndex].updateBias();
1149     }
1150
1151     /* Increase the update counter. */
1152     histogramSize_.incrementNumUpdates();
1153 }
1154
1155 double BiasState::updateProbabilityWeightsAndConvolvedBias(const std::vector<DimParams>& dimParams,
1156                                                            const Grid&                   grid,
1157                                                            std::vector<double, AlignedAllocator<double>>* weight) const
1158 {
1159     /* Only neighbors of the current coordinate value will have a non-negligible chance of getting sampled */
1160     const std::vector<int>& neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
1161
1162 #if GMX_SIMD_HAVE_DOUBLE
1163     typedef SimdDouble PackType;
1164     constexpr int      packSize = GMX_SIMD_DOUBLE_WIDTH;
1165 #else
1166     typedef double PackType;
1167     constexpr int  packSize = 1;
1168 #endif
1169     /* Round the size of the weight array up to packSize */
1170     const int weightSize = ((neighbors.size() + packSize - 1) / packSize) * packSize;
1171     weight->resize(weightSize);
1172
1173     double* gmx_restrict weightData = weight->data();
1174     PackType             weightSumPack(0.0);
1175     for (size_t i = 0; i < neighbors.size(); i += packSize)
1176     {
1177         for (size_t n = i; n < i + packSize; n++)
1178         {
1179             if (n < neighbors.size())
1180             {
1181                 const int neighbor = neighbors[n];
1182                 (*weight)[n] =
1183                         biasedLogWeightFromPoint(dimParams, points_, grid, neighbor,
1184                                                  points_[neighbor].bias(), coordState_.coordValue());
1185             }
1186             else
1187             {
1188                 /* Pad with values that don't affect the result */
1189                 (*weight)[n] = detail::c_largeNegativeExponent;
1190             }
1191         }
1192         PackType weightPack = load<PackType>(weightData + i);
1193         weightPack          = gmx::exp(weightPack);
1194         weightSumPack       = weightSumPack + weightPack;
1195         store(weightData + i, weightPack);
1196     }
1197     /* Sum of probability weights */
1198     double weightSum = reduce(weightSumPack);
1199     GMX_RELEASE_ASSERT(weightSum > 0,
1200                        "zero probability weight when updating AWH probability weights.");
1201
1202     /* Normalize probabilities to sum to 1 */
1203     double invWeightSum = 1 / weightSum;
1204     for (double& w : *weight)
1205     {
1206         w *= invWeightSum;
1207     }
1208
1209     /* Return the convolved bias */
1210     return std::log(weightSum);
1211 }
1212
1213 double BiasState::calcConvolvedBias(const std::vector<DimParams>& dimParams,
1214                                     const Grid&                   grid,
1215                                     const awh_dvec&               coordValue) const
1216 {
1217     int              point     = grid.nearestIndex(coordValue);
1218     const GridPoint& gridPoint = grid.point(point);
1219
1220     /* Sum the probability weights from the neighborhood of the given point */
1221     double weightSum = 0;
1222     for (int neighbor : gridPoint.neighbor)
1223     {
1224         double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid, neighbor,
1225                                                     points_[neighbor].bias(), coordValue);
1226         weightSum += std::exp(logWeight);
1227     }
1228
1229     /* Returns -GMX_FLOAT_MAX if no neighboring points were in the target region. */
1230     return (weightSum > 0) ? std::log(weightSum) : -GMX_FLOAT_MAX;
1231 }
1232
1233 void BiasState::sampleProbabilityWeights(const Grid& grid, gmx::ArrayRef<const double> probWeightNeighbor)
1234 {
1235     const std::vector<int>& neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
1236
1237     /* Save weights for next update */
1238     for (size_t n = 0; n < neighbor.size(); n++)
1239     {
1240         points_[neighbor[n]].increaseWeightSumIteration(probWeightNeighbor[n]);
1241     }
1242
1243     /* Update the local update range. Two corner points define this rectangular
1244      * domain. We need to choose two new corner points such that the new domain
1245      * contains both the old update range and the current neighborhood.
1246      * In the simplest case when an update is performed every sample,
1247      * the update range would simply equal the current neighborhood.
1248      */
1249     int neighborStart = neighbor[0];
1250     int neighborLast  = neighbor[neighbor.size() - 1];
1251     for (int d = 0; d < grid.numDimensions(); d++)
1252     {
1253         int origin = grid.point(neighborStart).index[d];
1254         int last   = grid.point(neighborLast).index[d];
1255
1256         if (origin > last)
1257         {
1258             /* Unwrap if wrapped around the boundary (only happens for periodic
1259              * boundaries). This has been already for the stored index interval.
1260              */
1261             /* TODO: what we want to do is to find the smallest the update
1262              * interval that contains all points that need to be updated.
1263              * This amounts to combining two intervals, the current
1264              * [origin, end] update interval and the new touched neighborhood
1265              * into a new interval that contains all points from both the old
1266              * intervals.
1267              *
1268              * For periodic boundaries it becomes slightly more complicated
1269              * than for closed boundaries because then it needs not be
1270              * true that origin < end (so one can't simply relate the origin/end
1271              * in the min()/max() below). The strategy here is to choose the
1272              * origin closest to a reference point (index 0) and then unwrap
1273              * the end index if needed and choose the largest end index.
1274              * This ensures that both intervals are in the new interval
1275              * but it's not necessarily the smallest.
1276              * Currently we solve this by going through each possibility
1277              * and checking them.
1278              */
1279             last += grid.axis(d).numPointsInPeriod();
1280         }
1281
1282         originUpdatelist_[d] = std::min(originUpdatelist_[d], origin);
1283         endUpdatelist_[d]    = std::max(endUpdatelist_[d], last);
1284     }
1285 }
1286
1287 void BiasState::sampleCoordAndPmf(const Grid& grid, gmx::ArrayRef<const double> probWeightNeighbor, double convolvedBias)
1288 {
1289     /* Sampling-based deconvolution extracting the PMF.
1290      * Update the PMF histogram with the current coordinate value.
1291      *
1292      * Because of the finite width of the harmonic potential, the free energy
1293      * defined for each coordinate point does not exactly equal that of the
1294      * actual coordinate, the PMF. However, the PMF can be estimated by applying
1295      * the relation exp(-PMF) = exp(-bias_convolved)*P_biased/Z, i.e. by keeping a
1296      * reweighted histogram of the coordinate value. Strictly, this relies on
1297      * the unknown normalization constant Z being either constant or known. Here,
1298      * neither is true except in the long simulation time limit. Empirically however,
1299      * it works (mainly because how the PMF histogram is rescaled).
1300      */
1301
1302     /* Only save coordinate data that is in range (the given index is always
1303      * in range even if the coordinate value is not).
1304      */
1305     if (grid.covers(coordState_.coordValue()))
1306     {
1307         /* Save PMF sum and keep a histogram of the sampled coordinate values */
1308         points_[coordState_.gridpointIndex()].samplePmf(convolvedBias);
1309     }
1310
1311     /* Save probability weights for the update */
1312     sampleProbabilityWeights(grid, probWeightNeighbor);
1313 }
1314
1315 void BiasState::initHistoryFromState(AwhBiasHistory* biasHistory) const
1316 {
1317     biasHistory->pointState.resize(points_.size());
1318 }
1319
1320 void BiasState::updateHistory(AwhBiasHistory* biasHistory, const Grid& grid) const
1321 {
1322     GMX_RELEASE_ASSERT(biasHistory->pointState.size() == points_.size(),
1323                        "The AWH history setup does not match the AWH state.");
1324
1325     AwhBiasStateHistory* stateHistory = &biasHistory->state;
1326     stateHistory->umbrellaGridpoint   = coordState_.umbrellaGridpoint();
1327
1328     for (size_t m = 0; m < biasHistory->pointState.size(); m++)
1329     {
1330         AwhPointStateHistory* psh = &biasHistory->pointState[m];
1331
1332         points_[m].storeState(psh);
1333
1334         psh->weightsum_covering = weightSumCovering_[m];
1335     }
1336
1337     histogramSize_.storeState(stateHistory);
1338
1339     stateHistory->origin_index_updatelist = multiDimGridIndexToLinear(grid, originUpdatelist_);
1340     stateHistory->end_index_updatelist    = multiDimGridIndexToLinear(grid, endUpdatelist_);
1341 }
1342
1343 void BiasState::restoreFromHistory(const AwhBiasHistory& biasHistory, const Grid& grid)
1344 {
1345     const AwhBiasStateHistory& stateHistory = biasHistory.state;
1346
1347     coordState_.restoreFromHistory(stateHistory);
1348
1349     if (biasHistory.pointState.size() != points_.size())
1350     {
1351         GMX_THROW(
1352                 InvalidInputError("Bias grid size in checkpoint and simulation do not match. "
1353                                   "Likely you provided a checkpoint from a different simulation."));
1354     }
1355     for (size_t m = 0; m < points_.size(); m++)
1356     {
1357         points_[m].setFromHistory(biasHistory.pointState[m]);
1358     }
1359
1360     for (size_t m = 0; m < weightSumCovering_.size(); m++)
1361     {
1362         weightSumCovering_[m] = biasHistory.pointState[m].weightsum_covering;
1363     }
1364
1365     histogramSize_.restoreFromHistory(stateHistory);
1366
1367     linearGridindexToMultiDim(grid, stateHistory.origin_index_updatelist, originUpdatelist_);
1368     linearGridindexToMultiDim(grid, stateHistory.end_index_updatelist, endUpdatelist_);
1369 }
1370
1371 void BiasState::broadcast(const t_commrec* commRecord)
1372 {
1373     gmx_bcast(sizeof(coordState_), &coordState_, commRecord);
1374
1375     gmx_bcast(points_.size() * sizeof(PointState), points_.data(), commRecord);
1376
1377     gmx_bcast(weightSumCovering_.size() * sizeof(double), weightSumCovering_.data(), commRecord);
1378
1379     gmx_bcast(sizeof(histogramSize_), &histogramSize_, commRecord);
1380 }
1381
1382 void BiasState::setFreeEnergyToConvolvedPmf(const std::vector<DimParams>& dimParams, const Grid& grid)
1383 {
1384     std::vector<float> convolvedPmf;
1385
1386     calcConvolvedPmf(dimParams, grid, &convolvedPmf);
1387
1388     for (size_t m = 0; m < points_.size(); m++)
1389     {
1390         points_[m].setFreeEnergy(convolvedPmf[m]);
1391     }
1392 }
1393
1394 /*! \brief
1395  * Count trailing data rows containing only zeros.
1396  *
1397  * \param[in] data        2D data array.
1398  * \param[in] numRows     Number of rows in array.
1399  * \param[in] numColumns  Number of cols in array.
1400  * \returns the number of trailing zero rows.
1401  */
1402 static int countTrailingZeroRows(const double* const* data, int numRows, int numColumns)
1403 {
1404     int numZeroRows = 0;
1405     for (int m = numRows - 1; m >= 0; m--)
1406     {
1407         bool rowIsZero = true;
1408         for (int d = 0; d < numColumns; d++)
1409         {
1410             if (data[d][m] != 0)
1411             {
1412                 rowIsZero = false;
1413                 break;
1414             }
1415         }
1416
1417         if (!rowIsZero)
1418         {
1419             /* At a row with non-zero data */
1420             break;
1421         }
1422         else
1423         {
1424             /* Still at a zero data row, keep checking rows higher up. */
1425             numZeroRows++;
1426         }
1427     }
1428
1429     return numZeroRows;
1430 }
1431
1432 /*! \brief
1433  * Initializes the PMF and target with data read from an input table.
1434  *
1435  * \param[in]     dimParams   The dimension parameters.
1436  * \param[in]     grid        The grid.
1437  * \param[in]     filename    The filename to read PMF and target from.
1438  * \param[in]     numBias     Number of biases.
1439  * \param[in]     biasIndex   The index of the bias.
1440  * \param[in,out] pointState  The state of the points in this bias.
1441  */
1442 static void readUserPmfAndTargetDistribution(const std::vector<DimParams>& dimParams,
1443                                              const Grid&                   grid,
1444                                              const std::string&            filename,
1445                                              int                           numBias,
1446                                              int                           biasIndex,
1447                                              std::vector<PointState>*      pointState)
1448 {
1449     /* Read the PMF and target distribution.
1450        From the PMF, the convolved PMF, or the reference value free energy, can be calculated
1451        base on the force constant. The free energy and target together determine the bias.
1452      */
1453     std::string filenameModified(filename);
1454     if (numBias > 1)
1455     {
1456         size_t n = filenameModified.rfind('.');
1457         GMX_RELEASE_ASSERT(n != std::string::npos,
1458                            "The filename should contain an extension starting with .");
1459         filenameModified.insert(n, formatString("%d", biasIndex));
1460     }
1461
1462     std::string correctFormatMessage = formatString(
1463             "%s is expected in the following format. "
1464             "The first ndim column(s) should contain the coordinate values for each point, "
1465             "each column containing values of one dimension (in ascending order). "
1466             "For a multidimensional coordinate, points should be listed "
1467             "in the order obtained by traversing lower dimensions first. "
1468             "E.g. for two-dimensional grid of size nxn: "
1469             "(1, 1), (1, 2),..., (1, n), (2, 1), (2, 2), ..., , (n, n - 1), (n, n). "
1470             "Column ndim +  1 should contain the PMF value for each coordinate value. "
1471             "The target distribution values should be in column ndim + 2  or column ndim + 5. "
1472             "Make sure the input file ends with a new line but has no trailing new lines.",
1473             filename.c_str());
1474     gmx::TextLineWrapper wrapper;
1475     wrapper.settings().setLineLength(c_linewidth);
1476     correctFormatMessage = wrapper.wrapToString(correctFormatMessage);
1477
1478     double** data;
1479     int      numColumns;
1480     int      numRows = read_xvg(filenameModified.c_str(), &data, &numColumns);
1481
1482     /* Check basic data properties here. Grid takes care of more complicated things. */
1483
1484     if (numRows <= 0)
1485     {
1486         std::string mesg = gmx::formatString("%s is empty!.\n\n%s", filename.c_str(),
1487                                              correctFormatMessage.c_str());
1488         GMX_THROW(InvalidInputError(mesg));
1489     }
1490
1491     /* Less than 2 points is not useful for PMF or target. */
1492     if (numRows < 2)
1493     {
1494         std::string mesg = gmx::formatString(
1495                 "%s contains too few data points (%d)."
1496                 "The minimum number of points is 2.",
1497                 filename.c_str(), numRows);
1498         GMX_THROW(InvalidInputError(mesg));
1499     }
1500
1501     /* Make sure there are enough columns of data.
1502
1503        Two formats are allowed. Either with columns  {coords, PMF, target} or
1504        {coords, PMF, x, y, z, target, ...}. The latter format is allowed since that
1505        is how AWH output is written (x, y, z being other AWH variables). For this format,
1506        trailing columns are ignored.
1507      */
1508     int columnIndexTarget;
1509     int numColumnsMin  = dimParams.size() + 2;
1510     int columnIndexPmf = dimParams.size();
1511     if (numColumns == numColumnsMin)
1512     {
1513         columnIndexTarget = columnIndexPmf + 1;
1514     }
1515     else
1516     {
1517         columnIndexTarget = columnIndexPmf + 4;
1518     }
1519
1520     if (numColumns < numColumnsMin)
1521     {
1522         std::string mesg = gmx::formatString(
1523                 "The number of columns in %s should be at least %d."
1524                 "\n\n%s",
1525                 filename.c_str(), numColumnsMin, correctFormatMessage.c_str());
1526         GMX_THROW(InvalidInputError(mesg));
1527     }
1528
1529     /* read_xvg can give trailing zero data rows for trailing new lines in the input. We allow 1 zero row,
1530        since this could be real data. But multiple trailing zero rows cannot correspond to valid data. */
1531     int numZeroRows = countTrailingZeroRows(data, numRows, numColumns);
1532     if (numZeroRows > 1)
1533     {
1534         std::string mesg = gmx::formatString(
1535                 "Found %d trailing zero data rows in %s. Please remove trailing empty lines and "
1536                 "try again.",
1537                 numZeroRows, filename.c_str());
1538         GMX_THROW(InvalidInputError(mesg));
1539     }
1540
1541     /* Convert from user units to internal units before sending the data of to grid. */
1542     for (size_t d = 0; d < dimParams.size(); d++)
1543     {
1544         double scalingFactor = dimParams[d].scaleUserInputToInternal(1);
1545         if (scalingFactor == 1)
1546         {
1547             continue;
1548         }
1549         for (size_t m = 0; m < pointState->size(); m++)
1550         {
1551             data[d][m] *= scalingFactor;
1552         }
1553     }
1554
1555     /* Get a data point for each AWH grid point so that they all get data. */
1556     std::vector<int> gridIndexToDataIndex(grid.numPoints());
1557     mapGridToDataGrid(&gridIndexToDataIndex, data, numRows, filename, grid, correctFormatMessage);
1558
1559     /* Extract the data for each grid point.
1560      * We check if the target distribution is zero for all points.
1561      */
1562     bool targetDistributionIsZero = true;
1563     for (size_t m = 0; m < pointState->size(); m++)
1564     {
1565         (*pointState)[m].setLogPmfSum(-data[columnIndexPmf][gridIndexToDataIndex[m]]);
1566         double target = data[columnIndexTarget][gridIndexToDataIndex[m]];
1567
1568         /* Check if the values are allowed. */
1569         if (target < 0)
1570         {
1571             std::string mesg = gmx::formatString(
1572                     "Target distribution weight at point %zu (%g) in %s is negative.", m, target,
1573                     filename.c_str());
1574             GMX_THROW(InvalidInputError(mesg));
1575         }
1576         if (target > 0)
1577         {
1578             targetDistributionIsZero = false;
1579         }
1580         (*pointState)[m].setTargetConstantWeight(target);
1581     }
1582
1583     if (targetDistributionIsZero)
1584     {
1585         std::string mesg =
1586                 gmx::formatString("The target weights given in column %d in %s are all 0",
1587                                   columnIndexTarget, filename.c_str());
1588         GMX_THROW(InvalidInputError(mesg));
1589     }
1590
1591     /* Free the arrays. */
1592     for (int m = 0; m < numColumns; m++)
1593     {
1594         sfree(data[m]);
1595     }
1596     sfree(data);
1597 }
1598
1599 void BiasState::normalizePmf(int numSharingSims)
1600 {
1601     /* The normalization of the PMF estimate matters because it determines how big effect the next sample has.
1602        Approximately (for large enough force constant) we should have:
1603        sum_x(exp(-pmf(x)) = nsamples*sum_xref(exp(-f(xref)).
1604      */
1605
1606     /* Calculate the normalization factor, i.e. divide by the pmf sum, multiply by the number of samples and the f sum */
1607     double expSumPmf = 0;
1608     double expSumF   = 0;
1609     for (const PointState& pointState : points_)
1610     {
1611         if (pointState.inTargetRegion())
1612         {
1613             expSumPmf += std::exp(pointState.logPmfSum());
1614             expSumF += std::exp(-pointState.freeEnergy());
1615         }
1616     }
1617     double numSamples = histogramSize_.histogramSize() / numSharingSims;
1618
1619     /* Renormalize */
1620     double logRenorm = std::log(numSamples * expSumF / expSumPmf);
1621     for (PointState& pointState : points_)
1622     {
1623         if (pointState.inTargetRegion())
1624         {
1625             pointState.setLogPmfSum(pointState.logPmfSum() + logRenorm);
1626         }
1627     }
1628 }
1629
1630 void BiasState::initGridPointState(const AwhBiasParams&          awhBiasParams,
1631                                    const std::vector<DimParams>& dimParams,
1632                                    const Grid&                   grid,
1633                                    const BiasParams&             params,
1634                                    const std::string&            filename,
1635                                    int                           numBias)
1636 {
1637     /* Modify PMF, free energy and the constant target distribution factor
1638      * to user input values if there is data given.
1639      */
1640     if (awhBiasParams.bUserData)
1641     {
1642         readUserPmfAndTargetDistribution(dimParams, grid, filename, numBias, params.biasIndex, &points_);
1643         setFreeEnergyToConvolvedPmf(dimParams, grid);
1644     }
1645
1646     /* The local Boltzmann distribution is special because the target distribution is updated as a function of the reference weighthistogram. */
1647     GMX_RELEASE_ASSERT(params.eTarget != eawhtargetLOCALBOLTZMANN || points_[0].weightSumRef() != 0,
1648                        "AWH reference weight histogram not initialized properly with local "
1649                        "Boltzmann target distribution.");
1650
1651     updateTargetDistribution(points_, params);
1652
1653     for (PointState& pointState : points_)
1654     {
1655         if (pointState.inTargetRegion())
1656         {
1657             pointState.updateBias();
1658         }
1659         else
1660         {
1661             /* Note that for zero target this is a value that represents -infinity but should not be used for biasing. */
1662             pointState.setTargetToZero();
1663         }
1664     }
1665
1666     /* Set the initial reference weighthistogram. */
1667     const double histogramSize = histogramSize_.histogramSize();
1668     for (auto& pointState : points_)
1669     {
1670         pointState.setInitialReferenceWeightHistogram(histogramSize);
1671     }
1672
1673     /* Make sure the pmf is normalized consistently with the histogram size.
1674        Note: the target distribution and free energy need to be set here. */
1675     normalizePmf(params.numSharedUpdate);
1676 }
1677
1678 BiasState::BiasState(const AwhBiasParams&          awhBiasParams,
1679                      double                        histogramSizeInitial,
1680                      const std::vector<DimParams>& dimParams,
1681                      const Grid&                   grid) :
1682     coordState_(awhBiasParams, dimParams, grid),
1683     points_(grid.numPoints()),
1684     weightSumCovering_(grid.numPoints()),
1685     histogramSize_(awhBiasParams, histogramSizeInitial)
1686 {
1687     /* The minimum and maximum multidimensional point indices that are affected by the next update */
1688     for (size_t d = 0; d < dimParams.size(); d++)
1689     {
1690         int index            = grid.point(coordState_.gridpointIndex()).index[d];
1691         originUpdatelist_[d] = index;
1692         endUpdatelist_[d]    = index;
1693     }
1694 }
1695
1696 } // namespace gmx