2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019, The GROMACS development team.
5 * Copyright (c) 2020,2021, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
39 * Implements the BiasState class.
41 * \author Viveca Lindahl
42 * \author Berk Hess <hess@kth.se>
48 #include "biasstate.h"
59 #include "gromacs/fileio/gmxfio.h"
60 #include "gromacs/fileio/xvgr.h"
61 #include "gromacs/gmxlib/network.h"
62 #include "gromacs/math/units.h"
63 #include "gromacs/math/utilities.h"
64 #include "gromacs/mdrunutility/multisim.h"
65 #include "gromacs/mdtypes/awh_history.h"
66 #include "gromacs/mdtypes/awh_params.h"
67 #include "gromacs/mdtypes/commrec.h"
68 #include "gromacs/simd/simd.h"
69 #include "gromacs/simd/simd_math.h"
70 #include "gromacs/utility/arrayref.h"
71 #include "gromacs/utility/exceptions.h"
72 #include "gromacs/utility/gmxassert.h"
73 #include "gromacs/utility/smalloc.h"
74 #include "gromacs/utility/stringutil.h"
77 #include "pointstate.h"
82 void BiasState::getPmf(gmx::ArrayRef<float> pmf) const
84 GMX_ASSERT(pmf.size() == points_.size(), "pmf should have the size of the bias grid");
86 /* The PMF is just the negative of the log of the sampled PMF histogram.
87 * Points with zero target weight are ignored, they will mostly contain noise.
89 for (size_t i = 0; i < points_.size(); i++)
91 pmf[i] = points_[i].inTargetRegion() ? -points_[i].logPmfSum() : GMX_FLOAT_MAX;
99 * Sum an array over all simulations on the master rank of each simulation.
101 * \param[in,out] arrayRef The data to sum.
102 * \param[in] multiSimComm Struct for multi-simulation communication.
104 void sumOverSimulations(gmx::ArrayRef<int> arrayRef, const gmx_multisim_t* multiSimComm)
106 gmx_sumi_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
110 * Sum an array over all simulations on the master rank of each simulation.
112 * \param[in,out] arrayRef The data to sum.
113 * \param[in] multiSimComm Struct for multi-simulation communication.
115 void sumOverSimulations(gmx::ArrayRef<double> arrayRef, const gmx_multisim_t* multiSimComm)
117 gmx_sumd_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
121 * Sum an array over all simulations on all ranks of each simulation.
123 * This assumes the data is identical on all ranks within each simulation.
125 * \param[in,out] arrayRef The data to sum.
126 * \param[in] commRecord Struct for intra-simulation communication.
127 * \param[in] multiSimComm Struct for multi-simulation communication.
130 void sumOverSimulations(gmx::ArrayRef<T> arrayRef, const t_commrec* commRecord, const gmx_multisim_t* multiSimComm)
132 if (MASTER(commRecord))
134 sumOverSimulations(arrayRef, multiSimComm);
136 if (commRecord->nnodes > 1)
138 gmx_bcast(arrayRef.size() * sizeof(T), arrayRef.data(), commRecord->mpi_comm_mygroup);
143 * Sum PMF over multiple simulations, when requested.
145 * \param[in,out] pointState The state of the points in the bias.
146 * \param[in] numSharedUpdate The number of biases sharing the histogram.
147 * \param[in] commRecord Struct for intra-simulation communication.
148 * \param[in] multiSimComm Struct for multi-simulation communication.
150 void sumPmf(gmx::ArrayRef<PointState> pointState,
152 const t_commrec* commRecord,
153 const gmx_multisim_t* multiSimComm)
155 if (numSharedUpdate == 1)
159 GMX_ASSERT(multiSimComm != nullptr && numSharedUpdate % multiSimComm->numSimulations_ == 0,
160 "numSharedUpdate should be a multiple of multiSimComm->numSimulations_");
161 GMX_ASSERT(numSharedUpdate == multiSimComm->numSimulations_,
162 "Sharing within a simulation is not implemented (yet)");
164 std::vector<double> buffer(pointState.size());
166 /* Need to temporarily exponentiate the log weights to sum over simulations */
167 for (size_t i = 0; i < buffer.size(); i++)
169 buffer[i] = pointState[i].inTargetRegion() ? std::exp(pointState[i].logPmfSum()) : 0;
172 sumOverSimulations(gmx::ArrayRef<double>(buffer), commRecord, multiSimComm);
174 /* Take log again to get (non-normalized) PMF */
175 double normFac = 1.0 / numSharedUpdate;
176 for (gmx::index i = 0; i < pointState.ssize(); i++)
178 if (pointState[i].inTargetRegion())
180 pointState[i].setLogPmfSum(std::log(buffer[i] * normFac));
186 * Find the minimum free energy value.
188 * \param[in] pointState The state of the points.
189 * \returns the minimum free energy value.
191 double freeEnergyMinimumValue(gmx::ArrayRef<const PointState> pointState)
193 double fMin = GMX_FLOAT_MAX;
195 for (auto const& ps : pointState)
197 if (ps.inTargetRegion() && ps.freeEnergy() < fMin)
199 fMin = ps.freeEnergy();
207 * Find and return the log of the probability weight of a point given a coordinate value.
209 * The unnormalized weight is given by
210 * w(point|value) = exp(bias(point) - U(value,point)),
211 * where U is a harmonic umbrella potential.
213 * \param[in] dimParams The bias dimensions parameters
214 * \param[in] points The point state.
215 * \param[in] grid The grid.
216 * \param[in] pointIndex Point to evaluate probability weight for.
217 * \param[in] pointBias Bias for the point (as a log weight).
218 * \param[in] value Coordinate value.
219 * \param[in] neighborLambdaEnergies The energy of the system in neighboring lambdas states. Can be
220 * empty when there are no free energy lambda state dimensions.
221 * \param[in] gridpointIndex The index of the current grid point.
222 * \returns the log of the biased probability weight.
224 double biasedLogWeightFromPoint(ArrayRef<const DimParams> dimParams,
225 ArrayRef<const PointState> points,
226 const BiasGrid& grid,
229 const awh_dvec value,
230 ArrayRef<const double> neighborLambdaEnergies,
233 double logWeight = detail::c_largeNegativeExponent;
235 /* Only points in the target region have non-zero weight */
236 if (points[pointIndex].inTargetRegion())
238 logWeight = pointBias;
240 /* Add potential for all parameter dimensions */
241 for (size_t d = 0; d < dimParams.size(); d++)
243 if (dimParams[d].isFepLambdaDimension())
245 /* If this is not a sampling step or if this function is called from
246 * calcConvolvedBias(), when writing energy subblocks, neighborLambdaEnergies will
247 * be empty. No convolution is required along the lambda dimension. */
248 if (!neighborLambdaEnergies.empty())
250 const int pointLambdaIndex = grid.point(pointIndex).coordValue[d];
251 const int gridpointLambdaIndex = grid.point(gridpointIndex).coordValue[d];
252 logWeight -= dimParams[d].fepDimParams().beta
253 * (neighborLambdaEnergies[pointLambdaIndex]
254 - neighborLambdaEnergies[gridpointLambdaIndex]);
259 double dev = getDeviationFromPointAlongGridAxis(grid, d, pointIndex, value[d]);
260 logWeight -= 0.5 * dimParams[d].pullDimParams().betak * dev * dev;
268 * Calculates the marginal distribution (marginal probability) for each value along
269 * a free energy lambda axis.
270 * The marginal distribution of one coordinate dimension value is the sum of the probability
271 * distribution of all values (herein all neighbor values) with the same value in the dimension
273 * \param[in] grid The bias grid.
274 * \param[in] neighbors The points to use for the calculation of the marginal distribution.
275 * \param[in] probWeightNeighbor Probability weights of the neighbors.
276 * \returns The calculated marginal distribution in a 1D array with
277 * as many elements as there are points along the axis of interest.
279 std::vector<double> calculateFELambdaMarginalDistribution(const BiasGrid& grid,
280 ArrayRef<const int> neighbors,
281 ArrayRef<const double> probWeightNeighbor)
283 const std::optional<int> lambdaAxisIndex = grid.lambdaAxisIndex();
284 GMX_RELEASE_ASSERT(lambdaAxisIndex,
285 "There must be a free energy lambda axis in order to calculate the free "
286 "energy lambda marginal distribution.");
287 const int numFepLambdaStates = grid.numFepLambdaStates();
288 std::vector<double> lambdaMarginalDistribution(numFepLambdaStates, 0);
290 for (size_t i = 0; i < neighbors.size(); i++)
292 const int neighbor = neighbors[i];
293 const int lambdaState = grid.point(neighbor).coordValue[lambdaAxisIndex.value()];
294 lambdaMarginalDistribution[lambdaState] += probWeightNeighbor[i];
296 return lambdaMarginalDistribution;
301 void BiasState::calcConvolvedPmf(ArrayRef<const DimParams> dimParams,
302 const BiasGrid& grid,
303 std::vector<float>* convolvedPmf) const
305 size_t numPoints = grid.numPoints();
307 convolvedPmf->resize(numPoints);
309 /* Get the PMF to convolve. */
310 std::vector<float> pmf(numPoints);
313 for (size_t m = 0; m < numPoints; m++)
315 double freeEnergyWeights = 0;
316 const GridPoint& point = grid.point(m);
317 for (auto& neighbor : point.neighbor)
319 /* Do not convolve the bias along a lambda axis - only use the pmf from the current point */
320 if (!pointsHaveDifferentLambda(grid, m, neighbor))
322 /* The negative PMF is a positive bias. */
323 double biasNeighbor = -pmf[neighbor];
325 /* Add the convolved PMF weights for the neighbors of this point.
326 Note that this function only adds point within the target > 0 region.
327 Sum weights, take the logarithm last to get the free energy. */
328 double logWeight = biasedLogWeightFromPoint(
329 dimParams, points_, grid, neighbor, biasNeighbor, point.coordValue, {}, m);
330 freeEnergyWeights += std::exp(logWeight);
334 GMX_RELEASE_ASSERT(freeEnergyWeights > 0,
335 "Attempting to do log(<= 0) in AWH convolved PMF calculation.");
336 (*convolvedPmf)[m] = -std::log(static_cast<float>(freeEnergyWeights));
344 * Updates the target distribution for all points.
346 * The target distribution is always updated for all points
349 * \param[in,out] pointState The state of all points.
350 * \param[in] params The bias parameters.
352 void updateTargetDistribution(ArrayRef<PointState> pointState, const BiasParams& params)
354 double freeEnergyCutoff = 0;
355 if (params.eTarget == AwhTargetType::Cutoff)
357 freeEnergyCutoff = freeEnergyMinimumValue(pointState) + params.freeEnergyCutoffInKT;
360 double sumTarget = 0;
361 for (PointState& ps : pointState)
363 sumTarget += ps.updateTargetWeight(params, freeEnergyCutoff);
365 GMX_RELEASE_ASSERT(sumTarget > 0, "We should have a non-zero distribution");
368 double invSum = 1.0 / sumTarget;
369 for (PointState& ps : pointState)
371 ps.scaleTarget(invSum);
376 * Puts together a string describing a grid point.
378 * \param[in] grid The grid.
379 * \param[in] point BiasGrid point index.
380 * \returns a string for the point.
382 std::string gridPointValueString(const BiasGrid& grid, int point)
384 std::string pointString;
388 for (int d = 0; d < grid.numDimensions(); d++)
390 pointString += gmx::formatString("%g", grid.point(point).coordValue[d]);
391 if (d < grid.numDimensions() - 1)
406 int BiasState::warnForHistogramAnomalies(const BiasGrid& grid, int biasIndex, double t, FILE* fplog, int maxNumWarnings) const
408 GMX_ASSERT(fplog != nullptr, "Warnings can only be issued if there is log file.");
409 const double maxHistogramRatio = 0.5; /* Tolerance for printing a warning about the histogram ratios */
411 /* Sum up the histograms and get their normalization */
412 double sumVisits = 0;
413 double sumWeights = 0;
414 for (auto& pointState : points_)
416 if (pointState.inTargetRegion())
418 sumVisits += pointState.numVisitsTot();
419 sumWeights += pointState.weightSumTot();
422 GMX_RELEASE_ASSERT(sumVisits > 0, "We should have visits");
423 GMX_RELEASE_ASSERT(sumWeights > 0, "We should have weight");
424 double invNormVisits = 1.0 / sumVisits;
425 double invNormWeight = 1.0 / sumWeights;
427 /* Check all points for warnings */
429 size_t numPoints = grid.numPoints();
430 for (size_t m = 0; m < numPoints; m++)
432 /* Skip points close to boundary or non-target region */
433 const GridPoint& gridPoint = grid.point(m);
434 bool skipPoint = false;
435 for (size_t n = 0; (n < gridPoint.neighbor.size()) && !skipPoint; n++)
437 int neighbor = gridPoint.neighbor[n];
438 skipPoint = !points_[neighbor].inTargetRegion();
439 for (int d = 0; (d < grid.numDimensions()) && !skipPoint; d++)
441 const GridPoint& neighborPoint = grid.point(neighbor);
442 skipPoint = neighborPoint.index[d] == 0
443 || neighborPoint.index[d] == grid.axis(d).numPoints() - 1;
447 /* Warn if the coordinate distribution is less than the target distribution with a certain fraction somewhere */
448 const double relativeWeight = points_[m].weightSumTot() * invNormWeight;
449 const double relativeVisits = points_[m].numVisitsTot() * invNormVisits;
450 if (!skipPoint && relativeVisits < relativeWeight * maxHistogramRatio)
452 std::string pointValueString = gridPointValueString(grid, m);
453 std::string warningMessage = gmx::formatString(
455 "at t = %g ps the obtained coordinate distribution at coordinate value %s "
456 "is less than a fraction %g of the reference distribution at that point. "
457 "If you are not certain about your settings you might want to increase your "
458 "pull force constant or "
459 "modify your sampling region.\n",
462 pointValueString.c_str(),
464 gmx::TextLineWrapper wrapper;
465 wrapper.settings().setLineLength(c_linewidth);
466 fprintf(fplog, "%s", wrapper.wrapToString(warningMessage).c_str());
470 if (numWarnings >= maxNumWarnings)
479 double BiasState::calcUmbrellaForceAndPotential(ArrayRef<const DimParams> dimParams,
480 const BiasGrid& grid,
482 ArrayRef<const double> neighborLambdaDhdl,
483 ArrayRef<double> force) const
485 double potential = 0;
486 for (size_t d = 0; d < dimParams.size(); d++)
488 if (dimParams[d].isFepLambdaDimension())
490 if (!neighborLambdaDhdl.empty())
492 const int coordpointLambdaIndex = grid.point(point).coordValue[d];
493 force[d] = neighborLambdaDhdl[coordpointLambdaIndex];
494 /* The potential should not be affected by the lambda dimension. */
500 getDeviationFromPointAlongGridAxis(grid, d, point, coordState_.coordValue()[d]);
501 double k = dimParams[d].pullDimParams().k;
503 /* Force from harmonic potential 0.5*k*dev^2 */
504 force[d] = -k * deviation;
505 potential += 0.5 * k * deviation * deviation;
512 void BiasState::calcConvolvedForce(ArrayRef<const DimParams> dimParams,
513 const BiasGrid& grid,
514 ArrayRef<const double> probWeightNeighbor,
515 ArrayRef<const double> neighborLambdaDhdl,
516 ArrayRef<double> forceWorkBuffer,
517 ArrayRef<double> force) const
519 for (size_t d = 0; d < dimParams.size(); d++)
524 /* Only neighboring points have non-negligible contribution. */
525 const std::vector<int>& neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
526 gmx::ArrayRef<double> forceFromNeighbor = forceWorkBuffer;
527 for (size_t n = 0; n < neighbor.size(); n++)
529 double weightNeighbor = probWeightNeighbor[n];
530 int indexNeighbor = neighbor[n];
532 /* Get the umbrella force from this point. The returned potential is ignored here. */
533 calcUmbrellaForceAndPotential(dimParams, grid, indexNeighbor, neighborLambdaDhdl, forceFromNeighbor);
535 /* Add the weighted umbrella force to the convolved force. */
536 for (size_t d = 0; d < dimParams.size(); d++)
538 force[d] += forceFromNeighbor[d] * weightNeighbor;
543 double BiasState::moveUmbrella(ArrayRef<const DimParams> dimParams,
544 const BiasGrid& grid,
545 ArrayRef<const double> probWeightNeighbor,
546 ArrayRef<const double> neighborLambdaDhdl,
547 ArrayRef<double> biasForce,
551 bool onlySampleUmbrellaGridpoint)
553 /* Generate and set a new coordinate reference value */
554 coordState_.sampleUmbrellaGridpoint(
555 grid, coordState_.gridpointIndex(), probWeightNeighbor, step, seed, indexSeed);
557 if (onlySampleUmbrellaGridpoint)
562 std::vector<double> newForce(dimParams.size());
563 double newPotential = calcUmbrellaForceAndPotential(
564 dimParams, grid, coordState_.umbrellaGridpoint(), neighborLambdaDhdl, newForce);
566 /* A modification of the reference value at time t will lead to a different
567 force over t-dt/2 to t and over t to t+dt/2. For high switching rates
568 this means the force and velocity will change signs roughly as often.
569 To avoid any issues we take the average of the previous and new force
570 at steps when the reference value has been moved. E.g. if the ref. value
571 is set every step to (coord dvalue +/- delta) would give zero force.
573 for (gmx::index d = 0; d < biasForce.ssize(); d++)
575 /* Average of the current and new force */
576 biasForce[d] = 0.5 * (biasForce[d] + newForce[d]);
586 * Sets the histogram rescaling factors needed to control the histogram size.
588 * For sake of robustness, the reference weight histogram can grow at a rate
589 * different from the actual sampling rate. Typically this happens for a limited
590 * initial time, alternatively growth is scaled down by a constant factor for all
591 * times. Since the size of the reference histogram sets the size of the free
592 * energy update this should be reflected also in the PMF. Thus the PMF histogram
593 * needs to be rescaled too.
595 * This function should only be called by the bias update function or wrapped by a function that
596 * knows what scale factors should be applied when, e.g,
597 * getSkippedUpdateHistogramScaleFactors().
599 * \param[in] params The bias parameters.
600 * \param[in] newHistogramSize New reference weight histogram size.
601 * \param[in] oldHistogramSize Previous reference weight histogram size (before adding new samples).
602 * \param[out] weightHistScaling Scaling factor for the reference weight histogram.
603 * \param[out] logPmfSumScaling Log of the scaling factor for the PMF histogram.
605 void setHistogramUpdateScaleFactors(const BiasParams& params,
606 double newHistogramSize,
607 double oldHistogramSize,
608 double* weightHistScaling,
609 double* logPmfSumScaling)
612 /* The two scaling factors below are slightly different (ignoring the log factor) because the
613 reference and the PMF histogram apply weight scaling differently. The weight histogram
614 applies is locally, i.e. each sample is scaled down meaning all samples get equal weight.
615 It is done this way because that is what target type local Boltzmann (for which
616 target = weight histogram) needs. In contrast, the PMF histogram is rescaled globally
617 by repeatedly scaling down the whole histogram. The reasons for doing it this way are:
618 1) empirically this is necessary for converging the PMF; 2) since the extraction of
619 the PMF is theoretically only valid for a constant bias, new samples should get more
620 weight than old ones for which the bias is fluctuating more. */
622 newHistogramSize / (oldHistogramSize + params.updateWeight * params.localWeightScaling);
623 *logPmfSumScaling = std::log(newHistogramSize / (oldHistogramSize + params.updateWeight));
628 void BiasState::getSkippedUpdateHistogramScaleFactors(const BiasParams& params,
629 double* weightHistScaling,
630 double* logPmfSumScaling) const
632 GMX_ASSERT(params.skipUpdates(),
633 "Calling function for skipped updates when skipping updates is not allowed");
635 if (inInitialStage())
637 /* In between global updates the reference histogram size is kept constant so we trivially
638 know what the histogram size was at the time of the skipped update. */
639 double histogramSize = histogramSize_.histogramSize();
640 setHistogramUpdateScaleFactors(
641 params, histogramSize, histogramSize, weightHistScaling, logPmfSumScaling);
645 /* In the final stage, the reference histogram grows at the sampling rate which gives trivial scale factors. */
646 *weightHistScaling = 1;
647 *logPmfSumScaling = 0;
651 void BiasState::doSkippedUpdatesForAllPoints(const BiasParams& params)
653 double weightHistScaling;
654 double logPmfsumScaling;
656 getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
658 for (auto& pointState : points_)
660 bool didUpdate = pointState.performPreviouslySkippedUpdates(
661 params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
663 /* Update the bias for this point only if there were skipped updates in the past to avoid calculating the log unneccessarily */
666 pointState.updateBias();
671 void BiasState::doSkippedUpdatesInNeighborhood(const BiasParams& params, const BiasGrid& grid)
673 double weightHistScaling;
674 double logPmfsumScaling;
676 getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
678 /* For each neighbor point of the center point, refresh its state by adding the results of all past, skipped updates. */
679 const std::vector<int>& neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
680 for (auto& neighbor : neighbors)
682 bool didUpdate = points_[neighbor].performPreviouslySkippedUpdates(
683 params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
687 points_[neighbor].updateBias();
696 * Merge update lists from multiple sharing simulations.
698 * \param[in,out] updateList Update list for this simulation (assumed >= npoints long).
699 * \param[in] numPoints Total number of points.
700 * \param[in] commRecord Struct for intra-simulation communication.
701 * \param[in] multiSimComm Struct for multi-simulation communication.
703 void mergeSharedUpdateLists(std::vector<int>* updateList,
705 const t_commrec* commRecord,
706 const gmx_multisim_t* multiSimComm)
708 std::vector<int> numUpdatesOfPoint;
710 /* Flag the update points of this sim.
711 TODO: we can probably avoid allocating this array and just use the input array. */
712 numUpdatesOfPoint.resize(numPoints, 0);
713 for (auto& pointIndex : *updateList)
715 numUpdatesOfPoint[pointIndex] = 1;
718 /* Sum over the sims to get all the flagged points */
719 sumOverSimulations(arrayRefFromArray(numUpdatesOfPoint.data(), numPoints), commRecord, multiSimComm);
721 /* Collect the indices of the flagged points in place. The resulting array will be the merged update list.*/
723 for (int m = 0; m < numPoints; m++)
725 if (numUpdatesOfPoint[m] > 0)
727 updateList->push_back(m);
733 * Generate an update list of points sampled since the last update.
735 * \param[in] grid The AWH bias.
736 * \param[in] points The point state.
737 * \param[in] originUpdatelist The origin of the rectangular region that has been sampled since
738 * last update. \param[in] endUpdatelist The end of the rectangular that has been sampled since
739 * last update. \param[in,out] updateList Local update list to set (assumed >= npoints long).
741 void makeLocalUpdateList(const BiasGrid& grid,
742 ArrayRef<const PointState> points,
743 const awh_ivec originUpdatelist,
744 const awh_ivec endUpdatelist,
745 std::vector<int>* updateList)
750 /* Define the update search grid */
751 for (int d = 0; d < grid.numDimensions(); d++)
753 origin[d] = originUpdatelist[d];
754 numPoints[d] = endUpdatelist[d] - originUpdatelist[d] + 1;
756 /* Because the end_updatelist is unwrapped it can be > (npoints - 1) so that numPoints can be > npoints in grid.
757 This helps for calculating the distance/number of points but should be removed and fixed when the way of
758 updating origin/end updatelist is changed (see sampleProbabilityWeights). */
759 numPoints[d] = std::min(grid.axis(d).numPoints(), numPoints[d]);
762 /* Make the update list */
765 bool pointExists = true;
768 pointExists = advancePointInSubgrid(grid, origin, numPoints, &pointIndex);
770 if (pointExists && points[pointIndex].inTargetRegion())
772 updateList->push_back(pointIndex);
779 void BiasState::resetLocalUpdateRange(const BiasGrid& grid)
781 const int gridpointIndex = coordState_.gridpointIndex();
782 for (int d = 0; d < grid.numDimensions(); d++)
784 /* This gives the minimum range consisting only of the current closest point. */
785 originUpdatelist_[d] = grid.point(gridpointIndex).index[d];
786 endUpdatelist_[d] = grid.point(gridpointIndex).index[d];
794 * Add partial histograms (accumulating between updates) to accumulating histograms.
796 * \param[in,out] pointState The state of the points in the bias.
797 * \param[in,out] weightSumCovering The weights for checking covering.
798 * \param[in] numSharedUpdate The number of biases sharing the histrogram.
799 * \param[in] commRecord Struct for intra-simulation communication.
800 * \param[in] multiSimComm Struct for multi-simulation communication.
801 * \param[in] localUpdateList List of points with data.
803 void sumHistograms(gmx::ArrayRef<PointState> pointState,
804 gmx::ArrayRef<double> weightSumCovering,
806 const t_commrec* commRecord,
807 const gmx_multisim_t* multiSimComm,
808 const std::vector<int>& localUpdateList)
810 /* The covering checking histograms are added before summing over simulations, so that the
811 weights from different simulations are kept distinguishable. */
812 for (int globalIndex : localUpdateList)
814 weightSumCovering[globalIndex] += pointState[globalIndex].weightSumIteration();
817 /* Sum histograms over multiple simulations if needed. */
818 if (numSharedUpdate > 1)
820 GMX_ASSERT(numSharedUpdate == multiSimComm->numSimulations_,
821 "Sharing within a simulation is not implemented (yet)");
823 /* Collect the weights and counts in linear arrays to be able to use gmx_sumd_sim. */
824 std::vector<double> weightSum;
825 std::vector<double> coordVisits;
827 weightSum.resize(localUpdateList.size());
828 coordVisits.resize(localUpdateList.size());
830 for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
832 const PointState& ps = pointState[localUpdateList[localIndex]];
834 weightSum[localIndex] = ps.weightSumIteration();
835 coordVisits[localIndex] = ps.numVisitsIteration();
838 sumOverSimulations(gmx::ArrayRef<double>(weightSum), commRecord, multiSimComm);
839 sumOverSimulations(gmx::ArrayRef<double>(coordVisits), commRecord, multiSimComm);
841 /* Transfer back the result */
842 for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
844 PointState& ps = pointState[localUpdateList[localIndex]];
846 ps.setPartialWeightAndCount(weightSum[localIndex], coordVisits[localIndex]);
850 /* Now add the partial counts and weights to the accumulating histograms.
851 Note: we still need to use the weights for the update so we wait
852 with resetting them until the end of the update. */
853 for (int globalIndex : localUpdateList)
855 pointState[globalIndex].addPartialWeightAndCount();
860 * Label points along an axis as covered or not.
862 * A point is covered if it is surrounded by visited points up to a radius = coverRadius.
864 * \param[in] visited Visited? For each point.
865 * \param[in] checkCovering Check for covering? For each point.
866 * \param[in] numPoints The number of grid points along this dimension.
867 * \param[in] period Period in number of points.
868 * \param[in] coverRadius Cover radius, in points, needed for defining a point as covered.
869 * \param[in,out] covered In this array elements are 1 for covered points and 0 for
870 * non-covered points, this routine assumes that \p covered has at least size \p numPoints.
872 void labelCoveredPoints(const std::vector<bool>& visited,
873 const std::vector<bool>& checkCovering,
877 gmx::ArrayRef<int> covered)
879 GMX_ASSERT(covered.ssize() >= numPoints, "covered should be at least as large as the grid");
881 bool haveFirstNotVisited = false;
882 int firstNotVisited = -1;
883 int notVisitedLow = -1;
884 int notVisitedHigh = -1;
886 for (int n = 0; n < numPoints; n++)
888 if (checkCovering[n] && !visited[n])
890 if (!haveFirstNotVisited)
894 haveFirstNotVisited = true;
900 /* Have now an interval I = [notVisitedLow,notVisitedHigh] of visited points bounded
901 by unvisited points. The unvisted end points affect the coveredness of the
902 visited with a reach equal to the cover radius. */
903 int notCoveredLow = notVisitedLow + coverRadius;
904 int notCoveredHigh = notVisitedHigh - coverRadius;
905 for (int i = notVisitedLow; i <= notVisitedHigh; i++)
907 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
910 /* Find a new interval to set covering for. Make the notVisitedHigh of this interval
911 the notVisitedLow of the next. */
912 notVisitedLow = notVisitedHigh;
917 /* Have labelled all the internal points. Now take care of the boundary regions. */
918 if (!haveFirstNotVisited)
920 /* No non-visited points <=> all points visited => all points covered. */
922 for (int n = 0; n < numPoints; n++)
929 int lastNotVisited = notVisitedLow;
931 /* For periodic boundaries, non-visited points can influence points
932 on the other side of the boundary so we need to wrap around. */
934 /* Lower end. For periodic boundaries the last upper end not visited point becomes the low-end not visited point.
935 For non-periodic boundaries there is no lower end point so a dummy value is used. */
936 int notVisitedHigh = firstNotVisited;
937 int notVisitedLow = period > 0 ? (lastNotVisited - period) : -(coverRadius + 1);
939 int notCoveredLow = notVisitedLow + coverRadius;
940 int notCoveredHigh = notVisitedHigh - coverRadius;
942 for (int i = 0; i <= notVisitedHigh; i++)
944 /* For non-periodic boundaries notCoveredLow = -1 will impose no restriction. */
945 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
948 /* Upper end. Same as for lower end but in the other direction. */
949 notVisitedHigh = period > 0 ? (firstNotVisited + period) : (numPoints + coverRadius);
950 notVisitedLow = lastNotVisited;
952 notCoveredLow = notVisitedLow + coverRadius;
953 notCoveredHigh = notVisitedHigh - coverRadius;
955 for (int i = notVisitedLow; i <= numPoints - 1; i++)
957 /* For non-periodic boundaries notCoveredHigh = numPoints will impose no restriction. */
958 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
965 bool BiasState::isSamplingRegionCovered(const BiasParams& params,
966 ArrayRef<const DimParams> dimParams,
967 const BiasGrid& grid,
968 const t_commrec* commRecord,
969 const gmx_multisim_t* multiSimComm) const
971 /* Allocate and initialize arrays: one for checking visits along each dimension,
972 one for keeping track of which points to check and one for the covered points.
973 Possibly these could be kept as AWH variables to avoid these allocations. */
976 std::vector<bool> visited;
977 std::vector<bool> checkCovering;
978 // We use int for the covering array since we might use gmx_sumi_sim.
979 std::vector<int> covered;
982 std::vector<CheckDim> checkDim;
983 checkDim.resize(grid.numDimensions());
985 for (int d = 0; d < grid.numDimensions(); d++)
987 const size_t numPoints = grid.axis(d).numPoints();
988 checkDim[d].visited.resize(numPoints, false);
989 checkDim[d].checkCovering.resize(numPoints, false);
990 checkDim[d].covered.resize(numPoints, 0);
993 /* Set visited points along each dimension and which points should be checked for covering.
994 Specifically, points above the free energy cutoff (if there is one) or points outside
995 of the target region are ignored. */
997 /* Set the free energy cutoff */
998 double maxFreeEnergy = GMX_FLOAT_MAX;
1000 if (params.eTarget == AwhTargetType::Cutoff)
1002 maxFreeEnergy = freeEnergyMinimumValue(points_) + params.freeEnergyCutoffInKT;
1005 /* Set the threshold weight for a point to be considered visited. */
1006 double weightThreshold = 1;
1007 for (int d = 0; d < grid.numDimensions(); d++)
1009 if (grid.axis(d).isFepLambdaAxis())
1011 /* Do not modify the weight threshold based on a FEP lambda axis. The spread
1012 * of the sampling weights is not depending on a Gaussian distribution (like
1014 weightThreshold *= 1.0;
1018 /* The spacing is proportional to 1/sqrt(betak). The weight threshold will be
1019 * approximately (given that the spacing can be modified if the dimension is periodic)
1020 * proportional to sqrt(1/(2*pi)). */
1021 weightThreshold *= grid.axis(d).spacing()
1022 * std::sqrt(dimParams[d].pullDimParams().betak * 0.5 * M_1_PI);
1026 /* Project the sampling weights onto each dimension */
1027 for (size_t m = 0; m < grid.numPoints(); m++)
1029 const PointState& pointState = points_[m];
1031 for (int d = 0; d < grid.numDimensions(); d++)
1033 int n = grid.point(m).index[d];
1035 /* Is visited if it was already visited or if there is enough weight at the current point */
1036 checkDim[d].visited[n] = checkDim[d].visited[n] || (weightSumCovering_[m] > weightThreshold);
1038 /* Check for covering if there is at least point in this slice that is in the target region and within the cutoff */
1039 checkDim[d].checkCovering[n] =
1040 checkDim[d].checkCovering[n]
1041 || (pointState.inTargetRegion() && pointState.freeEnergy() < maxFreeEnergy);
1045 /* Label each point along each dimension as covered or not. */
1046 for (int d = 0; d < grid.numDimensions(); d++)
1048 labelCoveredPoints(checkDim[d].visited,
1049 checkDim[d].checkCovering,
1050 grid.axis(d).numPoints(),
1051 grid.axis(d).numPointsInPeriod(),
1052 params.coverRadius()[d],
1053 checkDim[d].covered);
1056 /* Now check for global covering. Each dimension needs to be covered separately.
1057 A dimension is covered if each point is covered. Multiple simulations collectively
1058 cover the points, i.e. a point is covered if any of the simulations covered it.
1059 However, visited points are not shared, i.e. if a point is covered or not is
1060 determined by the visits of a single simulation. In general the covering criterion is
1061 all points covered => all points are surrounded by visited points up to a radius = coverRadius.
1062 For 1 simulation, all points covered <=> all points visited. For multiple simulations
1063 however, all points visited collectively !=> all points covered, except for coverRadius = 0.
1064 In the limit of large coverRadius, all points covered => all points visited by at least one
1065 simulation (since no point will be covered until all points have been visited by a
1066 single simulation). Basically coverRadius sets how much "connectedness" (or mixing) a point
1067 needs with surrounding points before sharing covering information with other simulations. */
1069 /* Communicate the covered points between sharing simulations if needed. */
1070 if (params.numSharedUpdate > 1)
1072 /* For multiple dimensions this may not be the best way to do it. */
1073 for (int d = 0; d < grid.numDimensions(); d++)
1076 gmx::arrayRefFromArray(checkDim[d].covered.data(), grid.axis(d).numPoints()),
1082 /* Now check if for each dimension all points are covered. Break if not true. */
1083 bool allPointsCovered = true;
1084 for (int d = 0; d < grid.numDimensions() && allPointsCovered; d++)
1086 for (int n = 0; n < grid.axis(d).numPoints() && allPointsCovered; n++)
1088 allPointsCovered = (checkDim[d].covered[n] != 0);
1092 return allPointsCovered;
1096 * Normalizes the free energy and PMF sum.
1098 * \param[in] pointState The state of the points.
1100 static void normalizeFreeEnergyAndPmfSum(std::vector<PointState>* pointState)
1102 double minF = freeEnergyMinimumValue(*pointState);
1104 for (PointState& ps : *pointState)
1106 ps.normalizeFreeEnergyAndPmfSum(minF);
1110 void BiasState::updateFreeEnergyAndAddSamplesToHistogram(ArrayRef<const DimParams> dimParams,
1111 const BiasGrid& grid,
1112 const BiasParams& params,
1113 const t_commrec* commRecord,
1114 const gmx_multisim_t* multiSimComm,
1118 std::vector<int>* updateList)
1120 /* Note hat updateList is only used in this scope and is always
1121 * re-initialized. We do not use a local vector, because that would
1122 * cause reallocation every time this funtion is called and the vector
1123 * can be the size of the whole grid.
1126 /* Make a list of all local points, i.e. those that could have been touched since
1127 the last update. These are the points needed for summing histograms below
1128 (non-local points only add zeros). For local updates, this will also be the
1129 final update list. */
1130 makeLocalUpdateList(grid, points_, originUpdatelist_, endUpdatelist_, updateList);
1131 if (params.numSharedUpdate > 1)
1133 mergeSharedUpdateLists(updateList, points_.size(), commRecord, multiSimComm);
1136 /* Reset the range for the next update */
1137 resetLocalUpdateRange(grid);
1139 /* Add samples to histograms for all local points and sync simulations if needed */
1140 sumHistograms(points_, weightSumCovering_, params.numSharedUpdate, commRecord, multiSimComm, *updateList);
1142 sumPmf(points_, params.numSharedUpdate, commRecord, multiSimComm);
1144 /* Renormalize the free energy if values are too large. */
1145 bool needToNormalizeFreeEnergy = false;
1146 for (int& globalIndex : *updateList)
1148 /* We want to keep the absolute value of the free energies to be less
1149 c_largePositiveExponent to be able to safely pass these values to exp(). The check below
1150 ensures this as long as the free energy values grow less than 0.5*c_largePositiveExponent
1151 in a return time to this neighborhood. For reasonable update sizes it's unlikely that
1152 this requirement would be broken. */
1153 if (std::abs(points_[globalIndex].freeEnergy()) > 0.5 * detail::c_largePositiveExponent)
1155 needToNormalizeFreeEnergy = true;
1160 /* Update target distribution? */
1161 bool needToUpdateTargetDistribution =
1162 (params.eTarget != AwhTargetType::Constant && params.isUpdateTargetStep(step));
1164 /* In the initial stage, the histogram grows dynamically as a function of the number of coverings. */
1165 bool detectedCovering = false;
1166 if (inInitialStage())
1169 (params.isCheckCoveringStep(step)
1170 && isSamplingRegionCovered(params, dimParams, grid, commRecord, multiSimComm));
1173 /* The weighthistogram size after this update. */
1174 double newHistogramSize = histogramSize_.newHistogramSize(
1175 params, t, detectedCovering, points_, weightSumCovering_, fplog);
1177 /* Make the update list. Usually we try to only update local points,
1178 * but if the update has non-trivial or non-deterministic effects
1179 * on non-local points a global update is needed. This is the case when:
1180 * 1) a covering occurred in the initial stage, leading to non-trivial
1181 * histogram rescaling factors; or
1182 * 2) the target distribution will be updated, since we don't make any
1183 * assumption on its form; or
1184 * 3) the AWH parameters are such that we never attempt to skip non-local
1186 * 4) the free energy values have grown so large that a renormalization
1189 if (needToUpdateTargetDistribution || detectedCovering || !params.skipUpdates() || needToNormalizeFreeEnergy)
1191 /* Global update, just add all points. */
1192 updateList->clear();
1193 for (size_t m = 0; m < points_.size(); m++)
1195 if (points_[m].inTargetRegion())
1197 updateList->push_back(m);
1202 /* Set histogram scale factors. */
1203 double weightHistScalingSkipped = 0;
1204 double logPmfsumScalingSkipped = 0;
1205 if (params.skipUpdates())
1207 getSkippedUpdateHistogramScaleFactors(params, &weightHistScalingSkipped, &logPmfsumScalingSkipped);
1209 double weightHistScalingNew;
1210 double logPmfsumScalingNew;
1211 setHistogramUpdateScaleFactors(
1212 params, newHistogramSize, histogramSize_.histogramSize(), &weightHistScalingNew, &logPmfsumScalingNew);
1214 /* Update free energy and reference weight histogram for points in the update list. */
1215 for (int pointIndex : *updateList)
1217 PointState* pointStateToUpdate = &points_[pointIndex];
1219 /* Do updates from previous update steps that were skipped because this point was at that time non-local. */
1220 if (params.skipUpdates())
1222 pointStateToUpdate->performPreviouslySkippedUpdates(
1223 params, histogramSize_.numUpdates(), weightHistScalingSkipped, logPmfsumScalingSkipped);
1226 /* Now do an update with new sampling data. */
1227 pointStateToUpdate->updateWithNewSampling(
1228 params, histogramSize_.numUpdates(), weightHistScalingNew, logPmfsumScalingNew);
1231 /* Only update the histogram size after we are done with the local point updates */
1232 histogramSize_.setHistogramSize(newHistogramSize, weightHistScalingNew);
1234 if (needToNormalizeFreeEnergy)
1236 normalizeFreeEnergyAndPmfSum(&points_);
1239 if (needToUpdateTargetDistribution)
1241 /* The target distribution is always updated for all points at once. */
1242 updateTargetDistribution(points_, params);
1245 /* Update the bias. The bias is updated separately and last since it simply a function of
1246 the free energy and the target distribution and we want to avoid doing extra work. */
1247 for (int pointIndex : *updateList)
1249 points_[pointIndex].updateBias();
1252 /* Increase the update counter. */
1253 histogramSize_.incrementNumUpdates();
1256 double BiasState::updateProbabilityWeightsAndConvolvedBias(ArrayRef<const DimParams> dimParams,
1257 const BiasGrid& grid,
1258 ArrayRef<const double> neighborLambdaEnergies,
1259 std::vector<double, AlignedAllocator<double>>* weight) const
1261 /* Only neighbors of the current coordinate value will have a non-negligible chance of getting sampled */
1262 const std::vector<int>& neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
1264 #if GMX_SIMD_HAVE_DOUBLE
1265 typedef SimdDouble PackType;
1266 constexpr int packSize = GMX_SIMD_DOUBLE_WIDTH;
1268 typedef double PackType;
1269 constexpr int packSize = 1;
1271 /* Round the size of the weight array up to packSize */
1272 const int weightSize = ((neighbors.size() + packSize - 1) / packSize) * packSize;
1273 weight->resize(weightSize);
1275 double* gmx_restrict weightData = weight->data();
1276 PackType weightSumPack(0.0);
1277 for (size_t i = 0; i < neighbors.size(); i += packSize)
1279 for (size_t n = i; n < i + packSize; n++)
1281 if (n < neighbors.size())
1283 const int neighbor = neighbors[n];
1284 (*weight)[n] = biasedLogWeightFromPoint(dimParams,
1288 points_[neighbor].bias(),
1289 coordState_.coordValue(),
1290 neighborLambdaEnergies,
1291 coordState_.gridpointIndex());
1295 /* Pad with values that don't affect the result */
1296 (*weight)[n] = detail::c_largeNegativeExponent;
1299 PackType weightPack = load<PackType>(weightData + i);
1300 weightPack = gmx::exp(weightPack);
1301 weightSumPack = weightSumPack + weightPack;
1302 store(weightData + i, weightPack);
1304 /* Sum of probability weights */
1305 double weightSum = reduce(weightSumPack);
1306 GMX_RELEASE_ASSERT(weightSum > 0,
1307 "zero probability weight when updating AWH probability weights.");
1309 /* Normalize probabilities to sum to 1 */
1310 double invWeightSum = 1 / weightSum;
1312 /* When there is a free energy lambda state axis remove the convolved contributions along that
1313 * axis from the total bias. This must be done after calculating invWeightSum (since weightSum
1314 * will be modified), but before normalizing the weights (below). */
1315 if (grid.hasLambdaAxis())
1317 /* If there is only one axis the bias will not be convolved in any dimension. */
1318 if (grid.axis().size() == 1)
1320 weightSum = gmx::exp(points_[coordState_.gridpointIndex()].bias());
1324 for (size_t i = 0; i < neighbors.size(); i++)
1326 const int neighbor = neighbors[i];
1327 if (pointsHaveDifferentLambda(grid, coordState_.gridpointIndex(), neighbor))
1329 weightSum -= weightData[i];
1335 for (double& w : *weight)
1340 /* Return the convolved bias */
1341 return std::log(weightSum);
1344 double BiasState::calcConvolvedBias(ArrayRef<const DimParams> dimParams,
1345 const BiasGrid& grid,
1346 const awh_dvec& coordValue) const
1348 int point = grid.nearestIndex(coordValue);
1349 const GridPoint& gridPoint = grid.point(point);
1351 /* Sum the probability weights from the neighborhood of the given point */
1352 double weightSum = 0;
1353 for (int neighbor : gridPoint.neighbor)
1355 /* No convolution is required along the lambda dimension. */
1356 if (pointsHaveDifferentLambda(grid, point, neighbor))
1360 double logWeight = biasedLogWeightFromPoint(
1361 dimParams, points_, grid, neighbor, points_[neighbor].bias(), coordValue, {}, point);
1362 weightSum += std::exp(logWeight);
1365 /* Returns -GMX_FLOAT_MAX if no neighboring points were in the target region. */
1366 return (weightSum > 0) ? std::log(weightSum) : -GMX_FLOAT_MAX;
1369 void BiasState::sampleProbabilityWeights(const BiasGrid& grid, gmx::ArrayRef<const double> probWeightNeighbor)
1371 const std::vector<int>& neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
1373 /* Save weights for next update */
1374 for (size_t n = 0; n < neighbor.size(); n++)
1376 points_[neighbor[n]].increaseWeightSumIteration(probWeightNeighbor[n]);
1379 /* Update the local update range. Two corner points define this rectangular
1380 * domain. We need to choose two new corner points such that the new domain
1381 * contains both the old update range and the current neighborhood.
1382 * In the simplest case when an update is performed every sample,
1383 * the update range would simply equal the current neighborhood.
1385 int neighborStart = neighbor[0];
1386 int neighborLast = neighbor[neighbor.size() - 1];
1387 for (int d = 0; d < grid.numDimensions(); d++)
1389 int origin = grid.point(neighborStart).index[d];
1390 int last = grid.point(neighborLast).index[d];
1394 /* Unwrap if wrapped around the boundary (only happens for periodic
1395 * boundaries). This has been already for the stored index interval.
1397 /* TODO: what we want to do is to find the smallest the update
1398 * interval that contains all points that need to be updated.
1399 * This amounts to combining two intervals, the current
1400 * [origin, end] update interval and the new touched neighborhood
1401 * into a new interval that contains all points from both the old
1404 * For periodic boundaries it becomes slightly more complicated
1405 * than for closed boundaries because then it needs not be
1406 * true that origin < end (so one can't simply relate the origin/end
1407 * in the min()/max() below). The strategy here is to choose the
1408 * origin closest to a reference point (index 0) and then unwrap
1409 * the end index if needed and choose the largest end index.
1410 * This ensures that both intervals are in the new interval
1411 * but it's not necessarily the smallest.
1412 * Currently we solve this by going through each possibility
1413 * and checking them.
1415 last += grid.axis(d).numPointsInPeriod();
1418 originUpdatelist_[d] = std::min(originUpdatelist_[d], origin);
1419 endUpdatelist_[d] = std::max(endUpdatelist_[d], last);
1423 void BiasState::sampleCoordAndPmf(const std::vector<DimParams>& dimParams,
1424 const BiasGrid& grid,
1425 gmx::ArrayRef<const double> probWeightNeighbor,
1426 double convolvedBias)
1428 /* Sampling-based deconvolution extracting the PMF.
1429 * Update the PMF histogram with the current coordinate value.
1431 * Because of the finite width of the harmonic potential, the free energy
1432 * defined for each coordinate point does not exactly equal that of the
1433 * actual coordinate, the PMF. However, the PMF can be estimated by applying
1434 * the relation exp(-PMF) = exp(-bias_convolved)*P_biased/Z, i.e. by keeping a
1435 * reweighted histogram of the coordinate value. Strictly, this relies on
1436 * the unknown normalization constant Z being either constant or known. Here,
1437 * neither is true except in the long simulation time limit. Empirically however,
1438 * it works (mainly because how the PMF histogram is rescaled).
1441 const int gridPointIndex = coordState_.gridpointIndex();
1442 const std::optional<int> lambdaAxisIndex = grid.lambdaAxisIndex();
1444 /* Update the PMF of points along a lambda axis with their bias. */
1445 if (lambdaAxisIndex)
1447 const std::vector<int>& neighbors = grid.point(gridPointIndex).neighbor;
1449 std::vector<double> lambdaMarginalDistribution =
1450 calculateFELambdaMarginalDistribution(grid, neighbors, probWeightNeighbor);
1452 awh_dvec coordValueAlongLambda = { coordState_.coordValue()[0],
1453 coordState_.coordValue()[1],
1454 coordState_.coordValue()[2],
1455 coordState_.coordValue()[3] };
1456 for (size_t i = 0; i < neighbors.size(); i++)
1458 const int neighbor = neighbors[i];
1460 if (pointsAlongLambdaAxis(grid, gridPointIndex, neighbor))
1462 const double neighborLambda = grid.point(neighbor).coordValue[lambdaAxisIndex.value()];
1463 if (neighbor == gridPointIndex)
1465 bias = convolvedBias;
1469 coordValueAlongLambda[lambdaAxisIndex.value()] = neighborLambda;
1470 bias = calcConvolvedBias(dimParams, grid, coordValueAlongLambda);
1473 const double probWeight = lambdaMarginalDistribution[neighborLambda];
1474 const double weightedBias = bias - std::log(std::max(probWeight, GMX_DOUBLE_MIN)); // avoid log(0)
1476 if (neighbor == gridPointIndex && grid.covers(coordState_.coordValue()))
1478 points_[neighbor].samplePmf(weightedBias);
1482 points_[neighbor].updatePmfUnvisited(weightedBias);
1489 /* Only save coordinate data that is in range (the given index is always
1490 * in range even if the coordinate value is not).
1492 if (grid.covers(coordState_.coordValue()))
1494 /* Save PMF sum and keep a histogram of the sampled coordinate values */
1495 points_[gridPointIndex].samplePmf(convolvedBias);
1499 /* Save probability weights for the update */
1500 sampleProbabilityWeights(grid, probWeightNeighbor);
1503 void BiasState::initHistoryFromState(AwhBiasHistory* biasHistory) const
1505 biasHistory->pointState.resize(points_.size());
1508 void BiasState::updateHistory(AwhBiasHistory* biasHistory, const BiasGrid& grid) const
1510 GMX_RELEASE_ASSERT(biasHistory->pointState.size() == points_.size(),
1511 "The AWH history setup does not match the AWH state.");
1513 AwhBiasStateHistory* stateHistory = &biasHistory->state;
1514 stateHistory->umbrellaGridpoint = coordState_.umbrellaGridpoint();
1516 for (size_t m = 0; m < biasHistory->pointState.size(); m++)
1518 AwhPointStateHistory* psh = &biasHistory->pointState[m];
1520 points_[m].storeState(psh);
1522 psh->weightsum_covering = weightSumCovering_[m];
1525 histogramSize_.storeState(stateHistory);
1527 stateHistory->origin_index_updatelist = multiDimGridIndexToLinear(grid, originUpdatelist_);
1528 stateHistory->end_index_updatelist = multiDimGridIndexToLinear(grid, endUpdatelist_);
1531 void BiasState::restoreFromHistory(const AwhBiasHistory& biasHistory, const BiasGrid& grid)
1533 const AwhBiasStateHistory& stateHistory = biasHistory.state;
1535 coordState_.restoreFromHistory(stateHistory);
1537 if (biasHistory.pointState.size() != points_.size())
1540 InvalidInputError("Bias grid size in checkpoint and simulation do not match. "
1541 "Likely you provided a checkpoint from a different simulation."));
1543 for (size_t m = 0; m < points_.size(); m++)
1545 points_[m].setFromHistory(biasHistory.pointState[m]);
1548 for (size_t m = 0; m < weightSumCovering_.size(); m++)
1550 weightSumCovering_[m] = biasHistory.pointState[m].weightsum_covering;
1553 histogramSize_.restoreFromHistory(stateHistory);
1555 linearGridindexToMultiDim(grid, stateHistory.origin_index_updatelist, originUpdatelist_);
1556 linearGridindexToMultiDim(grid, stateHistory.end_index_updatelist, endUpdatelist_);
1559 void BiasState::broadcast(const t_commrec* commRecord)
1561 gmx_bcast(sizeof(coordState_), &coordState_, commRecord->mpi_comm_mygroup);
1563 gmx_bcast(points_.size() * sizeof(PointState), points_.data(), commRecord->mpi_comm_mygroup);
1565 gmx_bcast(weightSumCovering_.size() * sizeof(double), weightSumCovering_.data(), commRecord->mpi_comm_mygroup);
1567 gmx_bcast(sizeof(histogramSize_), &histogramSize_, commRecord->mpi_comm_mygroup);
1570 void BiasState::setFreeEnergyToConvolvedPmf(ArrayRef<const DimParams> dimParams, const BiasGrid& grid)
1572 std::vector<float> convolvedPmf;
1574 calcConvolvedPmf(dimParams, grid, &convolvedPmf);
1576 for (size_t m = 0; m < points_.size(); m++)
1578 points_[m].setFreeEnergy(convolvedPmf[m]);
1583 * Count trailing data rows containing only zeros.
1585 * \param[in] data 2D data array.
1586 * \param[in] numRows Number of rows in array.
1587 * \param[in] numColumns Number of cols in array.
1588 * \returns the number of trailing zero rows.
1590 static int countTrailingZeroRows(const double* const* data, int numRows, int numColumns)
1592 int numZeroRows = 0;
1593 for (int m = numRows - 1; m >= 0; m--)
1595 bool rowIsZero = true;
1596 for (int d = 0; d < numColumns; d++)
1598 if (data[d][m] != 0)
1607 /* At a row with non-zero data */
1612 /* Still at a zero data row, keep checking rows higher up. */
1621 * Initializes the PMF and target with data read from an input table.
1623 * \param[in] dimParams The dimension parameters.
1624 * \param[in] grid The grid.
1625 * \param[in] filename The filename to read PMF and target from.
1626 * \param[in] numBias Number of biases.
1627 * \param[in] biasIndex The index of the bias.
1628 * \param[in,out] pointState The state of the points in this bias.
1630 static void readUserPmfAndTargetDistribution(ArrayRef<const DimParams> dimParams,
1631 const BiasGrid& grid,
1632 const std::string& filename,
1635 std::vector<PointState>* pointState)
1637 /* Read the PMF and target distribution.
1638 From the PMF, the convolved PMF, or the reference value free energy, can be calculated
1639 base on the force constant. The free energy and target together determine the bias.
1641 std::string filenameModified(filename);
1644 size_t n = filenameModified.rfind('.');
1645 GMX_RELEASE_ASSERT(n != std::string::npos,
1646 "The filename should contain an extension starting with .");
1647 filenameModified.insert(n, formatString("%d", biasIndex));
1650 std::string correctFormatMessage = formatString(
1651 "%s is expected in the following format. "
1652 "The first ndim column(s) should contain the coordinate values for each point, "
1653 "each column containing values of one dimension (in ascending order). "
1654 "For a multidimensional coordinate, points should be listed "
1655 "in the order obtained by traversing lower dimensions first. "
1656 "E.g. for two-dimensional grid of size nxn: "
1657 "(1, 1), (1, 2),..., (1, n), (2, 1), (2, 2), ..., , (n, n - 1), (n, n). "
1658 "Column ndim + 1 should contain the PMF value for each coordinate value. "
1659 "The target distribution values should be in column ndim + 2 or column ndim + 5. "
1660 "Make sure the input file ends with a new line but has no trailing new lines.",
1662 gmx::TextLineWrapper wrapper;
1663 wrapper.settings().setLineLength(c_linewidth);
1664 correctFormatMessage = wrapper.wrapToString(correctFormatMessage);
1668 int numRows = read_xvg(filenameModified.c_str(), &data, &numColumns);
1670 /* Check basic data properties here. BiasGrid takes care of more complicated things. */
1674 std::string mesg = gmx::formatString(
1675 "%s is empty!.\n\n%s", filename.c_str(), correctFormatMessage.c_str());
1676 GMX_THROW(InvalidInputError(mesg));
1679 /* Less than 2 points is not useful for PMF or target. */
1682 std::string mesg = gmx::formatString(
1683 "%s contains too few data points (%d)."
1684 "The minimum number of points is 2.",
1687 GMX_THROW(InvalidInputError(mesg));
1690 /* Make sure there are enough columns of data.
1692 Two formats are allowed. Either with columns {coords, PMF, target} or
1693 {coords, PMF, x, y, z, target, ...}. The latter format is allowed since that
1694 is how AWH output is written (x, y, z being other AWH variables). For this format,
1695 trailing columns are ignored.
1697 int columnIndexTarget;
1698 int numColumnsMin = dimParams.size() + 2;
1699 int columnIndexPmf = dimParams.size();
1700 if (numColumns == numColumnsMin)
1702 columnIndexTarget = columnIndexPmf + 1;
1706 columnIndexTarget = columnIndexPmf + 4;
1709 if (numColumns < numColumnsMin)
1711 std::string mesg = gmx::formatString(
1712 "The number of columns in %s should be at least %d."
1716 correctFormatMessage.c_str());
1717 GMX_THROW(InvalidInputError(mesg));
1720 /* read_xvg can give trailing zero data rows for trailing new lines in the input. We allow 1 zero row,
1721 since this could be real data. But multiple trailing zero rows cannot correspond to valid data. */
1722 int numZeroRows = countTrailingZeroRows(data, numRows, numColumns);
1723 if (numZeroRows > 1)
1725 std::string mesg = gmx::formatString(
1726 "Found %d trailing zero data rows in %s. Please remove trailing empty lines and "
1730 GMX_THROW(InvalidInputError(mesg));
1733 /* Convert from user units to internal units before sending the data of to grid. */
1734 for (size_t d = 0; d < dimParams.size(); d++)
1736 double scalingFactor = dimParams[d].scaleUserInputToInternal(1);
1737 if (scalingFactor == 1)
1741 for (size_t m = 0; m < pointState->size(); m++)
1743 data[d][m] *= scalingFactor;
1747 /* Get a data point for each AWH grid point so that they all get data. */
1748 std::vector<int> gridIndexToDataIndex(grid.numPoints());
1749 mapGridToDataGrid(&gridIndexToDataIndex, data, numRows, filename, grid, correctFormatMessage);
1751 /* Extract the data for each grid point.
1752 * We check if the target distribution is zero for all points.
1754 bool targetDistributionIsZero = true;
1755 for (size_t m = 0; m < pointState->size(); m++)
1757 (*pointState)[m].setLogPmfSum(-data[columnIndexPmf][gridIndexToDataIndex[m]]);
1758 double target = data[columnIndexTarget][gridIndexToDataIndex[m]];
1760 /* Check if the values are allowed. */
1763 std::string mesg = gmx::formatString(
1764 "Target distribution weight at point %zu (%g) in %s is negative.",
1768 GMX_THROW(InvalidInputError(mesg));
1772 targetDistributionIsZero = false;
1774 (*pointState)[m].setTargetConstantWeight(target);
1777 if (targetDistributionIsZero)
1780 gmx::formatString("The target weights given in column %d in %s are all 0",
1783 GMX_THROW(InvalidInputError(mesg));
1786 /* Free the arrays. */
1787 for (int m = 0; m < numColumns; m++)
1794 void BiasState::normalizePmf(int numSharingSims)
1796 /* The normalization of the PMF estimate matters because it determines how big effect the next sample has.
1797 Approximately (for large enough force constant) we should have:
1798 sum_x(exp(-pmf(x)) = nsamples*sum_xref(exp(-f(xref)).
1801 /* Calculate the normalization factor, i.e. divide by the pmf sum, multiply by the number of samples and the f sum */
1802 double expSumPmf = 0;
1804 for (const PointState& pointState : points_)
1806 if (pointState.inTargetRegion())
1808 expSumPmf += std::exp(pointState.logPmfSum());
1809 expSumF += std::exp(-pointState.freeEnergy());
1812 double numSamples = histogramSize_.histogramSize() / numSharingSims;
1815 double logRenorm = std::log(numSamples * expSumF / expSumPmf);
1816 for (PointState& pointState : points_)
1818 if (pointState.inTargetRegion())
1820 pointState.setLogPmfSum(pointState.logPmfSum() + logRenorm);
1825 void BiasState::initGridPointState(const AwhBiasParams& awhBiasParams,
1826 ArrayRef<const DimParams> dimParams,
1827 const BiasGrid& grid,
1828 const BiasParams& params,
1829 const std::string& filename,
1832 /* Modify PMF, free energy and the constant target distribution factor
1833 * to user input values if there is data given.
1835 if (awhBiasParams.userPMFEstimate())
1837 readUserPmfAndTargetDistribution(dimParams, grid, filename, numBias, params.biasIndex, &points_);
1838 setFreeEnergyToConvolvedPmf(dimParams, grid);
1841 /* The local Boltzmann distribution is special because the target distribution is updated as a function of the reference weighthistogram. */
1842 GMX_RELEASE_ASSERT(params.eTarget != AwhTargetType::LocalBoltzmann || points_[0].weightSumRef() != 0,
1843 "AWH reference weight histogram not initialized properly with local "
1844 "Boltzmann target distribution.");
1846 updateTargetDistribution(points_, params);
1848 for (PointState& pointState : points_)
1850 if (pointState.inTargetRegion())
1852 pointState.updateBias();
1856 /* Note that for zero target this is a value that represents -infinity but should not be used for biasing. */
1857 pointState.setTargetToZero();
1861 /* Set the initial reference weighthistogram. */
1862 const double histogramSize = histogramSize_.histogramSize();
1863 for (auto& pointState : points_)
1865 pointState.setInitialReferenceWeightHistogram(histogramSize);
1868 /* Make sure the pmf is normalized consistently with the histogram size.
1869 Note: the target distribution and free energy need to be set here. */
1870 normalizePmf(params.numSharedUpdate);
1873 BiasState::BiasState(const AwhBiasParams& awhBiasParams,
1874 double histogramSizeInitial,
1875 ArrayRef<const DimParams> dimParams,
1876 const BiasGrid& grid) :
1877 coordState_(awhBiasParams, dimParams, grid),
1878 points_(grid.numPoints()),
1879 weightSumCovering_(grid.numPoints()),
1880 histogramSize_(awhBiasParams, histogramSizeInitial)
1882 /* The minimum and maximum multidimensional point indices that are affected by the next update */
1883 for (size_t d = 0; d < dimParams.size(); d++)
1885 int index = grid.point(coordState_.gridpointIndex()).index[d];
1886 originUpdatelist_[d] = index;
1887 endUpdatelist_[d] = index;