2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 * Implements the BiasState class.
40 * \author Viveca Lindahl
41 * \author Berk Hess <hess@kth.se>
47 #include "biasstate.h"
58 #include "gromacs/fileio/gmxfio.h"
59 #include "gromacs/fileio/xvgr.h"
60 #include "gromacs/gmxlib/network.h"
61 #include "gromacs/math/utilities.h"
62 #include "gromacs/mdrunutility/multisim.h"
63 #include "gromacs/mdtypes/awh_history.h"
64 #include "gromacs/mdtypes/awh_params.h"
65 #include "gromacs/mdtypes/commrec.h"
66 #include "gromacs/simd/simd.h"
67 #include "gromacs/simd/simd_math.h"
68 #include "gromacs/utility/arrayref.h"
69 #include "gromacs/utility/exceptions.h"
70 #include "gromacs/utility/gmxassert.h"
71 #include "gromacs/utility/smalloc.h"
72 #include "gromacs/utility/stringutil.h"
75 #include "pointstate.h"
80 void BiasState::getPmf(gmx::ArrayRef<float> pmf) const
82 GMX_ASSERT(pmf.size() == points_.size(), "pmf should have the size of the bias grid");
84 /* The PMF is just the negative of the log of the sampled PMF histogram.
85 * Points with zero target weight are ignored, they will mostly contain noise.
87 for (size_t i = 0; i < points_.size(); i++)
89 pmf[i] = points_[i].inTargetRegion() ? -points_[i].logPmfSum() : GMX_FLOAT_MAX;
97 * Sum an array over all simulations on the master rank of each simulation.
99 * \param[in,out] arrayRef The data to sum.
100 * \param[in] multiSimComm Struct for multi-simulation communication.
102 void sumOverSimulations(gmx::ArrayRef<int> arrayRef, const gmx_multisim_t* multiSimComm)
104 gmx_sumi_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
108 * Sum an array over all simulations on the master rank of each simulation.
110 * \param[in,out] arrayRef The data to sum.
111 * \param[in] multiSimComm Struct for multi-simulation communication.
113 void sumOverSimulations(gmx::ArrayRef<double> arrayRef, const gmx_multisim_t* multiSimComm)
115 gmx_sumd_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
119 * Sum an array over all simulations on all ranks of each simulation.
121 * This assumes the data is identical on all ranks within each simulation.
123 * \param[in,out] arrayRef The data to sum.
124 * \param[in] commRecord Struct for intra-simulation communication.
125 * \param[in] multiSimComm Struct for multi-simulation communication.
128 void sumOverSimulations(gmx::ArrayRef<T> arrayRef, const t_commrec* commRecord, const gmx_multisim_t* multiSimComm)
130 if (MASTER(commRecord))
132 sumOverSimulations(arrayRef, multiSimComm);
134 if (commRecord->nnodes > 1)
136 gmx_bcast(arrayRef.size() * sizeof(T), arrayRef.data(), commRecord->mpi_comm_mygroup);
141 * Sum PMF over multiple simulations, when requested.
143 * \param[in,out] pointState The state of the points in the bias.
144 * \param[in] numSharedUpdate The number of biases sharing the histogram.
145 * \param[in] commRecord Struct for intra-simulation communication.
146 * \param[in] multiSimComm Struct for multi-simulation communication.
148 void sumPmf(gmx::ArrayRef<PointState> pointState,
150 const t_commrec* commRecord,
151 const gmx_multisim_t* multiSimComm)
153 if (numSharedUpdate == 1)
157 GMX_ASSERT(multiSimComm != nullptr && numSharedUpdate % multiSimComm->numSimulations_ == 0,
158 "numSharedUpdate should be a multiple of multiSimComm->numSimulations_");
159 GMX_ASSERT(numSharedUpdate == multiSimComm->numSimulations_,
160 "Sharing within a simulation is not implemented (yet)");
162 std::vector<double> buffer(pointState.size());
164 /* Need to temporarily exponentiate the log weights to sum over simulations */
165 for (size_t i = 0; i < buffer.size(); i++)
167 buffer[i] = pointState[i].inTargetRegion() ? std::exp(-pointState[i].logPmfSum()) : 0;
170 sumOverSimulations(gmx::ArrayRef<double>(buffer), commRecord, multiSimComm);
172 /* Take log again to get (non-normalized) PMF */
173 double normFac = 1.0 / numSharedUpdate;
174 for (gmx::index i = 0; i < pointState.ssize(); i++)
176 if (pointState[i].inTargetRegion())
178 pointState[i].setLogPmfSum(-std::log(buffer[i] * normFac));
184 * Find the minimum free energy value.
186 * \param[in] pointState The state of the points.
187 * \returns the minimum free energy value.
189 double freeEnergyMinimumValue(gmx::ArrayRef<const PointState> pointState)
191 double fMin = GMX_FLOAT_MAX;
193 for (auto const& ps : pointState)
195 if (ps.inTargetRegion() && ps.freeEnergy() < fMin)
197 fMin = ps.freeEnergy();
205 * Find and return the log of the probability weight of a point given a coordinate value.
207 * The unnormalized weight is given by
208 * w(point|value) = exp(bias(point) - U(value,point)),
209 * where U is a harmonic umbrella potential.
211 * \param[in] dimParams The bias dimensions parameters
212 * \param[in] points The point state.
213 * \param[in] grid The grid.
214 * \param[in] pointIndex Point to evaluate probability weight for.
215 * \param[in] pointBias Bias for the point (as a log weight).
216 * \param[in] value Coordinate value.
217 * \param[in] neighborLambdaEnergies The energy of the system in neighboring lambdas states. Can be
218 * empty when there are no free energy lambda state dimensions.
219 * \param[in] gridpointIndex The index of the current grid point.
220 * \returns the log of the biased probability weight.
222 double biasedLogWeightFromPoint(const std::vector<DimParams>& dimParams,
223 const std::vector<PointState>& points,
224 const BiasGrid& grid,
227 const awh_dvec value,
228 gmx::ArrayRef<const double> neighborLambdaEnergies,
231 double logWeight = detail::c_largeNegativeExponent;
233 /* Only points in the target region have non-zero weight */
234 if (points[pointIndex].inTargetRegion())
236 logWeight = pointBias;
238 /* Add potential for all parameter dimensions */
239 for (size_t d = 0; d < dimParams.size(); d++)
241 if (dimParams[d].isFepLambdaDimension())
243 /* If this is not a sampling step or if this function is called from
244 * calcConvolvedBias(), when writing energy subblocks, neighborLambdaEnergies will
245 * be empty. No convolution is required along the lambda dimension. */
246 if (!neighborLambdaEnergies.empty())
248 const int pointLambdaIndex = grid.point(pointIndex).coordValue[d];
249 const int gridpointLambdaIndex = grid.point(gridpointIndex).coordValue[d];
250 logWeight -= dimParams[d].fepDimParams().beta
251 * (neighborLambdaEnergies[pointLambdaIndex]
252 - neighborLambdaEnergies[gridpointLambdaIndex]);
257 double dev = getDeviationFromPointAlongGridAxis(grid, d, pointIndex, value[d]);
258 logWeight -= 0.5 * dimParams[d].pullDimParams().betak * dev * dev;
266 * Calculates the marginal distribution (marginal probability) for each value along
267 * a free energy lambda axis.
268 * The marginal distribution of one coordinate dimension value is the sum of the probability
269 * distribution of all values (herein all neighbor values) with the same value in the dimension
271 * \param[in] grid The bias grid.
272 * \param[in] neighbors The points to use for the calculation of the marginal distribution.
273 * \param[in] probWeightNeighbor Probability weights of the neighbors.
274 * \returns The calculated marginal distribution in a 1D array with
275 * as many elements as there are points along the axis of interest.
277 std::vector<double> calculateFELambdaMarginalDistribution(const BiasGrid& grid,
278 gmx::ArrayRef<const int> neighbors,
279 gmx::ArrayRef<const double> probWeightNeighbor)
281 const std::optional<int> lambdaAxisIndex = grid.lambdaAxisIndex();
282 GMX_RELEASE_ASSERT(lambdaAxisIndex,
283 "There must be a free energy lambda axis in order to calculate the free "
284 "energy lambda marginal distribution.");
285 const int numFepLambdaStates = grid.numFepLambdaStates();
286 std::vector<double> lambdaMarginalDistribution(numFepLambdaStates, 0);
288 for (size_t i = 0; i < neighbors.size(); i++)
290 const int neighbor = neighbors[i];
291 const int lambdaState = grid.point(neighbor).coordValue[lambdaAxisIndex.value()];
292 lambdaMarginalDistribution[lambdaState] += probWeightNeighbor[i];
294 return lambdaMarginalDistribution;
299 void BiasState::calcConvolvedPmf(const std::vector<DimParams>& dimParams,
300 const BiasGrid& grid,
301 std::vector<float>* convolvedPmf) const
303 size_t numPoints = grid.numPoints();
305 convolvedPmf->resize(numPoints);
307 /* Get the PMF to convolve. */
308 std::vector<float> pmf(numPoints);
311 for (size_t m = 0; m < numPoints; m++)
313 double freeEnergyWeights = 0;
314 const GridPoint& point = grid.point(m);
315 for (auto& neighbor : point.neighbor)
317 /* Do not convolve the bias along a lambda axis - only use the pmf from the current point */
318 if (!pointsHaveDifferentLambda(grid, m, neighbor))
320 /* The negative PMF is a positive bias. */
321 double biasNeighbor = -pmf[neighbor];
323 /* Add the convolved PMF weights for the neighbors of this point.
324 Note that this function only adds point within the target > 0 region.
325 Sum weights, take the logarithm last to get the free energy. */
326 double logWeight = biasedLogWeightFromPoint(
327 dimParams, points_, grid, neighbor, biasNeighbor, point.coordValue, {}, m);
328 freeEnergyWeights += std::exp(logWeight);
332 GMX_RELEASE_ASSERT(freeEnergyWeights > 0,
333 "Attempting to do log(<= 0) in AWH convolved PMF calculation.");
334 (*convolvedPmf)[m] = -std::log(static_cast<float>(freeEnergyWeights));
342 * Updates the target distribution for all points.
344 * The target distribution is always updated for all points
347 * \param[in,out] pointState The state of all points.
348 * \param[in] params The bias parameters.
350 void updateTargetDistribution(gmx::ArrayRef<PointState> pointState, const BiasParams& params)
352 double freeEnergyCutoff = 0;
353 if (params.eTarget == eawhtargetCUTOFF)
355 freeEnergyCutoff = freeEnergyMinimumValue(pointState) + params.freeEnergyCutoffInKT;
358 double sumTarget = 0;
359 for (PointState& ps : pointState)
361 sumTarget += ps.updateTargetWeight(params, freeEnergyCutoff);
363 GMX_RELEASE_ASSERT(sumTarget > 0, "We should have a non-zero distribution");
366 double invSum = 1.0 / sumTarget;
367 for (PointState& ps : pointState)
369 ps.scaleTarget(invSum);
374 * Puts together a string describing a grid point.
376 * \param[in] grid The grid.
377 * \param[in] point BiasGrid point index.
378 * \returns a string for the point.
380 std::string gridPointValueString(const BiasGrid& grid, int point)
382 std::string pointString;
386 for (int d = 0; d < grid.numDimensions(); d++)
388 pointString += gmx::formatString("%g", grid.point(point).coordValue[d]);
389 if (d < grid.numDimensions() - 1)
404 int BiasState::warnForHistogramAnomalies(const BiasGrid& grid, int biasIndex, double t, FILE* fplog, int maxNumWarnings) const
406 GMX_ASSERT(fplog != nullptr, "Warnings can only be issued if there is log file.");
407 const double maxHistogramRatio = 0.5; /* Tolerance for printing a warning about the histogram ratios */
409 /* Sum up the histograms and get their normalization */
410 double sumVisits = 0;
411 double sumWeights = 0;
412 for (auto& pointState : points_)
414 if (pointState.inTargetRegion())
416 sumVisits += pointState.numVisitsTot();
417 sumWeights += pointState.weightSumTot();
420 GMX_RELEASE_ASSERT(sumVisits > 0, "We should have visits");
421 GMX_RELEASE_ASSERT(sumWeights > 0, "We should have weight");
422 double invNormVisits = 1.0 / sumVisits;
423 double invNormWeight = 1.0 / sumWeights;
425 /* Check all points for warnings */
427 size_t numPoints = grid.numPoints();
428 for (size_t m = 0; m < numPoints; m++)
430 /* Skip points close to boundary or non-target region */
431 const GridPoint& gridPoint = grid.point(m);
432 bool skipPoint = false;
433 for (size_t n = 0; (n < gridPoint.neighbor.size()) && !skipPoint; n++)
435 int neighbor = gridPoint.neighbor[n];
436 skipPoint = !points_[neighbor].inTargetRegion();
437 for (int d = 0; (d < grid.numDimensions()) && !skipPoint; d++)
439 const GridPoint& neighborPoint = grid.point(neighbor);
440 skipPoint = neighborPoint.index[d] == 0
441 || neighborPoint.index[d] == grid.axis(d).numPoints() - 1;
445 /* Warn if the coordinate distribution is less than the target distribution with a certain fraction somewhere */
446 const double relativeWeight = points_[m].weightSumTot() * invNormWeight;
447 const double relativeVisits = points_[m].numVisitsTot() * invNormVisits;
448 if (!skipPoint && relativeVisits < relativeWeight * maxHistogramRatio)
450 std::string pointValueString = gridPointValueString(grid, m);
451 std::string warningMessage = gmx::formatString(
453 "at t = %g ps the obtained coordinate distribution at coordinate value %s "
454 "is less than a fraction %g of the reference distribution at that point. "
455 "If you are not certain about your settings you might want to increase your "
456 "pull force constant or "
457 "modify your sampling region.\n",
460 pointValueString.c_str(),
462 gmx::TextLineWrapper wrapper;
463 wrapper.settings().setLineLength(c_linewidth);
464 fprintf(fplog, "%s", wrapper.wrapToString(warningMessage).c_str());
468 if (numWarnings >= maxNumWarnings)
477 double BiasState::calcUmbrellaForceAndPotential(const std::vector<DimParams>& dimParams,
478 const BiasGrid& grid,
480 ArrayRef<const double> neighborLambdaDhdl,
481 gmx::ArrayRef<double> force) const
483 double potential = 0;
484 for (size_t d = 0; d < dimParams.size(); d++)
486 if (dimParams[d].isFepLambdaDimension())
488 if (!neighborLambdaDhdl.empty())
490 const int coordpointLambdaIndex = grid.point(point).coordValue[d];
491 force[d] = neighborLambdaDhdl[coordpointLambdaIndex];
492 /* The potential should not be affected by the lambda dimension. */
498 getDeviationFromPointAlongGridAxis(grid, d, point, coordState_.coordValue()[d]);
499 double k = dimParams[d].pullDimParams().k;
501 /* Force from harmonic potential 0.5*k*dev^2 */
502 force[d] = -k * deviation;
503 potential += 0.5 * k * deviation * deviation;
510 void BiasState::calcConvolvedForce(const std::vector<DimParams>& dimParams,
511 const BiasGrid& grid,
512 gmx::ArrayRef<const double> probWeightNeighbor,
513 ArrayRef<const double> neighborLambdaDhdl,
514 gmx::ArrayRef<double> forceWorkBuffer,
515 gmx::ArrayRef<double> force) const
517 for (size_t d = 0; d < dimParams.size(); d++)
522 /* Only neighboring points have non-negligible contribution. */
523 const std::vector<int>& neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
524 gmx::ArrayRef<double> forceFromNeighbor = forceWorkBuffer;
525 for (size_t n = 0; n < neighbor.size(); n++)
527 double weightNeighbor = probWeightNeighbor[n];
528 int indexNeighbor = neighbor[n];
530 /* Get the umbrella force from this point. The returned potential is ignored here. */
531 calcUmbrellaForceAndPotential(dimParams, grid, indexNeighbor, neighborLambdaDhdl, forceFromNeighbor);
533 /* Add the weighted umbrella force to the convolved force. */
534 for (size_t d = 0; d < dimParams.size(); d++)
536 force[d] += forceFromNeighbor[d] * weightNeighbor;
541 double BiasState::moveUmbrella(const std::vector<DimParams>& dimParams,
542 const BiasGrid& grid,
543 gmx::ArrayRef<const double> probWeightNeighbor,
544 ArrayRef<const double> neighborLambdaDhdl,
545 gmx::ArrayRef<double> biasForce,
549 bool onlySampleUmbrellaGridpoint)
551 /* Generate and set a new coordinate reference value */
552 coordState_.sampleUmbrellaGridpoint(
553 grid, coordState_.gridpointIndex(), probWeightNeighbor, step, seed, indexSeed);
555 if (onlySampleUmbrellaGridpoint)
560 std::vector<double> newForce(dimParams.size());
561 double newPotential = calcUmbrellaForceAndPotential(
562 dimParams, grid, coordState_.umbrellaGridpoint(), neighborLambdaDhdl, newForce);
564 /* A modification of the reference value at time t will lead to a different
565 force over t-dt/2 to t and over t to t+dt/2. For high switching rates
566 this means the force and velocity will change signs roughly as often.
567 To avoid any issues we take the average of the previous and new force
568 at steps when the reference value has been moved. E.g. if the ref. value
569 is set every step to (coord dvalue +/- delta) would give zero force.
571 for (gmx::index d = 0; d < biasForce.ssize(); d++)
573 /* Average of the current and new force */
574 biasForce[d] = 0.5 * (biasForce[d] + newForce[d]);
584 * Sets the histogram rescaling factors needed to control the histogram size.
586 * For sake of robustness, the reference weight histogram can grow at a rate
587 * different from the actual sampling rate. Typically this happens for a limited
588 * initial time, alternatively growth is scaled down by a constant factor for all
589 * times. Since the size of the reference histogram sets the size of the free
590 * energy update this should be reflected also in the PMF. Thus the PMF histogram
591 * needs to be rescaled too.
593 * This function should only be called by the bias update function or wrapped by a function that
594 * knows what scale factors should be applied when, e.g,
595 * getSkippedUpdateHistogramScaleFactors().
597 * \param[in] params The bias parameters.
598 * \param[in] newHistogramSize New reference weight histogram size.
599 * \param[in] oldHistogramSize Previous reference weight histogram size (before adding new samples).
600 * \param[out] weightHistScaling Scaling factor for the reference weight histogram.
601 * \param[out] logPmfSumScaling Log of the scaling factor for the PMF histogram.
603 void setHistogramUpdateScaleFactors(const BiasParams& params,
604 double newHistogramSize,
605 double oldHistogramSize,
606 double* weightHistScaling,
607 double* logPmfSumScaling)
610 /* The two scaling factors below are slightly different (ignoring the log factor) because the
611 reference and the PMF histogram apply weight scaling differently. The weight histogram
612 applies is locally, i.e. each sample is scaled down meaning all samples get equal weight.
613 It is done this way because that is what target type local Boltzmann (for which
614 target = weight histogram) needs. In contrast, the PMF histogram is rescaled globally
615 by repeatedly scaling down the whole histogram. The reasons for doing it this way are:
616 1) empirically this is necessary for converging the PMF; 2) since the extraction of
617 the PMF is theoretically only valid for a constant bias, new samples should get more
618 weight than old ones for which the bias is fluctuating more. */
620 newHistogramSize / (oldHistogramSize + params.updateWeight * params.localWeightScaling);
621 *logPmfSumScaling = std::log(newHistogramSize / (oldHistogramSize + params.updateWeight));
626 void BiasState::getSkippedUpdateHistogramScaleFactors(const BiasParams& params,
627 double* weightHistScaling,
628 double* logPmfSumScaling) const
630 GMX_ASSERT(params.skipUpdates(),
631 "Calling function for skipped updates when skipping updates is not allowed");
633 if (inInitialStage())
635 /* In between global updates the reference histogram size is kept constant so we trivially
636 know what the histogram size was at the time of the skipped update. */
637 double histogramSize = histogramSize_.histogramSize();
638 setHistogramUpdateScaleFactors(
639 params, histogramSize, histogramSize, weightHistScaling, logPmfSumScaling);
643 /* In the final stage, the reference histogram grows at the sampling rate which gives trivial scale factors. */
644 *weightHistScaling = 1;
645 *logPmfSumScaling = 0;
649 void BiasState::doSkippedUpdatesForAllPoints(const BiasParams& params)
651 double weightHistScaling;
652 double logPmfsumScaling;
654 getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
656 for (auto& pointState : points_)
658 bool didUpdate = pointState.performPreviouslySkippedUpdates(
659 params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
661 /* Update the bias for this point only if there were skipped updates in the past to avoid calculating the log unneccessarily */
664 pointState.updateBias();
669 void BiasState::doSkippedUpdatesInNeighborhood(const BiasParams& params, const BiasGrid& grid)
671 double weightHistScaling;
672 double logPmfsumScaling;
674 getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
676 /* For each neighbor point of the center point, refresh its state by adding the results of all past, skipped updates. */
677 const std::vector<int>& neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
678 for (auto& neighbor : neighbors)
680 bool didUpdate = points_[neighbor].performPreviouslySkippedUpdates(
681 params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
685 points_[neighbor].updateBias();
694 * Merge update lists from multiple sharing simulations.
696 * \param[in,out] updateList Update list for this simulation (assumed >= npoints long).
697 * \param[in] numPoints Total number of points.
698 * \param[in] commRecord Struct for intra-simulation communication.
699 * \param[in] multiSimComm Struct for multi-simulation communication.
701 void mergeSharedUpdateLists(std::vector<int>* updateList,
703 const t_commrec* commRecord,
704 const gmx_multisim_t* multiSimComm)
706 std::vector<int> numUpdatesOfPoint;
708 /* Flag the update points of this sim.
709 TODO: we can probably avoid allocating this array and just use the input array. */
710 numUpdatesOfPoint.resize(numPoints, 0);
711 for (auto& pointIndex : *updateList)
713 numUpdatesOfPoint[pointIndex] = 1;
716 /* Sum over the sims to get all the flagged points */
717 sumOverSimulations(arrayRefFromArray(numUpdatesOfPoint.data(), numPoints), commRecord, multiSimComm);
719 /* Collect the indices of the flagged points in place. The resulting array will be the merged update list.*/
721 for (int m = 0; m < numPoints; m++)
723 if (numUpdatesOfPoint[m] > 0)
725 updateList->push_back(m);
731 * Generate an update list of points sampled since the last update.
733 * \param[in] grid The AWH bias.
734 * \param[in] points The point state.
735 * \param[in] originUpdatelist The origin of the rectangular region that has been sampled since
736 * last update. \param[in] endUpdatelist The end of the rectangular that has been sampled since
737 * last update. \param[in,out] updateList Local update list to set (assumed >= npoints long).
739 void makeLocalUpdateList(const BiasGrid& grid,
740 const std::vector<PointState>& points,
741 const awh_ivec originUpdatelist,
742 const awh_ivec endUpdatelist,
743 std::vector<int>* updateList)
748 /* Define the update search grid */
749 for (int d = 0; d < grid.numDimensions(); d++)
751 origin[d] = originUpdatelist[d];
752 numPoints[d] = endUpdatelist[d] - originUpdatelist[d] + 1;
754 /* Because the end_updatelist is unwrapped it can be > (npoints - 1) so that numPoints can be > npoints in grid.
755 This helps for calculating the distance/number of points but should be removed and fixed when the way of
756 updating origin/end updatelist is changed (see sampleProbabilityWeights). */
757 numPoints[d] = std::min(grid.axis(d).numPoints(), numPoints[d]);
760 /* Make the update list */
763 bool pointExists = true;
766 pointExists = advancePointInSubgrid(grid, origin, numPoints, &pointIndex);
768 if (pointExists && points[pointIndex].inTargetRegion())
770 updateList->push_back(pointIndex);
777 void BiasState::resetLocalUpdateRange(const BiasGrid& grid)
779 const int gridpointIndex = coordState_.gridpointIndex();
780 for (int d = 0; d < grid.numDimensions(); d++)
782 /* This gives the minimum range consisting only of the current closest point. */
783 originUpdatelist_[d] = grid.point(gridpointIndex).index[d];
784 endUpdatelist_[d] = grid.point(gridpointIndex).index[d];
792 * Add partial histograms (accumulating between updates) to accumulating histograms.
794 * \param[in,out] pointState The state of the points in the bias.
795 * \param[in,out] weightSumCovering The weights for checking covering.
796 * \param[in] numSharedUpdate The number of biases sharing the histrogram.
797 * \param[in] commRecord Struct for intra-simulation communication.
798 * \param[in] multiSimComm Struct for multi-simulation communication.
799 * \param[in] localUpdateList List of points with data.
801 void sumHistograms(gmx::ArrayRef<PointState> pointState,
802 gmx::ArrayRef<double> weightSumCovering,
804 const t_commrec* commRecord,
805 const gmx_multisim_t* multiSimComm,
806 const std::vector<int>& localUpdateList)
808 /* The covering checking histograms are added before summing over simulations, so that the
809 weights from different simulations are kept distinguishable. */
810 for (int globalIndex : localUpdateList)
812 weightSumCovering[globalIndex] += pointState[globalIndex].weightSumIteration();
815 /* Sum histograms over multiple simulations if needed. */
816 if (numSharedUpdate > 1)
818 GMX_ASSERT(numSharedUpdate == multiSimComm->numSimulations_,
819 "Sharing within a simulation is not implemented (yet)");
821 /* Collect the weights and counts in linear arrays to be able to use gmx_sumd_sim. */
822 std::vector<double> weightSum;
823 std::vector<double> coordVisits;
825 weightSum.resize(localUpdateList.size());
826 coordVisits.resize(localUpdateList.size());
828 for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
830 const PointState& ps = pointState[localUpdateList[localIndex]];
832 weightSum[localIndex] = ps.weightSumIteration();
833 coordVisits[localIndex] = ps.numVisitsIteration();
836 sumOverSimulations(gmx::ArrayRef<double>(weightSum), commRecord, multiSimComm);
837 sumOverSimulations(gmx::ArrayRef<double>(coordVisits), commRecord, multiSimComm);
839 /* Transfer back the result */
840 for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
842 PointState& ps = pointState[localUpdateList[localIndex]];
844 ps.setPartialWeightAndCount(weightSum[localIndex], coordVisits[localIndex]);
848 /* Now add the partial counts and weights to the accumulating histograms.
849 Note: we still need to use the weights for the update so we wait
850 with resetting them until the end of the update. */
851 for (int globalIndex : localUpdateList)
853 pointState[globalIndex].addPartialWeightAndCount();
858 * Label points along an axis as covered or not.
860 * A point is covered if it is surrounded by visited points up to a radius = coverRadius.
862 * \param[in] visited Visited? For each point.
863 * \param[in] checkCovering Check for covering? For each point.
864 * \param[in] numPoints The number of grid points along this dimension.
865 * \param[in] period Period in number of points.
866 * \param[in] coverRadius Cover radius, in points, needed for defining a point as covered.
867 * \param[in,out] covered In this array elements are 1 for covered points and 0 for
868 * non-covered points, this routine assumes that \p covered has at least size \p numPoints.
870 void labelCoveredPoints(const std::vector<bool>& visited,
871 const std::vector<bool>& checkCovering,
875 gmx::ArrayRef<int> covered)
877 GMX_ASSERT(covered.ssize() >= numPoints, "covered should be at least as large as the grid");
879 bool haveFirstNotVisited = false;
880 int firstNotVisited = -1;
881 int notVisitedLow = -1;
882 int notVisitedHigh = -1;
884 for (int n = 0; n < numPoints; n++)
886 if (checkCovering[n] && !visited[n])
888 if (!haveFirstNotVisited)
892 haveFirstNotVisited = true;
898 /* Have now an interval I = [notVisitedLow,notVisitedHigh] of visited points bounded
899 by unvisited points. The unvisted end points affect the coveredness of the
900 visited with a reach equal to the cover radius. */
901 int notCoveredLow = notVisitedLow + coverRadius;
902 int notCoveredHigh = notVisitedHigh - coverRadius;
903 for (int i = notVisitedLow; i <= notVisitedHigh; i++)
905 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
908 /* Find a new interval to set covering for. Make the notVisitedHigh of this interval
909 the notVisitedLow of the next. */
910 notVisitedLow = notVisitedHigh;
915 /* Have labelled all the internal points. Now take care of the boundary regions. */
916 if (!haveFirstNotVisited)
918 /* No non-visited points <=> all points visited => all points covered. */
920 for (int n = 0; n < numPoints; n++)
927 int lastNotVisited = notVisitedLow;
929 /* For periodic boundaries, non-visited points can influence points
930 on the other side of the boundary so we need to wrap around. */
932 /* Lower end. For periodic boundaries the last upper end not visited point becomes the low-end not visited point.
933 For non-periodic boundaries there is no lower end point so a dummy value is used. */
934 int notVisitedHigh = firstNotVisited;
935 int notVisitedLow = period > 0 ? (lastNotVisited - period) : -(coverRadius + 1);
937 int notCoveredLow = notVisitedLow + coverRadius;
938 int notCoveredHigh = notVisitedHigh - coverRadius;
940 for (int i = 0; i <= notVisitedHigh; i++)
942 /* For non-periodic boundaries notCoveredLow = -1 will impose no restriction. */
943 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
946 /* Upper end. Same as for lower end but in the other direction. */
947 notVisitedHigh = period > 0 ? (firstNotVisited + period) : (numPoints + coverRadius);
948 notVisitedLow = lastNotVisited;
950 notCoveredLow = notVisitedLow + coverRadius;
951 notCoveredHigh = notVisitedHigh - coverRadius;
953 for (int i = notVisitedLow; i <= numPoints - 1; i++)
955 /* For non-periodic boundaries notCoveredHigh = numPoints will impose no restriction. */
956 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
963 bool BiasState::isSamplingRegionCovered(const BiasParams& params,
964 const std::vector<DimParams>& dimParams,
965 const BiasGrid& grid,
966 const t_commrec* commRecord,
967 const gmx_multisim_t* multiSimComm) const
969 /* Allocate and initialize arrays: one for checking visits along each dimension,
970 one for keeping track of which points to check and one for the covered points.
971 Possibly these could be kept as AWH variables to avoid these allocations. */
974 std::vector<bool> visited;
975 std::vector<bool> checkCovering;
976 // We use int for the covering array since we might use gmx_sumi_sim.
977 std::vector<int> covered;
980 std::vector<CheckDim> checkDim;
981 checkDim.resize(grid.numDimensions());
983 for (int d = 0; d < grid.numDimensions(); d++)
985 const size_t numPoints = grid.axis(d).numPoints();
986 checkDim[d].visited.resize(numPoints, false);
987 checkDim[d].checkCovering.resize(numPoints, false);
988 checkDim[d].covered.resize(numPoints, 0);
991 /* Set visited points along each dimension and which points should be checked for covering.
992 Specifically, points above the free energy cutoff (if there is one) or points outside
993 of the target region are ignored. */
995 /* Set the free energy cutoff */
996 double maxFreeEnergy = GMX_FLOAT_MAX;
998 if (params.eTarget == eawhtargetCUTOFF)
1000 maxFreeEnergy = freeEnergyMinimumValue(points_) + params.freeEnergyCutoffInKT;
1003 /* Set the threshold weight for a point to be considered visited. */
1004 double weightThreshold = 1;
1005 for (int d = 0; d < grid.numDimensions(); d++)
1007 if (grid.axis(d).isFepLambdaAxis())
1009 /* Do not modify the weight threshold based on a FEP lambda axis. The spread
1010 * of the sampling weights is not depending on a Gaussian distribution (like
1012 weightThreshold *= 1.0;
1016 /* The spacing is proportional to 1/sqrt(betak). The weight threshold will be
1017 * approximately (given that the spacing can be modified if the dimension is periodic)
1018 * proportional to sqrt(1/(2*pi)). */
1019 weightThreshold *= grid.axis(d).spacing()
1020 * std::sqrt(dimParams[d].pullDimParams().betak * 0.5 * M_1_PI);
1024 /* Project the sampling weights onto each dimension */
1025 for (size_t m = 0; m < grid.numPoints(); m++)
1027 const PointState& pointState = points_[m];
1029 for (int d = 0; d < grid.numDimensions(); d++)
1031 int n = grid.point(m).index[d];
1033 /* Is visited if it was already visited or if there is enough weight at the current point */
1034 checkDim[d].visited[n] = checkDim[d].visited[n] || (weightSumCovering_[m] > weightThreshold);
1036 /* Check for covering if there is at least point in this slice that is in the target region and within the cutoff */
1037 checkDim[d].checkCovering[n] =
1038 checkDim[d].checkCovering[n]
1039 || (pointState.inTargetRegion() && pointState.freeEnergy() < maxFreeEnergy);
1043 /* Label each point along each dimension as covered or not. */
1044 for (int d = 0; d < grid.numDimensions(); d++)
1046 labelCoveredPoints(checkDim[d].visited,
1047 checkDim[d].checkCovering,
1048 grid.axis(d).numPoints(),
1049 grid.axis(d).numPointsInPeriod(),
1050 params.coverRadius()[d],
1051 checkDim[d].covered);
1054 /* Now check for global covering. Each dimension needs to be covered separately.
1055 A dimension is covered if each point is covered. Multiple simulations collectively
1056 cover the points, i.e. a point is covered if any of the simulations covered it.
1057 However, visited points are not shared, i.e. if a point is covered or not is
1058 determined by the visits of a single simulation. In general the covering criterion is
1059 all points covered => all points are surrounded by visited points up to a radius = coverRadius.
1060 For 1 simulation, all points covered <=> all points visited. For multiple simulations
1061 however, all points visited collectively !=> all points covered, except for coverRadius = 0.
1062 In the limit of large coverRadius, all points covered => all points visited by at least one
1063 simulation (since no point will be covered until all points have been visited by a
1064 single simulation). Basically coverRadius sets how much "connectedness" (or mixing) a point
1065 needs with surrounding points before sharing covering information with other simulations. */
1067 /* Communicate the covered points between sharing simulations if needed. */
1068 if (params.numSharedUpdate > 1)
1070 /* For multiple dimensions this may not be the best way to do it. */
1071 for (int d = 0; d < grid.numDimensions(); d++)
1074 gmx::arrayRefFromArray(checkDim[d].covered.data(), grid.axis(d).numPoints()),
1080 /* Now check if for each dimension all points are covered. Break if not true. */
1081 bool allPointsCovered = true;
1082 for (int d = 0; d < grid.numDimensions() && allPointsCovered; d++)
1084 for (int n = 0; n < grid.axis(d).numPoints() && allPointsCovered; n++)
1086 allPointsCovered = (checkDim[d].covered[n] != 0);
1090 return allPointsCovered;
1094 * Normalizes the free energy and PMF sum.
1096 * \param[in] pointState The state of the points.
1098 static void normalizeFreeEnergyAndPmfSum(std::vector<PointState>* pointState)
1100 double minF = freeEnergyMinimumValue(*pointState);
1102 for (PointState& ps : *pointState)
1104 ps.normalizeFreeEnergyAndPmfSum(minF);
1108 void BiasState::updateFreeEnergyAndAddSamplesToHistogram(const std::vector<DimParams>& dimParams,
1109 const BiasGrid& grid,
1110 const BiasParams& params,
1111 const t_commrec* commRecord,
1112 const gmx_multisim_t* multiSimComm,
1116 std::vector<int>* updateList)
1118 /* Note hat updateList is only used in this scope and is always
1119 * re-initialized. We do not use a local vector, because that would
1120 * cause reallocation every time this funtion is called and the vector
1121 * can be the size of the whole grid.
1124 /* Make a list of all local points, i.e. those that could have been touched since
1125 the last update. These are the points needed for summing histograms below
1126 (non-local points only add zeros). For local updates, this will also be the
1127 final update list. */
1128 makeLocalUpdateList(grid, points_, originUpdatelist_, endUpdatelist_, updateList);
1129 if (params.numSharedUpdate > 1)
1131 mergeSharedUpdateLists(updateList, points_.size(), commRecord, multiSimComm);
1134 /* Reset the range for the next update */
1135 resetLocalUpdateRange(grid);
1137 /* Add samples to histograms for all local points and sync simulations if needed */
1138 sumHistograms(points_, weightSumCovering_, params.numSharedUpdate, commRecord, multiSimComm, *updateList);
1140 sumPmf(points_, params.numSharedUpdate, commRecord, multiSimComm);
1142 /* Renormalize the free energy if values are too large. */
1143 bool needToNormalizeFreeEnergy = false;
1144 for (int& globalIndex : *updateList)
1146 /* We want to keep the absolute value of the free energies to be less
1147 c_largePositiveExponent to be able to safely pass these values to exp(). The check below
1148 ensures this as long as the free energy values grow less than 0.5*c_largePositiveExponent
1149 in a return time to this neighborhood. For reasonable update sizes it's unlikely that
1150 this requirement would be broken. */
1151 if (std::abs(points_[globalIndex].freeEnergy()) > 0.5 * detail::c_largePositiveExponent)
1153 needToNormalizeFreeEnergy = true;
1158 /* Update target distribution? */
1159 bool needToUpdateTargetDistribution =
1160 (params.eTarget != eawhtargetCONSTANT && params.isUpdateTargetStep(step));
1162 /* In the initial stage, the histogram grows dynamically as a function of the number of coverings. */
1163 bool detectedCovering = false;
1164 if (inInitialStage())
1167 (params.isCheckCoveringStep(step)
1168 && isSamplingRegionCovered(params, dimParams, grid, commRecord, multiSimComm));
1171 /* The weighthistogram size after this update. */
1172 double newHistogramSize = histogramSize_.newHistogramSize(
1173 params, t, detectedCovering, points_, weightSumCovering_, fplog);
1175 /* Make the update list. Usually we try to only update local points,
1176 * but if the update has non-trivial or non-deterministic effects
1177 * on non-local points a global update is needed. This is the case when:
1178 * 1) a covering occurred in the initial stage, leading to non-trivial
1179 * histogram rescaling factors; or
1180 * 2) the target distribution will be updated, since we don't make any
1181 * assumption on its form; or
1182 * 3) the AWH parameters are such that we never attempt to skip non-local
1184 * 4) the free energy values have grown so large that a renormalization
1187 if (needToUpdateTargetDistribution || detectedCovering || !params.skipUpdates() || needToNormalizeFreeEnergy)
1189 /* Global update, just add all points. */
1190 updateList->clear();
1191 for (size_t m = 0; m < points_.size(); m++)
1193 if (points_[m].inTargetRegion())
1195 updateList->push_back(m);
1200 /* Set histogram scale factors. */
1201 double weightHistScalingSkipped = 0;
1202 double logPmfsumScalingSkipped = 0;
1203 if (params.skipUpdates())
1205 getSkippedUpdateHistogramScaleFactors(params, &weightHistScalingSkipped, &logPmfsumScalingSkipped);
1207 double weightHistScalingNew;
1208 double logPmfsumScalingNew;
1209 setHistogramUpdateScaleFactors(
1210 params, newHistogramSize, histogramSize_.histogramSize(), &weightHistScalingNew, &logPmfsumScalingNew);
1212 /* Update free energy and reference weight histogram for points in the update list. */
1213 for (int pointIndex : *updateList)
1215 PointState* pointStateToUpdate = &points_[pointIndex];
1217 /* Do updates from previous update steps that were skipped because this point was at that time non-local. */
1218 if (params.skipUpdates())
1220 pointStateToUpdate->performPreviouslySkippedUpdates(
1221 params, histogramSize_.numUpdates(), weightHistScalingSkipped, logPmfsumScalingSkipped);
1224 /* Now do an update with new sampling data. */
1225 pointStateToUpdate->updateWithNewSampling(
1226 params, histogramSize_.numUpdates(), weightHistScalingNew, logPmfsumScalingNew);
1229 /* Only update the histogram size after we are done with the local point updates */
1230 histogramSize_.setHistogramSize(newHistogramSize, weightHistScalingNew);
1232 if (needToNormalizeFreeEnergy)
1234 normalizeFreeEnergyAndPmfSum(&points_);
1237 if (needToUpdateTargetDistribution)
1239 /* The target distribution is always updated for all points at once. */
1240 updateTargetDistribution(points_, params);
1243 /* Update the bias. The bias is updated separately and last since it simply a function of
1244 the free energy and the target distribution and we want to avoid doing extra work. */
1245 for (int pointIndex : *updateList)
1247 points_[pointIndex].updateBias();
1250 /* Increase the update counter. */
1251 histogramSize_.incrementNumUpdates();
1254 double BiasState::updateProbabilityWeightsAndConvolvedBias(const std::vector<DimParams>& dimParams,
1255 const BiasGrid& grid,
1256 gmx::ArrayRef<const double> neighborLambdaEnergies,
1257 std::vector<double, AlignedAllocator<double>>* weight) const
1259 /* Only neighbors of the current coordinate value will have a non-negligible chance of getting sampled */
1260 const std::vector<int>& neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
1262 #if GMX_SIMD_HAVE_DOUBLE
1263 typedef SimdDouble PackType;
1264 constexpr int packSize = GMX_SIMD_DOUBLE_WIDTH;
1266 typedef double PackType;
1267 constexpr int packSize = 1;
1269 /* Round the size of the weight array up to packSize */
1270 const int weightSize = ((neighbors.size() + packSize - 1) / packSize) * packSize;
1271 weight->resize(weightSize);
1273 double* gmx_restrict weightData = weight->data();
1274 PackType weightSumPack(0.0);
1275 for (size_t i = 0; i < neighbors.size(); i += packSize)
1277 for (size_t n = i; n < i + packSize; n++)
1279 if (n < neighbors.size())
1281 const int neighbor = neighbors[n];
1282 (*weight)[n] = biasedLogWeightFromPoint(dimParams,
1286 points_[neighbor].bias(),
1287 coordState_.coordValue(),
1288 neighborLambdaEnergies,
1289 coordState_.gridpointIndex());
1293 /* Pad with values that don't affect the result */
1294 (*weight)[n] = detail::c_largeNegativeExponent;
1297 PackType weightPack = load<PackType>(weightData + i);
1298 weightPack = gmx::exp(weightPack);
1299 weightSumPack = weightSumPack + weightPack;
1300 store(weightData + i, weightPack);
1302 /* Sum of probability weights */
1303 double weightSum = reduce(weightSumPack);
1304 GMX_RELEASE_ASSERT(weightSum > 0,
1305 "zero probability weight when updating AWH probability weights.");
1307 /* Normalize probabilities to sum to 1 */
1308 double invWeightSum = 1 / weightSum;
1310 /* When there is a free energy lambda state axis remove the convolved contributions along that
1311 * axis from the total bias. This must be done after calculating invWeightSum (since weightSum
1312 * will be modified), but before normalizing the weights (below). */
1313 if (grid.hasLambdaAxis())
1315 /* If there is only one axis the bias will not be convolved in any dimension. */
1316 if (grid.axis().size() == 1)
1318 weightSum = gmx::exp(points_[coordState_.gridpointIndex()].bias());
1322 for (size_t i = 0; i < neighbors.size(); i++)
1324 const int neighbor = neighbors[i];
1325 if (pointsHaveDifferentLambda(grid, coordState_.gridpointIndex(), neighbor))
1327 weightSum -= weightData[i];
1333 for (double& w : *weight)
1338 /* Return the convolved bias */
1339 return std::log(weightSum);
1342 double BiasState::calcConvolvedBias(const std::vector<DimParams>& dimParams,
1343 const BiasGrid& grid,
1344 const awh_dvec& coordValue) const
1346 int point = grid.nearestIndex(coordValue);
1347 const GridPoint& gridPoint = grid.point(point);
1349 /* Sum the probability weights from the neighborhood of the given point */
1350 double weightSum = 0;
1351 for (int neighbor : gridPoint.neighbor)
1353 /* No convolution is required along the lambda dimension. */
1354 if (pointsHaveDifferentLambda(grid, point, neighbor))
1358 double logWeight = biasedLogWeightFromPoint(
1359 dimParams, points_, grid, neighbor, points_[neighbor].bias(), coordValue, {}, point);
1360 weightSum += std::exp(logWeight);
1363 /* Returns -GMX_FLOAT_MAX if no neighboring points were in the target region. */
1364 return (weightSum > 0) ? std::log(weightSum) : -GMX_FLOAT_MAX;
1367 void BiasState::sampleProbabilityWeights(const BiasGrid& grid, gmx::ArrayRef<const double> probWeightNeighbor)
1369 const std::vector<int>& neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
1371 /* Save weights for next update */
1372 for (size_t n = 0; n < neighbor.size(); n++)
1374 points_[neighbor[n]].increaseWeightSumIteration(probWeightNeighbor[n]);
1377 /* Update the local update range. Two corner points define this rectangular
1378 * domain. We need to choose two new corner points such that the new domain
1379 * contains both the old update range and the current neighborhood.
1380 * In the simplest case when an update is performed every sample,
1381 * the update range would simply equal the current neighborhood.
1383 int neighborStart = neighbor[0];
1384 int neighborLast = neighbor[neighbor.size() - 1];
1385 for (int d = 0; d < grid.numDimensions(); d++)
1387 int origin = grid.point(neighborStart).index[d];
1388 int last = grid.point(neighborLast).index[d];
1392 /* Unwrap if wrapped around the boundary (only happens for periodic
1393 * boundaries). This has been already for the stored index interval.
1395 /* TODO: what we want to do is to find the smallest the update
1396 * interval that contains all points that need to be updated.
1397 * This amounts to combining two intervals, the current
1398 * [origin, end] update interval and the new touched neighborhood
1399 * into a new interval that contains all points from both the old
1402 * For periodic boundaries it becomes slightly more complicated
1403 * than for closed boundaries because then it needs not be
1404 * true that origin < end (so one can't simply relate the origin/end
1405 * in the min()/max() below). The strategy here is to choose the
1406 * origin closest to a reference point (index 0) and then unwrap
1407 * the end index if needed and choose the largest end index.
1408 * This ensures that both intervals are in the new interval
1409 * but it's not necessarily the smallest.
1410 * Currently we solve this by going through each possibility
1411 * and checking them.
1413 last += grid.axis(d).numPointsInPeriod();
1416 originUpdatelist_[d] = std::min(originUpdatelist_[d], origin);
1417 endUpdatelist_[d] = std::max(endUpdatelist_[d], last);
1421 void BiasState::sampleCoordAndPmf(const std::vector<DimParams>& dimParams,
1422 const BiasGrid& grid,
1423 gmx::ArrayRef<const double> probWeightNeighbor,
1424 double convolvedBias)
1426 /* Sampling-based deconvolution extracting the PMF.
1427 * Update the PMF histogram with the current coordinate value.
1429 * Because of the finite width of the harmonic potential, the free energy
1430 * defined for each coordinate point does not exactly equal that of the
1431 * actual coordinate, the PMF. However, the PMF can be estimated by applying
1432 * the relation exp(-PMF) = exp(-bias_convolved)*P_biased/Z, i.e. by keeping a
1433 * reweighted histogram of the coordinate value. Strictly, this relies on
1434 * the unknown normalization constant Z being either constant or known. Here,
1435 * neither is true except in the long simulation time limit. Empirically however,
1436 * it works (mainly because how the PMF histogram is rescaled).
1439 const int gridPointIndex = coordState_.gridpointIndex();
1440 const std::optional<int> lambdaAxisIndex = grid.lambdaAxisIndex();
1442 /* Update the PMF of points along a lambda axis with their bias. */
1443 if (lambdaAxisIndex)
1445 const std::vector<int>& neighbors = grid.point(gridPointIndex).neighbor;
1447 std::vector<double> lambdaMarginalDistribution =
1448 calculateFELambdaMarginalDistribution(grid, neighbors, probWeightNeighbor);
1450 awh_dvec coordValueAlongLambda = { coordState_.coordValue()[0],
1451 coordState_.coordValue()[1],
1452 coordState_.coordValue()[2],
1453 coordState_.coordValue()[3] };
1454 for (size_t i = 0; i < neighbors.size(); i++)
1456 const int neighbor = neighbors[i];
1458 if (pointsAlongLambdaAxis(grid, gridPointIndex, neighbor))
1460 const double neighborLambda = grid.point(neighbor).coordValue[lambdaAxisIndex.value()];
1461 if (neighbor == gridPointIndex)
1463 bias = convolvedBias;
1467 coordValueAlongLambda[lambdaAxisIndex.value()] = neighborLambda;
1468 bias = calcConvolvedBias(dimParams, grid, coordValueAlongLambda);
1471 const double probWeight = lambdaMarginalDistribution[neighborLambda];
1472 const double weightedBias = bias - std::log(std::max(probWeight, GMX_DOUBLE_MIN)); // avoid log(0)
1474 if (neighbor == gridPointIndex && grid.covers(coordState_.coordValue()))
1476 points_[neighbor].samplePmf(weightedBias);
1480 points_[neighbor].updatePmfUnvisited(weightedBias);
1487 /* Only save coordinate data that is in range (the given index is always
1488 * in range even if the coordinate value is not).
1490 if (grid.covers(coordState_.coordValue()))
1492 /* Save PMF sum and keep a histogram of the sampled coordinate values */
1493 points_[gridPointIndex].samplePmf(convolvedBias);
1497 /* Save probability weights for the update */
1498 sampleProbabilityWeights(grid, probWeightNeighbor);
1501 void BiasState::initHistoryFromState(AwhBiasHistory* biasHistory) const
1503 biasHistory->pointState.resize(points_.size());
1506 void BiasState::updateHistory(AwhBiasHistory* biasHistory, const BiasGrid& grid) const
1508 GMX_RELEASE_ASSERT(biasHistory->pointState.size() == points_.size(),
1509 "The AWH history setup does not match the AWH state.");
1511 AwhBiasStateHistory* stateHistory = &biasHistory->state;
1512 stateHistory->umbrellaGridpoint = coordState_.umbrellaGridpoint();
1514 for (size_t m = 0; m < biasHistory->pointState.size(); m++)
1516 AwhPointStateHistory* psh = &biasHistory->pointState[m];
1518 points_[m].storeState(psh);
1520 psh->weightsum_covering = weightSumCovering_[m];
1523 histogramSize_.storeState(stateHistory);
1525 stateHistory->origin_index_updatelist = multiDimGridIndexToLinear(grid, originUpdatelist_);
1526 stateHistory->end_index_updatelist = multiDimGridIndexToLinear(grid, endUpdatelist_);
1529 void BiasState::restoreFromHistory(const AwhBiasHistory& biasHistory, const BiasGrid& grid)
1531 const AwhBiasStateHistory& stateHistory = biasHistory.state;
1533 coordState_.restoreFromHistory(stateHistory);
1535 if (biasHistory.pointState.size() != points_.size())
1538 InvalidInputError("Bias grid size in checkpoint and simulation do not match. "
1539 "Likely you provided a checkpoint from a different simulation."));
1541 for (size_t m = 0; m < points_.size(); m++)
1543 points_[m].setFromHistory(biasHistory.pointState[m]);
1546 for (size_t m = 0; m < weightSumCovering_.size(); m++)
1548 weightSumCovering_[m] = biasHistory.pointState[m].weightsum_covering;
1551 histogramSize_.restoreFromHistory(stateHistory);
1553 linearGridindexToMultiDim(grid, stateHistory.origin_index_updatelist, originUpdatelist_);
1554 linearGridindexToMultiDim(grid, stateHistory.end_index_updatelist, endUpdatelist_);
1557 void BiasState::broadcast(const t_commrec* commRecord)
1559 gmx_bcast(sizeof(coordState_), &coordState_, commRecord->mpi_comm_mygroup);
1561 gmx_bcast(points_.size() * sizeof(PointState), points_.data(), commRecord->mpi_comm_mygroup);
1563 gmx_bcast(weightSumCovering_.size() * sizeof(double), weightSumCovering_.data(), commRecord->mpi_comm_mygroup);
1565 gmx_bcast(sizeof(histogramSize_), &histogramSize_, commRecord->mpi_comm_mygroup);
1568 void BiasState::setFreeEnergyToConvolvedPmf(const std::vector<DimParams>& dimParams, const BiasGrid& grid)
1570 std::vector<float> convolvedPmf;
1572 calcConvolvedPmf(dimParams, grid, &convolvedPmf);
1574 for (size_t m = 0; m < points_.size(); m++)
1576 points_[m].setFreeEnergy(convolvedPmf[m]);
1581 * Count trailing data rows containing only zeros.
1583 * \param[in] data 2D data array.
1584 * \param[in] numRows Number of rows in array.
1585 * \param[in] numColumns Number of cols in array.
1586 * \returns the number of trailing zero rows.
1588 static int countTrailingZeroRows(const double* const* data, int numRows, int numColumns)
1590 int numZeroRows = 0;
1591 for (int m = numRows - 1; m >= 0; m--)
1593 bool rowIsZero = true;
1594 for (int d = 0; d < numColumns; d++)
1596 if (data[d][m] != 0)
1605 /* At a row with non-zero data */
1610 /* Still at a zero data row, keep checking rows higher up. */
1619 * Initializes the PMF and target with data read from an input table.
1621 * \param[in] dimParams The dimension parameters.
1622 * \param[in] grid The grid.
1623 * \param[in] filename The filename to read PMF and target from.
1624 * \param[in] numBias Number of biases.
1625 * \param[in] biasIndex The index of the bias.
1626 * \param[in,out] pointState The state of the points in this bias.
1628 static void readUserPmfAndTargetDistribution(const std::vector<DimParams>& dimParams,
1629 const BiasGrid& grid,
1630 const std::string& filename,
1633 std::vector<PointState>* pointState)
1635 /* Read the PMF and target distribution.
1636 From the PMF, the convolved PMF, or the reference value free energy, can be calculated
1637 base on the force constant. The free energy and target together determine the bias.
1639 std::string filenameModified(filename);
1642 size_t n = filenameModified.rfind('.');
1643 GMX_RELEASE_ASSERT(n != std::string::npos,
1644 "The filename should contain an extension starting with .");
1645 filenameModified.insert(n, formatString("%d", biasIndex));
1648 std::string correctFormatMessage = formatString(
1649 "%s is expected in the following format. "
1650 "The first ndim column(s) should contain the coordinate values for each point, "
1651 "each column containing values of one dimension (in ascending order). "
1652 "For a multidimensional coordinate, points should be listed "
1653 "in the order obtained by traversing lower dimensions first. "
1654 "E.g. for two-dimensional grid of size nxn: "
1655 "(1, 1), (1, 2),..., (1, n), (2, 1), (2, 2), ..., , (n, n - 1), (n, n). "
1656 "Column ndim + 1 should contain the PMF value for each coordinate value. "
1657 "The target distribution values should be in column ndim + 2 or column ndim + 5. "
1658 "Make sure the input file ends with a new line but has no trailing new lines.",
1660 gmx::TextLineWrapper wrapper;
1661 wrapper.settings().setLineLength(c_linewidth);
1662 correctFormatMessage = wrapper.wrapToString(correctFormatMessage);
1666 int numRows = read_xvg(filenameModified.c_str(), &data, &numColumns);
1668 /* Check basic data properties here. BiasGrid takes care of more complicated things. */
1672 std::string mesg = gmx::formatString(
1673 "%s is empty!.\n\n%s", filename.c_str(), correctFormatMessage.c_str());
1674 GMX_THROW(InvalidInputError(mesg));
1677 /* Less than 2 points is not useful for PMF or target. */
1680 std::string mesg = gmx::formatString(
1681 "%s contains too few data points (%d)."
1682 "The minimum number of points is 2.",
1685 GMX_THROW(InvalidInputError(mesg));
1688 /* Make sure there are enough columns of data.
1690 Two formats are allowed. Either with columns {coords, PMF, target} or
1691 {coords, PMF, x, y, z, target, ...}. The latter format is allowed since that
1692 is how AWH output is written (x, y, z being other AWH variables). For this format,
1693 trailing columns are ignored.
1695 int columnIndexTarget;
1696 int numColumnsMin = dimParams.size() + 2;
1697 int columnIndexPmf = dimParams.size();
1698 if (numColumns == numColumnsMin)
1700 columnIndexTarget = columnIndexPmf + 1;
1704 columnIndexTarget = columnIndexPmf + 4;
1707 if (numColumns < numColumnsMin)
1709 std::string mesg = gmx::formatString(
1710 "The number of columns in %s should be at least %d."
1714 correctFormatMessage.c_str());
1715 GMX_THROW(InvalidInputError(mesg));
1718 /* read_xvg can give trailing zero data rows for trailing new lines in the input. We allow 1 zero row,
1719 since this could be real data. But multiple trailing zero rows cannot correspond to valid data. */
1720 int numZeroRows = countTrailingZeroRows(data, numRows, numColumns);
1721 if (numZeroRows > 1)
1723 std::string mesg = gmx::formatString(
1724 "Found %d trailing zero data rows in %s. Please remove trailing empty lines and "
1728 GMX_THROW(InvalidInputError(mesg));
1731 /* Convert from user units to internal units before sending the data of to grid. */
1732 for (size_t d = 0; d < dimParams.size(); d++)
1734 double scalingFactor = dimParams[d].scaleUserInputToInternal(1);
1735 if (scalingFactor == 1)
1739 for (size_t m = 0; m < pointState->size(); m++)
1741 data[d][m] *= scalingFactor;
1745 /* Get a data point for each AWH grid point so that they all get data. */
1746 std::vector<int> gridIndexToDataIndex(grid.numPoints());
1747 mapGridToDataGrid(&gridIndexToDataIndex, data, numRows, filename, grid, correctFormatMessage);
1749 /* Extract the data for each grid point.
1750 * We check if the target distribution is zero for all points.
1752 bool targetDistributionIsZero = true;
1753 for (size_t m = 0; m < pointState->size(); m++)
1755 (*pointState)[m].setLogPmfSum(-data[columnIndexPmf][gridIndexToDataIndex[m]]);
1756 double target = data[columnIndexTarget][gridIndexToDataIndex[m]];
1758 /* Check if the values are allowed. */
1761 std::string mesg = gmx::formatString(
1762 "Target distribution weight at point %zu (%g) in %s is negative.",
1766 GMX_THROW(InvalidInputError(mesg));
1770 targetDistributionIsZero = false;
1772 (*pointState)[m].setTargetConstantWeight(target);
1775 if (targetDistributionIsZero)
1778 gmx::formatString("The target weights given in column %d in %s are all 0",
1781 GMX_THROW(InvalidInputError(mesg));
1784 /* Free the arrays. */
1785 for (int m = 0; m < numColumns; m++)
1792 void BiasState::normalizePmf(int numSharingSims)
1794 /* The normalization of the PMF estimate matters because it determines how big effect the next sample has.
1795 Approximately (for large enough force constant) we should have:
1796 sum_x(exp(-pmf(x)) = nsamples*sum_xref(exp(-f(xref)).
1799 /* Calculate the normalization factor, i.e. divide by the pmf sum, multiply by the number of samples and the f sum */
1800 double expSumPmf = 0;
1802 for (const PointState& pointState : points_)
1804 if (pointState.inTargetRegion())
1806 expSumPmf += std::exp(pointState.logPmfSum());
1807 expSumF += std::exp(-pointState.freeEnergy());
1810 double numSamples = histogramSize_.histogramSize() / numSharingSims;
1813 double logRenorm = std::log(numSamples * expSumF / expSumPmf);
1814 for (PointState& pointState : points_)
1816 if (pointState.inTargetRegion())
1818 pointState.setLogPmfSum(pointState.logPmfSum() + logRenorm);
1823 void BiasState::initGridPointState(const AwhBiasParams& awhBiasParams,
1824 const std::vector<DimParams>& dimParams,
1825 const BiasGrid& grid,
1826 const BiasParams& params,
1827 const std::string& filename,
1830 /* Modify PMF, free energy and the constant target distribution factor
1831 * to user input values if there is data given.
1833 if (awhBiasParams.bUserData)
1835 readUserPmfAndTargetDistribution(dimParams, grid, filename, numBias, params.biasIndex, &points_);
1836 setFreeEnergyToConvolvedPmf(dimParams, grid);
1839 /* The local Boltzmann distribution is special because the target distribution is updated as a function of the reference weighthistogram. */
1840 GMX_RELEASE_ASSERT(params.eTarget != eawhtargetLOCALBOLTZMANN || points_[0].weightSumRef() != 0,
1841 "AWH reference weight histogram not initialized properly with local "
1842 "Boltzmann target distribution.");
1844 updateTargetDistribution(points_, params);
1846 for (PointState& pointState : points_)
1848 if (pointState.inTargetRegion())
1850 pointState.updateBias();
1854 /* Note that for zero target this is a value that represents -infinity but should not be used for biasing. */
1855 pointState.setTargetToZero();
1859 /* Set the initial reference weighthistogram. */
1860 const double histogramSize = histogramSize_.histogramSize();
1861 for (auto& pointState : points_)
1863 pointState.setInitialReferenceWeightHistogram(histogramSize);
1866 /* Make sure the pmf is normalized consistently with the histogram size.
1867 Note: the target distribution and free energy need to be set here. */
1868 normalizePmf(params.numSharedUpdate);
1871 BiasState::BiasState(const AwhBiasParams& awhBiasParams,
1872 double histogramSizeInitial,
1873 const std::vector<DimParams>& dimParams,
1874 const BiasGrid& grid) :
1875 coordState_(awhBiasParams, dimParams, grid),
1876 points_(grid.numPoints()),
1877 weightSumCovering_(grid.numPoints()),
1878 histogramSize_(awhBiasParams, histogramSizeInitial)
1880 /* The minimum and maximum multidimensional point indices that are affected by the next update */
1881 for (size_t d = 0; d < dimParams.size(); d++)
1883 int index = grid.point(coordState_.gridpointIndex()).index[d];
1884 originUpdatelist_[d] = index;
1885 endUpdatelist_[d] = index;