2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 * Implements the BiasState class.
40 * \author Viveca Lindahl
41 * \author Berk Hess <hess@kth.se>
47 #include "biasstate.h"
57 #include "gromacs/fileio/gmxfio.h"
58 #include "gromacs/fileio/xvgr.h"
59 #include "gromacs/gmxlib/network.h"
60 #include "gromacs/math/utilities.h"
61 #include "gromacs/mdrunutility/multisim.h"
62 #include "gromacs/mdtypes/awh_history.h"
63 #include "gromacs/mdtypes/awh_params.h"
64 #include "gromacs/mdtypes/commrec.h"
65 #include "gromacs/simd/simd.h"
66 #include "gromacs/simd/simd_math.h"
67 #include "gromacs/utility/arrayref.h"
68 #include "gromacs/utility/exceptions.h"
69 #include "gromacs/utility/gmxassert.h"
70 #include "gromacs/utility/smalloc.h"
71 #include "gromacs/utility/stringutil.h"
74 #include "pointstate.h"
79 void BiasState::getPmf(gmx::ArrayRef<float> pmf) const
81 GMX_ASSERT(pmf.size() == points_.size(), "pmf should have the size of the bias grid");
83 /* The PMF is just the negative of the log of the sampled PMF histogram.
84 * Points with zero target weight are ignored, they will mostly contain noise.
86 for (size_t i = 0; i < points_.size(); i++)
88 pmf[i] = points_[i].inTargetRegion() ? -points_[i].logPmfSum() : GMX_FLOAT_MAX;
96 * Sum an array over all simulations on the master rank of each simulation.
98 * \param[in,out] arrayRef The data to sum.
99 * \param[in] multiSimComm Struct for multi-simulation communication.
101 void sumOverSimulations(gmx::ArrayRef<int> arrayRef, const gmx_multisim_t* multiSimComm)
103 gmx_sumi_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
107 * Sum an array over all simulations on the master rank of each simulation.
109 * \param[in,out] arrayRef The data to sum.
110 * \param[in] multiSimComm Struct for multi-simulation communication.
112 void sumOverSimulations(gmx::ArrayRef<double> arrayRef, const gmx_multisim_t* multiSimComm)
114 gmx_sumd_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
118 * Sum an array over all simulations on all ranks of each simulation.
120 * This assumes the data is identical on all ranks within each simulation.
122 * \param[in,out] arrayRef The data to sum.
123 * \param[in] commRecord Struct for intra-simulation communication.
124 * \param[in] multiSimComm Struct for multi-simulation communication.
127 void sumOverSimulations(gmx::ArrayRef<T> arrayRef, const t_commrec* commRecord, const gmx_multisim_t* multiSimComm)
129 if (MASTER(commRecord))
131 sumOverSimulations(arrayRef, multiSimComm);
133 if (commRecord->nnodes > 1)
135 gmx_bcast(arrayRef.size() * sizeof(T), arrayRef.data(), commRecord);
140 * Sum PMF over multiple simulations, when requested.
142 * \param[in,out] pointState The state of the points in the bias.
143 * \param[in] numSharedUpdate The number of biases sharing the histogram.
144 * \param[in] commRecord Struct for intra-simulation communication.
145 * \param[in] multiSimComm Struct for multi-simulation communication.
147 void sumPmf(gmx::ArrayRef<PointState> pointState,
149 const t_commrec* commRecord,
150 const gmx_multisim_t* multiSimComm)
152 if (numSharedUpdate == 1)
156 GMX_ASSERT(multiSimComm != nullptr && numSharedUpdate % multiSimComm->nsim == 0,
157 "numSharedUpdate should be a multiple of multiSimComm->nsim");
158 GMX_ASSERT(numSharedUpdate == multiSimComm->nsim,
159 "Sharing within a simulation is not implemented (yet)");
161 std::vector<double> buffer(pointState.size());
163 /* Need to temporarily exponentiate the log weights to sum over simulations */
164 for (size_t i = 0; i < buffer.size(); i++)
166 buffer[i] = pointState[i].inTargetRegion() ? std::exp(pointState[i].logPmfSum()) : 0;
169 sumOverSimulations(gmx::ArrayRef<double>(buffer), commRecord, multiSimComm);
171 /* Take log again to get (non-normalized) PMF */
172 double normFac = 1.0 / numSharedUpdate;
173 for (gmx::index i = 0; i < pointState.ssize(); i++)
175 if (pointState[i].inTargetRegion())
177 pointState[i].setLogPmfSum(std::log(buffer[i] * normFac));
183 * Find the minimum free energy value.
185 * \param[in] pointState The state of the points.
186 * \returns the minimum free energy value.
188 double freeEnergyMinimumValue(gmx::ArrayRef<const PointState> pointState)
190 double fMin = GMX_FLOAT_MAX;
192 for (auto const& ps : pointState)
194 if (ps.inTargetRegion() && ps.freeEnergy() < fMin)
196 fMin = ps.freeEnergy();
204 * Find and return the log of the probability weight of a point given a coordinate value.
206 * The unnormalized weight is given by
207 * w(point|value) = exp(bias(point) - U(value,point)),
208 * where U is a harmonic umbrella potential.
210 * \param[in] dimParams The bias dimensions parameters
211 * \param[in] points The point state.
212 * \param[in] grid The grid.
213 * \param[in] pointIndex Point to evaluate probability weight for.
214 * \param[in] pointBias Bias for the point (as a log weight).
215 * \param[in] value Coordinate value.
216 * \returns the log of the biased probability weight.
218 double biasedLogWeightFromPoint(const std::vector<DimParams>& dimParams,
219 const std::vector<PointState>& points,
223 const awh_dvec value)
225 double logWeight = detail::c_largeNegativeExponent;
227 /* Only points in the target reigon have non-zero weight */
228 if (points[pointIndex].inTargetRegion())
230 logWeight = pointBias;
232 /* Add potential for all parameter dimensions */
233 for (size_t d = 0; d < dimParams.size(); d++)
235 double dev = getDeviationFromPointAlongGridAxis(grid, d, pointIndex, value[d]);
236 logWeight -= 0.5 * dimParams[d].betak * dev * dev;
245 void BiasState::calcConvolvedPmf(const std::vector<DimParams>& dimParams,
247 std::vector<float>* convolvedPmf) const
249 size_t numPoints = grid.numPoints();
251 convolvedPmf->resize(numPoints);
253 /* Get the PMF to convolve. */
254 std::vector<float> pmf(numPoints);
257 for (size_t m = 0; m < numPoints; m++)
259 double freeEnergyWeights = 0;
260 const GridPoint& point = grid.point(m);
261 for (auto& neighbor : point.neighbor)
263 /* The negative PMF is a positive bias. */
264 double biasNeighbor = -pmf[neighbor];
266 /* Add the convolved PMF weights for the neighbors of this point.
267 Note that this function only adds point within the target > 0 region.
268 Sum weights, take the logarithm last to get the free energy. */
269 double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid, neighbor,
270 biasNeighbor, point.coordValue);
271 freeEnergyWeights += std::exp(logWeight);
274 GMX_RELEASE_ASSERT(freeEnergyWeights > 0,
275 "Attempting to do log(<= 0) in AWH convolved PMF calculation.");
276 (*convolvedPmf)[m] = -std::log(static_cast<float>(freeEnergyWeights));
284 * Updates the target distribution for all points.
286 * The target distribution is always updated for all points
289 * \param[in,out] pointState The state of all points.
290 * \param[in] params The bias parameters.
292 void updateTargetDistribution(gmx::ArrayRef<PointState> pointState, const BiasParams& params)
294 double freeEnergyCutoff = 0;
295 if (params.eTarget == eawhtargetCUTOFF)
297 freeEnergyCutoff = freeEnergyMinimumValue(pointState) + params.freeEnergyCutoffInKT;
300 double sumTarget = 0;
301 for (PointState& ps : pointState)
303 sumTarget += ps.updateTargetWeight(params, freeEnergyCutoff);
305 GMX_RELEASE_ASSERT(sumTarget > 0, "We should have a non-zero distribution");
308 double invSum = 1.0 / sumTarget;
309 for (PointState& ps : pointState)
311 ps.scaleTarget(invSum);
316 * Puts together a string describing a grid point.
318 * \param[in] grid The grid.
319 * \param[in] point Grid point index.
320 * \returns a string for the point.
322 std::string gridPointValueString(const Grid& grid, int point)
324 std::string pointString;
328 for (int d = 0; d < grid.numDimensions(); d++)
330 pointString += gmx::formatString("%g", grid.point(point).coordValue[d]);
331 if (d < grid.numDimensions() - 1)
346 int BiasState::warnForHistogramAnomalies(const Grid& grid, int biasIndex, double t, FILE* fplog, int maxNumWarnings) const
348 GMX_ASSERT(fplog != nullptr, "Warnings can only be issued if there is log file.");
349 const double maxHistogramRatio = 0.5; /* Tolerance for printing a warning about the histogram ratios */
351 /* Sum up the histograms and get their normalization */
352 double sumVisits = 0;
353 double sumWeights = 0;
354 for (auto& pointState : points_)
356 if (pointState.inTargetRegion())
358 sumVisits += pointState.numVisitsTot();
359 sumWeights += pointState.weightSumTot();
362 GMX_RELEASE_ASSERT(sumVisits > 0, "We should have visits");
363 GMX_RELEASE_ASSERT(sumWeights > 0, "We should have weight");
364 double invNormVisits = 1.0 / sumVisits;
365 double invNormWeight = 1.0 / sumWeights;
367 /* Check all points for warnings */
369 size_t numPoints = grid.numPoints();
370 for (size_t m = 0; m < numPoints; m++)
372 /* Skip points close to boundary or non-target region */
373 const GridPoint& gridPoint = grid.point(m);
374 bool skipPoint = false;
375 for (size_t n = 0; (n < gridPoint.neighbor.size()) && !skipPoint; n++)
377 int neighbor = gridPoint.neighbor[n];
378 skipPoint = !points_[neighbor].inTargetRegion();
379 for (int d = 0; (d < grid.numDimensions()) && !skipPoint; d++)
381 const GridPoint& neighborPoint = grid.point(neighbor);
382 skipPoint = neighborPoint.index[d] == 0
383 || neighborPoint.index[d] == grid.axis(d).numPoints() - 1;
387 /* Warn if the coordinate distribution is less than the target distribution with a certain fraction somewhere */
388 const double relativeWeight = points_[m].weightSumTot() * invNormWeight;
389 const double relativeVisits = points_[m].numVisitsTot() * invNormVisits;
390 if (!skipPoint && relativeVisits < relativeWeight * maxHistogramRatio)
392 std::string pointValueString = gridPointValueString(grid, m);
393 std::string warningMessage = gmx::formatString(
395 "at t = %g ps the obtained coordinate distribution at coordinate value %s "
396 "is less than a fraction %g of the reference distribution at that point. "
397 "If you are not certain about your settings you might want to increase your "
398 "pull force constant or "
399 "modify your sampling region.\n",
400 biasIndex + 1, t, pointValueString.c_str(), maxHistogramRatio);
401 gmx::TextLineWrapper wrapper;
402 wrapper.settings().setLineLength(c_linewidth);
403 fprintf(fplog, "%s", wrapper.wrapToString(warningMessage).c_str());
407 if (numWarnings >= maxNumWarnings)
416 double BiasState::calcUmbrellaForceAndPotential(const std::vector<DimParams>& dimParams,
419 gmx::ArrayRef<double> force) const
421 double potential = 0;
422 for (size_t d = 0; d < dimParams.size(); d++)
425 getDeviationFromPointAlongGridAxis(grid, d, point, coordState_.coordValue()[d]);
427 double k = dimParams[d].k;
429 /* Force from harmonic potential 0.5*k*dev^2 */
430 force[d] = -k * deviation;
431 potential += 0.5 * k * deviation * deviation;
437 void BiasState::calcConvolvedForce(const std::vector<DimParams>& dimParams,
439 gmx::ArrayRef<const double> probWeightNeighbor,
440 gmx::ArrayRef<double> forceWorkBuffer,
441 gmx::ArrayRef<double> force) const
443 for (size_t d = 0; d < dimParams.size(); d++)
448 /* Only neighboring points have non-negligible contribution. */
449 const std::vector<int>& neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
450 gmx::ArrayRef<double> forceFromNeighbor = forceWorkBuffer;
451 for (size_t n = 0; n < neighbor.size(); n++)
453 double weightNeighbor = probWeightNeighbor[n];
454 int indexNeighbor = neighbor[n];
456 /* Get the umbrella force from this point. The returned potential is ignored here. */
457 calcUmbrellaForceAndPotential(dimParams, grid, indexNeighbor, forceFromNeighbor);
459 /* Add the weighted umbrella force to the convolved force. */
460 for (size_t d = 0; d < dimParams.size(); d++)
462 force[d] += forceFromNeighbor[d] * weightNeighbor;
467 double BiasState::moveUmbrella(const std::vector<DimParams>& dimParams,
469 gmx::ArrayRef<const double> probWeightNeighbor,
470 gmx::ArrayRef<double> biasForce,
475 /* Generate and set a new coordinate reference value */
476 coordState_.sampleUmbrellaGridpoint(grid, coordState_.gridpointIndex(), probWeightNeighbor,
477 step, seed, indexSeed);
479 std::vector<double> newForce(dimParams.size());
480 double newPotential =
481 calcUmbrellaForceAndPotential(dimParams, grid, coordState_.umbrellaGridpoint(), newForce);
483 /* A modification of the reference value at time t will lead to a different
484 force over t-dt/2 to t and over t to t+dt/2. For high switching rates
485 this means the force and velocity will change signs roughly as often.
486 To avoid any issues we take the average of the previous and new force
487 at steps when the reference value has been moved. E.g. if the ref. value
488 is set every step to (coord dvalue +/- delta) would give zero force.
490 for (gmx::index d = 0; d < biasForce.ssize(); d++)
492 /* Average of the current and new force */
493 biasForce[d] = 0.5 * (biasForce[d] + newForce[d]);
503 * Sets the histogram rescaling factors needed to control the histogram size.
505 * For sake of robustness, the reference weight histogram can grow at a rate
506 * different from the actual sampling rate. Typically this happens for a limited
507 * initial time, alternatively growth is scaled down by a constant factor for all
508 * times. Since the size of the reference histogram sets the size of the free
509 * energy update this should be reflected also in the PMF. Thus the PMF histogram
510 * needs to be rescaled too.
512 * This function should only be called by the bias update function or wrapped by a function that
513 * knows what scale factors should be applied when, e.g,
514 * getSkippedUpdateHistogramScaleFactors().
516 * \param[in] params The bias parameters.
517 * \param[in] newHistogramSize New reference weight histogram size.
518 * \param[in] oldHistogramSize Previous reference weight histogram size (before adding new samples).
519 * \param[out] weightHistScaling Scaling factor for the reference weight histogram.
520 * \param[out] logPmfSumScaling Log of the scaling factor for the PMF histogram.
522 void setHistogramUpdateScaleFactors(const BiasParams& params,
523 double newHistogramSize,
524 double oldHistogramSize,
525 double* weightHistScaling,
526 double* logPmfSumScaling)
529 /* The two scaling factors below are slightly different (ignoring the log factor) because the
530 reference and the PMF histogram apply weight scaling differently. The weight histogram
531 applies is locally, i.e. each sample is scaled down meaning all samples get equal weight.
532 It is done this way because that is what target type local Boltzmann (for which
533 target = weight histogram) needs. In contrast, the PMF histogram is rescaled globally
534 by repeatedly scaling down the whole histogram. The reasons for doing it this way are:
535 1) empirically this is necessary for converging the PMF; 2) since the extraction of
536 the PMF is theoretically only valid for a constant bias, new samples should get more
537 weight than old ones for which the bias is fluctuating more. */
539 newHistogramSize / (oldHistogramSize + params.updateWeight * params.localWeightScaling);
540 *logPmfSumScaling = std::log(newHistogramSize / (oldHistogramSize + params.updateWeight));
545 void BiasState::getSkippedUpdateHistogramScaleFactors(const BiasParams& params,
546 double* weightHistScaling,
547 double* logPmfSumScaling) const
549 GMX_ASSERT(params.skipUpdates(),
550 "Calling function for skipped updates when skipping updates is not allowed");
552 if (inInitialStage())
554 /* In between global updates the reference histogram size is kept constant so we trivially
555 know what the histogram size was at the time of the skipped update. */
556 double histogramSize = histogramSize_.histogramSize();
557 setHistogramUpdateScaleFactors(params, histogramSize, histogramSize, weightHistScaling,
562 /* In the final stage, the reference histogram grows at the sampling rate which gives trivial scale factors. */
563 *weightHistScaling = 1;
564 *logPmfSumScaling = 0;
568 void BiasState::doSkippedUpdatesForAllPoints(const BiasParams& params)
570 double weightHistScaling;
571 double logPmfsumScaling;
573 getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
575 for (auto& pointState : points_)
577 bool didUpdate = pointState.performPreviouslySkippedUpdates(
578 params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
580 /* Update the bias for this point only if there were skipped updates in the past to avoid calculating the log unneccessarily */
583 pointState.updateBias();
588 void BiasState::doSkippedUpdatesInNeighborhood(const BiasParams& params, const Grid& grid)
590 double weightHistScaling;
591 double logPmfsumScaling;
593 getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
595 /* For each neighbor point of the center point, refresh its state by adding the results of all past, skipped updates. */
596 const std::vector<int>& neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
597 for (auto& neighbor : neighbors)
599 bool didUpdate = points_[neighbor].performPreviouslySkippedUpdates(
600 params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
604 points_[neighbor].updateBias();
613 * Merge update lists from multiple sharing simulations.
615 * \param[in,out] updateList Update list for this simulation (assumed >= npoints long).
616 * \param[in] numPoints Total number of points.
617 * \param[in] commRecord Struct for intra-simulation communication.
618 * \param[in] multiSimComm Struct for multi-simulation communication.
620 void mergeSharedUpdateLists(std::vector<int>* updateList,
622 const t_commrec* commRecord,
623 const gmx_multisim_t* multiSimComm)
625 std::vector<int> numUpdatesOfPoint;
627 /* Flag the update points of this sim.
628 TODO: we can probably avoid allocating this array and just use the input array. */
629 numUpdatesOfPoint.resize(numPoints, 0);
630 for (auto& pointIndex : *updateList)
632 numUpdatesOfPoint[pointIndex] = 1;
635 /* Sum over the sims to get all the flagged points */
636 sumOverSimulations(arrayRefFromArray(numUpdatesOfPoint.data(), numPoints), commRecord, multiSimComm);
638 /* Collect the indices of the flagged points in place. The resulting array will be the merged update list.*/
640 for (int m = 0; m < numPoints; m++)
642 if (numUpdatesOfPoint[m] > 0)
644 updateList->push_back(m);
650 * Generate an update list of points sampled since the last update.
652 * \param[in] grid The AWH bias.
653 * \param[in] points The point state.
654 * \param[in] originUpdatelist The origin of the rectangular region that has been sampled since
655 * last update. \param[in] endUpdatelist The end of the rectangular that has been sampled since
656 * last update. \param[in,out] updateList Local update list to set (assumed >= npoints long).
658 void makeLocalUpdateList(const Grid& grid,
659 const std::vector<PointState>& points,
660 const awh_ivec originUpdatelist,
661 const awh_ivec endUpdatelist,
662 std::vector<int>* updateList)
667 /* Define the update search grid */
668 for (int d = 0; d < grid.numDimensions(); d++)
670 origin[d] = originUpdatelist[d];
671 numPoints[d] = endUpdatelist[d] - originUpdatelist[d] + 1;
673 /* Because the end_updatelist is unwrapped it can be > (npoints - 1) so that numPoints can be > npoints in grid.
674 This helps for calculating the distance/number of points but should be removed and fixed when the way of
675 updating origin/end updatelist is changed (see sampleProbabilityWeights). */
676 numPoints[d] = std::min(grid.axis(d).numPoints(), numPoints[d]);
679 /* Make the update list */
682 bool pointExists = true;
685 pointExists = advancePointInSubgrid(grid, origin, numPoints, &pointIndex);
687 if (pointExists && points[pointIndex].inTargetRegion())
689 updateList->push_back(pointIndex);
696 void BiasState::resetLocalUpdateRange(const Grid& grid)
698 const int gridpointIndex = coordState_.gridpointIndex();
699 for (int d = 0; d < grid.numDimensions(); d++)
701 /* This gives the minimum range consisting only of the current closest point. */
702 originUpdatelist_[d] = grid.point(gridpointIndex).index[d];
703 endUpdatelist_[d] = grid.point(gridpointIndex).index[d];
711 * Add partial histograms (accumulating between updates) to accumulating histograms.
713 * \param[in,out] pointState The state of the points in the bias.
714 * \param[in,out] weightSumCovering The weights for checking covering.
715 * \param[in] numSharedUpdate The number of biases sharing the histrogram.
716 * \param[in] commRecord Struct for intra-simulation communication.
717 * \param[in] multiSimComm Struct for multi-simulation communication.
718 * \param[in] localUpdateList List of points with data.
720 void sumHistograms(gmx::ArrayRef<PointState> pointState,
721 gmx::ArrayRef<double> weightSumCovering,
723 const t_commrec* commRecord,
724 const gmx_multisim_t* multiSimComm,
725 const std::vector<int>& localUpdateList)
727 /* The covering checking histograms are added before summing over simulations, so that the
728 weights from different simulations are kept distinguishable. */
729 for (int globalIndex : localUpdateList)
731 weightSumCovering[globalIndex] += pointState[globalIndex].weightSumIteration();
734 /* Sum histograms over multiple simulations if needed. */
735 if (numSharedUpdate > 1)
737 GMX_ASSERT(numSharedUpdate == multiSimComm->nsim,
738 "Sharing within a simulation is not implemented (yet)");
740 /* Collect the weights and counts in linear arrays to be able to use gmx_sumd_sim. */
741 std::vector<double> weightSum;
742 std::vector<double> coordVisits;
744 weightSum.resize(localUpdateList.size());
745 coordVisits.resize(localUpdateList.size());
747 for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
749 const PointState& ps = pointState[localUpdateList[localIndex]];
751 weightSum[localIndex] = ps.weightSumIteration();
752 coordVisits[localIndex] = ps.numVisitsIteration();
755 sumOverSimulations(gmx::ArrayRef<double>(weightSum), commRecord, multiSimComm);
756 sumOverSimulations(gmx::ArrayRef<double>(coordVisits), commRecord, multiSimComm);
758 /* Transfer back the result */
759 for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
761 PointState& ps = pointState[localUpdateList[localIndex]];
763 ps.setPartialWeightAndCount(weightSum[localIndex], coordVisits[localIndex]);
767 /* Now add the partial counts and weights to the accumulating histograms.
768 Note: we still need to use the weights for the update so we wait
769 with resetting them until the end of the update. */
770 for (int globalIndex : localUpdateList)
772 pointState[globalIndex].addPartialWeightAndCount();
777 * Label points along an axis as covered or not.
779 * A point is covered if it is surrounded by visited points up to a radius = coverRadius.
781 * \param[in] visited Visited? For each point.
782 * \param[in] checkCovering Check for covering? For each point.
783 * \param[in] numPoints The number of grid points along this dimension.
784 * \param[in] period Period in number of points.
785 * \param[in] coverRadius Cover radius, in points, needed for defining a point as covered.
786 * \param[in,out] covered In this array elements are 1 for covered points and 0 for
787 * non-covered points, this routine assumes that \p covered has at least size \p numPoints.
789 void labelCoveredPoints(const std::vector<bool>& visited,
790 const std::vector<bool>& checkCovering,
794 gmx::ArrayRef<int> covered)
796 GMX_ASSERT(covered.ssize() >= numPoints, "covered should be at least as large as the grid");
798 bool haveFirstNotVisited = false;
799 int firstNotVisited = -1;
800 int notVisitedLow = -1;
801 int notVisitedHigh = -1;
803 for (int n = 0; n < numPoints; n++)
805 if (checkCovering[n] && !visited[n])
807 if (!haveFirstNotVisited)
811 haveFirstNotVisited = true;
817 /* Have now an interval I = [notVisitedLow,notVisitedHigh] of visited points bounded
818 by unvisited points. The unvisted end points affect the coveredness of the
819 visited with a reach equal to the cover radius. */
820 int notCoveredLow = notVisitedLow + coverRadius;
821 int notCoveredHigh = notVisitedHigh - coverRadius;
822 for (int i = notVisitedLow; i <= notVisitedHigh; i++)
824 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
827 /* Find a new interval to set covering for. Make the notVisitedHigh of this interval
828 the notVisitedLow of the next. */
829 notVisitedLow = notVisitedHigh;
834 /* Have labelled all the internal points. Now take care of the boundary regions. */
835 if (!haveFirstNotVisited)
837 /* No non-visited points <=> all points visited => all points covered. */
839 for (int n = 0; n < numPoints; n++)
846 int lastNotVisited = notVisitedLow;
848 /* For periodic boundaries, non-visited points can influence points
849 on the other side of the boundary so we need to wrap around. */
851 /* Lower end. For periodic boundaries the last upper end not visited point becomes the low-end not visited point.
852 For non-periodic boundaries there is no lower end point so a dummy value is used. */
853 int notVisitedHigh = firstNotVisited;
854 int notVisitedLow = period > 0 ? (lastNotVisited - period) : -(coverRadius + 1);
856 int notCoveredLow = notVisitedLow + coverRadius;
857 int notCoveredHigh = notVisitedHigh - coverRadius;
859 for (int i = 0; i <= notVisitedHigh; i++)
861 /* For non-periodic boundaries notCoveredLow = -1 will impose no restriction. */
862 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
865 /* Upper end. Same as for lower end but in the other direction. */
866 notVisitedHigh = period > 0 ? (firstNotVisited + period) : (numPoints + coverRadius);
867 notVisitedLow = lastNotVisited;
869 notCoveredLow = notVisitedLow + coverRadius;
870 notCoveredHigh = notVisitedHigh - coverRadius;
872 for (int i = notVisitedLow; i <= numPoints - 1; i++)
874 /* For non-periodic boundaries notCoveredHigh = numPoints will impose no restriction. */
875 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
882 bool BiasState::isSamplingRegionCovered(const BiasParams& params,
883 const std::vector<DimParams>& dimParams,
885 const t_commrec* commRecord,
886 const gmx_multisim_t* multiSimComm) const
888 /* Allocate and initialize arrays: one for checking visits along each dimension,
889 one for keeping track of which points to check and one for the covered points.
890 Possibly these could be kept as AWH variables to avoid these allocations. */
893 std::vector<bool> visited;
894 std::vector<bool> checkCovering;
895 // We use int for the covering array since we might use gmx_sumi_sim.
896 std::vector<int> covered;
899 std::vector<CheckDim> checkDim;
900 checkDim.resize(grid.numDimensions());
902 for (int d = 0; d < grid.numDimensions(); d++)
904 const size_t numPoints = grid.axis(d).numPoints();
905 checkDim[d].visited.resize(numPoints, false);
906 checkDim[d].checkCovering.resize(numPoints, false);
907 checkDim[d].covered.resize(numPoints, 0);
910 /* Set visited points along each dimension and which points should be checked for covering.
911 Specifically, points above the free energy cutoff (if there is one) or points outside
912 of the target region are ignored. */
914 /* Set the free energy cutoff */
915 double maxFreeEnergy = GMX_FLOAT_MAX;
917 if (params.eTarget == eawhtargetCUTOFF)
919 maxFreeEnergy = freeEnergyMinimumValue(points_) + params.freeEnergyCutoffInKT;
922 /* Set the threshold weight for a point to be considered visited. */
923 double weightThreshold = 1;
924 for (int d = 0; d < grid.numDimensions(); d++)
926 weightThreshold *= grid.axis(d).spacing() * std::sqrt(dimParams[d].betak * 0.5 * M_1_PI);
929 /* Project the sampling weights onto each dimension */
930 for (size_t m = 0; m < grid.numPoints(); m++)
932 const PointState& pointState = points_[m];
934 for (int d = 0; d < grid.numDimensions(); d++)
936 int n = grid.point(m).index[d];
938 /* Is visited if it was already visited or if there is enough weight at the current point */
939 checkDim[d].visited[n] = checkDim[d].visited[n] || (weightSumCovering_[m] > weightThreshold);
941 /* Check for covering if there is at least point in this slice that is in the target region and within the cutoff */
942 checkDim[d].checkCovering[n] =
943 checkDim[d].checkCovering[n]
944 || (pointState.inTargetRegion() && pointState.freeEnergy() < maxFreeEnergy);
948 /* Label each point along each dimension as covered or not. */
949 for (int d = 0; d < grid.numDimensions(); d++)
951 labelCoveredPoints(checkDim[d].visited, checkDim[d].checkCovering, grid.axis(d).numPoints(),
952 grid.axis(d).numPointsInPeriod(), params.coverRadius()[d], checkDim[d].covered);
955 /* Now check for global covering. Each dimension needs to be covered separately.
956 A dimension is covered if each point is covered. Multiple simulations collectively
957 cover the points, i.e. a point is covered if any of the simulations covered it.
958 However, visited points are not shared, i.e. if a point is covered or not is
959 determined by the visits of a single simulation. In general the covering criterion is
960 all points covered => all points are surrounded by visited points up to a radius = coverRadius.
961 For 1 simulation, all points covered <=> all points visited. For multiple simulations
962 however, all points visited collectively !=> all points covered, except for coverRadius = 0.
963 In the limit of large coverRadius, all points covered => all points visited by at least one
964 simulation (since no point will be covered until all points have been visited by a
965 single simulation). Basically coverRadius sets how much "connectedness" (or mixing) a point
966 needs with surrounding points before sharing covering information with other simulations. */
968 /* Communicate the covered points between sharing simulations if needed. */
969 if (params.numSharedUpdate > 1)
971 /* For multiple dimensions this may not be the best way to do it. */
972 for (int d = 0; d < grid.numDimensions(); d++)
975 gmx::arrayRefFromArray(checkDim[d].covered.data(), grid.axis(d).numPoints()),
976 commRecord, multiSimComm);
980 /* Now check if for each dimension all points are covered. Break if not true. */
981 bool allPointsCovered = true;
982 for (int d = 0; d < grid.numDimensions() && allPointsCovered; d++)
984 for (int n = 0; n < grid.axis(d).numPoints() && allPointsCovered; n++)
986 allPointsCovered = (checkDim[d].covered[n] != 0);
990 return allPointsCovered;
994 * Normalizes the free energy and PMF sum.
996 * \param[in] pointState The state of the points.
998 static void normalizeFreeEnergyAndPmfSum(std::vector<PointState>* pointState)
1000 double minF = freeEnergyMinimumValue(*pointState);
1002 for (PointState& ps : *pointState)
1004 ps.normalizeFreeEnergyAndPmfSum(minF);
1008 void BiasState::updateFreeEnergyAndAddSamplesToHistogram(const std::vector<DimParams>& dimParams,
1010 const BiasParams& params,
1011 const t_commrec* commRecord,
1012 const gmx_multisim_t* multiSimComm,
1016 std::vector<int>* updateList)
1018 /* Note hat updateList is only used in this scope and is always
1019 * re-initialized. We do not use a local vector, because that would
1020 * cause reallocation every time this funtion is called and the vector
1021 * can be the size of the whole grid.
1024 /* Make a list of all local points, i.e. those that could have been touched since
1025 the last update. These are the points needed for summing histograms below
1026 (non-local points only add zeros). For local updates, this will also be the
1027 final update list. */
1028 makeLocalUpdateList(grid, points_, originUpdatelist_, endUpdatelist_, updateList);
1029 if (params.numSharedUpdate > 1)
1031 mergeSharedUpdateLists(updateList, points_.size(), commRecord, multiSimComm);
1034 /* Reset the range for the next update */
1035 resetLocalUpdateRange(grid);
1037 /* Add samples to histograms for all local points and sync simulations if needed */
1038 sumHistograms(points_, weightSumCovering_, params.numSharedUpdate, commRecord, multiSimComm, *updateList);
1040 sumPmf(points_, params.numSharedUpdate, commRecord, multiSimComm);
1042 /* Renormalize the free energy if values are too large. */
1043 bool needToNormalizeFreeEnergy = false;
1044 for (int& globalIndex : *updateList)
1046 /* We want to keep the absolute value of the free energies to be less
1047 c_largePositiveExponent to be able to safely pass these values to exp(). The check below
1048 ensures this as long as the free energy values grow less than 0.5*c_largePositiveExponent
1049 in a return time to this neighborhood. For reasonable update sizes it's unlikely that
1050 this requirement would be broken. */
1051 if (std::abs(points_[globalIndex].freeEnergy()) > 0.5 * detail::c_largePositiveExponent)
1053 needToNormalizeFreeEnergy = true;
1058 /* Update target distribution? */
1059 bool needToUpdateTargetDistribution =
1060 (params.eTarget != eawhtargetCONSTANT && params.isUpdateTargetStep(step));
1062 /* In the initial stage, the histogram grows dynamically as a function of the number of coverings. */
1063 bool detectedCovering = false;
1064 if (inInitialStage())
1067 (params.isCheckCoveringStep(step)
1068 && isSamplingRegionCovered(params, dimParams, grid, commRecord, multiSimComm));
1071 /* The weighthistogram size after this update. */
1072 double newHistogramSize = histogramSize_.newHistogramSize(params, t, detectedCovering, points_,
1073 weightSumCovering_, fplog);
1075 /* Make the update list. Usually we try to only update local points,
1076 * but if the update has non-trivial or non-deterministic effects
1077 * on non-local points a global update is needed. This is the case when:
1078 * 1) a covering occurred in the initial stage, leading to non-trivial
1079 * histogram rescaling factors; or
1080 * 2) the target distribution will be updated, since we don't make any
1081 * assumption on its form; or
1082 * 3) the AWH parameters are such that we never attempt to skip non-local
1084 * 4) the free energy values have grown so large that a renormalization
1087 if (needToUpdateTargetDistribution || detectedCovering || !params.skipUpdates() || needToNormalizeFreeEnergy)
1089 /* Global update, just add all points. */
1090 updateList->clear();
1091 for (size_t m = 0; m < points_.size(); m++)
1093 if (points_[m].inTargetRegion())
1095 updateList->push_back(m);
1100 /* Set histogram scale factors. */
1101 double weightHistScalingSkipped = 0;
1102 double logPmfsumScalingSkipped = 0;
1103 if (params.skipUpdates())
1105 getSkippedUpdateHistogramScaleFactors(params, &weightHistScalingSkipped, &logPmfsumScalingSkipped);
1107 double weightHistScalingNew;
1108 double logPmfsumScalingNew;
1109 setHistogramUpdateScaleFactors(params, newHistogramSize, histogramSize_.histogramSize(),
1110 &weightHistScalingNew, &logPmfsumScalingNew);
1112 /* Update free energy and reference weight histogram for points in the update list. */
1113 for (int pointIndex : *updateList)
1115 PointState* pointStateToUpdate = &points_[pointIndex];
1117 /* Do updates from previous update steps that were skipped because this point was at that time non-local. */
1118 if (params.skipUpdates())
1120 pointStateToUpdate->performPreviouslySkippedUpdates(params, histogramSize_.numUpdates(),
1121 weightHistScalingSkipped,
1122 logPmfsumScalingSkipped);
1125 /* Now do an update with new sampling data. */
1126 pointStateToUpdate->updateWithNewSampling(params, histogramSize_.numUpdates(),
1127 weightHistScalingNew, logPmfsumScalingNew);
1130 /* Only update the histogram size after we are done with the local point updates */
1131 histogramSize_.setHistogramSize(newHistogramSize, weightHistScalingNew);
1133 if (needToNormalizeFreeEnergy)
1135 normalizeFreeEnergyAndPmfSum(&points_);
1138 if (needToUpdateTargetDistribution)
1140 /* The target distribution is always updated for all points at once. */
1141 updateTargetDistribution(points_, params);
1144 /* Update the bias. The bias is updated separately and last since it simply a function of
1145 the free energy and the target distribution and we want to avoid doing extra work. */
1146 for (int pointIndex : *updateList)
1148 points_[pointIndex].updateBias();
1151 /* Increase the update counter. */
1152 histogramSize_.incrementNumUpdates();
1155 double BiasState::updateProbabilityWeightsAndConvolvedBias(const std::vector<DimParams>& dimParams,
1157 std::vector<double, AlignedAllocator<double>>* weight) const
1159 /* Only neighbors of the current coordinate value will have a non-negligible chance of getting sampled */
1160 const std::vector<int>& neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
1162 #if GMX_SIMD_HAVE_DOUBLE
1163 typedef SimdDouble PackType;
1164 constexpr int packSize = GMX_SIMD_DOUBLE_WIDTH;
1166 typedef double PackType;
1167 constexpr int packSize = 1;
1169 /* Round the size of the weight array up to packSize */
1170 const int weightSize = ((neighbors.size() + packSize - 1) / packSize) * packSize;
1171 weight->resize(weightSize);
1173 double* gmx_restrict weightData = weight->data();
1174 PackType weightSumPack(0.0);
1175 for (size_t i = 0; i < neighbors.size(); i += packSize)
1177 for (size_t n = i; n < i + packSize; n++)
1179 if (n < neighbors.size())
1181 const int neighbor = neighbors[n];
1183 biasedLogWeightFromPoint(dimParams, points_, grid, neighbor,
1184 points_[neighbor].bias(), coordState_.coordValue());
1188 /* Pad with values that don't affect the result */
1189 (*weight)[n] = detail::c_largeNegativeExponent;
1192 PackType weightPack = load<PackType>(weightData + i);
1193 weightPack = gmx::exp(weightPack);
1194 weightSumPack = weightSumPack + weightPack;
1195 store(weightData + i, weightPack);
1197 /* Sum of probability weights */
1198 double weightSum = reduce(weightSumPack);
1199 GMX_RELEASE_ASSERT(weightSum > 0,
1200 "zero probability weight when updating AWH probability weights.");
1202 /* Normalize probabilities to sum to 1 */
1203 double invWeightSum = 1 / weightSum;
1204 for (double& w : *weight)
1209 /* Return the convolved bias */
1210 return std::log(weightSum);
1213 double BiasState::calcConvolvedBias(const std::vector<DimParams>& dimParams,
1215 const awh_dvec& coordValue) const
1217 int point = grid.nearestIndex(coordValue);
1218 const GridPoint& gridPoint = grid.point(point);
1220 /* Sum the probability weights from the neighborhood of the given point */
1221 double weightSum = 0;
1222 for (int neighbor : gridPoint.neighbor)
1224 double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid, neighbor,
1225 points_[neighbor].bias(), coordValue);
1226 weightSum += std::exp(logWeight);
1229 /* Returns -GMX_FLOAT_MAX if no neighboring points were in the target region. */
1230 return (weightSum > 0) ? std::log(weightSum) : -GMX_FLOAT_MAX;
1233 void BiasState::sampleProbabilityWeights(const Grid& grid, gmx::ArrayRef<const double> probWeightNeighbor)
1235 const std::vector<int>& neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
1237 /* Save weights for next update */
1238 for (size_t n = 0; n < neighbor.size(); n++)
1240 points_[neighbor[n]].increaseWeightSumIteration(probWeightNeighbor[n]);
1243 /* Update the local update range. Two corner points define this rectangular
1244 * domain. We need to choose two new corner points such that the new domain
1245 * contains both the old update range and the current neighborhood.
1246 * In the simplest case when an update is performed every sample,
1247 * the update range would simply equal the current neighborhood.
1249 int neighborStart = neighbor[0];
1250 int neighborLast = neighbor[neighbor.size() - 1];
1251 for (int d = 0; d < grid.numDimensions(); d++)
1253 int origin = grid.point(neighborStart).index[d];
1254 int last = grid.point(neighborLast).index[d];
1258 /* Unwrap if wrapped around the boundary (only happens for periodic
1259 * boundaries). This has been already for the stored index interval.
1261 /* TODO: what we want to do is to find the smallest the update
1262 * interval that contains all points that need to be updated.
1263 * This amounts to combining two intervals, the current
1264 * [origin, end] update interval and the new touched neighborhood
1265 * into a new interval that contains all points from both the old
1268 * For periodic boundaries it becomes slightly more complicated
1269 * than for closed boundaries because then it needs not be
1270 * true that origin < end (so one can't simply relate the origin/end
1271 * in the min()/max() below). The strategy here is to choose the
1272 * origin closest to a reference point (index 0) and then unwrap
1273 * the end index if needed and choose the largest end index.
1274 * This ensures that both intervals are in the new interval
1275 * but it's not necessarily the smallest.
1276 * Currently we solve this by going through each possibility
1277 * and checking them.
1279 last += grid.axis(d).numPointsInPeriod();
1282 originUpdatelist_[d] = std::min(originUpdatelist_[d], origin);
1283 endUpdatelist_[d] = std::max(endUpdatelist_[d], last);
1287 void BiasState::sampleCoordAndPmf(const Grid& grid, gmx::ArrayRef<const double> probWeightNeighbor, double convolvedBias)
1289 /* Sampling-based deconvolution extracting the PMF.
1290 * Update the PMF histogram with the current coordinate value.
1292 * Because of the finite width of the harmonic potential, the free energy
1293 * defined for each coordinate point does not exactly equal that of the
1294 * actual coordinate, the PMF. However, the PMF can be estimated by applying
1295 * the relation exp(-PMF) = exp(-bias_convolved)*P_biased/Z, i.e. by keeping a
1296 * reweighted histogram of the coordinate value. Strictly, this relies on
1297 * the unknown normalization constant Z being either constant or known. Here,
1298 * neither is true except in the long simulation time limit. Empirically however,
1299 * it works (mainly because how the PMF histogram is rescaled).
1302 /* Only save coordinate data that is in range (the given index is always
1303 * in range even if the coordinate value is not).
1305 if (grid.covers(coordState_.coordValue()))
1307 /* Save PMF sum and keep a histogram of the sampled coordinate values */
1308 points_[coordState_.gridpointIndex()].samplePmf(convolvedBias);
1311 /* Save probability weights for the update */
1312 sampleProbabilityWeights(grid, probWeightNeighbor);
1315 void BiasState::initHistoryFromState(AwhBiasHistory* biasHistory) const
1317 biasHistory->pointState.resize(points_.size());
1320 void BiasState::updateHistory(AwhBiasHistory* biasHistory, const Grid& grid) const
1322 GMX_RELEASE_ASSERT(biasHistory->pointState.size() == points_.size(),
1323 "The AWH history setup does not match the AWH state.");
1325 AwhBiasStateHistory* stateHistory = &biasHistory->state;
1326 stateHistory->umbrellaGridpoint = coordState_.umbrellaGridpoint();
1328 for (size_t m = 0; m < biasHistory->pointState.size(); m++)
1330 AwhPointStateHistory* psh = &biasHistory->pointState[m];
1332 points_[m].storeState(psh);
1334 psh->weightsum_covering = weightSumCovering_[m];
1337 histogramSize_.storeState(stateHistory);
1339 stateHistory->origin_index_updatelist = multiDimGridIndexToLinear(grid, originUpdatelist_);
1340 stateHistory->end_index_updatelist = multiDimGridIndexToLinear(grid, endUpdatelist_);
1343 void BiasState::restoreFromHistory(const AwhBiasHistory& biasHistory, const Grid& grid)
1345 const AwhBiasStateHistory& stateHistory = biasHistory.state;
1347 coordState_.restoreFromHistory(stateHistory);
1349 if (biasHistory.pointState.size() != points_.size())
1352 InvalidInputError("Bias grid size in checkpoint and simulation do not match. "
1353 "Likely you provided a checkpoint from a different simulation."));
1355 for (size_t m = 0; m < points_.size(); m++)
1357 points_[m].setFromHistory(biasHistory.pointState[m]);
1360 for (size_t m = 0; m < weightSumCovering_.size(); m++)
1362 weightSumCovering_[m] = biasHistory.pointState[m].weightsum_covering;
1365 histogramSize_.restoreFromHistory(stateHistory);
1367 linearGridindexToMultiDim(grid, stateHistory.origin_index_updatelist, originUpdatelist_);
1368 linearGridindexToMultiDim(grid, stateHistory.end_index_updatelist, endUpdatelist_);
1371 void BiasState::broadcast(const t_commrec* commRecord)
1373 gmx_bcast(sizeof(coordState_), &coordState_, commRecord);
1375 gmx_bcast(points_.size() * sizeof(PointState), points_.data(), commRecord);
1377 gmx_bcast(weightSumCovering_.size() * sizeof(double), weightSumCovering_.data(), commRecord);
1379 gmx_bcast(sizeof(histogramSize_), &histogramSize_, commRecord);
1382 void BiasState::setFreeEnergyToConvolvedPmf(const std::vector<DimParams>& dimParams, const Grid& grid)
1384 std::vector<float> convolvedPmf;
1386 calcConvolvedPmf(dimParams, grid, &convolvedPmf);
1388 for (size_t m = 0; m < points_.size(); m++)
1390 points_[m].setFreeEnergy(convolvedPmf[m]);
1395 * Count trailing data rows containing only zeros.
1397 * \param[in] data 2D data array.
1398 * \param[in] numRows Number of rows in array.
1399 * \param[in] numColumns Number of cols in array.
1400 * \returns the number of trailing zero rows.
1402 static int countTrailingZeroRows(const double* const* data, int numRows, int numColumns)
1404 int numZeroRows = 0;
1405 for (int m = numRows - 1; m >= 0; m--)
1407 bool rowIsZero = true;
1408 for (int d = 0; d < numColumns; d++)
1410 if (data[d][m] != 0)
1419 /* At a row with non-zero data */
1424 /* Still at a zero data row, keep checking rows higher up. */
1433 * Initializes the PMF and target with data read from an input table.
1435 * \param[in] dimParams The dimension parameters.
1436 * \param[in] grid The grid.
1437 * \param[in] filename The filename to read PMF and target from.
1438 * \param[in] numBias Number of biases.
1439 * \param[in] biasIndex The index of the bias.
1440 * \param[in,out] pointState The state of the points in this bias.
1442 static void readUserPmfAndTargetDistribution(const std::vector<DimParams>& dimParams,
1444 const std::string& filename,
1447 std::vector<PointState>* pointState)
1449 /* Read the PMF and target distribution.
1450 From the PMF, the convolved PMF, or the reference value free energy, can be calculated
1451 base on the force constant. The free energy and target together determine the bias.
1453 std::string filenameModified(filename);
1456 size_t n = filenameModified.rfind('.');
1457 GMX_RELEASE_ASSERT(n != std::string::npos,
1458 "The filename should contain an extension starting with .");
1459 filenameModified.insert(n, formatString("%d", biasIndex));
1462 std::string correctFormatMessage = formatString(
1463 "%s is expected in the following format. "
1464 "The first ndim column(s) should contain the coordinate values for each point, "
1465 "each column containing values of one dimension (in ascending order). "
1466 "For a multidimensional coordinate, points should be listed "
1467 "in the order obtained by traversing lower dimensions first. "
1468 "E.g. for two-dimensional grid of size nxn: "
1469 "(1, 1), (1, 2),..., (1, n), (2, 1), (2, 2), ..., , (n, n - 1), (n, n). "
1470 "Column ndim + 1 should contain the PMF value for each coordinate value. "
1471 "The target distribution values should be in column ndim + 2 or column ndim + 5. "
1472 "Make sure the input file ends with a new line but has no trailing new lines.",
1474 gmx::TextLineWrapper wrapper;
1475 wrapper.settings().setLineLength(c_linewidth);
1476 correctFormatMessage = wrapper.wrapToString(correctFormatMessage);
1480 int numRows = read_xvg(filenameModified.c_str(), &data, &numColumns);
1482 /* Check basic data properties here. Grid takes care of more complicated things. */
1486 std::string mesg = gmx::formatString("%s is empty!.\n\n%s", filename.c_str(),
1487 correctFormatMessage.c_str());
1488 GMX_THROW(InvalidInputError(mesg));
1491 /* Less than 2 points is not useful for PMF or target. */
1494 std::string mesg = gmx::formatString(
1495 "%s contains too few data points (%d)."
1496 "The minimum number of points is 2.",
1497 filename.c_str(), numRows);
1498 GMX_THROW(InvalidInputError(mesg));
1501 /* Make sure there are enough columns of data.
1503 Two formats are allowed. Either with columns {coords, PMF, target} or
1504 {coords, PMF, x, y, z, target, ...}. The latter format is allowed since that
1505 is how AWH output is written (x, y, z being other AWH variables). For this format,
1506 trailing columns are ignored.
1508 int columnIndexTarget;
1509 int numColumnsMin = dimParams.size() + 2;
1510 int columnIndexPmf = dimParams.size();
1511 if (numColumns == numColumnsMin)
1513 columnIndexTarget = columnIndexPmf + 1;
1517 columnIndexTarget = columnIndexPmf + 4;
1520 if (numColumns < numColumnsMin)
1522 std::string mesg = gmx::formatString(
1523 "The number of columns in %s should be at least %d."
1525 filename.c_str(), numColumnsMin, correctFormatMessage.c_str());
1526 GMX_THROW(InvalidInputError(mesg));
1529 /* read_xvg can give trailing zero data rows for trailing new lines in the input. We allow 1 zero row,
1530 since this could be real data. But multiple trailing zero rows cannot correspond to valid data. */
1531 int numZeroRows = countTrailingZeroRows(data, numRows, numColumns);
1532 if (numZeroRows > 1)
1534 std::string mesg = gmx::formatString(
1535 "Found %d trailing zero data rows in %s. Please remove trailing empty lines and "
1537 numZeroRows, filename.c_str());
1538 GMX_THROW(InvalidInputError(mesg));
1541 /* Convert from user units to internal units before sending the data of to grid. */
1542 for (size_t d = 0; d < dimParams.size(); d++)
1544 double scalingFactor = dimParams[d].scaleUserInputToInternal(1);
1545 if (scalingFactor == 1)
1549 for (size_t m = 0; m < pointState->size(); m++)
1551 data[d][m] *= scalingFactor;
1555 /* Get a data point for each AWH grid point so that they all get data. */
1556 std::vector<int> gridIndexToDataIndex(grid.numPoints());
1557 mapGridToDataGrid(&gridIndexToDataIndex, data, numRows, filename, grid, correctFormatMessage);
1559 /* Extract the data for each grid point.
1560 * We check if the target distribution is zero for all points.
1562 bool targetDistributionIsZero = true;
1563 for (size_t m = 0; m < pointState->size(); m++)
1565 (*pointState)[m].setLogPmfSum(-data[columnIndexPmf][gridIndexToDataIndex[m]]);
1566 double target = data[columnIndexTarget][gridIndexToDataIndex[m]];
1568 /* Check if the values are allowed. */
1571 std::string mesg = gmx::formatString(
1572 "Target distribution weight at point %zu (%g) in %s is negative.", m, target,
1574 GMX_THROW(InvalidInputError(mesg));
1578 targetDistributionIsZero = false;
1580 (*pointState)[m].setTargetConstantWeight(target);
1583 if (targetDistributionIsZero)
1586 gmx::formatString("The target weights given in column %d in %s are all 0",
1587 columnIndexTarget, filename.c_str());
1588 GMX_THROW(InvalidInputError(mesg));
1591 /* Free the arrays. */
1592 for (int m = 0; m < numColumns; m++)
1599 void BiasState::normalizePmf(int numSharingSims)
1601 /* The normalization of the PMF estimate matters because it determines how big effect the next sample has.
1602 Approximately (for large enough force constant) we should have:
1603 sum_x(exp(-pmf(x)) = nsamples*sum_xref(exp(-f(xref)).
1606 /* Calculate the normalization factor, i.e. divide by the pmf sum, multiply by the number of samples and the f sum */
1607 double expSumPmf = 0;
1609 for (const PointState& pointState : points_)
1611 if (pointState.inTargetRegion())
1613 expSumPmf += std::exp(pointState.logPmfSum());
1614 expSumF += std::exp(-pointState.freeEnergy());
1617 double numSamples = histogramSize_.histogramSize() / numSharingSims;
1620 double logRenorm = std::log(numSamples * expSumF / expSumPmf);
1621 for (PointState& pointState : points_)
1623 if (pointState.inTargetRegion())
1625 pointState.setLogPmfSum(pointState.logPmfSum() + logRenorm);
1630 void BiasState::initGridPointState(const AwhBiasParams& awhBiasParams,
1631 const std::vector<DimParams>& dimParams,
1633 const BiasParams& params,
1634 const std::string& filename,
1637 /* Modify PMF, free energy and the constant target distribution factor
1638 * to user input values if there is data given.
1640 if (awhBiasParams.bUserData)
1642 readUserPmfAndTargetDistribution(dimParams, grid, filename, numBias, params.biasIndex, &points_);
1643 setFreeEnergyToConvolvedPmf(dimParams, grid);
1646 /* The local Boltzmann distribution is special because the target distribution is updated as a function of the reference weighthistogram. */
1647 GMX_RELEASE_ASSERT(params.eTarget != eawhtargetLOCALBOLTZMANN || points_[0].weightSumRef() != 0,
1648 "AWH reference weight histogram not initialized properly with local "
1649 "Boltzmann target distribution.");
1651 updateTargetDistribution(points_, params);
1653 for (PointState& pointState : points_)
1655 if (pointState.inTargetRegion())
1657 pointState.updateBias();
1661 /* Note that for zero target this is a value that represents -infinity but should not be used for biasing. */
1662 pointState.setTargetToZero();
1666 /* Set the initial reference weighthistogram. */
1667 const double histogramSize = histogramSize_.histogramSize();
1668 for (auto& pointState : points_)
1670 pointState.setInitialReferenceWeightHistogram(histogramSize);
1673 /* Make sure the pmf is normalized consistently with the histogram size.
1674 Note: the target distribution and free energy need to be set here. */
1675 normalizePmf(params.numSharedUpdate);
1678 BiasState::BiasState(const AwhBiasParams& awhBiasParams,
1679 double histogramSizeInitial,
1680 const std::vector<DimParams>& dimParams,
1682 coordState_(awhBiasParams, dimParams, grid),
1683 points_(grid.numPoints()),
1684 weightSumCovering_(grid.numPoints()),
1685 histogramSize_(awhBiasParams, histogramSizeInitial)
1687 /* The minimum and maximum multidimensional point indices that are affected by the next update */
1688 for (size_t d = 0; d < dimParams.size(); d++)
1690 int index = grid.point(coordState_.gridpointIndex()).index[d];
1691 originUpdatelist_[d] = index;
1692 endUpdatelist_[d] = index;