2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 * Implements the BiasState class.
40 * \author Viveca Lindahl
41 * \author Berk Hess <hess@kth.se>
47 #include "biasstate.h"
57 #include "gromacs/fileio/gmxfio.h"
58 #include "gromacs/fileio/xvgr.h"
59 #include "gromacs/gmxlib/network.h"
60 #include "gromacs/math/utilities.h"
61 #include "gromacs/mdtypes/awh_history.h"
62 #include "gromacs/mdtypes/awh_params.h"
63 #include "gromacs/mdtypes/commrec.h"
64 #include "gromacs/simd/simd.h"
65 #include "gromacs/simd/simd_math.h"
66 #include "gromacs/utility/arrayref.h"
67 #include "gromacs/utility/exceptions.h"
68 #include "gromacs/utility/gmxassert.h"
69 #include "gromacs/utility/smalloc.h"
70 #include "gromacs/utility/stringutil.h"
73 #include "pointstate.h"
78 void BiasState::getPmf(gmx::ArrayRef<float> pmf) const
80 GMX_ASSERT(pmf.size() == index(points_.size()), "pmf should have the size of the bias grid");
82 /* The PMF is just the negative of the log of the sampled PMF histogram.
83 * Points with zero target weight are ignored, they will mostly contain noise.
85 for (size_t i = 0; i < points_.size(); i++)
87 pmf[i] = points_[i].inTargetRegion() ? -points_[i].logPmfSum() : GMX_FLOAT_MAX;
95 * Sum an array over all simulations on the master rank of each simulation.
97 * \param[in,out] arrayRef The data to sum.
98 * \param[in] multiSimComm Struct for multi-simulation communication.
100 void sumOverSimulations(gmx::ArrayRef<int> arrayRef,
101 const gmx_multisim_t *multiSimComm)
103 gmx_sumi_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
107 * Sum an array over all simulations on the master rank of each simulation.
109 * \param[in,out] arrayRef The data to sum.
110 * \param[in] multiSimComm Struct for multi-simulation communication.
112 void sumOverSimulations(gmx::ArrayRef<double> arrayRef,
113 const gmx_multisim_t *multiSimComm)
115 gmx_sumd_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
119 * Sum an array over all simulations on all ranks of each simulation.
121 * This assumes the data is identical on all ranks within each simulation.
123 * \param[in,out] arrayRef The data to sum.
124 * \param[in] commRecord Struct for intra-simulation communication.
125 * \param[in] multiSimComm Struct for multi-simulation communication.
128 void sumOverSimulations(gmx::ArrayRef<T> arrayRef,
129 const t_commrec *commRecord,
130 const gmx_multisim_t *multiSimComm)
132 if (MASTER(commRecord))
134 sumOverSimulations(arrayRef, multiSimComm);
136 if (commRecord->nnodes > 1)
138 gmx_bcast(arrayRef.size()*sizeof(T), arrayRef.data(), commRecord);
143 * Sum PMF over multiple simulations, when requested.
145 * \param[in,out] pointState The state of the points in the bias.
146 * \param[in] numSharedUpdate The number of biases sharing the histogram.
147 * \param[in] commRecord Struct for intra-simulation communication.
148 * \param[in] multiSimComm Struct for multi-simulation communication.
150 void sumPmf(gmx::ArrayRef<PointState> pointState,
152 const t_commrec *commRecord,
153 const gmx_multisim_t *multiSimComm)
155 if (numSharedUpdate == 1)
159 GMX_ASSERT(multiSimComm != nullptr && numSharedUpdate % multiSimComm->nsim == 0, "numSharedUpdate should be a multiple of multiSimComm->nsim");
160 GMX_ASSERT(numSharedUpdate == multiSimComm->nsim, "Sharing within a simulation is not implemented (yet)");
162 std::vector<double> buffer(pointState.size());
164 /* Need to temporarily exponentiate the log weights to sum over simulations */
165 for (size_t i = 0; i < buffer.size(); i++)
167 buffer[i] = pointState[i].inTargetRegion() ? std::exp(-pointState[i].logPmfSum()) : 0;
170 sumOverSimulations(gmx::ArrayRef<double>(buffer), commRecord, multiSimComm);
172 /* Take log again to get (non-normalized) PMF */
173 double normFac = 1.0/numSharedUpdate;
174 for (gmx::index i = 0; i < pointState.size(); i++)
176 if (pointState[i].inTargetRegion())
178 pointState[i].setLogPmfSum(-std::log(buffer[i]*normFac));
184 * Find the minimum free energy value.
186 * \param[in] pointState The state of the points.
187 * \returns the minimum free energy value.
189 double freeEnergyMinimumValue(gmx::ArrayRef<const PointState> pointState)
191 double fMin = GMX_FLOAT_MAX;
193 for (auto const &ps : pointState)
195 if (ps.inTargetRegion() && ps.freeEnergy() < fMin)
197 fMin = ps.freeEnergy();
205 * Find and return the log of the probability weight of a point given a coordinate value.
207 * The unnormalized weight is given by
208 * w(point|value) = exp(bias(point) - U(value,point)),
209 * where U is a harmonic umbrella potential.
211 * \param[in] dimParams The bias dimensions parameters
212 * \param[in] points The point state.
213 * \param[in] grid The grid.
214 * \param[in] pointIndex Point to evaluate probability weight for.
215 * \param[in] pointBias Bias for the point (as a log weight).
216 * \param[in] value Coordinate value.
217 * \returns the log of the biased probability weight.
219 double biasedLogWeightFromPoint(const std::vector<DimParams> &dimParams,
220 const std::vector<PointState> &points,
224 const awh_dvec value)
226 double logWeight = detail::c_largeNegativeExponent;
228 /* Only points in the target reigon have non-zero weight */
229 if (points[pointIndex].inTargetRegion())
231 logWeight = pointBias;
233 /* Add potential for all parameter dimensions */
234 for (size_t d = 0; d < dimParams.size(); d++)
236 double dev = getDeviationFromPointAlongGridAxis(grid, d, pointIndex, value[d]);
237 logWeight -= 0.5*dimParams[d].betak*dev*dev;
246 void BiasState::calcConvolvedPmf(const std::vector<DimParams> &dimParams,
248 std::vector<float> *convolvedPmf) const
250 size_t numPoints = grid.numPoints();
252 convolvedPmf->resize(numPoints);
254 /* Get the PMF to convolve. */
255 std::vector<float> pmf(numPoints);
258 for (size_t m = 0; m < numPoints; m++)
260 double freeEnergyWeights = 0;
261 const GridPoint &point = grid.point(m);
262 for (auto &neighbor : point.neighbor)
264 /* The negative PMF is a positive bias. */
265 double biasNeighbor = -pmf[neighbor];
267 /* Add the convolved PMF weights for the neighbors of this point.
268 Note that this function only adds point within the target > 0 region.
269 Sum weights, take the logarithm last to get the free energy. */
270 double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid,
271 neighbor, biasNeighbor,
273 freeEnergyWeights += std::exp(logWeight);
276 GMX_RELEASE_ASSERT(freeEnergyWeights > 0, "Attempting to do log(<= 0) in AWH convolved PMF calculation.");
277 (*convolvedPmf)[m] = -std::log(static_cast<float>(freeEnergyWeights));
285 * Updates the target distribution for all points.
287 * The target distribution is always updated for all points
290 * \param[in,out] pointState The state of all points.
291 * \param[in] params The bias parameters.
293 void updateTargetDistribution(gmx::ArrayRef<PointState> pointState,
294 const BiasParams ¶ms)
296 double freeEnergyCutoff = 0;
297 if (params.eTarget == eawhtargetCUTOFF)
299 freeEnergyCutoff = freeEnergyMinimumValue(pointState) + params.freeEnergyCutoffInKT;
302 double sumTarget = 0;
303 for (PointState &ps : pointState)
305 sumTarget += ps.updateTargetWeight(params, freeEnergyCutoff);
307 GMX_RELEASE_ASSERT(sumTarget > 0, "We should have a non-zero distribution");
310 double invSum = 1.0/sumTarget;
311 for (PointState &ps : pointState)
313 ps.scaleTarget(invSum);
318 * Puts together a string describing a grid point.
320 * \param[in] grid The grid.
321 * \param[in] point Grid point index.
322 * \returns a string for the point.
324 std::string gridPointValueString(const Grid &grid, int point)
326 std::string pointString;
330 for (int d = 0; d < grid.numDimensions(); d++)
332 pointString += gmx::formatString("%g", grid.point(point).coordValue[d]);
333 if (d < grid.numDimensions() - 1)
348 int BiasState::warnForHistogramAnomalies(const Grid &grid,
352 int maxNumWarnings) const
354 GMX_ASSERT(fplog != nullptr, "Warnings can only be issued if there is log file.");
355 const double maxHistogramRatio = 0.5; /* Tolerance for printing a warning about the histogram ratios */
357 /* Sum up the histograms and get their normalization */
358 double sumVisits = 0;
359 double sumWeights = 0;
360 for (auto &pointState : points_)
362 if (pointState.inTargetRegion())
364 sumVisits += pointState.numVisitsTot();
365 sumWeights += pointState.weightSumTot();
368 GMX_RELEASE_ASSERT(sumVisits > 0, "We should have visits");
369 GMX_RELEASE_ASSERT(sumWeights > 0, "We should have weight");
370 double invNormVisits = 1.0/sumVisits;
371 double invNormWeight = 1.0/sumWeights;
373 /* Check all points for warnings */
375 size_t numPoints = grid.numPoints();
376 for (size_t m = 0; m < numPoints; m++)
378 /* Skip points close to boundary or non-target region */
379 const GridPoint &gridPoint = grid.point(m);
380 bool skipPoint = false;
381 for (size_t n = 0; (n < gridPoint.neighbor.size()) && !skipPoint; n++)
383 int neighbor = gridPoint.neighbor[n];
384 skipPoint = !points_[neighbor].inTargetRegion();
385 for (int d = 0; (d < grid.numDimensions()) && !skipPoint; d++)
387 const GridPoint &neighborPoint = grid.point(neighbor);
389 neighborPoint.index[d] == 0 ||
390 neighborPoint.index[d] == grid.axis(d).numPoints() - 1;
394 /* Warn if the coordinate distribution is less than the target distribution with a certain fraction somewhere */
395 const double relativeWeight = points_[m].weightSumTot()*invNormWeight;
396 const double relativeVisits = points_[m].numVisitsTot()*invNormVisits;
398 relativeVisits < relativeWeight*maxHistogramRatio)
400 std::string pointValueString = gridPointValueString(grid, m);
401 std::string warningMessage =
402 gmx::formatString("\nawh%d warning: "
403 "at t = %g ps the obtained coordinate distribution at coordinate value %s "
404 "is less than a fraction %g of the reference distribution at that point. "
405 "If you are not certain about your settings you might want to increase your pull force constant or "
406 "modify your sampling region.\n",
407 biasIndex + 1, t, pointValueString.c_str(), maxHistogramRatio);
408 gmx::TextLineWrapper wrapper;
409 wrapper.settings().setLineLength(c_linewidth);
410 fprintf(fplog, "%s", wrapper.wrapToString(warningMessage).c_str());
414 if (numWarnings >= maxNumWarnings)
423 double BiasState::calcUmbrellaForceAndPotential(const std::vector<DimParams> &dimParams,
426 gmx::ArrayRef<double> force) const
428 double potential = 0;
429 for (size_t d = 0; d < dimParams.size(); d++)
431 double deviation = getDeviationFromPointAlongGridAxis(grid, d, point, coordState_.coordValue()[d]);
433 double k = dimParams[d].k;
435 /* Force from harmonic potential 0.5*k*dev^2 */
436 force[d] = -k*deviation;
437 potential += 0.5*k*deviation*deviation;
443 void BiasState::calcConvolvedForce(const std::vector<DimParams> &dimParams,
445 gmx::ArrayRef<const double> probWeightNeighbor,
446 gmx::ArrayRef<double> forceWorkBuffer,
447 gmx::ArrayRef<double> force) const
449 for (size_t d = 0; d < dimParams.size(); d++)
454 /* Only neighboring points have non-negligible contribution. */
455 const std::vector<int> &neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
456 gmx::ArrayRef<double> forceFromNeighbor = forceWorkBuffer;
457 for (size_t n = 0; n < neighbor.size(); n++)
459 double weightNeighbor = probWeightNeighbor[n];
460 int indexNeighbor = neighbor[n];
462 /* Get the umbrella force from this point. The returned potential is ignored here. */
463 calcUmbrellaForceAndPotential(dimParams, grid, indexNeighbor,
466 /* Add the weighted umbrella force to the convolved force. */
467 for (size_t d = 0; d < dimParams.size(); d++)
469 force[d] += forceFromNeighbor[d]*weightNeighbor;
474 double BiasState::moveUmbrella(const std::vector<DimParams> &dimParams,
476 gmx::ArrayRef<const double> probWeightNeighbor,
477 gmx::ArrayRef<double> biasForce,
482 /* Generate and set a new coordinate reference value */
483 coordState_.sampleUmbrellaGridpoint(grid, coordState_.gridpointIndex(), probWeightNeighbor, step, seed, indexSeed);
485 std::vector<double> newForce(dimParams.size());
486 double newPotential =
487 calcUmbrellaForceAndPotential(dimParams, grid, coordState_.umbrellaGridpoint(), newForce);
489 /* A modification of the reference value at time t will lead to a different
490 force over t-dt/2 to t and over t to t+dt/2. For high switching rates
491 this means the force and velocity will change signs roughly as often.
492 To avoid any issues we take the average of the previous and new force
493 at steps when the reference value has been moved. E.g. if the ref. value
494 is set every step to (coord dvalue +/- delta) would give zero force.
496 for (gmx::index d = 0; d < biasForce.size(); d++)
498 /* Average of the current and new force */
499 biasForce[d] = 0.5*(biasForce[d] + newForce[d]);
509 * Sets the histogram rescaling factors needed to control the histogram size.
511 * For sake of robustness, the reference weight histogram can grow at a rate
512 * different from the actual sampling rate. Typically this happens for a limited
513 * initial time, alternatively growth is scaled down by a constant factor for all
514 * times. Since the size of the reference histogram sets the size of the free
515 * energy update this should be reflected also in the PMF. Thus the PMF histogram
516 * needs to be rescaled too.
518 * This function should only be called by the bias update function or wrapped by a function that
519 * knows what scale factors should be applied when, e.g,
520 * getSkippedUpdateHistogramScaleFactors().
522 * \param[in] params The bias parameters.
523 * \param[in] newHistogramSize New reference weight histogram size.
524 * \param[in] oldHistogramSize Previous reference weight histogram size (before adding new samples).
525 * \param[out] weightHistScaling Scaling factor for the reference weight histogram.
526 * \param[out] logPmfSumScaling Log of the scaling factor for the PMF histogram.
528 void setHistogramUpdateScaleFactors(const BiasParams ¶ms,
529 double newHistogramSize,
530 double oldHistogramSize,
531 double *weightHistScaling,
532 double *logPmfSumScaling)
535 /* The two scaling factors below are slightly different (ignoring the log factor) because the
536 reference and the PMF histogram apply weight scaling differently. The weight histogram
537 applies is locally, i.e. each sample is scaled down meaning all samples get equal weight.
538 It is done this way because that is what target type local Boltzmann (for which
539 target = weight histogram) needs. In contrast, the PMF histogram is rescaled globally
540 by repeatedly scaling down the whole histogram. The reasons for doing it this way are:
541 1) empirically this is necessary for converging the PMF; 2) since the extraction of
542 the PMF is theoretically only valid for a constant bias, new samples should get more
543 weight than old ones for which the bias is fluctuating more. */
544 *weightHistScaling = newHistogramSize/(oldHistogramSize + params.updateWeight*params.localWeightScaling);
545 *logPmfSumScaling = std::log(newHistogramSize/(oldHistogramSize + params.updateWeight));
550 void BiasState::getSkippedUpdateHistogramScaleFactors(const BiasParams ¶ms,
551 double *weightHistScaling,
552 double *logPmfSumScaling) const
554 GMX_ASSERT(params.skipUpdates(), "Calling function for skipped updates when skipping updates is not allowed");
556 if (inInitialStage())
558 /* In between global updates the reference histogram size is kept constant so we trivially know what the
559 histogram size was at the time of the skipped update. */
560 double histogramSize = histogramSize_.histogramSize();
561 setHistogramUpdateScaleFactors(params, histogramSize, histogramSize,
562 weightHistScaling, logPmfSumScaling);
566 /* In the final stage, the reference histogram grows at the sampling rate which gives trivial scale factors. */
567 *weightHistScaling = 1;
568 *logPmfSumScaling = 0;
572 void BiasState::doSkippedUpdatesForAllPoints(const BiasParams ¶ms)
574 double weightHistScaling;
575 double logPmfsumScaling;
577 getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
579 for (auto &pointState : points_)
581 bool didUpdate = pointState.performPreviouslySkippedUpdates(params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
583 /* Update the bias for this point only if there were skipped updates in the past to avoid calculating the log unneccessarily */
586 pointState.updateBias();
591 void BiasState::doSkippedUpdatesInNeighborhood(const BiasParams ¶ms,
594 double weightHistScaling;
595 double logPmfsumScaling;
597 getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
599 /* For each neighbor point of the center point, refresh its state by adding the results of all past, skipped updates. */
600 const std::vector<int> &neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
601 for (auto &neighbor : neighbors)
603 bool didUpdate = points_[neighbor].performPreviouslySkippedUpdates(params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
607 points_[neighbor].updateBias();
616 * Merge update lists from multiple sharing simulations.
618 * \param[in,out] updateList Update list for this simulation (assumed >= npoints long).
619 * \param[in] numPoints Total number of points.
620 * \param[in] commRecord Struct for intra-simulation communication.
621 * \param[in] multiSimComm Struct for multi-simulation communication.
623 void mergeSharedUpdateLists(std::vector<int> *updateList,
625 const t_commrec *commRecord,
626 const gmx_multisim_t *multiSimComm)
628 std::vector<int> numUpdatesOfPoint;
630 /* Flag the update points of this sim.
631 TODO: we can probably avoid allocating this array and just use the input array. */
632 numUpdatesOfPoint.resize(numPoints, 0);
633 for (auto &pointIndex : *updateList)
635 numUpdatesOfPoint[pointIndex] = 1;
638 /* Sum over the sims to get all the flagged points */
639 sumOverSimulations(arrayRefFromArray(numUpdatesOfPoint.data(), numPoints), commRecord, multiSimComm);
641 /* Collect the indices of the flagged points in place. The resulting array will be the merged update list.*/
643 for (int m = 0; m < numPoints; m++)
645 if (numUpdatesOfPoint[m] > 0)
647 updateList->push_back(m);
653 * Generate an update list of points sampled since the last update.
655 * \param[in] grid The AWH bias.
656 * \param[in] points The point state.
657 * \param[in] originUpdatelist The origin of the rectangular region that has been sampled since last update.
658 * \param[in] endUpdatelist The end of the rectangular that has been sampled since last update.
659 * \param[in,out] updateList Local update list to set (assumed >= npoints long).
661 void makeLocalUpdateList(const Grid &grid,
662 const std::vector<PointState> &points,
663 const awh_ivec originUpdatelist,
664 const awh_ivec endUpdatelist,
665 std::vector<int> *updateList)
670 /* Define the update search grid */
671 for (int d = 0; d < grid.numDimensions(); d++)
673 origin[d] = originUpdatelist[d];
674 numPoints[d] = endUpdatelist[d] - originUpdatelist[d] + 1;
676 /* Because the end_updatelist is unwrapped it can be > (npoints - 1) so that numPoints can be > npoints in grid.
677 This helps for calculating the distance/number of points but should be removed and fixed when the way of
678 updating origin/end updatelist is changed (see sampleProbabilityWeights). */
679 numPoints[d] = std::min(grid.axis(d).numPoints(), numPoints[d]);
682 /* Make the update list */
685 bool pointExists = true;
688 pointExists = advancePointInSubgrid(grid, origin, numPoints, &pointIndex);
690 if (pointExists && points[pointIndex].inTargetRegion())
692 updateList->push_back(pointIndex);
699 void BiasState::resetLocalUpdateRange(const Grid &grid)
701 const int gridpointIndex = coordState_.gridpointIndex();
702 for (int d = 0; d < grid.numDimensions(); d++)
704 /* This gives the minimum range consisting only of the current closest point. */
705 originUpdatelist_[d] = grid.point(gridpointIndex).index[d];
706 endUpdatelist_[d] = grid.point(gridpointIndex).index[d];
714 * Add partial histograms (accumulating between updates) to accumulating histograms.
716 * \param[in,out] pointState The state of the points in the bias.
717 * \param[in,out] weightSumCovering The weights for checking covering.
718 * \param[in] numSharedUpdate The number of biases sharing the histrogram.
719 * \param[in] commRecord Struct for intra-simulation communication.
720 * \param[in] multiSimComm Struct for multi-simulation communication.
721 * \param[in] localUpdateList List of points with data.
723 void sumHistograms(gmx::ArrayRef<PointState> pointState,
724 gmx::ArrayRef<double> weightSumCovering,
726 const t_commrec *commRecord,
727 const gmx_multisim_t *multiSimComm,
728 const std::vector<int> &localUpdateList)
730 /* The covering checking histograms are added before summing over simulations, so that the weights from different
731 simulations are kept distinguishable. */
732 for (int globalIndex : localUpdateList)
734 weightSumCovering[globalIndex] +=
735 pointState[globalIndex].weightSumIteration();
738 /* Sum histograms over multiple simulations if needed. */
739 if (numSharedUpdate > 1)
741 GMX_ASSERT(numSharedUpdate == multiSimComm->nsim, "Sharing within a simulation is not implemented (yet)");
743 /* Collect the weights and counts in linear arrays to be able to use gmx_sumd_sim. */
744 std::vector<double> weightSum;
745 std::vector<double> coordVisits;
747 weightSum.resize(localUpdateList.size());
748 coordVisits.resize(localUpdateList.size());
750 for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
752 const PointState &ps = pointState[localUpdateList[localIndex]];
754 weightSum[localIndex] = ps.weightSumIteration();
755 coordVisits[localIndex] = ps.numVisitsIteration();
758 sumOverSimulations(gmx::ArrayRef<double>(weightSum), commRecord, multiSimComm);
759 sumOverSimulations(gmx::ArrayRef<double>(coordVisits), commRecord, multiSimComm);
761 /* Transfer back the result */
762 for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
764 PointState &ps = pointState[localUpdateList[localIndex]];
766 ps.setPartialWeightAndCount(weightSum[localIndex],
767 coordVisits[localIndex]);
771 /* Now add the partial counts and weights to the accumulating histograms.
772 Note: we still need to use the weights for the update so we wait
773 with resetting them until the end of the update. */
774 for (int globalIndex : localUpdateList)
776 pointState[globalIndex].addPartialWeightAndCount();
781 * Label points along an axis as covered or not.
783 * A point is covered if it is surrounded by visited points up to a radius = coverRadius.
785 * \param[in] visited Visited? For each point.
786 * \param[in] checkCovering Check for covering? For each point.
787 * \param[in] numPoints The number of grid points along this dimension.
788 * \param[in] period Period in number of points.
789 * \param[in] coverRadius Cover radius, in points, needed for defining a point as covered.
790 * \param[in,out] covered In this array elements are 1 for covered points and 0 for non-covered points, this routine assumes that \p covered has at least size \p numPoints.
792 void labelCoveredPoints(const std::vector<bool> &visited,
793 const std::vector<bool> &checkCovering,
797 gmx::ArrayRef<int> covered)
799 GMX_ASSERT(covered.size() >= numPoints, "covered should be at least as large as the grid");
801 bool haveFirstNotVisited = false;
802 int firstNotVisited = -1;
803 int notVisitedLow = -1;
804 int notVisitedHigh = -1;
806 for (int n = 0; n < numPoints; n++)
808 if (checkCovering[n] && !visited[n])
810 if (!haveFirstNotVisited)
814 haveFirstNotVisited = true;
820 /* Have now an interval I = [notVisitedLow,notVisitedHigh] of visited points bounded
821 by unvisited points. The unvisted end points affect the coveredness of the visited
822 with a reach equal to the cover radius. */
823 int notCoveredLow = notVisitedLow + coverRadius;
824 int notCoveredHigh = notVisitedHigh - coverRadius;
825 for (int i = notVisitedLow; i <= notVisitedHigh; i++)
827 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
830 /* Find a new interval to set covering for. Make the notVisitedHigh of this interval the
831 notVisitedLow of the next. */
832 notVisitedLow = notVisitedHigh;
837 /* Have labelled all the internal points. Now take care of the boundary regions. */
838 if (!haveFirstNotVisited)
840 /* No non-visited points <=> all points visited => all points covered. */
842 for (int n = 0; n < numPoints; n++)
849 int lastNotVisited = notVisitedLow;
851 /* For periodic boundaries, non-visited points can influence points
852 on the other side of the boundary so we need to wrap around. */
854 /* Lower end. For periodic boundaries the last upper end not visited point becomes the low-end not visited point.
855 For non-periodic boundaries there is no lower end point so a dummy value is used. */
856 int notVisitedHigh = firstNotVisited;
857 int notVisitedLow = period > 0 ? (lastNotVisited - period) : -(coverRadius + 1);
859 int notCoveredLow = notVisitedLow + coverRadius;
860 int notCoveredHigh = notVisitedHigh - coverRadius;
862 for (int i = 0; i <= notVisitedHigh; i++)
864 /* For non-periodic boundaries notCoveredLow = -1 will impose no restriction. */
865 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
868 /* Upper end. Same as for lower end but in the other direction. */
869 notVisitedHigh = period > 0 ? (firstNotVisited + period) : (numPoints + coverRadius);
870 notVisitedLow = lastNotVisited;
872 notCoveredLow = notVisitedLow + coverRadius;
873 notCoveredHigh = notVisitedHigh - coverRadius;
875 for (int i = notVisitedLow; i <= numPoints - 1; i++)
877 /* For non-periodic boundaries notCoveredHigh = numPoints will impose no restriction. */
878 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
885 bool BiasState::isSamplingRegionCovered(const BiasParams ¶ms,
886 const std::vector<DimParams> &dimParams,
888 const t_commrec *commRecord,
889 const gmx_multisim_t *multiSimComm) const
891 /* Allocate and initialize arrays: one for checking visits along each dimension,
892 one for keeping track of which points to check and one for the covered points.
893 Possibly these could be kept as AWH variables to avoid these allocations. */
896 std::vector<bool> visited;
897 std::vector<bool> checkCovering;
898 // We use int for the covering array since we might use gmx_sumi_sim.
899 std::vector<int> covered;
902 std::vector<CheckDim> checkDim;
903 checkDim.resize(grid.numDimensions());
905 for (int d = 0; d < grid.numDimensions(); d++)
907 const size_t numPoints = grid.axis(d).numPoints();
908 checkDim[d].visited.resize(numPoints, false);
909 checkDim[d].checkCovering.resize(numPoints, false);
910 checkDim[d].covered.resize(numPoints, 0);
913 /* Set visited points along each dimension and which points should be checked for covering.
914 Specifically, points above the free energy cutoff (if there is one) or points outside
915 of the target region are ignored. */
917 /* Set the free energy cutoff */
918 double maxFreeEnergy = GMX_FLOAT_MAX;
920 if (params.eTarget == eawhtargetCUTOFF)
922 maxFreeEnergy = freeEnergyMinimumValue(points_) + params.freeEnergyCutoffInKT;
925 /* Set the threshold weight for a point to be considered visited. */
926 double weightThreshold = 1;
927 for (int d = 0; d < grid.numDimensions(); d++)
929 weightThreshold *= grid.axis(d).spacing()*std::sqrt(dimParams[d].betak*0.5*M_1_PI);
932 /* Project the sampling weights onto each dimension */
933 for (size_t m = 0; m < grid.numPoints(); m++)
935 const PointState &pointState = points_[m];
937 for (int d = 0; d < grid.numDimensions(); d++)
939 int n = grid.point(m).index[d];
941 /* Is visited if it was already visited or if there is enough weight at the current point */
942 checkDim[d].visited[n] = checkDim[d].visited[n] || (weightSumCovering_[m] > weightThreshold);
944 /* Check for covering if there is at least point in this slice that is in the target region and within the cutoff */
945 checkDim[d].checkCovering[n] = checkDim[d].checkCovering[n] || (pointState.inTargetRegion() && pointState.freeEnergy() < maxFreeEnergy);
949 /* Label each point along each dimension as covered or not. */
950 for (int d = 0; d < grid.numDimensions(); d++)
952 labelCoveredPoints(checkDim[d].visited, checkDim[d].checkCovering, grid.axis(d).numPoints(), grid.axis(d).numPointsInPeriod(),
953 params.coverRadius()[d], checkDim[d].covered);
956 /* Now check for global covering. Each dimension needs to be covered separately.
957 A dimension is covered if each point is covered. Multiple simulations collectively
958 cover the points, i.e. a point is covered if any of the simulations covered it.
959 However, visited points are not shared, i.e. if a point is covered or not is
960 determined by the visits of a single simulation. In general the covering criterion is
961 all points covered => all points are surrounded by visited points up to a radius = coverRadius.
962 For 1 simulation, all points covered <=> all points visited. For multiple simulations
963 however, all points visited collectively !=> all points covered, except for coverRadius = 0.
964 In the limit of large coverRadius, all points covered => all points visited by at least one
965 simulation (since no point will be covered until all points have been visited by a
966 single simulation). Basically coverRadius sets how much "connectedness" (or mixing) a point
967 needs with surrounding points before sharing covering information with other simulations. */
969 /* Communicate the covered points between sharing simulations if needed. */
970 if (params.numSharedUpdate > 1)
972 /* For multiple dimensions this may not be the best way to do it. */
973 for (int d = 0; d < grid.numDimensions(); d++)
975 sumOverSimulations(gmx::arrayRefFromArray(checkDim[d].covered.data(), grid.axis(d).numPoints()), commRecord, multiSimComm);
979 /* Now check if for each dimension all points are covered. Break if not true. */
980 bool allPointsCovered = true;
981 for (int d = 0; d < grid.numDimensions() && allPointsCovered; d++)
983 for (int n = 0; n < grid.axis(d).numPoints() && allPointsCovered; n++)
985 allPointsCovered = (checkDim[d].covered[n] != 0);
989 return allPointsCovered;
993 * Normalizes the free energy and PMF sum.
995 * \param[in] pointState The state of the points.
997 static void normalizeFreeEnergyAndPmfSum(std::vector<PointState> *pointState)
999 double minF = freeEnergyMinimumValue(*pointState);
1001 for (PointState &ps : *pointState)
1003 ps.normalizeFreeEnergyAndPmfSum(minF);
1007 void BiasState::updateFreeEnergyAndAddSamplesToHistogram(const std::vector<DimParams> &dimParams,
1009 const BiasParams ¶ms,
1010 const t_commrec *commRecord,
1011 const gmx_multisim_t *multiSimComm,
1015 std::vector<int> *updateList)
1017 /* Note hat updateList is only used in this scope and is always
1018 * re-initialized. We do not use a local vector, because that would
1019 * cause reallocation every time this funtion is called and the vector
1020 * can be the size of the whole grid.
1023 /* Make a list of all local points, i.e. those that could have been touched since
1024 the last update. These are the points needed for summing histograms below
1025 (non-local points only add zeros). For local updates, this will also be the
1026 final update list. */
1027 makeLocalUpdateList(grid, points_, originUpdatelist_, endUpdatelist_,
1029 if (params.numSharedUpdate > 1)
1031 mergeSharedUpdateLists(updateList, points_.size(), commRecord, multiSimComm);
1034 /* Reset the range for the next update */
1035 resetLocalUpdateRange(grid);
1037 /* Add samples to histograms for all local points and sync simulations if needed */
1038 sumHistograms(points_, weightSumCovering_,
1039 params.numSharedUpdate, commRecord, multiSimComm, *updateList);
1041 sumPmf(points_, params.numSharedUpdate, commRecord, multiSimComm);
1043 /* Renormalize the free energy if values are too large. */
1044 bool needToNormalizeFreeEnergy = false;
1045 for (int &globalIndex : *updateList)
1047 /* We want to keep the absolute value of the free energies to be less c_largePositiveExponent
1048 to be able to safely pass these values to exp(). The check below ensures this as long as
1049 the free energy values grow less than 0.5*c_largePositiveExponent in a return time to this
1050 neighborhood. For reasonable update sizes it's unlikely that this requirement would be
1052 if (std::abs(points_[globalIndex].freeEnergy()) > 0.5*detail::c_largePositiveExponent)
1054 needToNormalizeFreeEnergy = true;
1059 /* Update target distribution? */
1060 bool needToUpdateTargetDistribution =
1061 (params.eTarget != eawhtargetCONSTANT &&
1062 params.isUpdateTargetStep(step));
1064 /* In the initial stage, the histogram grows dynamically as a function of the number of coverings. */
1065 bool detectedCovering = false;
1066 if (inInitialStage())
1068 detectedCovering = (params.isCheckCoveringStep(step) &&
1069 isSamplingRegionCovered(params, dimParams, grid,
1070 commRecord, multiSimComm));
1073 /* The weighthistogram size after this update. */
1074 double newHistogramSize = histogramSize_.newHistogramSize(params, t, detectedCovering, points_, weightSumCovering_, fplog);
1076 /* Make the update list. Usually we try to only update local points,
1077 * but if the update has non-trivial or non-deterministic effects
1078 * on non-local points a global update is needed. This is the case when:
1079 * 1) a covering occurred in the initial stage, leading to non-trivial
1080 * histogram rescaling factors; or
1081 * 2) the target distribution will be updated, since we don't make any
1082 * assumption on its form; or
1083 * 3) the AWH parameters are such that we never attempt to skip non-local
1085 * 4) the free energy values have grown so large that a renormalization
1088 if (needToUpdateTargetDistribution || detectedCovering || !params.skipUpdates() || needToNormalizeFreeEnergy)
1090 /* Global update, just add all points. */
1091 updateList->clear();
1092 for (size_t m = 0; m < points_.size(); m++)
1094 if (points_[m].inTargetRegion())
1096 updateList->push_back(m);
1101 /* Set histogram scale factors. */
1102 double weightHistScalingSkipped = 0;
1103 double logPmfsumScalingSkipped = 0;
1104 if (params.skipUpdates())
1106 getSkippedUpdateHistogramScaleFactors(params, &weightHistScalingSkipped, &logPmfsumScalingSkipped);
1108 double weightHistScalingNew;
1109 double logPmfsumScalingNew;
1110 setHistogramUpdateScaleFactors(params, newHistogramSize, histogramSize_.histogramSize(),
1111 &weightHistScalingNew, &logPmfsumScalingNew);
1113 /* Update free energy and reference weight histogram for points in the update list. */
1114 for (int pointIndex : *updateList)
1116 PointState *pointStateToUpdate = &points_[pointIndex];
1118 /* Do updates from previous update steps that were skipped because this point was at that time non-local. */
1119 if (params.skipUpdates())
1121 pointStateToUpdate->performPreviouslySkippedUpdates(params, histogramSize_.numUpdates(), weightHistScalingSkipped, logPmfsumScalingSkipped);
1124 /* Now do an update with new sampling data. */
1125 pointStateToUpdate->updateWithNewSampling(params, histogramSize_.numUpdates(), weightHistScalingNew, logPmfsumScalingNew);
1128 /* Only update the histogram size after we are done with the local point updates */
1129 histogramSize_.setHistogramSize(newHistogramSize, weightHistScalingNew);
1131 if (needToNormalizeFreeEnergy)
1133 normalizeFreeEnergyAndPmfSum(&points_);
1136 if (needToUpdateTargetDistribution)
1138 /* The target distribution is always updated for all points at once. */
1139 updateTargetDistribution(points_, params);
1142 /* Update the bias. The bias is updated separately and last since it simply a function of
1143 the free energy and the target distribution and we want to avoid doing extra work. */
1144 for (int pointIndex : *updateList)
1146 points_[pointIndex].updateBias();
1149 /* Increase the update counter. */
1150 histogramSize_.incrementNumUpdates();
1153 double BiasState::updateProbabilityWeightsAndConvolvedBias(const std::vector<DimParams> &dimParams,
1155 std::vector < double, AlignedAllocator < double>> *weight) const
1157 /* Only neighbors of the current coordinate value will have a non-negligible chance of getting sampled */
1158 const std::vector<int> &neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
1160 #if GMX_SIMD_HAVE_DOUBLE
1161 typedef SimdDouble PackType;
1162 constexpr int packSize = GMX_SIMD_DOUBLE_WIDTH;
1164 typedef double PackType;
1165 constexpr int packSize = 1;
1167 /* Round the size of the weight array up to packSize */
1168 const int weightSize = ((neighbors.size() + packSize - 1)/packSize)*packSize;
1169 weight->resize(weightSize);
1171 double * gmx_restrict weightData = weight->data();
1172 PackType weightSumPack(0.0);
1173 for (size_t i = 0; i < neighbors.size(); i += packSize)
1175 for (size_t n = i; n < i + packSize; n++)
1177 if (n < neighbors.size())
1179 const int neighbor = neighbors[n];
1180 (*weight)[n] = biasedLogWeightFromPoint(dimParams, points_, grid,
1181 neighbor, points_[neighbor].bias(),
1182 coordState_.coordValue());
1186 /* Pad with values that don't affect the result */
1187 (*weight)[n] = detail::c_largeNegativeExponent;
1190 PackType weightPack = load<PackType>(weightData + i);
1191 weightPack = gmx::exp(weightPack);
1192 weightSumPack = weightSumPack + weightPack;
1193 store(weightData + i, weightPack);
1195 /* Sum of probability weights */
1196 double weightSum = reduce(weightSumPack);
1197 GMX_RELEASE_ASSERT(weightSum > 0, "zero probability weight when updating AWH probability weights.");
1199 /* Normalize probabilities to sum to 1 */
1200 double invWeightSum = 1/weightSum;
1201 for (double &w : *weight)
1206 /* Return the convolved bias */
1207 return std::log(weightSum);
1210 double BiasState::calcConvolvedBias(const std::vector<DimParams> &dimParams,
1212 const awh_dvec &coordValue) const
1214 int point = grid.nearestIndex(coordValue);
1215 const GridPoint &gridPoint = grid.point(point);
1217 /* Sum the probability weights from the neighborhood of the given point */
1218 double weightSum = 0;
1219 for (int neighbor : gridPoint.neighbor)
1221 double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid,
1222 neighbor, points_[neighbor].bias(),
1224 weightSum += std::exp(logWeight);
1227 /* Returns -GMX_FLOAT_MAX if no neighboring points were in the target region. */
1228 return (weightSum > 0) ? std::log(weightSum) : -GMX_FLOAT_MAX;
1231 void BiasState::sampleProbabilityWeights(const Grid &grid,
1232 gmx::ArrayRef<const double> probWeightNeighbor)
1234 const std::vector<int> &neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
1236 /* Save weights for next update */
1237 for (size_t n = 0; n < neighbor.size(); n++)
1239 points_[neighbor[n]].increaseWeightSumIteration(probWeightNeighbor[n]);
1242 /* Update the local update range. Two corner points define this rectangular
1243 * domain. We need to choose two new corner points such that the new domain
1244 * contains both the old update range and the current neighborhood.
1245 * In the simplest case when an update is performed every sample,
1246 * the update range would simply equal the current neighborhood.
1248 int neighborStart = neighbor[0];
1249 int neighborLast = neighbor[neighbor.size() - 1];
1250 for (int d = 0; d < grid.numDimensions(); d++)
1252 int origin = grid.point(neighborStart).index[d];
1253 int last = grid.point(neighborLast).index[d];
1257 /* Unwrap if wrapped around the boundary (only happens for periodic
1258 * boundaries). This has been already for the stored index interval.
1260 /* TODO: what we want to do is to find the smallest the update
1261 * interval that contains all points that need to be updated.
1262 * This amounts to combining two intervals, the current
1263 * [origin, end] update interval and the new touched neighborhood
1264 * into a new interval that contains all points from both the old
1267 * For periodic boundaries it becomes slightly more complicated
1268 * than for closed boundaries because then it needs not be
1269 * true that origin < end (so one can't simply relate the origin/end
1270 * in the min()/max() below). The strategy here is to choose the
1271 * origin closest to a reference point (index 0) and then unwrap
1272 * the end index if needed and choose the largest end index.
1273 * This ensures that both intervals are in the new interval
1274 * but it's not necessarily the smallest.
1275 * Currently we solve this by going through each possibility
1276 * and checking them.
1278 last += grid.axis(d).numPointsInPeriod();
1281 originUpdatelist_[d] = std::min(originUpdatelist_[d], origin);
1282 endUpdatelist_[d] = std::max(endUpdatelist_[d], last);
1286 void BiasState::sampleCoordAndPmf(const Grid &grid,
1287 gmx::ArrayRef<const double> probWeightNeighbor,
1288 double convolvedBias)
1290 /* Sampling-based deconvolution extracting the PMF.
1291 * Update the PMF histogram with the current coordinate value.
1293 * Because of the finite width of the harmonic potential, the free energy
1294 * defined for each coordinate point does not exactly equal that of the
1295 * actual coordinate, the PMF. However, the PMF can be estimated by applying
1296 * the relation exp(-PMF) = exp(-bias_convolved)*P_biased/Z, i.e. by keeping a
1297 * reweighted histogram of the coordinate value. Strictly, this relies on
1298 * the unknown normalization constant Z being either constant or known. Here,
1299 * neither is true except in the long simulation time limit. Empirically however,
1300 * it works (mainly because how the PMF histogram is rescaled).
1303 /* Only save coordinate data that is in range (the given index is always
1304 * in range even if the coordinate value is not).
1306 if (grid.covers(coordState_.coordValue()))
1308 /* Save PMF sum and keep a histogram of the sampled coordinate values */
1309 points_[coordState_.gridpointIndex()].samplePmf(convolvedBias);
1312 /* Save probability weights for the update */
1313 sampleProbabilityWeights(grid, probWeightNeighbor);
1316 void BiasState::initHistoryFromState(AwhBiasHistory *biasHistory) const
1318 biasHistory->pointState.resize(points_.size());
1321 void BiasState::updateHistory(AwhBiasHistory *biasHistory,
1322 const Grid &grid) const
1324 GMX_RELEASE_ASSERT(biasHistory->pointState.size() == points_.size(), "The AWH history setup does not match the AWH state.");
1326 AwhBiasStateHistory *stateHistory = &biasHistory->state;
1327 stateHistory->umbrellaGridpoint = coordState_.umbrellaGridpoint();
1329 for (size_t m = 0; m < biasHistory->pointState.size(); m++)
1331 AwhPointStateHistory *psh = &biasHistory->pointState[m];
1333 points_[m].storeState(psh);
1335 psh->weightsum_covering = weightSumCovering_[m];
1338 histogramSize_.storeState(stateHistory);
1340 stateHistory->origin_index_updatelist = multiDimGridIndexToLinear(grid,
1342 stateHistory->end_index_updatelist = multiDimGridIndexToLinear(grid,
1346 void BiasState::restoreFromHistory(const AwhBiasHistory &biasHistory,
1349 const AwhBiasStateHistory &stateHistory = biasHistory.state;
1351 coordState_.restoreFromHistory(stateHistory);
1353 if (biasHistory.pointState.size() != points_.size())
1355 GMX_THROW(InvalidInputError("Bias grid size in checkpoint and simulation do not match. Likely you provided a checkpoint from a different simulation."));
1357 for (size_t m = 0; m < points_.size(); m++)
1359 points_[m].setFromHistory(biasHistory.pointState[m]);
1362 for (size_t m = 0; m < weightSumCovering_.size(); m++)
1364 weightSumCovering_[m] = biasHistory.pointState[m].weightsum_covering;
1367 histogramSize_.restoreFromHistory(stateHistory);
1369 linearGridindexToMultiDim(grid, stateHistory.origin_index_updatelist, originUpdatelist_);
1370 linearGridindexToMultiDim(grid, stateHistory.end_index_updatelist, endUpdatelist_);
1373 void BiasState::broadcast(const t_commrec *commRecord)
1375 gmx_bcast(sizeof(coordState_), &coordState_, commRecord);
1377 gmx_bcast(points_.size()*sizeof(PointState), points_.data(), commRecord);
1379 gmx_bcast(weightSumCovering_.size()*sizeof(double), weightSumCovering_.data(), commRecord);
1381 gmx_bcast(sizeof(histogramSize_), &histogramSize_, commRecord);
1384 void BiasState::setFreeEnergyToConvolvedPmf(const std::vector<DimParams> &dimParams,
1387 std::vector<float> convolvedPmf;
1389 calcConvolvedPmf(dimParams, grid, &convolvedPmf);
1391 for (size_t m = 0; m < points_.size(); m++)
1393 points_[m].setFreeEnergy(convolvedPmf[m]);
1398 * Count trailing data rows containing only zeros.
1400 * \param[in] data 2D data array.
1401 * \param[in] numRows Number of rows in array.
1402 * \param[in] numColumns Number of cols in array.
1403 * \returns the number of trailing zero rows.
1405 static int countTrailingZeroRows(const double* const *data,
1409 int numZeroRows = 0;
1410 for (int m = numRows - 1; m >= 0; m--)
1412 bool rowIsZero = true;
1413 for (int d = 0; d < numColumns; d++)
1415 if (data[d][m] != 0)
1424 /* At a row with non-zero data */
1429 /* Still at a zero data row, keep checking rows higher up. */
1438 * Initializes the PMF and target with data read from an input table.
1440 * \param[in] dimParams The dimension parameters.
1441 * \param[in] grid The grid.
1442 * \param[in] filename The filename to read PMF and target from.
1443 * \param[in] numBias Number of biases.
1444 * \param[in] biasIndex The index of the bias.
1445 * \param[in,out] pointState The state of the points in this bias.
1447 static void readUserPmfAndTargetDistribution(const std::vector<DimParams> &dimParams,
1449 const std::string &filename,
1452 std::vector<PointState> *pointState)
1454 /* Read the PMF and target distribution.
1455 From the PMF, the convolved PMF, or the reference value free energy, can be calculated
1456 base on the force constant. The free energy and target together determine the bias.
1458 std::string filenameModified(filename);
1461 size_t n = filenameModified.rfind('.');
1462 GMX_RELEASE_ASSERT(n != std::string::npos, "The filename should contain an extension starting with .");
1463 filenameModified.insert(n, formatString("%d", biasIndex));
1466 std::string correctFormatMessage =
1467 formatString("%s is expected in the following format. "
1468 "The first ndim column(s) should contain the coordinate values for each point, "
1469 "each column containing values of one dimension (in ascending order). "
1470 "For a multidimensional coordinate, points should be listed "
1471 "in the order obtained by traversing lower dimensions first. "
1472 "E.g. for two-dimensional grid of size nxn: "
1473 "(1, 1), (1, 2),..., (1, n), (2, 1), (2, 2), ..., , (n, n - 1), (n, n). "
1474 "Column ndim + 1 should contain the PMF value for each coordinate value. "
1475 "The target distribution values should be in column ndim + 2 or column ndim + 5. "
1476 "Make sure the input file ends with a new line but has no trailing new lines.",
1478 gmx::TextLineWrapper wrapper;
1479 wrapper.settings().setLineLength(c_linewidth);
1480 correctFormatMessage = wrapper.wrapToString(correctFormatMessage);
1484 int numRows = read_xvg(filenameModified.c_str(), &data, &numColumns);
1486 /* Check basic data properties here. Grid takes care of more complicated things. */
1490 std::string mesg = gmx::formatString("%s is empty!.\n\n%s", filename.c_str(), correctFormatMessage.c_str());
1491 GMX_THROW(InvalidInputError(mesg));
1494 /* Less than 2 points is not useful for PMF or target. */
1498 gmx::formatString("%s contains too few data points (%d)."
1499 "The minimum number of points is 2.",
1500 filename.c_str(), numRows);
1501 GMX_THROW(InvalidInputError(mesg));
1504 /* Make sure there are enough columns of data.
1506 Two formats are allowed. Either with columns {coords, PMF, target} or
1507 {coords, PMF, x, y, z, target, ...}. The latter format is allowed since that
1508 is how AWH output is written (x, y, z being other AWH variables). For this format,
1509 trailing columns are ignored.
1511 int columnIndexTarget;
1512 int numColumnsMin = dimParams.size() + 2;
1513 int columnIndexPmf = dimParams.size();
1514 if (numColumns == numColumnsMin)
1516 columnIndexTarget = columnIndexPmf + 1;
1520 columnIndexTarget = columnIndexPmf + 4;
1523 if (numColumns < numColumnsMin)
1526 gmx::formatString("The number of columns in %s should be at least %d."
1528 filename.c_str(), numColumnsMin, correctFormatMessage.c_str());
1529 GMX_THROW(InvalidInputError(mesg));
1532 /* read_xvg can give trailing zero data rows for trailing new lines in the input. We allow 1 zero row,
1533 since this could be real data. But multiple trailing zero rows cannot correspond to valid data. */
1534 int numZeroRows = countTrailingZeroRows(data, numRows, numColumns);
1535 if (numZeroRows > 1)
1537 std::string mesg = gmx::formatString("Found %d trailing zero data rows in %s. Please remove trailing empty lines and try again.",
1538 numZeroRows, filename.c_str());
1539 GMX_THROW(InvalidInputError(mesg));
1542 /* Convert from user units to internal units before sending the data of to grid. */
1543 for (size_t d = 0; d < dimParams.size(); d++)
1545 double scalingFactor = dimParams[d].scaleUserInputToInternal(1);
1546 if (scalingFactor == 1)
1550 for (size_t m = 0; m < pointState->size(); m++)
1552 data[d][m] *= scalingFactor;
1556 /* Get a data point for each AWH grid point so that they all get data. */
1557 std::vector<int> gridIndexToDataIndex(grid.numPoints());
1558 mapGridToDataGrid(&gridIndexToDataIndex, data, numRows, filename, grid, correctFormatMessage);
1560 /* Extract the data for each grid point.
1561 * We check if the target distribution is zero for all points.
1563 bool targetDistributionIsZero = true;
1564 for (size_t m = 0; m < pointState->size(); m++)
1566 (*pointState)[m].setLogPmfSum(-data[columnIndexPmf][gridIndexToDataIndex[m]]);
1567 double target = data[columnIndexTarget][gridIndexToDataIndex[m]];
1569 /* Check if the values are allowed. */
1572 std::string mesg = gmx::formatString("Target distribution weight at point %zu (%g) in %s is negative.",
1573 m, target, filename.c_str());
1574 GMX_THROW(InvalidInputError(mesg));
1578 targetDistributionIsZero = false;
1580 (*pointState)[m].setTargetConstantWeight(target);
1583 if (targetDistributionIsZero)
1585 std::string mesg = gmx::formatString("The target weights given in column %d in %s are all 0",
1586 columnIndexTarget, filename.c_str());
1587 GMX_THROW(InvalidInputError(mesg));
1590 /* Free the arrays. */
1591 for (int m = 0; m < numColumns; m++)
1598 void BiasState::normalizePmf(int numSharingSims)
1600 /* The normalization of the PMF estimate matters because it determines how big effect the next sample has.
1601 Approximately (for large enough force constant) we should have:
1602 sum_x(exp(-pmf(x)) = nsamples*sum_xref(exp(-f(xref)).
1605 /* Calculate the normalization factor, i.e. divide by the pmf sum, multiply by the number of samples and the f sum */
1606 double expSumPmf = 0;
1608 for (const PointState &pointState : points_)
1610 if (pointState.inTargetRegion())
1612 expSumPmf += std::exp( pointState.logPmfSum());
1613 expSumF += std::exp(-pointState.freeEnergy());
1616 double numSamples = histogramSize_.histogramSize()/numSharingSims;
1619 double logRenorm = std::log(numSamples*expSumF/expSumPmf);
1620 for (PointState &pointState : points_)
1622 if (pointState.inTargetRegion())
1624 pointState.setLogPmfSum(pointState.logPmfSum() + logRenorm);
1629 void BiasState::initGridPointState(const AwhBiasParams &awhBiasParams,
1630 const std::vector<DimParams> &dimParams,
1632 const BiasParams ¶ms,
1633 const std::string &filename,
1636 /* Modify PMF, free energy and the constant target distribution factor
1637 * to user input values if there is data given.
1639 if (awhBiasParams.bUserData)
1641 readUserPmfAndTargetDistribution(dimParams, grid, filename, numBias, params.biasIndex, &points_);
1642 setFreeEnergyToConvolvedPmf(dimParams, grid);
1645 /* The local Boltzmann distribution is special because the target distribution is updated as a function of the reference weighthistogram. */
1646 GMX_RELEASE_ASSERT(params.eTarget != eawhtargetLOCALBOLTZMANN ||
1647 points_[0].weightSumRef() != 0,
1648 "AWH reference weight histogram not initialized properly with local Boltzmann target distribution.");
1650 updateTargetDistribution(points_, params);
1652 for (PointState &pointState : points_)
1654 if (pointState.inTargetRegion())
1656 pointState.updateBias();
1660 /* Note that for zero target this is a value that represents -infinity but should not be used for biasing. */
1661 pointState.setTargetToZero();
1665 /* Set the initial reference weighthistogram. */
1666 const double histogramSize = histogramSize_.histogramSize();
1667 for (auto &pointState : points_)
1669 pointState.setInitialReferenceWeightHistogram(histogramSize);
1672 /* Make sure the pmf is normalized consistently with the histogram size.
1673 Note: the target distribution and free energy need to be set here. */
1674 normalizePmf(params.numSharedUpdate);
1677 BiasState::BiasState(const AwhBiasParams &awhBiasParams,
1678 double histogramSizeInitial,
1679 const std::vector<DimParams> &dimParams,
1681 coordState_(awhBiasParams, dimParams, grid),
1682 points_(grid.numPoints()),
1683 weightSumCovering_(grid.numPoints()),
1684 histogramSize_(awhBiasParams, histogramSizeInitial)
1686 /* The minimum and maximum multidimensional point indices that are affected by the next update */
1687 for (size_t d = 0; d < dimParams.size(); d++)
1689 int index = grid.point(coordState_.gridpointIndex()).index[d];
1690 originUpdatelist_[d] = index;
1691 endUpdatelist_[d] = index;