2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 * Implements the BiasState class.
40 * \author Viveca Lindahl
41 * \author Berk Hess <hess@kth.se>
47 #include "biasstate.h"
58 #include "gromacs/fileio/gmxfio.h"
59 #include "gromacs/fileio/xvgr.h"
60 #include "gromacs/gmxlib/network.h"
61 #include "gromacs/math/utilities.h"
62 #include "gromacs/mdrunutility/multisim.h"
63 #include "gromacs/mdtypes/awh_history.h"
64 #include "gromacs/mdtypes/awh_params.h"
65 #include "gromacs/mdtypes/commrec.h"
66 #include "gromacs/simd/simd.h"
67 #include "gromacs/simd/simd_math.h"
68 #include "gromacs/utility/arrayref.h"
69 #include "gromacs/utility/exceptions.h"
70 #include "gromacs/utility/gmxassert.h"
71 #include "gromacs/utility/smalloc.h"
72 #include "gromacs/utility/stringutil.h"
75 #include "pointstate.h"
80 void BiasState::getPmf(gmx::ArrayRef<float> pmf) const
82 GMX_ASSERT(pmf.size() == points_.size(), "pmf should have the size of the bias grid");
84 /* The PMF is just the negative of the log of the sampled PMF histogram.
85 * Points with zero target weight are ignored, they will mostly contain noise.
87 for (size_t i = 0; i < points_.size(); i++)
89 pmf[i] = points_[i].inTargetRegion() ? -points_[i].logPmfSum() : GMX_FLOAT_MAX;
97 * Sum an array over all simulations on the master rank of each simulation.
99 * \param[in,out] arrayRef The data to sum.
100 * \param[in] multiSimComm Struct for multi-simulation communication.
102 void sumOverSimulations(gmx::ArrayRef<int> arrayRef, const gmx_multisim_t* multiSimComm)
104 gmx_sumi_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
108 * Sum an array over all simulations on the master rank of each simulation.
110 * \param[in,out] arrayRef The data to sum.
111 * \param[in] multiSimComm Struct for multi-simulation communication.
113 void sumOverSimulations(gmx::ArrayRef<double> arrayRef, const gmx_multisim_t* multiSimComm)
115 gmx_sumd_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
119 * Sum an array over all simulations on all ranks of each simulation.
121 * This assumes the data is identical on all ranks within each simulation.
123 * \param[in,out] arrayRef The data to sum.
124 * \param[in] commRecord Struct for intra-simulation communication.
125 * \param[in] multiSimComm Struct for multi-simulation communication.
128 void sumOverSimulations(gmx::ArrayRef<T> arrayRef, const t_commrec* commRecord, const gmx_multisim_t* multiSimComm)
130 if (MASTER(commRecord))
132 sumOverSimulations(arrayRef, multiSimComm);
134 if (commRecord->nnodes > 1)
136 gmx_bcast(arrayRef.size() * sizeof(T), arrayRef.data(), commRecord->mpi_comm_mygroup);
141 * Sum PMF over multiple simulations, when requested.
143 * \param[in,out] pointState The state of the points in the bias.
144 * \param[in] numSharedUpdate The number of biases sharing the histogram.
145 * \param[in] commRecord Struct for intra-simulation communication.
146 * \param[in] multiSimComm Struct for multi-simulation communication.
148 void sumPmf(gmx::ArrayRef<PointState> pointState,
150 const t_commrec* commRecord,
151 const gmx_multisim_t* multiSimComm)
153 if (numSharedUpdate == 1)
157 GMX_ASSERT(multiSimComm != nullptr && numSharedUpdate % multiSimComm->numSimulations_ == 0,
158 "numSharedUpdate should be a multiple of multiSimComm->numSimulations_");
159 GMX_ASSERT(numSharedUpdate == multiSimComm->numSimulations_,
160 "Sharing within a simulation is not implemented (yet)");
162 std::vector<double> buffer(pointState.size());
164 /* Need to temporarily exponentiate the log weights to sum over simulations */
165 for (size_t i = 0; i < buffer.size(); i++)
167 buffer[i] = pointState[i].inTargetRegion() ? std::exp(-pointState[i].logPmfSum()) : 0;
170 sumOverSimulations(gmx::ArrayRef<double>(buffer), commRecord, multiSimComm);
172 /* Take log again to get (non-normalized) PMF */
173 double normFac = 1.0 / numSharedUpdate;
174 for (gmx::index i = 0; i < pointState.ssize(); i++)
176 if (pointState[i].inTargetRegion())
178 pointState[i].setLogPmfSum(-std::log(buffer[i] * normFac));
184 * Find the minimum free energy value.
186 * \param[in] pointState The state of the points.
187 * \returns the minimum free energy value.
189 double freeEnergyMinimumValue(gmx::ArrayRef<const PointState> pointState)
191 double fMin = GMX_FLOAT_MAX;
193 for (auto const& ps : pointState)
195 if (ps.inTargetRegion() && ps.freeEnergy() < fMin)
197 fMin = ps.freeEnergy();
205 * Find and return the log of the probability weight of a point given a coordinate value.
207 * The unnormalized weight is given by
208 * w(point|value) = exp(bias(point) - U(value,point)),
209 * where U is a harmonic umbrella potential.
211 * \param[in] dimParams The bias dimensions parameters
212 * \param[in] points The point state.
213 * \param[in] grid The grid.
214 * \param[in] pointIndex Point to evaluate probability weight for.
215 * \param[in] pointBias Bias for the point (as a log weight).
216 * \param[in] value Coordinate value.
217 * \param[in] neighborLambdaEnergies The energy of the system in neighboring lambdas states. Can be
218 * empty when there are no free energy lambda state dimensions.
219 * \param[in] gridpointIndex The index of the current grid point.
220 * \returns the log of the biased probability weight.
222 double biasedLogWeightFromPoint(const std::vector<DimParams>& dimParams,
223 const std::vector<PointState>& points,
224 const BiasGrid& grid,
227 const awh_dvec value,
228 gmx::ArrayRef<const double> neighborLambdaEnergies,
231 double logWeight = detail::c_largeNegativeExponent;
233 /* Only points in the target region have non-zero weight */
234 if (points[pointIndex].inTargetRegion())
236 logWeight = pointBias;
238 /* Add potential for all parameter dimensions */
239 for (size_t d = 0; d < dimParams.size(); d++)
241 if (dimParams[d].isFepLambdaDimension())
243 /* If this is not a sampling step or if this function is called from
244 * calcConvolvedBias(), when writing energy subblocks, neighborLambdaEnergies will
245 * be empty. No convolution is required along the lambda dimension. */
246 if (!neighborLambdaEnergies.empty())
248 const int pointLambdaIndex = grid.point(pointIndex).coordValue[d];
249 const int gridpointLambdaIndex = grid.point(gridpointIndex).coordValue[d];
250 logWeight -= dimParams[d].fepDimParams().beta
251 * (neighborLambdaEnergies[pointLambdaIndex]
252 - neighborLambdaEnergies[gridpointLambdaIndex]);
257 double dev = getDeviationFromPointAlongGridAxis(grid, d, pointIndex, value[d]);
258 logWeight -= 0.5 * dimParams[d].pullDimParams().betak * dev * dev;
266 * Calculates the marginal distribution (marginal probability) for each value along
267 * a free energy lambda axis.
268 * The marginal distribution of one coordinate dimension value is the sum of the probability
269 * distribution of all values (herein all neighbor values) with the same value in the dimension
271 * \param[in] grid The bias grid.
272 * \param[in] neighbors The points to use for the calculation of the marginal distribution.
273 * \param[in] probWeightNeighbor Probability weights of the neighbors.
274 * \returns The calculated marginal distribution in a 1D array with
275 * as many elements as there are points along the axis of interest.
277 std::vector<double> calculateFELambdaMarginalDistribution(const BiasGrid& grid,
278 gmx::ArrayRef<const int> neighbors,
279 gmx::ArrayRef<const double> probWeightNeighbor)
281 const std::optional<int> lambdaAxisIndex = grid.lambdaAxisIndex();
282 GMX_RELEASE_ASSERT(lambdaAxisIndex,
283 "There must be a free energy lambda axis in order to calculate the free "
284 "energy lambda marginal distribution.");
285 const int numFepLambdaStates = grid.numFepLambdaStates();
286 std::vector<double> lambdaMarginalDistribution(numFepLambdaStates, 0);
288 for (size_t i = 0; i < neighbors.size(); i++)
290 const int neighbor = neighbors[i];
291 const int lambdaState = grid.point(neighbor).coordValue[lambdaAxisIndex.value()];
292 lambdaMarginalDistribution[lambdaState] += probWeightNeighbor[i];
294 return lambdaMarginalDistribution;
299 void BiasState::calcConvolvedPmf(const std::vector<DimParams>& dimParams,
300 const BiasGrid& grid,
301 std::vector<float>* convolvedPmf) const
303 size_t numPoints = grid.numPoints();
305 convolvedPmf->resize(numPoints);
307 /* Get the PMF to convolve. */
308 std::vector<float> pmf(numPoints);
311 for (size_t m = 0; m < numPoints; m++)
313 double freeEnergyWeights = 0;
314 const GridPoint& point = grid.point(m);
315 for (auto& neighbor : point.neighbor)
317 /* The negative PMF is a positive bias. */
318 double biasNeighbor = -pmf[neighbor];
320 /* Add the convolved PMF weights for the neighbors of this point.
321 Note that this function only adds point within the target > 0 region.
322 Sum weights, take the logarithm last to get the free energy. */
323 double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid, neighbor,
324 biasNeighbor, point.coordValue, {}, m);
325 freeEnergyWeights += std::exp(logWeight);
328 GMX_RELEASE_ASSERT(freeEnergyWeights > 0,
329 "Attempting to do log(<= 0) in AWH convolved PMF calculation.");
330 (*convolvedPmf)[m] = -std::log(static_cast<float>(freeEnergyWeights));
338 * Updates the target distribution for all points.
340 * The target distribution is always updated for all points
343 * \param[in,out] pointState The state of all points.
344 * \param[in] params The bias parameters.
346 void updateTargetDistribution(gmx::ArrayRef<PointState> pointState, const BiasParams& params)
348 double freeEnergyCutoff = 0;
349 if (params.eTarget == eawhtargetCUTOFF)
351 freeEnergyCutoff = freeEnergyMinimumValue(pointState) + params.freeEnergyCutoffInKT;
354 double sumTarget = 0;
355 for (PointState& ps : pointState)
357 sumTarget += ps.updateTargetWeight(params, freeEnergyCutoff);
359 GMX_RELEASE_ASSERT(sumTarget > 0, "We should have a non-zero distribution");
362 double invSum = 1.0 / sumTarget;
363 for (PointState& ps : pointState)
365 ps.scaleTarget(invSum);
370 * Puts together a string describing a grid point.
372 * \param[in] grid The grid.
373 * \param[in] point BiasGrid point index.
374 * \returns a string for the point.
376 std::string gridPointValueString(const BiasGrid& grid, int point)
378 std::string pointString;
382 for (int d = 0; d < grid.numDimensions(); d++)
384 pointString += gmx::formatString("%g", grid.point(point).coordValue[d]);
385 if (d < grid.numDimensions() - 1)
400 int BiasState::warnForHistogramAnomalies(const BiasGrid& grid, int biasIndex, double t, FILE* fplog, int maxNumWarnings) const
402 GMX_ASSERT(fplog != nullptr, "Warnings can only be issued if there is log file.");
403 const double maxHistogramRatio = 0.5; /* Tolerance for printing a warning about the histogram ratios */
405 /* Sum up the histograms and get their normalization */
406 double sumVisits = 0;
407 double sumWeights = 0;
408 for (auto& pointState : points_)
410 if (pointState.inTargetRegion())
412 sumVisits += pointState.numVisitsTot();
413 sumWeights += pointState.weightSumTot();
416 GMX_RELEASE_ASSERT(sumVisits > 0, "We should have visits");
417 GMX_RELEASE_ASSERT(sumWeights > 0, "We should have weight");
418 double invNormVisits = 1.0 / sumVisits;
419 double invNormWeight = 1.0 / sumWeights;
421 /* Check all points for warnings */
423 size_t numPoints = grid.numPoints();
424 for (size_t m = 0; m < numPoints; m++)
426 /* Skip points close to boundary or non-target region */
427 const GridPoint& gridPoint = grid.point(m);
428 bool skipPoint = false;
429 for (size_t n = 0; (n < gridPoint.neighbor.size()) && !skipPoint; n++)
431 int neighbor = gridPoint.neighbor[n];
432 skipPoint = !points_[neighbor].inTargetRegion();
433 for (int d = 0; (d < grid.numDimensions()) && !skipPoint; d++)
435 const GridPoint& neighborPoint = grid.point(neighbor);
436 skipPoint = neighborPoint.index[d] == 0
437 || neighborPoint.index[d] == grid.axis(d).numPoints() - 1;
441 /* Warn if the coordinate distribution is less than the target distribution with a certain fraction somewhere */
442 const double relativeWeight = points_[m].weightSumTot() * invNormWeight;
443 const double relativeVisits = points_[m].numVisitsTot() * invNormVisits;
444 if (!skipPoint && relativeVisits < relativeWeight * maxHistogramRatio)
446 std::string pointValueString = gridPointValueString(grid, m);
447 std::string warningMessage = gmx::formatString(
449 "at t = %g ps the obtained coordinate distribution at coordinate value %s "
450 "is less than a fraction %g of the reference distribution at that point. "
451 "If you are not certain about your settings you might want to increase your "
452 "pull force constant or "
453 "modify your sampling region.\n",
454 biasIndex + 1, t, pointValueString.c_str(), maxHistogramRatio);
455 gmx::TextLineWrapper wrapper;
456 wrapper.settings().setLineLength(c_linewidth);
457 fprintf(fplog, "%s", wrapper.wrapToString(warningMessage).c_str());
461 if (numWarnings >= maxNumWarnings)
470 double BiasState::calcUmbrellaForceAndPotential(const std::vector<DimParams>& dimParams,
471 const BiasGrid& grid,
473 ArrayRef<const double> neighborLambdaDhdl,
474 gmx::ArrayRef<double> force) const
476 double potential = 0;
477 for (size_t d = 0; d < dimParams.size(); d++)
479 if (dimParams[d].isFepLambdaDimension())
481 if (!neighborLambdaDhdl.empty())
483 const int coordpointLambdaIndex = grid.point(point).coordValue[d];
484 force[d] = neighborLambdaDhdl[coordpointLambdaIndex];
485 /* The potential should not be affected by the lambda dimension. */
491 getDeviationFromPointAlongGridAxis(grid, d, point, coordState_.coordValue()[d]);
492 double k = dimParams[d].pullDimParams().k;
494 /* Force from harmonic potential 0.5*k*dev^2 */
495 force[d] = -k * deviation;
496 potential += 0.5 * k * deviation * deviation;
503 void BiasState::calcConvolvedForce(const std::vector<DimParams>& dimParams,
504 const BiasGrid& grid,
505 gmx::ArrayRef<const double> probWeightNeighbor,
506 ArrayRef<const double> neighborLambdaDhdl,
507 gmx::ArrayRef<double> forceWorkBuffer,
508 gmx::ArrayRef<double> force) const
510 for (size_t d = 0; d < dimParams.size(); d++)
515 /* Only neighboring points have non-negligible contribution. */
516 const std::vector<int>& neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
517 gmx::ArrayRef<double> forceFromNeighbor = forceWorkBuffer;
518 for (size_t n = 0; n < neighbor.size(); n++)
520 double weightNeighbor = probWeightNeighbor[n];
521 int indexNeighbor = neighbor[n];
523 /* Get the umbrella force from this point. The returned potential is ignored here. */
524 calcUmbrellaForceAndPotential(dimParams, grid, indexNeighbor, neighborLambdaDhdl, forceFromNeighbor);
526 /* Add the weighted umbrella force to the convolved force. */
527 for (size_t d = 0; d < dimParams.size(); d++)
529 force[d] += forceFromNeighbor[d] * weightNeighbor;
534 double BiasState::moveUmbrella(const std::vector<DimParams>& dimParams,
535 const BiasGrid& grid,
536 gmx::ArrayRef<const double> probWeightNeighbor,
537 ArrayRef<const double> neighborLambdaDhdl,
538 gmx::ArrayRef<double> biasForce,
542 bool onlySampleUmbrellaGridpoint)
544 /* Generate and set a new coordinate reference value */
545 coordState_.sampleUmbrellaGridpoint(grid, coordState_.gridpointIndex(), probWeightNeighbor,
546 step, seed, indexSeed);
548 if (onlySampleUmbrellaGridpoint)
553 std::vector<double> newForce(dimParams.size());
554 double newPotential = calcUmbrellaForceAndPotential(
555 dimParams, grid, coordState_.umbrellaGridpoint(), neighborLambdaDhdl, newForce);
557 /* A modification of the reference value at time t will lead to a different
558 force over t-dt/2 to t and over t to t+dt/2. For high switching rates
559 this means the force and velocity will change signs roughly as often.
560 To avoid any issues we take the average of the previous and new force
561 at steps when the reference value has been moved. E.g. if the ref. value
562 is set every step to (coord dvalue +/- delta) would give zero force.
564 for (gmx::index d = 0; d < biasForce.ssize(); d++)
566 /* Average of the current and new force */
567 biasForce[d] = 0.5 * (biasForce[d] + newForce[d]);
577 * Sets the histogram rescaling factors needed to control the histogram size.
579 * For sake of robustness, the reference weight histogram can grow at a rate
580 * different from the actual sampling rate. Typically this happens for a limited
581 * initial time, alternatively growth is scaled down by a constant factor for all
582 * times. Since the size of the reference histogram sets the size of the free
583 * energy update this should be reflected also in the PMF. Thus the PMF histogram
584 * needs to be rescaled too.
586 * This function should only be called by the bias update function or wrapped by a function that
587 * knows what scale factors should be applied when, e.g,
588 * getSkippedUpdateHistogramScaleFactors().
590 * \param[in] params The bias parameters.
591 * \param[in] newHistogramSize New reference weight histogram size.
592 * \param[in] oldHistogramSize Previous reference weight histogram size (before adding new samples).
593 * \param[out] weightHistScaling Scaling factor for the reference weight histogram.
594 * \param[out] logPmfSumScaling Log of the scaling factor for the PMF histogram.
596 void setHistogramUpdateScaleFactors(const BiasParams& params,
597 double newHistogramSize,
598 double oldHistogramSize,
599 double* weightHistScaling,
600 double* logPmfSumScaling)
603 /* The two scaling factors below are slightly different (ignoring the log factor) because the
604 reference and the PMF histogram apply weight scaling differently. The weight histogram
605 applies is locally, i.e. each sample is scaled down meaning all samples get equal weight.
606 It is done this way because that is what target type local Boltzmann (for which
607 target = weight histogram) needs. In contrast, the PMF histogram is rescaled globally
608 by repeatedly scaling down the whole histogram. The reasons for doing it this way are:
609 1) empirically this is necessary for converging the PMF; 2) since the extraction of
610 the PMF is theoretically only valid for a constant bias, new samples should get more
611 weight than old ones for which the bias is fluctuating more. */
613 newHistogramSize / (oldHistogramSize + params.updateWeight * params.localWeightScaling);
614 *logPmfSumScaling = std::log(newHistogramSize / (oldHistogramSize + params.updateWeight));
619 void BiasState::getSkippedUpdateHistogramScaleFactors(const BiasParams& params,
620 double* weightHistScaling,
621 double* logPmfSumScaling) const
623 GMX_ASSERT(params.skipUpdates(),
624 "Calling function for skipped updates when skipping updates is not allowed");
626 if (inInitialStage())
628 /* In between global updates the reference histogram size is kept constant so we trivially
629 know what the histogram size was at the time of the skipped update. */
630 double histogramSize = histogramSize_.histogramSize();
631 setHistogramUpdateScaleFactors(params, histogramSize, histogramSize, weightHistScaling,
636 /* In the final stage, the reference histogram grows at the sampling rate which gives trivial scale factors. */
637 *weightHistScaling = 1;
638 *logPmfSumScaling = 0;
642 void BiasState::doSkippedUpdatesForAllPoints(const BiasParams& params)
644 double weightHistScaling;
645 double logPmfsumScaling;
647 getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
649 for (auto& pointState : points_)
651 bool didUpdate = pointState.performPreviouslySkippedUpdates(
652 params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
654 /* Update the bias for this point only if there were skipped updates in the past to avoid calculating the log unneccessarily */
657 pointState.updateBias();
662 void BiasState::doSkippedUpdatesInNeighborhood(const BiasParams& params, const BiasGrid& grid)
664 double weightHistScaling;
665 double logPmfsumScaling;
667 getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
669 /* For each neighbor point of the center point, refresh its state by adding the results of all past, skipped updates. */
670 const std::vector<int>& neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
671 for (auto& neighbor : neighbors)
673 bool didUpdate = points_[neighbor].performPreviouslySkippedUpdates(
674 params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
678 points_[neighbor].updateBias();
687 * Merge update lists from multiple sharing simulations.
689 * \param[in,out] updateList Update list for this simulation (assumed >= npoints long).
690 * \param[in] numPoints Total number of points.
691 * \param[in] commRecord Struct for intra-simulation communication.
692 * \param[in] multiSimComm Struct for multi-simulation communication.
694 void mergeSharedUpdateLists(std::vector<int>* updateList,
696 const t_commrec* commRecord,
697 const gmx_multisim_t* multiSimComm)
699 std::vector<int> numUpdatesOfPoint;
701 /* Flag the update points of this sim.
702 TODO: we can probably avoid allocating this array and just use the input array. */
703 numUpdatesOfPoint.resize(numPoints, 0);
704 for (auto& pointIndex : *updateList)
706 numUpdatesOfPoint[pointIndex] = 1;
709 /* Sum over the sims to get all the flagged points */
710 sumOverSimulations(arrayRefFromArray(numUpdatesOfPoint.data(), numPoints), commRecord, multiSimComm);
712 /* Collect the indices of the flagged points in place. The resulting array will be the merged update list.*/
714 for (int m = 0; m < numPoints; m++)
716 if (numUpdatesOfPoint[m] > 0)
718 updateList->push_back(m);
724 * Generate an update list of points sampled since the last update.
726 * \param[in] grid The AWH bias.
727 * \param[in] points The point state.
728 * \param[in] originUpdatelist The origin of the rectangular region that has been sampled since
729 * last update. \param[in] endUpdatelist The end of the rectangular that has been sampled since
730 * last update. \param[in,out] updateList Local update list to set (assumed >= npoints long).
732 void makeLocalUpdateList(const BiasGrid& grid,
733 const std::vector<PointState>& points,
734 const awh_ivec originUpdatelist,
735 const awh_ivec endUpdatelist,
736 std::vector<int>* updateList)
741 /* Define the update search grid */
742 for (int d = 0; d < grid.numDimensions(); d++)
744 origin[d] = originUpdatelist[d];
745 numPoints[d] = endUpdatelist[d] - originUpdatelist[d] + 1;
747 /* Because the end_updatelist is unwrapped it can be > (npoints - 1) so that numPoints can be > npoints in grid.
748 This helps for calculating the distance/number of points but should be removed and fixed when the way of
749 updating origin/end updatelist is changed (see sampleProbabilityWeights). */
750 numPoints[d] = std::min(grid.axis(d).numPoints(), numPoints[d]);
753 /* Make the update list */
756 bool pointExists = true;
759 pointExists = advancePointInSubgrid(grid, origin, numPoints, &pointIndex);
761 if (pointExists && points[pointIndex].inTargetRegion())
763 updateList->push_back(pointIndex);
770 void BiasState::resetLocalUpdateRange(const BiasGrid& grid)
772 const int gridpointIndex = coordState_.gridpointIndex();
773 for (int d = 0; d < grid.numDimensions(); d++)
775 /* This gives the minimum range consisting only of the current closest point. */
776 originUpdatelist_[d] = grid.point(gridpointIndex).index[d];
777 endUpdatelist_[d] = grid.point(gridpointIndex).index[d];
785 * Add partial histograms (accumulating between updates) to accumulating histograms.
787 * \param[in,out] pointState The state of the points in the bias.
788 * \param[in,out] weightSumCovering The weights for checking covering.
789 * \param[in] numSharedUpdate The number of biases sharing the histrogram.
790 * \param[in] commRecord Struct for intra-simulation communication.
791 * \param[in] multiSimComm Struct for multi-simulation communication.
792 * \param[in] localUpdateList List of points with data.
794 void sumHistograms(gmx::ArrayRef<PointState> pointState,
795 gmx::ArrayRef<double> weightSumCovering,
797 const t_commrec* commRecord,
798 const gmx_multisim_t* multiSimComm,
799 const std::vector<int>& localUpdateList)
801 /* The covering checking histograms are added before summing over simulations, so that the
802 weights from different simulations are kept distinguishable. */
803 for (int globalIndex : localUpdateList)
805 weightSumCovering[globalIndex] += pointState[globalIndex].weightSumIteration();
808 /* Sum histograms over multiple simulations if needed. */
809 if (numSharedUpdate > 1)
811 GMX_ASSERT(numSharedUpdate == multiSimComm->numSimulations_,
812 "Sharing within a simulation is not implemented (yet)");
814 /* Collect the weights and counts in linear arrays to be able to use gmx_sumd_sim. */
815 std::vector<double> weightSum;
816 std::vector<double> coordVisits;
818 weightSum.resize(localUpdateList.size());
819 coordVisits.resize(localUpdateList.size());
821 for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
823 const PointState& ps = pointState[localUpdateList[localIndex]];
825 weightSum[localIndex] = ps.weightSumIteration();
826 coordVisits[localIndex] = ps.numVisitsIteration();
829 sumOverSimulations(gmx::ArrayRef<double>(weightSum), commRecord, multiSimComm);
830 sumOverSimulations(gmx::ArrayRef<double>(coordVisits), commRecord, multiSimComm);
832 /* Transfer back the result */
833 for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
835 PointState& ps = pointState[localUpdateList[localIndex]];
837 ps.setPartialWeightAndCount(weightSum[localIndex], coordVisits[localIndex]);
841 /* Now add the partial counts and weights to the accumulating histograms.
842 Note: we still need to use the weights for the update so we wait
843 with resetting them until the end of the update. */
844 for (int globalIndex : localUpdateList)
846 pointState[globalIndex].addPartialWeightAndCount();
851 * Label points along an axis as covered or not.
853 * A point is covered if it is surrounded by visited points up to a radius = coverRadius.
855 * \param[in] visited Visited? For each point.
856 * \param[in] checkCovering Check for covering? For each point.
857 * \param[in] numPoints The number of grid points along this dimension.
858 * \param[in] period Period in number of points.
859 * \param[in] coverRadius Cover radius, in points, needed for defining a point as covered.
860 * \param[in,out] covered In this array elements are 1 for covered points and 0 for
861 * non-covered points, this routine assumes that \p covered has at least size \p numPoints.
863 void labelCoveredPoints(const std::vector<bool>& visited,
864 const std::vector<bool>& checkCovering,
868 gmx::ArrayRef<int> covered)
870 GMX_ASSERT(covered.ssize() >= numPoints, "covered should be at least as large as the grid");
872 bool haveFirstNotVisited = false;
873 int firstNotVisited = -1;
874 int notVisitedLow = -1;
875 int notVisitedHigh = -1;
877 for (int n = 0; n < numPoints; n++)
879 if (checkCovering[n] && !visited[n])
881 if (!haveFirstNotVisited)
885 haveFirstNotVisited = true;
891 /* Have now an interval I = [notVisitedLow,notVisitedHigh] of visited points bounded
892 by unvisited points. The unvisted end points affect the coveredness of the
893 visited with a reach equal to the cover radius. */
894 int notCoveredLow = notVisitedLow + coverRadius;
895 int notCoveredHigh = notVisitedHigh - coverRadius;
896 for (int i = notVisitedLow; i <= notVisitedHigh; i++)
898 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
901 /* Find a new interval to set covering for. Make the notVisitedHigh of this interval
902 the notVisitedLow of the next. */
903 notVisitedLow = notVisitedHigh;
908 /* Have labelled all the internal points. Now take care of the boundary regions. */
909 if (!haveFirstNotVisited)
911 /* No non-visited points <=> all points visited => all points covered. */
913 for (int n = 0; n < numPoints; n++)
920 int lastNotVisited = notVisitedLow;
922 /* For periodic boundaries, non-visited points can influence points
923 on the other side of the boundary so we need to wrap around. */
925 /* Lower end. For periodic boundaries the last upper end not visited point becomes the low-end not visited point.
926 For non-periodic boundaries there is no lower end point so a dummy value is used. */
927 int notVisitedHigh = firstNotVisited;
928 int notVisitedLow = period > 0 ? (lastNotVisited - period) : -(coverRadius + 1);
930 int notCoveredLow = notVisitedLow + coverRadius;
931 int notCoveredHigh = notVisitedHigh - coverRadius;
933 for (int i = 0; i <= notVisitedHigh; i++)
935 /* For non-periodic boundaries notCoveredLow = -1 will impose no restriction. */
936 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
939 /* Upper end. Same as for lower end but in the other direction. */
940 notVisitedHigh = period > 0 ? (firstNotVisited + period) : (numPoints + coverRadius);
941 notVisitedLow = lastNotVisited;
943 notCoveredLow = notVisitedLow + coverRadius;
944 notCoveredHigh = notVisitedHigh - coverRadius;
946 for (int i = notVisitedLow; i <= numPoints - 1; i++)
948 /* For non-periodic boundaries notCoveredHigh = numPoints will impose no restriction. */
949 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
956 bool BiasState::isSamplingRegionCovered(const BiasParams& params,
957 const std::vector<DimParams>& dimParams,
958 const BiasGrid& grid,
959 const t_commrec* commRecord,
960 const gmx_multisim_t* multiSimComm) const
962 /* Allocate and initialize arrays: one for checking visits along each dimension,
963 one for keeping track of which points to check and one for the covered points.
964 Possibly these could be kept as AWH variables to avoid these allocations. */
967 std::vector<bool> visited;
968 std::vector<bool> checkCovering;
969 // We use int for the covering array since we might use gmx_sumi_sim.
970 std::vector<int> covered;
973 std::vector<CheckDim> checkDim;
974 checkDim.resize(grid.numDimensions());
976 for (int d = 0; d < grid.numDimensions(); d++)
978 const size_t numPoints = grid.axis(d).numPoints();
979 checkDim[d].visited.resize(numPoints, false);
980 checkDim[d].checkCovering.resize(numPoints, false);
981 checkDim[d].covered.resize(numPoints, 0);
984 /* Set visited points along each dimension and which points should be checked for covering.
985 Specifically, points above the free energy cutoff (if there is one) or points outside
986 of the target region are ignored. */
988 /* Set the free energy cutoff */
989 double maxFreeEnergy = GMX_FLOAT_MAX;
991 if (params.eTarget == eawhtargetCUTOFF)
993 maxFreeEnergy = freeEnergyMinimumValue(points_) + params.freeEnergyCutoffInKT;
996 /* Set the threshold weight for a point to be considered visited. */
997 double weightThreshold = 1;
998 for (int d = 0; d < grid.numDimensions(); d++)
1000 if (grid.axis(d).isFepLambdaAxis())
1002 /* TODO: Verify that a threshold of 1.0 is OK. With a very high sample weight 1.0 can be
1003 * reached quickly even in regions with low probability. Should the sample weight be
1004 * taken into account here? */
1005 weightThreshold *= 1.0;
1009 weightThreshold *= grid.axis(d).spacing()
1010 * std::sqrt(dimParams[d].pullDimParams().betak * 0.5 * M_1_PI);
1014 /* Project the sampling weights onto each dimension */
1015 for (size_t m = 0; m < grid.numPoints(); m++)
1017 const PointState& pointState = points_[m];
1019 for (int d = 0; d < grid.numDimensions(); d++)
1021 int n = grid.point(m).index[d];
1023 /* Is visited if it was already visited or if there is enough weight at the current point */
1024 checkDim[d].visited[n] = checkDim[d].visited[n] || (weightSumCovering_[m] > weightThreshold);
1026 /* Check for covering if there is at least point in this slice that is in the target region and within the cutoff */
1027 checkDim[d].checkCovering[n] =
1028 checkDim[d].checkCovering[n]
1029 || (pointState.inTargetRegion() && pointState.freeEnergy() < maxFreeEnergy);
1033 /* Label each point along each dimension as covered or not. */
1034 for (int d = 0; d < grid.numDimensions(); d++)
1036 labelCoveredPoints(checkDim[d].visited, checkDim[d].checkCovering, grid.axis(d).numPoints(),
1037 grid.axis(d).numPointsInPeriod(), params.coverRadius()[d], checkDim[d].covered);
1040 /* Now check for global covering. Each dimension needs to be covered separately.
1041 A dimension is covered if each point is covered. Multiple simulations collectively
1042 cover the points, i.e. a point is covered if any of the simulations covered it.
1043 However, visited points are not shared, i.e. if a point is covered or not is
1044 determined by the visits of a single simulation. In general the covering criterion is
1045 all points covered => all points are surrounded by visited points up to a radius = coverRadius.
1046 For 1 simulation, all points covered <=> all points visited. For multiple simulations
1047 however, all points visited collectively !=> all points covered, except for coverRadius = 0.
1048 In the limit of large coverRadius, all points covered => all points visited by at least one
1049 simulation (since no point will be covered until all points have been visited by a
1050 single simulation). Basically coverRadius sets how much "connectedness" (or mixing) a point
1051 needs with surrounding points before sharing covering information with other simulations. */
1053 /* Communicate the covered points between sharing simulations if needed. */
1054 if (params.numSharedUpdate > 1)
1056 /* For multiple dimensions this may not be the best way to do it. */
1057 for (int d = 0; d < grid.numDimensions(); d++)
1060 gmx::arrayRefFromArray(checkDim[d].covered.data(), grid.axis(d).numPoints()),
1061 commRecord, multiSimComm);
1065 /* Now check if for each dimension all points are covered. Break if not true. */
1066 bool allPointsCovered = true;
1067 for (int d = 0; d < grid.numDimensions() && allPointsCovered; d++)
1069 for (int n = 0; n < grid.axis(d).numPoints() && allPointsCovered; n++)
1071 allPointsCovered = (checkDim[d].covered[n] != 0);
1075 return allPointsCovered;
1079 * Normalizes the free energy and PMF sum.
1081 * \param[in] pointState The state of the points.
1083 static void normalizeFreeEnergyAndPmfSum(std::vector<PointState>* pointState)
1085 double minF = freeEnergyMinimumValue(*pointState);
1087 for (PointState& ps : *pointState)
1089 ps.normalizeFreeEnergyAndPmfSum(minF);
1093 void BiasState::updateFreeEnergyAndAddSamplesToHistogram(const std::vector<DimParams>& dimParams,
1094 const BiasGrid& grid,
1095 const BiasParams& params,
1096 const t_commrec* commRecord,
1097 const gmx_multisim_t* multiSimComm,
1101 std::vector<int>* updateList)
1103 /* Note hat updateList is only used in this scope and is always
1104 * re-initialized. We do not use a local vector, because that would
1105 * cause reallocation every time this funtion is called and the vector
1106 * can be the size of the whole grid.
1109 /* Make a list of all local points, i.e. those that could have been touched since
1110 the last update. These are the points needed for summing histograms below
1111 (non-local points only add zeros). For local updates, this will also be the
1112 final update list. */
1113 makeLocalUpdateList(grid, points_, originUpdatelist_, endUpdatelist_, updateList);
1114 if (params.numSharedUpdate > 1)
1116 mergeSharedUpdateLists(updateList, points_.size(), commRecord, multiSimComm);
1119 /* Reset the range for the next update */
1120 resetLocalUpdateRange(grid);
1122 /* Add samples to histograms for all local points and sync simulations if needed */
1123 sumHistograms(points_, weightSumCovering_, params.numSharedUpdate, commRecord, multiSimComm, *updateList);
1125 sumPmf(points_, params.numSharedUpdate, commRecord, multiSimComm);
1127 /* Renormalize the free energy if values are too large. */
1128 bool needToNormalizeFreeEnergy = false;
1129 for (int& globalIndex : *updateList)
1131 /* We want to keep the absolute value of the free energies to be less
1132 c_largePositiveExponent to be able to safely pass these values to exp(). The check below
1133 ensures this as long as the free energy values grow less than 0.5*c_largePositiveExponent
1134 in a return time to this neighborhood. For reasonable update sizes it's unlikely that
1135 this requirement would be broken. */
1136 if (std::abs(points_[globalIndex].freeEnergy()) > 0.5 * detail::c_largePositiveExponent)
1138 needToNormalizeFreeEnergy = true;
1143 /* Update target distribution? */
1144 bool needToUpdateTargetDistribution =
1145 (params.eTarget != eawhtargetCONSTANT && params.isUpdateTargetStep(step));
1147 /* In the initial stage, the histogram grows dynamically as a function of the number of coverings. */
1148 bool detectedCovering = false;
1149 if (inInitialStage())
1152 (params.isCheckCoveringStep(step)
1153 && isSamplingRegionCovered(params, dimParams, grid, commRecord, multiSimComm));
1156 /* The weighthistogram size after this update. */
1157 double newHistogramSize = histogramSize_.newHistogramSize(params, t, detectedCovering, points_,
1158 weightSumCovering_, fplog);
1160 /* Make the update list. Usually we try to only update local points,
1161 * but if the update has non-trivial or non-deterministic effects
1162 * on non-local points a global update is needed. This is the case when:
1163 * 1) a covering occurred in the initial stage, leading to non-trivial
1164 * histogram rescaling factors; or
1165 * 2) the target distribution will be updated, since we don't make any
1166 * assumption on its form; or
1167 * 3) the AWH parameters are such that we never attempt to skip non-local
1169 * 4) the free energy values have grown so large that a renormalization
1172 if (needToUpdateTargetDistribution || detectedCovering || !params.skipUpdates() || needToNormalizeFreeEnergy)
1174 /* Global update, just add all points. */
1175 updateList->clear();
1176 for (size_t m = 0; m < points_.size(); m++)
1178 if (points_[m].inTargetRegion())
1180 updateList->push_back(m);
1185 /* Set histogram scale factors. */
1186 double weightHistScalingSkipped = 0;
1187 double logPmfsumScalingSkipped = 0;
1188 if (params.skipUpdates())
1190 getSkippedUpdateHistogramScaleFactors(params, &weightHistScalingSkipped, &logPmfsumScalingSkipped);
1192 double weightHistScalingNew;
1193 double logPmfsumScalingNew;
1194 setHistogramUpdateScaleFactors(params, newHistogramSize, histogramSize_.histogramSize(),
1195 &weightHistScalingNew, &logPmfsumScalingNew);
1197 /* Update free energy and reference weight histogram for points in the update list. */
1198 for (int pointIndex : *updateList)
1200 PointState* pointStateToUpdate = &points_[pointIndex];
1202 /* Do updates from previous update steps that were skipped because this point was at that time non-local. */
1203 if (params.skipUpdates())
1205 pointStateToUpdate->performPreviouslySkippedUpdates(params, histogramSize_.numUpdates(),
1206 weightHistScalingSkipped,
1207 logPmfsumScalingSkipped);
1210 /* Now do an update with new sampling data. */
1211 pointStateToUpdate->updateWithNewSampling(params, histogramSize_.numUpdates(),
1212 weightHistScalingNew, logPmfsumScalingNew);
1215 /* Only update the histogram size after we are done with the local point updates */
1216 histogramSize_.setHistogramSize(newHistogramSize, weightHistScalingNew);
1218 if (needToNormalizeFreeEnergy)
1220 normalizeFreeEnergyAndPmfSum(&points_);
1223 if (needToUpdateTargetDistribution)
1225 /* The target distribution is always updated for all points at once. */
1226 updateTargetDistribution(points_, params);
1229 /* Update the bias. The bias is updated separately and last since it simply a function of
1230 the free energy and the target distribution and we want to avoid doing extra work. */
1231 for (int pointIndex : *updateList)
1233 points_[pointIndex].updateBias();
1236 /* Increase the update counter. */
1237 histogramSize_.incrementNumUpdates();
1240 double BiasState::updateProbabilityWeightsAndConvolvedBias(const std::vector<DimParams>& dimParams,
1241 const BiasGrid& grid,
1242 gmx::ArrayRef<const double> neighborLambdaEnergies,
1243 std::vector<double, AlignedAllocator<double>>* weight) const
1245 /* Only neighbors of the current coordinate value will have a non-negligible chance of getting sampled */
1246 const std::vector<int>& neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
1248 #if GMX_SIMD_HAVE_DOUBLE
1249 typedef SimdDouble PackType;
1250 constexpr int packSize = GMX_SIMD_DOUBLE_WIDTH;
1252 typedef double PackType;
1253 constexpr int packSize = 1;
1255 /* Round the size of the weight array up to packSize */
1256 const int weightSize = ((neighbors.size() + packSize - 1) / packSize) * packSize;
1257 weight->resize(weightSize);
1259 double* gmx_restrict weightData = weight->data();
1260 PackType weightSumPack(0.0);
1261 for (size_t i = 0; i < neighbors.size(); i += packSize)
1263 for (size_t n = i; n < i + packSize; n++)
1265 if (n < neighbors.size())
1267 const int neighbor = neighbors[n];
1268 (*weight)[n] = biasedLogWeightFromPoint(
1269 dimParams, points_, grid, neighbor, points_[neighbor].bias(),
1270 coordState_.coordValue(), neighborLambdaEnergies, coordState_.gridpointIndex());
1274 /* Pad with values that don't affect the result */
1275 (*weight)[n] = detail::c_largeNegativeExponent;
1278 PackType weightPack = load<PackType>(weightData + i);
1279 weightPack = gmx::exp(weightPack);
1280 weightSumPack = weightSumPack + weightPack;
1281 store(weightData + i, weightPack);
1283 /* Sum of probability weights */
1284 double weightSum = reduce(weightSumPack);
1285 GMX_RELEASE_ASSERT(weightSum > 0,
1286 "zero probability weight when updating AWH probability weights.");
1288 /* Normalize probabilities to sum to 1 */
1289 double invWeightSum = 1 / weightSum;
1291 /* When there is a free energy lambda state axis remove the convolved contributions along that
1292 * axis from the total bias. This must be done after calculating invWeightSum (since weightSum
1293 * will be modified), but before normalizing the weights (below). */
1294 if (grid.hasLambdaAxis())
1296 /* If there is only one axis the bias will not be convolved in any dimension. */
1297 if (grid.axis().size() == 1)
1299 weightSum = gmx::exp(points_[coordState_.gridpointIndex()].bias());
1303 for (size_t i = 0; i < neighbors.size(); i++)
1305 const int neighbor = neighbors[i];
1306 if (pointsHaveDifferentLambda(grid, coordState_.gridpointIndex(), neighbor))
1308 weightSum -= weightData[i];
1314 for (double& w : *weight)
1319 /* Return the convolved bias */
1320 return std::log(weightSum);
1323 double BiasState::calcConvolvedBias(const std::vector<DimParams>& dimParams,
1324 const BiasGrid& grid,
1325 const awh_dvec& coordValue) const
1327 int point = grid.nearestIndex(coordValue);
1328 const GridPoint& gridPoint = grid.point(point);
1330 /* Sum the probability weights from the neighborhood of the given point */
1331 double weightSum = 0;
1332 for (int neighbor : gridPoint.neighbor)
1334 /* No convolution is required along the lambda dimension. */
1335 if (pointsHaveDifferentLambda(grid, point, neighbor))
1339 double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid, neighbor,
1340 points_[neighbor].bias(), coordValue, {}, point);
1341 weightSum += std::exp(logWeight);
1344 /* Returns -GMX_FLOAT_MAX if no neighboring points were in the target region. */
1345 return (weightSum > 0) ? std::log(weightSum) : -GMX_FLOAT_MAX;
1348 void BiasState::sampleProbabilityWeights(const BiasGrid& grid, gmx::ArrayRef<const double> probWeightNeighbor)
1350 const std::vector<int>& neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
1352 /* Save weights for next update */
1353 for (size_t n = 0; n < neighbor.size(); n++)
1355 points_[neighbor[n]].increaseWeightSumIteration(probWeightNeighbor[n]);
1358 /* Update the local update range. Two corner points define this rectangular
1359 * domain. We need to choose two new corner points such that the new domain
1360 * contains both the old update range and the current neighborhood.
1361 * In the simplest case when an update is performed every sample,
1362 * the update range would simply equal the current neighborhood.
1364 int neighborStart = neighbor[0];
1365 int neighborLast = neighbor[neighbor.size() - 1];
1366 for (int d = 0; d < grid.numDimensions(); d++)
1368 int origin = grid.point(neighborStart).index[d];
1369 int last = grid.point(neighborLast).index[d];
1373 /* Unwrap if wrapped around the boundary (only happens for periodic
1374 * boundaries). This has been already for the stored index interval.
1376 /* TODO: what we want to do is to find the smallest the update
1377 * interval that contains all points that need to be updated.
1378 * This amounts to combining two intervals, the current
1379 * [origin, end] update interval and the new touched neighborhood
1380 * into a new interval that contains all points from both the old
1383 * For periodic boundaries it becomes slightly more complicated
1384 * than for closed boundaries because then it needs not be
1385 * true that origin < end (so one can't simply relate the origin/end
1386 * in the min()/max() below). The strategy here is to choose the
1387 * origin closest to a reference point (index 0) and then unwrap
1388 * the end index if needed and choose the largest end index.
1389 * This ensures that both intervals are in the new interval
1390 * but it's not necessarily the smallest.
1391 * Currently we solve this by going through each possibility
1392 * and checking them.
1394 last += grid.axis(d).numPointsInPeriod();
1397 originUpdatelist_[d] = std::min(originUpdatelist_[d], origin);
1398 endUpdatelist_[d] = std::max(endUpdatelist_[d], last);
1402 void BiasState::sampleCoordAndPmf(const std::vector<DimParams>& dimParams,
1403 const BiasGrid& grid,
1404 gmx::ArrayRef<const double> probWeightNeighbor,
1405 double convolvedBias)
1407 /* Sampling-based deconvolution extracting the PMF.
1408 * Update the PMF histogram with the current coordinate value.
1410 * Because of the finite width of the harmonic potential, the free energy
1411 * defined for each coordinate point does not exactly equal that of the
1412 * actual coordinate, the PMF. However, the PMF can be estimated by applying
1413 * the relation exp(-PMF) = exp(-bias_convolved)*P_biased/Z, i.e. by keeping a
1414 * reweighted histogram of the coordinate value. Strictly, this relies on
1415 * the unknown normalization constant Z being either constant or known. Here,
1416 * neither is true except in the long simulation time limit. Empirically however,
1417 * it works (mainly because how the PMF histogram is rescaled).
1420 const int gridPointIndex = coordState_.gridpointIndex();
1421 const std::optional<int> lambdaAxisIndex = grid.lambdaAxisIndex();
1423 /* Update the PMF of points along a lambda axis with their bias. */
1424 if (lambdaAxisIndex)
1426 const std::vector<int>& neighbors = grid.point(gridPointIndex).neighbor;
1428 std::vector<double> lambdaMarginalDistribution =
1429 calculateFELambdaMarginalDistribution(grid, neighbors, probWeightNeighbor);
1431 awh_dvec coordValueAlongLambda = { coordState_.coordValue()[0], coordState_.coordValue()[1],
1432 coordState_.coordValue()[2], coordState_.coordValue()[3] };
1433 for (size_t i = 0; i < neighbors.size(); i++)
1435 const int neighbor = neighbors[i];
1437 if (pointsAlongLambdaAxis(grid, gridPointIndex, neighbor))
1439 const double neighborLambda = grid.point(neighbor).coordValue[lambdaAxisIndex.value()];
1440 if (neighbor == gridPointIndex)
1442 bias = convolvedBias;
1446 coordValueAlongLambda[lambdaAxisIndex.value()] = neighborLambda;
1447 bias = calcConvolvedBias(dimParams, grid, coordValueAlongLambda);
1450 const double probWeight = lambdaMarginalDistribution[neighborLambda];
1451 const double weightedBias = bias - std::log(std::max(probWeight, GMX_DOUBLE_MIN)); // avoid log(0)
1453 if (neighbor == gridPointIndex && grid.covers(coordState_.coordValue()))
1455 points_[neighbor].samplePmf(weightedBias);
1459 points_[neighbor].updatePmfUnvisited(weightedBias);
1466 /* Only save coordinate data that is in range (the given index is always
1467 * in range even if the coordinate value is not).
1469 if (grid.covers(coordState_.coordValue()))
1471 /* Save PMF sum and keep a histogram of the sampled coordinate values */
1472 points_[gridPointIndex].samplePmf(convolvedBias);
1476 /* Save probability weights for the update */
1477 sampleProbabilityWeights(grid, probWeightNeighbor);
1480 void BiasState::initHistoryFromState(AwhBiasHistory* biasHistory) const
1482 biasHistory->pointState.resize(points_.size());
1485 void BiasState::updateHistory(AwhBiasHistory* biasHistory, const BiasGrid& grid) const
1487 GMX_RELEASE_ASSERT(biasHistory->pointState.size() == points_.size(),
1488 "The AWH history setup does not match the AWH state.");
1490 AwhBiasStateHistory* stateHistory = &biasHistory->state;
1491 stateHistory->umbrellaGridpoint = coordState_.umbrellaGridpoint();
1493 for (size_t m = 0; m < biasHistory->pointState.size(); m++)
1495 AwhPointStateHistory* psh = &biasHistory->pointState[m];
1497 points_[m].storeState(psh);
1499 psh->weightsum_covering = weightSumCovering_[m];
1502 histogramSize_.storeState(stateHistory);
1504 stateHistory->origin_index_updatelist = multiDimGridIndexToLinear(grid, originUpdatelist_);
1505 stateHistory->end_index_updatelist = multiDimGridIndexToLinear(grid, endUpdatelist_);
1508 void BiasState::restoreFromHistory(const AwhBiasHistory& biasHistory, const BiasGrid& grid)
1510 const AwhBiasStateHistory& stateHistory = biasHistory.state;
1512 coordState_.restoreFromHistory(stateHistory);
1514 if (biasHistory.pointState.size() != points_.size())
1517 InvalidInputError("Bias grid size in checkpoint and simulation do not match. "
1518 "Likely you provided a checkpoint from a different simulation."));
1520 for (size_t m = 0; m < points_.size(); m++)
1522 points_[m].setFromHistory(biasHistory.pointState[m]);
1525 for (size_t m = 0; m < weightSumCovering_.size(); m++)
1527 weightSumCovering_[m] = biasHistory.pointState[m].weightsum_covering;
1530 histogramSize_.restoreFromHistory(stateHistory);
1532 linearGridindexToMultiDim(grid, stateHistory.origin_index_updatelist, originUpdatelist_);
1533 linearGridindexToMultiDim(grid, stateHistory.end_index_updatelist, endUpdatelist_);
1536 void BiasState::broadcast(const t_commrec* commRecord)
1538 gmx_bcast(sizeof(coordState_), &coordState_, commRecord->mpi_comm_mygroup);
1540 gmx_bcast(points_.size() * sizeof(PointState), points_.data(), commRecord->mpi_comm_mygroup);
1542 gmx_bcast(weightSumCovering_.size() * sizeof(double), weightSumCovering_.data(),
1543 commRecord->mpi_comm_mygroup);
1545 gmx_bcast(sizeof(histogramSize_), &histogramSize_, commRecord->mpi_comm_mygroup);
1548 void BiasState::setFreeEnergyToConvolvedPmf(const std::vector<DimParams>& dimParams, const BiasGrid& grid)
1550 std::vector<float> convolvedPmf;
1552 calcConvolvedPmf(dimParams, grid, &convolvedPmf);
1554 for (size_t m = 0; m < points_.size(); m++)
1556 points_[m].setFreeEnergy(convolvedPmf[m]);
1561 * Count trailing data rows containing only zeros.
1563 * \param[in] data 2D data array.
1564 * \param[in] numRows Number of rows in array.
1565 * \param[in] numColumns Number of cols in array.
1566 * \returns the number of trailing zero rows.
1568 static int countTrailingZeroRows(const double* const* data, int numRows, int numColumns)
1570 int numZeroRows = 0;
1571 for (int m = numRows - 1; m >= 0; m--)
1573 bool rowIsZero = true;
1574 for (int d = 0; d < numColumns; d++)
1576 if (data[d][m] != 0)
1585 /* At a row with non-zero data */
1590 /* Still at a zero data row, keep checking rows higher up. */
1599 * Initializes the PMF and target with data read from an input table.
1601 * \param[in] dimParams The dimension parameters.
1602 * \param[in] grid The grid.
1603 * \param[in] filename The filename to read PMF and target from.
1604 * \param[in] numBias Number of biases.
1605 * \param[in] biasIndex The index of the bias.
1606 * \param[in,out] pointState The state of the points in this bias.
1608 static void readUserPmfAndTargetDistribution(const std::vector<DimParams>& dimParams,
1609 const BiasGrid& grid,
1610 const std::string& filename,
1613 std::vector<PointState>* pointState)
1615 /* Read the PMF and target distribution.
1616 From the PMF, the convolved PMF, or the reference value free energy, can be calculated
1617 base on the force constant. The free energy and target together determine the bias.
1619 std::string filenameModified(filename);
1622 size_t n = filenameModified.rfind('.');
1623 GMX_RELEASE_ASSERT(n != std::string::npos,
1624 "The filename should contain an extension starting with .");
1625 filenameModified.insert(n, formatString("%d", biasIndex));
1628 std::string correctFormatMessage = formatString(
1629 "%s is expected in the following format. "
1630 "The first ndim column(s) should contain the coordinate values for each point, "
1631 "each column containing values of one dimension (in ascending order). "
1632 "For a multidimensional coordinate, points should be listed "
1633 "in the order obtained by traversing lower dimensions first. "
1634 "E.g. for two-dimensional grid of size nxn: "
1635 "(1, 1), (1, 2),..., (1, n), (2, 1), (2, 2), ..., , (n, n - 1), (n, n). "
1636 "Column ndim + 1 should contain the PMF value for each coordinate value. "
1637 "The target distribution values should be in column ndim + 2 or column ndim + 5. "
1638 "Make sure the input file ends with a new line but has no trailing new lines.",
1640 gmx::TextLineWrapper wrapper;
1641 wrapper.settings().setLineLength(c_linewidth);
1642 correctFormatMessage = wrapper.wrapToString(correctFormatMessage);
1646 int numRows = read_xvg(filenameModified.c_str(), &data, &numColumns);
1648 /* Check basic data properties here. BiasGrid takes care of more complicated things. */
1652 std::string mesg = gmx::formatString("%s is empty!.\n\n%s", filename.c_str(),
1653 correctFormatMessage.c_str());
1654 GMX_THROW(InvalidInputError(mesg));
1657 /* Less than 2 points is not useful for PMF or target. */
1660 std::string mesg = gmx::formatString(
1661 "%s contains too few data points (%d)."
1662 "The minimum number of points is 2.",
1663 filename.c_str(), numRows);
1664 GMX_THROW(InvalidInputError(mesg));
1667 /* Make sure there are enough columns of data.
1669 Two formats are allowed. Either with columns {coords, PMF, target} or
1670 {coords, PMF, x, y, z, target, ...}. The latter format is allowed since that
1671 is how AWH output is written (x, y, z being other AWH variables). For this format,
1672 trailing columns are ignored.
1674 int columnIndexTarget;
1675 int numColumnsMin = dimParams.size() + 2;
1676 int columnIndexPmf = dimParams.size();
1677 if (numColumns == numColumnsMin)
1679 columnIndexTarget = columnIndexPmf + 1;
1683 columnIndexTarget = columnIndexPmf + 4;
1686 if (numColumns < numColumnsMin)
1688 std::string mesg = gmx::formatString(
1689 "The number of columns in %s should be at least %d."
1691 filename.c_str(), numColumnsMin, correctFormatMessage.c_str());
1692 GMX_THROW(InvalidInputError(mesg));
1695 /* read_xvg can give trailing zero data rows for trailing new lines in the input. We allow 1 zero row,
1696 since this could be real data. But multiple trailing zero rows cannot correspond to valid data. */
1697 int numZeroRows = countTrailingZeroRows(data, numRows, numColumns);
1698 if (numZeroRows > 1)
1700 std::string mesg = gmx::formatString(
1701 "Found %d trailing zero data rows in %s. Please remove trailing empty lines and "
1703 numZeroRows, filename.c_str());
1704 GMX_THROW(InvalidInputError(mesg));
1707 /* Convert from user units to internal units before sending the data of to grid. */
1708 for (size_t d = 0; d < dimParams.size(); d++)
1710 double scalingFactor = dimParams[d].scaleUserInputToInternal(1);
1711 if (scalingFactor == 1)
1715 for (size_t m = 0; m < pointState->size(); m++)
1717 data[d][m] *= scalingFactor;
1721 /* Get a data point for each AWH grid point so that they all get data. */
1722 std::vector<int> gridIndexToDataIndex(grid.numPoints());
1723 mapGridToDataGrid(&gridIndexToDataIndex, data, numRows, filename, grid, correctFormatMessage);
1725 /* Extract the data for each grid point.
1726 * We check if the target distribution is zero for all points.
1728 bool targetDistributionIsZero = true;
1729 for (size_t m = 0; m < pointState->size(); m++)
1731 (*pointState)[m].setLogPmfSum(-data[columnIndexPmf][gridIndexToDataIndex[m]]);
1732 double target = data[columnIndexTarget][gridIndexToDataIndex[m]];
1734 /* Check if the values are allowed. */
1737 std::string mesg = gmx::formatString(
1738 "Target distribution weight at point %zu (%g) in %s is negative.", m, target,
1740 GMX_THROW(InvalidInputError(mesg));
1744 targetDistributionIsZero = false;
1746 (*pointState)[m].setTargetConstantWeight(target);
1749 if (targetDistributionIsZero)
1752 gmx::formatString("The target weights given in column %d in %s are all 0",
1753 columnIndexTarget, filename.c_str());
1754 GMX_THROW(InvalidInputError(mesg));
1757 /* Free the arrays. */
1758 for (int m = 0; m < numColumns; m++)
1765 void BiasState::normalizePmf(int numSharingSims)
1767 /* The normalization of the PMF estimate matters because it determines how big effect the next sample has.
1768 Approximately (for large enough force constant) we should have:
1769 sum_x(exp(-pmf(x)) = nsamples*sum_xref(exp(-f(xref)).
1772 /* Calculate the normalization factor, i.e. divide by the pmf sum, multiply by the number of samples and the f sum */
1773 double expSumPmf = 0;
1775 for (const PointState& pointState : points_)
1777 if (pointState.inTargetRegion())
1779 expSumPmf += std::exp(pointState.logPmfSum());
1780 expSumF += std::exp(-pointState.freeEnergy());
1783 double numSamples = histogramSize_.histogramSize() / numSharingSims;
1786 double logRenorm = std::log(numSamples * expSumF / expSumPmf);
1787 for (PointState& pointState : points_)
1789 if (pointState.inTargetRegion())
1791 pointState.setLogPmfSum(pointState.logPmfSum() + logRenorm);
1796 void BiasState::initGridPointState(const AwhBiasParams& awhBiasParams,
1797 const std::vector<DimParams>& dimParams,
1798 const BiasGrid& grid,
1799 const BiasParams& params,
1800 const std::string& filename,
1803 /* Modify PMF, free energy and the constant target distribution factor
1804 * to user input values if there is data given.
1806 if (awhBiasParams.bUserData)
1808 readUserPmfAndTargetDistribution(dimParams, grid, filename, numBias, params.biasIndex, &points_);
1809 setFreeEnergyToConvolvedPmf(dimParams, grid);
1812 /* The local Boltzmann distribution is special because the target distribution is updated as a function of the reference weighthistogram. */
1813 GMX_RELEASE_ASSERT(params.eTarget != eawhtargetLOCALBOLTZMANN || points_[0].weightSumRef() != 0,
1814 "AWH reference weight histogram not initialized properly with local "
1815 "Boltzmann target distribution.");
1817 updateTargetDistribution(points_, params);
1819 for (PointState& pointState : points_)
1821 if (pointState.inTargetRegion())
1823 pointState.updateBias();
1827 /* Note that for zero target this is a value that represents -infinity but should not be used for biasing. */
1828 pointState.setTargetToZero();
1832 /* Set the initial reference weighthistogram. */
1833 const double histogramSize = histogramSize_.histogramSize();
1834 for (auto& pointState : points_)
1836 pointState.setInitialReferenceWeightHistogram(histogramSize);
1839 /* Make sure the pmf is normalized consistently with the histogram size.
1840 Note: the target distribution and free energy need to be set here. */
1841 normalizePmf(params.numSharedUpdate);
1844 BiasState::BiasState(const AwhBiasParams& awhBiasParams,
1845 double histogramSizeInitial,
1846 const std::vector<DimParams>& dimParams,
1847 const BiasGrid& grid) :
1848 coordState_(awhBiasParams, dimParams, grid),
1849 points_(grid.numPoints()),
1850 weightSumCovering_(grid.numPoints()),
1851 histogramSize_(awhBiasParams, histogramSizeInitial)
1853 /* The minimum and maximum multidimensional point indices that are affected by the next update */
1854 for (size_t d = 0; d < dimParams.size(); d++)
1856 int index = grid.point(coordState_.gridpointIndex()).index[d];
1857 originUpdatelist_[d] = index;
1858 endUpdatelist_[d] = index;