2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019, The GROMACS development team.
5 * Copyright (c) 2020,2021, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
39 * Implements the BiasState class.
41 * \author Viveca Lindahl
42 * \author Berk Hess <hess@kth.se>
48 #include "biasstate.h"
59 #include "gromacs/fileio/gmxfio.h"
60 #include "gromacs/fileio/xvgr.h"
61 #include "gromacs/gmxlib/network.h"
62 #include "gromacs/math/utilities.h"
63 #include "gromacs/mdrunutility/multisim.h"
64 #include "gromacs/mdtypes/awh_history.h"
65 #include "gromacs/mdtypes/awh_params.h"
66 #include "gromacs/mdtypes/commrec.h"
67 #include "gromacs/simd/simd.h"
68 #include "gromacs/simd/simd_math.h"
69 #include "gromacs/utility/arrayref.h"
70 #include "gromacs/utility/exceptions.h"
71 #include "gromacs/utility/gmxassert.h"
72 #include "gromacs/utility/smalloc.h"
73 #include "gromacs/utility/stringutil.h"
76 #include "pointstate.h"
81 void BiasState::getPmf(gmx::ArrayRef<float> pmf) const
83 GMX_ASSERT(pmf.size() == points_.size(), "pmf should have the size of the bias grid");
85 /* The PMF is just the negative of the log of the sampled PMF histogram.
86 * Points with zero target weight are ignored, they will mostly contain noise.
88 for (size_t i = 0; i < points_.size(); i++)
90 pmf[i] = points_[i].inTargetRegion() ? -points_[i].logPmfSum() : GMX_FLOAT_MAX;
98 * Sum an array over all simulations on the master rank of each simulation.
100 * \param[in,out] arrayRef The data to sum.
101 * \param[in] multiSimComm Struct for multi-simulation communication.
103 void sumOverSimulations(gmx::ArrayRef<int> arrayRef, const gmx_multisim_t* multiSimComm)
105 gmx_sumi_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
109 * Sum an array over all simulations on the master rank of each simulation.
111 * \param[in,out] arrayRef The data to sum.
112 * \param[in] multiSimComm Struct for multi-simulation communication.
114 void sumOverSimulations(gmx::ArrayRef<double> arrayRef, const gmx_multisim_t* multiSimComm)
116 gmx_sumd_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
120 * Sum an array over all simulations on all ranks of each simulation.
122 * This assumes the data is identical on all ranks within each simulation.
124 * \param[in,out] arrayRef The data to sum.
125 * \param[in] commRecord Struct for intra-simulation communication.
126 * \param[in] multiSimComm Struct for multi-simulation communication.
129 void sumOverSimulations(gmx::ArrayRef<T> arrayRef, const t_commrec* commRecord, const gmx_multisim_t* multiSimComm)
131 if (MASTER(commRecord))
133 sumOverSimulations(arrayRef, multiSimComm);
135 if (commRecord->nnodes > 1)
137 gmx_bcast(arrayRef.size() * sizeof(T), arrayRef.data(), commRecord->mpi_comm_mygroup);
142 * Sum PMF over multiple simulations, when requested.
144 * \param[in,out] pointState The state of the points in the bias.
145 * \param[in] numSharedUpdate The number of biases sharing the histogram.
146 * \param[in] commRecord Struct for intra-simulation communication.
147 * \param[in] multiSimComm Struct for multi-simulation communication.
149 void sumPmf(gmx::ArrayRef<PointState> pointState,
151 const t_commrec* commRecord,
152 const gmx_multisim_t* multiSimComm)
154 if (numSharedUpdate == 1)
158 GMX_ASSERT(multiSimComm != nullptr && numSharedUpdate % multiSimComm->numSimulations_ == 0,
159 "numSharedUpdate should be a multiple of multiSimComm->numSimulations_");
160 GMX_ASSERT(numSharedUpdate == multiSimComm->numSimulations_,
161 "Sharing within a simulation is not implemented (yet)");
163 std::vector<double> buffer(pointState.size());
165 /* Need to temporarily exponentiate the log weights to sum over simulations */
166 for (size_t i = 0; i < buffer.size(); i++)
168 buffer[i] = pointState[i].inTargetRegion() ? std::exp(pointState[i].logPmfSum()) : 0;
171 sumOverSimulations(gmx::ArrayRef<double>(buffer), commRecord, multiSimComm);
173 /* Take log again to get (non-normalized) PMF */
174 double normFac = 1.0 / numSharedUpdate;
175 for (gmx::index i = 0; i < pointState.ssize(); i++)
177 if (pointState[i].inTargetRegion())
179 pointState[i].setLogPmfSum(std::log(buffer[i] * normFac));
185 * Find the minimum free energy value.
187 * \param[in] pointState The state of the points.
188 * \returns the minimum free energy value.
190 double freeEnergyMinimumValue(gmx::ArrayRef<const PointState> pointState)
192 double fMin = GMX_FLOAT_MAX;
194 for (auto const& ps : pointState)
196 if (ps.inTargetRegion() && ps.freeEnergy() < fMin)
198 fMin = ps.freeEnergy();
206 * Find and return the log of the probability weight of a point given a coordinate value.
208 * The unnormalized weight is given by
209 * w(point|value) = exp(bias(point) - U(value,point)),
210 * where U is a harmonic umbrella potential.
212 * \param[in] dimParams The bias dimensions parameters
213 * \param[in] points The point state.
214 * \param[in] grid The grid.
215 * \param[in] pointIndex Point to evaluate probability weight for.
216 * \param[in] pointBias Bias for the point (as a log weight).
217 * \param[in] value Coordinate value.
218 * \param[in] neighborLambdaEnergies The energy of the system in neighboring lambdas states. Can be
219 * empty when there are no free energy lambda state dimensions.
220 * \param[in] gridpointIndex The index of the current grid point.
221 * \returns the log of the biased probability weight.
223 double biasedLogWeightFromPoint(ArrayRef<const DimParams> dimParams,
224 ArrayRef<const PointState> points,
225 const BiasGrid& grid,
228 const awh_dvec value,
229 ArrayRef<const double> neighborLambdaEnergies,
232 double logWeight = detail::c_largeNegativeExponent;
234 /* Only points in the target region have non-zero weight */
235 if (points[pointIndex].inTargetRegion())
237 logWeight = pointBias;
239 /* Add potential for all parameter dimensions */
240 for (size_t d = 0; d < dimParams.size(); d++)
242 if (dimParams[d].isFepLambdaDimension())
244 /* If this is not a sampling step or if this function is called from
245 * calcConvolvedBias(), when writing energy subblocks, neighborLambdaEnergies will
246 * be empty. No convolution is required along the lambda dimension. */
247 if (!neighborLambdaEnergies.empty())
249 const int pointLambdaIndex = grid.point(pointIndex).coordValue[d];
250 const int gridpointLambdaIndex = grid.point(gridpointIndex).coordValue[d];
251 logWeight -= dimParams[d].fepDimParams().beta
252 * (neighborLambdaEnergies[pointLambdaIndex]
253 - neighborLambdaEnergies[gridpointLambdaIndex]);
258 double dev = getDeviationFromPointAlongGridAxis(grid, d, pointIndex, value[d]);
259 logWeight -= 0.5 * dimParams[d].pullDimParams().betak * dev * dev;
267 * Calculates the marginal distribution (marginal probability) for each value along
268 * a free energy lambda axis.
269 * The marginal distribution of one coordinate dimension value is the sum of the probability
270 * distribution of all values (herein all neighbor values) with the same value in the dimension
272 * \param[in] grid The bias grid.
273 * \param[in] neighbors The points to use for the calculation of the marginal distribution.
274 * \param[in] probWeightNeighbor Probability weights of the neighbors.
275 * \returns The calculated marginal distribution in a 1D array with
276 * as many elements as there are points along the axis of interest.
278 std::vector<double> calculateFELambdaMarginalDistribution(const BiasGrid& grid,
279 ArrayRef<const int> neighbors,
280 ArrayRef<const double> probWeightNeighbor)
282 const std::optional<int> lambdaAxisIndex = grid.lambdaAxisIndex();
283 GMX_RELEASE_ASSERT(lambdaAxisIndex,
284 "There must be a free energy lambda axis in order to calculate the free "
285 "energy lambda marginal distribution.");
286 const int numFepLambdaStates = grid.numFepLambdaStates();
287 std::vector<double> lambdaMarginalDistribution(numFepLambdaStates, 0);
289 for (size_t i = 0; i < neighbors.size(); i++)
291 const int neighbor = neighbors[i];
292 const int lambdaState = grid.point(neighbor).coordValue[lambdaAxisIndex.value()];
293 lambdaMarginalDistribution[lambdaState] += probWeightNeighbor[i];
295 return lambdaMarginalDistribution;
300 void BiasState::calcConvolvedPmf(ArrayRef<const DimParams> dimParams,
301 const BiasGrid& grid,
302 std::vector<float>* convolvedPmf) const
304 size_t numPoints = grid.numPoints();
306 convolvedPmf->resize(numPoints);
308 /* Get the PMF to convolve. */
309 std::vector<float> pmf(numPoints);
312 for (size_t m = 0; m < numPoints; m++)
314 double freeEnergyWeights = 0;
315 const GridPoint& point = grid.point(m);
316 for (auto& neighbor : point.neighbor)
318 /* Do not convolve the bias along a lambda axis - only use the pmf from the current point */
319 if (!pointsHaveDifferentLambda(grid, m, neighbor))
321 /* The negative PMF is a positive bias. */
322 double biasNeighbor = -pmf[neighbor];
324 /* Add the convolved PMF weights for the neighbors of this point.
325 Note that this function only adds point within the target > 0 region.
326 Sum weights, take the logarithm last to get the free energy. */
327 double logWeight = biasedLogWeightFromPoint(
328 dimParams, points_, grid, neighbor, biasNeighbor, point.coordValue, {}, m);
329 freeEnergyWeights += std::exp(logWeight);
333 GMX_RELEASE_ASSERT(freeEnergyWeights > 0,
334 "Attempting to do log(<= 0) in AWH convolved PMF calculation.");
335 (*convolvedPmf)[m] = -std::log(static_cast<float>(freeEnergyWeights));
343 * Updates the target distribution for all points.
345 * The target distribution is always updated for all points
348 * \param[in,out] pointState The state of all points.
349 * \param[in] params The bias parameters.
351 void updateTargetDistribution(ArrayRef<PointState> pointState, const BiasParams& params)
353 double freeEnergyCutoff = 0;
354 if (params.eTarget == AwhTargetType::Cutoff)
356 freeEnergyCutoff = freeEnergyMinimumValue(pointState) + params.freeEnergyCutoffInKT;
359 double sumTarget = 0;
360 for (PointState& ps : pointState)
362 sumTarget += ps.updateTargetWeight(params, freeEnergyCutoff);
364 GMX_RELEASE_ASSERT(sumTarget > 0, "We should have a non-zero distribution");
367 double invSum = 1.0 / sumTarget;
368 for (PointState& ps : pointState)
370 ps.scaleTarget(invSum);
375 * Puts together a string describing a grid point.
377 * \param[in] grid The grid.
378 * \param[in] point BiasGrid point index.
379 * \returns a string for the point.
381 std::string gridPointValueString(const BiasGrid& grid, int point)
383 std::string pointString;
387 for (int d = 0; d < grid.numDimensions(); d++)
389 pointString += gmx::formatString("%g", grid.point(point).coordValue[d]);
390 if (d < grid.numDimensions() - 1)
405 int BiasState::warnForHistogramAnomalies(const BiasGrid& grid, int biasIndex, double t, FILE* fplog, int maxNumWarnings) const
407 GMX_ASSERT(fplog != nullptr, "Warnings can only be issued if there is log file.");
408 const double maxHistogramRatio = 0.5; /* Tolerance for printing a warning about the histogram ratios */
410 /* Sum up the histograms and get their normalization */
411 double sumVisits = 0;
412 double sumWeights = 0;
413 for (auto& pointState : points_)
415 if (pointState.inTargetRegion())
417 sumVisits += pointState.numVisitsTot();
418 sumWeights += pointState.weightSumTot();
421 GMX_RELEASE_ASSERT(sumVisits > 0, "We should have visits");
422 GMX_RELEASE_ASSERT(sumWeights > 0, "We should have weight");
423 double invNormVisits = 1.0 / sumVisits;
424 double invNormWeight = 1.0 / sumWeights;
426 /* Check all points for warnings */
428 size_t numPoints = grid.numPoints();
429 for (size_t m = 0; m < numPoints; m++)
431 /* Skip points close to boundary or non-target region */
432 const GridPoint& gridPoint = grid.point(m);
433 bool skipPoint = false;
434 for (size_t n = 0; (n < gridPoint.neighbor.size()) && !skipPoint; n++)
436 int neighbor = gridPoint.neighbor[n];
437 skipPoint = !points_[neighbor].inTargetRegion();
438 for (int d = 0; (d < grid.numDimensions()) && !skipPoint; d++)
440 const GridPoint& neighborPoint = grid.point(neighbor);
441 skipPoint = neighborPoint.index[d] == 0
442 || neighborPoint.index[d] == grid.axis(d).numPoints() - 1;
446 /* Warn if the coordinate distribution is less than the target distribution with a certain fraction somewhere */
447 const double relativeWeight = points_[m].weightSumTot() * invNormWeight;
448 const double relativeVisits = points_[m].numVisitsTot() * invNormVisits;
449 if (!skipPoint && relativeVisits < relativeWeight * maxHistogramRatio)
451 std::string pointValueString = gridPointValueString(grid, m);
452 std::string warningMessage = gmx::formatString(
454 "at t = %g ps the obtained coordinate distribution at coordinate value %s "
455 "is less than a fraction %g of the reference distribution at that point. "
456 "If you are not certain about your settings you might want to increase your "
457 "pull force constant or "
458 "modify your sampling region.\n",
461 pointValueString.c_str(),
463 gmx::TextLineWrapper wrapper;
464 wrapper.settings().setLineLength(c_linewidth);
465 fprintf(fplog, "%s", wrapper.wrapToString(warningMessage).c_str());
469 if (numWarnings >= maxNumWarnings)
478 double BiasState::calcUmbrellaForceAndPotential(ArrayRef<const DimParams> dimParams,
479 const BiasGrid& grid,
481 ArrayRef<const double> neighborLambdaDhdl,
482 ArrayRef<double> force) const
484 double potential = 0;
485 for (size_t d = 0; d < dimParams.size(); d++)
487 if (dimParams[d].isFepLambdaDimension())
489 if (!neighborLambdaDhdl.empty())
491 const int coordpointLambdaIndex = grid.point(point).coordValue[d];
492 force[d] = neighborLambdaDhdl[coordpointLambdaIndex];
493 /* The potential should not be affected by the lambda dimension. */
499 getDeviationFromPointAlongGridAxis(grid, d, point, coordState_.coordValue()[d]);
500 double k = dimParams[d].pullDimParams().k;
502 /* Force from harmonic potential 0.5*k*dev^2 */
503 force[d] = -k * deviation;
504 potential += 0.5 * k * deviation * deviation;
511 void BiasState::calcConvolvedForce(ArrayRef<const DimParams> dimParams,
512 const BiasGrid& grid,
513 ArrayRef<const double> probWeightNeighbor,
514 ArrayRef<const double> neighborLambdaDhdl,
515 ArrayRef<double> forceWorkBuffer,
516 ArrayRef<double> force) const
518 for (size_t d = 0; d < dimParams.size(); d++)
523 /* Only neighboring points have non-negligible contribution. */
524 const std::vector<int>& neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
525 gmx::ArrayRef<double> forceFromNeighbor = forceWorkBuffer;
526 for (size_t n = 0; n < neighbor.size(); n++)
528 double weightNeighbor = probWeightNeighbor[n];
529 int indexNeighbor = neighbor[n];
531 /* Get the umbrella force from this point. The returned potential is ignored here. */
532 calcUmbrellaForceAndPotential(dimParams, grid, indexNeighbor, neighborLambdaDhdl, forceFromNeighbor);
534 /* Add the weighted umbrella force to the convolved force. */
535 for (size_t d = 0; d < dimParams.size(); d++)
537 force[d] += forceFromNeighbor[d] * weightNeighbor;
542 double BiasState::moveUmbrella(ArrayRef<const DimParams> dimParams,
543 const BiasGrid& grid,
544 ArrayRef<const double> probWeightNeighbor,
545 ArrayRef<const double> neighborLambdaDhdl,
546 ArrayRef<double> biasForce,
550 bool onlySampleUmbrellaGridpoint)
552 /* Generate and set a new coordinate reference value */
553 coordState_.sampleUmbrellaGridpoint(
554 grid, coordState_.gridpointIndex(), probWeightNeighbor, step, seed, indexSeed);
556 if (onlySampleUmbrellaGridpoint)
561 std::vector<double> newForce(dimParams.size());
562 double newPotential = calcUmbrellaForceAndPotential(
563 dimParams, grid, coordState_.umbrellaGridpoint(), neighborLambdaDhdl, newForce);
565 /* A modification of the reference value at time t will lead to a different
566 force over t-dt/2 to t and over t to t+dt/2. For high switching rates
567 this means the force and velocity will change signs roughly as often.
568 To avoid any issues we take the average of the previous and new force
569 at steps when the reference value has been moved. E.g. if the ref. value
570 is set every step to (coord dvalue +/- delta) would give zero force.
572 for (gmx::index d = 0; d < biasForce.ssize(); d++)
574 /* Average of the current and new force */
575 biasForce[d] = 0.5 * (biasForce[d] + newForce[d]);
585 * Sets the histogram rescaling factors needed to control the histogram size.
587 * For sake of robustness, the reference weight histogram can grow at a rate
588 * different from the actual sampling rate. Typically this happens for a limited
589 * initial time, alternatively growth is scaled down by a constant factor for all
590 * times. Since the size of the reference histogram sets the size of the free
591 * energy update this should be reflected also in the PMF. Thus the PMF histogram
592 * needs to be rescaled too.
594 * This function should only be called by the bias update function or wrapped by a function that
595 * knows what scale factors should be applied when, e.g,
596 * getSkippedUpdateHistogramScaleFactors().
598 * \param[in] params The bias parameters.
599 * \param[in] newHistogramSize New reference weight histogram size.
600 * \param[in] oldHistogramSize Previous reference weight histogram size (before adding new samples).
601 * \param[out] weightHistScaling Scaling factor for the reference weight histogram.
602 * \param[out] logPmfSumScaling Log of the scaling factor for the PMF histogram.
604 void setHistogramUpdateScaleFactors(const BiasParams& params,
605 double newHistogramSize,
606 double oldHistogramSize,
607 double* weightHistScaling,
608 double* logPmfSumScaling)
611 /* The two scaling factors below are slightly different (ignoring the log factor) because the
612 reference and the PMF histogram apply weight scaling differently. The weight histogram
613 applies is locally, i.e. each sample is scaled down meaning all samples get equal weight.
614 It is done this way because that is what target type local Boltzmann (for which
615 target = weight histogram) needs. In contrast, the PMF histogram is rescaled globally
616 by repeatedly scaling down the whole histogram. The reasons for doing it this way are:
617 1) empirically this is necessary for converging the PMF; 2) since the extraction of
618 the PMF is theoretically only valid for a constant bias, new samples should get more
619 weight than old ones for which the bias is fluctuating more. */
621 newHistogramSize / (oldHistogramSize + params.updateWeight * params.localWeightScaling);
622 *logPmfSumScaling = std::log(newHistogramSize / (oldHistogramSize + params.updateWeight));
627 void BiasState::getSkippedUpdateHistogramScaleFactors(const BiasParams& params,
628 double* weightHistScaling,
629 double* logPmfSumScaling) const
631 GMX_ASSERT(params.skipUpdates(),
632 "Calling function for skipped updates when skipping updates is not allowed");
634 if (inInitialStage())
636 /* In between global updates the reference histogram size is kept constant so we trivially
637 know what the histogram size was at the time of the skipped update. */
638 double histogramSize = histogramSize_.histogramSize();
639 setHistogramUpdateScaleFactors(
640 params, histogramSize, histogramSize, weightHistScaling, logPmfSumScaling);
644 /* In the final stage, the reference histogram grows at the sampling rate which gives trivial scale factors. */
645 *weightHistScaling = 1;
646 *logPmfSumScaling = 0;
650 void BiasState::doSkippedUpdatesForAllPoints(const BiasParams& params)
652 double weightHistScaling;
653 double logPmfsumScaling;
655 getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
657 for (auto& pointState : points_)
659 bool didUpdate = pointState.performPreviouslySkippedUpdates(
660 params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
662 /* Update the bias for this point only if there were skipped updates in the past to avoid calculating the log unneccessarily */
665 pointState.updateBias();
670 void BiasState::doSkippedUpdatesInNeighborhood(const BiasParams& params, const BiasGrid& grid)
672 double weightHistScaling;
673 double logPmfsumScaling;
675 getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
677 /* For each neighbor point of the center point, refresh its state by adding the results of all past, skipped updates. */
678 const std::vector<int>& neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
679 for (auto& neighbor : neighbors)
681 bool didUpdate = points_[neighbor].performPreviouslySkippedUpdates(
682 params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
686 points_[neighbor].updateBias();
695 * Merge update lists from multiple sharing simulations.
697 * \param[in,out] updateList Update list for this simulation (assumed >= npoints long).
698 * \param[in] numPoints Total number of points.
699 * \param[in] commRecord Struct for intra-simulation communication.
700 * \param[in] multiSimComm Struct for multi-simulation communication.
702 void mergeSharedUpdateLists(std::vector<int>* updateList,
704 const t_commrec* commRecord,
705 const gmx_multisim_t* multiSimComm)
707 std::vector<int> numUpdatesOfPoint;
709 /* Flag the update points of this sim.
710 TODO: we can probably avoid allocating this array and just use the input array. */
711 numUpdatesOfPoint.resize(numPoints, 0);
712 for (auto& pointIndex : *updateList)
714 numUpdatesOfPoint[pointIndex] = 1;
717 /* Sum over the sims to get all the flagged points */
718 sumOverSimulations(arrayRefFromArray(numUpdatesOfPoint.data(), numPoints), commRecord, multiSimComm);
720 /* Collect the indices of the flagged points in place. The resulting array will be the merged update list.*/
722 for (int m = 0; m < numPoints; m++)
724 if (numUpdatesOfPoint[m] > 0)
726 updateList->push_back(m);
732 * Generate an update list of points sampled since the last update.
734 * \param[in] grid The AWH bias.
735 * \param[in] points The point state.
736 * \param[in] originUpdatelist The origin of the rectangular region that has been sampled since
737 * last update. \param[in] endUpdatelist The end of the rectangular that has been sampled since
738 * last update. \param[in,out] updateList Local update list to set (assumed >= npoints long).
740 void makeLocalUpdateList(const BiasGrid& grid,
741 ArrayRef<const PointState> points,
742 const awh_ivec originUpdatelist,
743 const awh_ivec endUpdatelist,
744 std::vector<int>* updateList)
749 /* Define the update search grid */
750 for (int d = 0; d < grid.numDimensions(); d++)
752 origin[d] = originUpdatelist[d];
753 numPoints[d] = endUpdatelist[d] - originUpdatelist[d] + 1;
755 /* Because the end_updatelist is unwrapped it can be > (npoints - 1) so that numPoints can be > npoints in grid.
756 This helps for calculating the distance/number of points but should be removed and fixed when the way of
757 updating origin/end updatelist is changed (see sampleProbabilityWeights). */
758 numPoints[d] = std::min(grid.axis(d).numPoints(), numPoints[d]);
761 /* Make the update list */
764 bool pointExists = true;
767 pointExists = advancePointInSubgrid(grid, origin, numPoints, &pointIndex);
769 if (pointExists && points[pointIndex].inTargetRegion())
771 updateList->push_back(pointIndex);
778 void BiasState::resetLocalUpdateRange(const BiasGrid& grid)
780 const int gridpointIndex = coordState_.gridpointIndex();
781 for (int d = 0; d < grid.numDimensions(); d++)
783 /* This gives the minimum range consisting only of the current closest point. */
784 originUpdatelist_[d] = grid.point(gridpointIndex).index[d];
785 endUpdatelist_[d] = grid.point(gridpointIndex).index[d];
793 * Add partial histograms (accumulating between updates) to accumulating histograms.
795 * \param[in,out] pointState The state of the points in the bias.
796 * \param[in,out] weightSumCovering The weights for checking covering.
797 * \param[in] numSharedUpdate The number of biases sharing the histrogram.
798 * \param[in] commRecord Struct for intra-simulation communication.
799 * \param[in] multiSimComm Struct for multi-simulation communication.
800 * \param[in] localUpdateList List of points with data.
802 void sumHistograms(gmx::ArrayRef<PointState> pointState,
803 gmx::ArrayRef<double> weightSumCovering,
805 const t_commrec* commRecord,
806 const gmx_multisim_t* multiSimComm,
807 const std::vector<int>& localUpdateList)
809 /* The covering checking histograms are added before summing over simulations, so that the
810 weights from different simulations are kept distinguishable. */
811 for (int globalIndex : localUpdateList)
813 weightSumCovering[globalIndex] += pointState[globalIndex].weightSumIteration();
816 /* Sum histograms over multiple simulations if needed. */
817 if (numSharedUpdate > 1)
819 GMX_ASSERT(numSharedUpdate == multiSimComm->numSimulations_,
820 "Sharing within a simulation is not implemented (yet)");
822 /* Collect the weights and counts in linear arrays to be able to use gmx_sumd_sim. */
823 std::vector<double> weightSum;
824 std::vector<double> coordVisits;
826 weightSum.resize(localUpdateList.size());
827 coordVisits.resize(localUpdateList.size());
829 for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
831 const PointState& ps = pointState[localUpdateList[localIndex]];
833 weightSum[localIndex] = ps.weightSumIteration();
834 coordVisits[localIndex] = ps.numVisitsIteration();
837 sumOverSimulations(gmx::ArrayRef<double>(weightSum), commRecord, multiSimComm);
838 sumOverSimulations(gmx::ArrayRef<double>(coordVisits), commRecord, multiSimComm);
840 /* Transfer back the result */
841 for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
843 PointState& ps = pointState[localUpdateList[localIndex]];
845 ps.setPartialWeightAndCount(weightSum[localIndex], coordVisits[localIndex]);
849 /* Now add the partial counts and weights to the accumulating histograms.
850 Note: we still need to use the weights for the update so we wait
851 with resetting them until the end of the update. */
852 for (int globalIndex : localUpdateList)
854 pointState[globalIndex].addPartialWeightAndCount();
859 * Label points along an axis as covered or not.
861 * A point is covered if it is surrounded by visited points up to a radius = coverRadius.
863 * \param[in] visited Visited? For each point.
864 * \param[in] checkCovering Check for covering? For each point.
865 * \param[in] numPoints The number of grid points along this dimension.
866 * \param[in] period Period in number of points.
867 * \param[in] coverRadius Cover radius, in points, needed for defining a point as covered.
868 * \param[in,out] covered In this array elements are 1 for covered points and 0 for
869 * non-covered points, this routine assumes that \p covered has at least size \p numPoints.
871 void labelCoveredPoints(const std::vector<bool>& visited,
872 const std::vector<bool>& checkCovering,
876 gmx::ArrayRef<int> covered)
878 GMX_ASSERT(covered.ssize() >= numPoints, "covered should be at least as large as the grid");
880 bool haveFirstNotVisited = false;
881 int firstNotVisited = -1;
882 int notVisitedLow = -1;
883 int notVisitedHigh = -1;
885 for (int n = 0; n < numPoints; n++)
887 if (checkCovering[n] && !visited[n])
889 if (!haveFirstNotVisited)
893 haveFirstNotVisited = true;
899 /* Have now an interval I = [notVisitedLow,notVisitedHigh] of visited points bounded
900 by unvisited points. The unvisted end points affect the coveredness of the
901 visited with a reach equal to the cover radius. */
902 int notCoveredLow = notVisitedLow + coverRadius;
903 int notCoveredHigh = notVisitedHigh - coverRadius;
904 for (int i = notVisitedLow; i <= notVisitedHigh; i++)
906 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
909 /* Find a new interval to set covering for. Make the notVisitedHigh of this interval
910 the notVisitedLow of the next. */
911 notVisitedLow = notVisitedHigh;
916 /* Have labelled all the internal points. Now take care of the boundary regions. */
917 if (!haveFirstNotVisited)
919 /* No non-visited points <=> all points visited => all points covered. */
921 for (int n = 0; n < numPoints; n++)
928 int lastNotVisited = notVisitedLow;
930 /* For periodic boundaries, non-visited points can influence points
931 on the other side of the boundary so we need to wrap around. */
933 /* Lower end. For periodic boundaries the last upper end not visited point becomes the low-end not visited point.
934 For non-periodic boundaries there is no lower end point so a dummy value is used. */
935 int notVisitedHigh = firstNotVisited;
936 int notVisitedLow = period > 0 ? (lastNotVisited - period) : -(coverRadius + 1);
938 int notCoveredLow = notVisitedLow + coverRadius;
939 int notCoveredHigh = notVisitedHigh - coverRadius;
941 for (int i = 0; i <= notVisitedHigh; i++)
943 /* For non-periodic boundaries notCoveredLow = -1 will impose no restriction. */
944 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
947 /* Upper end. Same as for lower end but in the other direction. */
948 notVisitedHigh = period > 0 ? (firstNotVisited + period) : (numPoints + coverRadius);
949 notVisitedLow = lastNotVisited;
951 notCoveredLow = notVisitedLow + coverRadius;
952 notCoveredHigh = notVisitedHigh - coverRadius;
954 for (int i = notVisitedLow; i <= numPoints - 1; i++)
956 /* For non-periodic boundaries notCoveredHigh = numPoints will impose no restriction. */
957 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
964 bool BiasState::isSamplingRegionCovered(const BiasParams& params,
965 ArrayRef<const DimParams> dimParams,
966 const BiasGrid& grid,
967 const t_commrec* commRecord,
968 const gmx_multisim_t* multiSimComm) const
970 /* Allocate and initialize arrays: one for checking visits along each dimension,
971 one for keeping track of which points to check and one for the covered points.
972 Possibly these could be kept as AWH variables to avoid these allocations. */
975 std::vector<bool> visited;
976 std::vector<bool> checkCovering;
977 // We use int for the covering array since we might use gmx_sumi_sim.
978 std::vector<int> covered;
981 std::vector<CheckDim> checkDim;
982 checkDim.resize(grid.numDimensions());
984 for (int d = 0; d < grid.numDimensions(); d++)
986 const size_t numPoints = grid.axis(d).numPoints();
987 checkDim[d].visited.resize(numPoints, false);
988 checkDim[d].checkCovering.resize(numPoints, false);
989 checkDim[d].covered.resize(numPoints, 0);
992 /* Set visited points along each dimension and which points should be checked for covering.
993 Specifically, points above the free energy cutoff (if there is one) or points outside
994 of the target region are ignored. */
996 /* Set the free energy cutoff */
997 double maxFreeEnergy = GMX_FLOAT_MAX;
999 if (params.eTarget == AwhTargetType::Cutoff)
1001 maxFreeEnergy = freeEnergyMinimumValue(points_) + params.freeEnergyCutoffInKT;
1004 /* Set the threshold weight for a point to be considered visited. */
1005 double weightThreshold = 1;
1006 for (int d = 0; d < grid.numDimensions(); d++)
1008 if (grid.axis(d).isFepLambdaAxis())
1010 /* Do not modify the weight threshold based on a FEP lambda axis. The spread
1011 * of the sampling weights is not depending on a Gaussian distribution (like
1013 weightThreshold *= 1.0;
1017 /* The spacing is proportional to 1/sqrt(betak). The weight threshold will be
1018 * approximately (given that the spacing can be modified if the dimension is periodic)
1019 * proportional to sqrt(1/(2*pi)). */
1020 weightThreshold *= grid.axis(d).spacing()
1021 * std::sqrt(dimParams[d].pullDimParams().betak * 0.5 * M_1_PI);
1025 /* Project the sampling weights onto each dimension */
1026 for (size_t m = 0; m < grid.numPoints(); m++)
1028 const PointState& pointState = points_[m];
1030 for (int d = 0; d < grid.numDimensions(); d++)
1032 int n = grid.point(m).index[d];
1034 /* Is visited if it was already visited or if there is enough weight at the current point */
1035 checkDim[d].visited[n] = checkDim[d].visited[n] || (weightSumCovering_[m] > weightThreshold);
1037 /* Check for covering if there is at least point in this slice that is in the target region and within the cutoff */
1038 checkDim[d].checkCovering[n] =
1039 checkDim[d].checkCovering[n]
1040 || (pointState.inTargetRegion() && pointState.freeEnergy() < maxFreeEnergy);
1044 /* Label each point along each dimension as covered or not. */
1045 for (int d = 0; d < grid.numDimensions(); d++)
1047 labelCoveredPoints(checkDim[d].visited,
1048 checkDim[d].checkCovering,
1049 grid.axis(d).numPoints(),
1050 grid.axis(d).numPointsInPeriod(),
1051 params.coverRadius()[d],
1052 checkDim[d].covered);
1055 /* Now check for global covering. Each dimension needs to be covered separately.
1056 A dimension is covered if each point is covered. Multiple simulations collectively
1057 cover the points, i.e. a point is covered if any of the simulations covered it.
1058 However, visited points are not shared, i.e. if a point is covered or not is
1059 determined by the visits of a single simulation. In general the covering criterion is
1060 all points covered => all points are surrounded by visited points up to a radius = coverRadius.
1061 For 1 simulation, all points covered <=> all points visited. For multiple simulations
1062 however, all points visited collectively !=> all points covered, except for coverRadius = 0.
1063 In the limit of large coverRadius, all points covered => all points visited by at least one
1064 simulation (since no point will be covered until all points have been visited by a
1065 single simulation). Basically coverRadius sets how much "connectedness" (or mixing) a point
1066 needs with surrounding points before sharing covering information with other simulations. */
1068 /* Communicate the covered points between sharing simulations if needed. */
1069 if (params.numSharedUpdate > 1)
1071 /* For multiple dimensions this may not be the best way to do it. */
1072 for (int d = 0; d < grid.numDimensions(); d++)
1075 gmx::arrayRefFromArray(checkDim[d].covered.data(), grid.axis(d).numPoints()),
1081 /* Now check if for each dimension all points are covered. Break if not true. */
1082 bool allPointsCovered = true;
1083 for (int d = 0; d < grid.numDimensions() && allPointsCovered; d++)
1085 for (int n = 0; n < grid.axis(d).numPoints() && allPointsCovered; n++)
1087 allPointsCovered = (checkDim[d].covered[n] != 0);
1091 return allPointsCovered;
1095 * Normalizes the free energy and PMF sum.
1097 * \param[in] pointState The state of the points.
1099 static void normalizeFreeEnergyAndPmfSum(std::vector<PointState>* pointState)
1101 double minF = freeEnergyMinimumValue(*pointState);
1103 for (PointState& ps : *pointState)
1105 ps.normalizeFreeEnergyAndPmfSum(minF);
1109 void BiasState::updateFreeEnergyAndAddSamplesToHistogram(ArrayRef<const DimParams> dimParams,
1110 const BiasGrid& grid,
1111 const BiasParams& params,
1112 const t_commrec* commRecord,
1113 const gmx_multisim_t* multiSimComm,
1117 std::vector<int>* updateList)
1119 /* Note hat updateList is only used in this scope and is always
1120 * re-initialized. We do not use a local vector, because that would
1121 * cause reallocation every time this funtion is called and the vector
1122 * can be the size of the whole grid.
1125 /* Make a list of all local points, i.e. those that could have been touched since
1126 the last update. These are the points needed for summing histograms below
1127 (non-local points only add zeros). For local updates, this will also be the
1128 final update list. */
1129 makeLocalUpdateList(grid, points_, originUpdatelist_, endUpdatelist_, updateList);
1130 if (params.numSharedUpdate > 1)
1132 mergeSharedUpdateLists(updateList, points_.size(), commRecord, multiSimComm);
1135 /* Reset the range for the next update */
1136 resetLocalUpdateRange(grid);
1138 /* Add samples to histograms for all local points and sync simulations if needed */
1139 sumHistograms(points_, weightSumCovering_, params.numSharedUpdate, commRecord, multiSimComm, *updateList);
1141 sumPmf(points_, params.numSharedUpdate, commRecord, multiSimComm);
1143 /* Renormalize the free energy if values are too large. */
1144 bool needToNormalizeFreeEnergy = false;
1145 for (int& globalIndex : *updateList)
1147 /* We want to keep the absolute value of the free energies to be less
1148 c_largePositiveExponent to be able to safely pass these values to exp(). The check below
1149 ensures this as long as the free energy values grow less than 0.5*c_largePositiveExponent
1150 in a return time to this neighborhood. For reasonable update sizes it's unlikely that
1151 this requirement would be broken. */
1152 if (std::abs(points_[globalIndex].freeEnergy()) > 0.5 * detail::c_largePositiveExponent)
1154 needToNormalizeFreeEnergy = true;
1159 /* Update target distribution? */
1160 bool needToUpdateTargetDistribution =
1161 (params.eTarget != AwhTargetType::Constant && params.isUpdateTargetStep(step));
1163 /* In the initial stage, the histogram grows dynamically as a function of the number of coverings. */
1164 bool detectedCovering = false;
1165 if (inInitialStage())
1168 (params.isCheckCoveringStep(step)
1169 && isSamplingRegionCovered(params, dimParams, grid, commRecord, multiSimComm));
1172 /* The weighthistogram size after this update. */
1173 double newHistogramSize = histogramSize_.newHistogramSize(
1174 params, t, detectedCovering, points_, weightSumCovering_, fplog);
1176 /* Make the update list. Usually we try to only update local points,
1177 * but if the update has non-trivial or non-deterministic effects
1178 * on non-local points a global update is needed. This is the case when:
1179 * 1) a covering occurred in the initial stage, leading to non-trivial
1180 * histogram rescaling factors; or
1181 * 2) the target distribution will be updated, since we don't make any
1182 * assumption on its form; or
1183 * 3) the AWH parameters are such that we never attempt to skip non-local
1185 * 4) the free energy values have grown so large that a renormalization
1188 if (needToUpdateTargetDistribution || detectedCovering || !params.skipUpdates() || needToNormalizeFreeEnergy)
1190 /* Global update, just add all points. */
1191 updateList->clear();
1192 for (size_t m = 0; m < points_.size(); m++)
1194 if (points_[m].inTargetRegion())
1196 updateList->push_back(m);
1201 /* Set histogram scale factors. */
1202 double weightHistScalingSkipped = 0;
1203 double logPmfsumScalingSkipped = 0;
1204 if (params.skipUpdates())
1206 getSkippedUpdateHistogramScaleFactors(params, &weightHistScalingSkipped, &logPmfsumScalingSkipped);
1208 double weightHistScalingNew;
1209 double logPmfsumScalingNew;
1210 setHistogramUpdateScaleFactors(
1211 params, newHistogramSize, histogramSize_.histogramSize(), &weightHistScalingNew, &logPmfsumScalingNew);
1213 /* Update free energy and reference weight histogram for points in the update list. */
1214 for (int pointIndex : *updateList)
1216 PointState* pointStateToUpdate = &points_[pointIndex];
1218 /* Do updates from previous update steps that were skipped because this point was at that time non-local. */
1219 if (params.skipUpdates())
1221 pointStateToUpdate->performPreviouslySkippedUpdates(
1222 params, histogramSize_.numUpdates(), weightHistScalingSkipped, logPmfsumScalingSkipped);
1225 /* Now do an update with new sampling data. */
1226 pointStateToUpdate->updateWithNewSampling(
1227 params, histogramSize_.numUpdates(), weightHistScalingNew, logPmfsumScalingNew);
1230 /* Only update the histogram size after we are done with the local point updates */
1231 histogramSize_.setHistogramSize(newHistogramSize, weightHistScalingNew);
1233 if (needToNormalizeFreeEnergy)
1235 normalizeFreeEnergyAndPmfSum(&points_);
1238 if (needToUpdateTargetDistribution)
1240 /* The target distribution is always updated for all points at once. */
1241 updateTargetDistribution(points_, params);
1244 /* Update the bias. The bias is updated separately and last since it simply a function of
1245 the free energy and the target distribution and we want to avoid doing extra work. */
1246 for (int pointIndex : *updateList)
1248 points_[pointIndex].updateBias();
1251 /* Increase the update counter. */
1252 histogramSize_.incrementNumUpdates();
1255 double BiasState::updateProbabilityWeightsAndConvolvedBias(ArrayRef<const DimParams> dimParams,
1256 const BiasGrid& grid,
1257 ArrayRef<const double> neighborLambdaEnergies,
1258 std::vector<double, AlignedAllocator<double>>* weight) const
1260 /* Only neighbors of the current coordinate value will have a non-negligible chance of getting sampled */
1261 const std::vector<int>& neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
1263 #if GMX_SIMD_HAVE_DOUBLE
1264 typedef SimdDouble PackType;
1265 constexpr int packSize = GMX_SIMD_DOUBLE_WIDTH;
1267 typedef double PackType;
1268 constexpr int packSize = 1;
1270 /* Round the size of the weight array up to packSize */
1271 const int weightSize = ((neighbors.size() + packSize - 1) / packSize) * packSize;
1272 weight->resize(weightSize);
1274 double* gmx_restrict weightData = weight->data();
1275 PackType weightSumPack(0.0);
1276 for (size_t i = 0; i < neighbors.size(); i += packSize)
1278 for (size_t n = i; n < i + packSize; n++)
1280 if (n < neighbors.size())
1282 const int neighbor = neighbors[n];
1283 (*weight)[n] = biasedLogWeightFromPoint(dimParams,
1287 points_[neighbor].bias(),
1288 coordState_.coordValue(),
1289 neighborLambdaEnergies,
1290 coordState_.gridpointIndex());
1294 /* Pad with values that don't affect the result */
1295 (*weight)[n] = detail::c_largeNegativeExponent;
1298 PackType weightPack = load<PackType>(weightData + i);
1299 weightPack = gmx::exp(weightPack);
1300 weightSumPack = weightSumPack + weightPack;
1301 store(weightData + i, weightPack);
1303 /* Sum of probability weights */
1304 double weightSum = reduce(weightSumPack);
1305 GMX_RELEASE_ASSERT(weightSum > 0,
1306 "zero probability weight when updating AWH probability weights.");
1308 /* Normalize probabilities to sum to 1 */
1309 double invWeightSum = 1 / weightSum;
1311 /* When there is a free energy lambda state axis remove the convolved contributions along that
1312 * axis from the total bias. This must be done after calculating invWeightSum (since weightSum
1313 * will be modified), but before normalizing the weights (below). */
1314 if (grid.hasLambdaAxis())
1316 /* If there is only one axis the bias will not be convolved in any dimension. */
1317 if (grid.axis().size() == 1)
1319 weightSum = gmx::exp(points_[coordState_.gridpointIndex()].bias());
1323 for (size_t i = 0; i < neighbors.size(); i++)
1325 const int neighbor = neighbors[i];
1326 if (pointsHaveDifferentLambda(grid, coordState_.gridpointIndex(), neighbor))
1328 weightSum -= weightData[i];
1334 for (double& w : *weight)
1339 /* Return the convolved bias */
1340 return std::log(weightSum);
1343 double BiasState::calcConvolvedBias(ArrayRef<const DimParams> dimParams,
1344 const BiasGrid& grid,
1345 const awh_dvec& coordValue) const
1347 int point = grid.nearestIndex(coordValue);
1348 const GridPoint& gridPoint = grid.point(point);
1350 /* Sum the probability weights from the neighborhood of the given point */
1351 double weightSum = 0;
1352 for (int neighbor : gridPoint.neighbor)
1354 /* No convolution is required along the lambda dimension. */
1355 if (pointsHaveDifferentLambda(grid, point, neighbor))
1359 double logWeight = biasedLogWeightFromPoint(
1360 dimParams, points_, grid, neighbor, points_[neighbor].bias(), coordValue, {}, point);
1361 weightSum += std::exp(logWeight);
1364 /* Returns -GMX_FLOAT_MAX if no neighboring points were in the target region. */
1365 return (weightSum > 0) ? std::log(weightSum) : -GMX_FLOAT_MAX;
1368 void BiasState::sampleProbabilityWeights(const BiasGrid& grid, gmx::ArrayRef<const double> probWeightNeighbor)
1370 const std::vector<int>& neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
1372 /* Save weights for next update */
1373 for (size_t n = 0; n < neighbor.size(); n++)
1375 points_[neighbor[n]].increaseWeightSumIteration(probWeightNeighbor[n]);
1378 /* Update the local update range. Two corner points define this rectangular
1379 * domain. We need to choose two new corner points such that the new domain
1380 * contains both the old update range and the current neighborhood.
1381 * In the simplest case when an update is performed every sample,
1382 * the update range would simply equal the current neighborhood.
1384 int neighborStart = neighbor[0];
1385 int neighborLast = neighbor[neighbor.size() - 1];
1386 for (int d = 0; d < grid.numDimensions(); d++)
1388 int origin = grid.point(neighborStart).index[d];
1389 int last = grid.point(neighborLast).index[d];
1393 /* Unwrap if wrapped around the boundary (only happens for periodic
1394 * boundaries). This has been already for the stored index interval.
1396 /* TODO: what we want to do is to find the smallest the update
1397 * interval that contains all points that need to be updated.
1398 * This amounts to combining two intervals, the current
1399 * [origin, end] update interval and the new touched neighborhood
1400 * into a new interval that contains all points from both the old
1403 * For periodic boundaries it becomes slightly more complicated
1404 * than for closed boundaries because then it needs not be
1405 * true that origin < end (so one can't simply relate the origin/end
1406 * in the min()/max() below). The strategy here is to choose the
1407 * origin closest to a reference point (index 0) and then unwrap
1408 * the end index if needed and choose the largest end index.
1409 * This ensures that both intervals are in the new interval
1410 * but it's not necessarily the smallest.
1411 * Currently we solve this by going through each possibility
1412 * and checking them.
1414 last += grid.axis(d).numPointsInPeriod();
1417 originUpdatelist_[d] = std::min(originUpdatelist_[d], origin);
1418 endUpdatelist_[d] = std::max(endUpdatelist_[d], last);
1422 void BiasState::sampleCoordAndPmf(const std::vector<DimParams>& dimParams,
1423 const BiasGrid& grid,
1424 gmx::ArrayRef<const double> probWeightNeighbor,
1425 double convolvedBias)
1427 /* Sampling-based deconvolution extracting the PMF.
1428 * Update the PMF histogram with the current coordinate value.
1430 * Because of the finite width of the harmonic potential, the free energy
1431 * defined for each coordinate point does not exactly equal that of the
1432 * actual coordinate, the PMF. However, the PMF can be estimated by applying
1433 * the relation exp(-PMF) = exp(-bias_convolved)*P_biased/Z, i.e. by keeping a
1434 * reweighted histogram of the coordinate value. Strictly, this relies on
1435 * the unknown normalization constant Z being either constant or known. Here,
1436 * neither is true except in the long simulation time limit. Empirically however,
1437 * it works (mainly because how the PMF histogram is rescaled).
1440 const int gridPointIndex = coordState_.gridpointIndex();
1441 const std::optional<int> lambdaAxisIndex = grid.lambdaAxisIndex();
1443 /* Update the PMF of points along a lambda axis with their bias. */
1444 if (lambdaAxisIndex)
1446 const std::vector<int>& neighbors = grid.point(gridPointIndex).neighbor;
1448 std::vector<double> lambdaMarginalDistribution =
1449 calculateFELambdaMarginalDistribution(grid, neighbors, probWeightNeighbor);
1451 awh_dvec coordValueAlongLambda = { coordState_.coordValue()[0],
1452 coordState_.coordValue()[1],
1453 coordState_.coordValue()[2],
1454 coordState_.coordValue()[3] };
1455 for (size_t i = 0; i < neighbors.size(); i++)
1457 const int neighbor = neighbors[i];
1459 if (pointsAlongLambdaAxis(grid, gridPointIndex, neighbor))
1461 const double neighborLambda = grid.point(neighbor).coordValue[lambdaAxisIndex.value()];
1462 if (neighbor == gridPointIndex)
1464 bias = convolvedBias;
1468 coordValueAlongLambda[lambdaAxisIndex.value()] = neighborLambda;
1469 bias = calcConvolvedBias(dimParams, grid, coordValueAlongLambda);
1472 const double probWeight = lambdaMarginalDistribution[neighborLambda];
1473 const double weightedBias = bias - std::log(std::max(probWeight, GMX_DOUBLE_MIN)); // avoid log(0)
1475 if (neighbor == gridPointIndex && grid.covers(coordState_.coordValue()))
1477 points_[neighbor].samplePmf(weightedBias);
1481 points_[neighbor].updatePmfUnvisited(weightedBias);
1488 /* Only save coordinate data that is in range (the given index is always
1489 * in range even if the coordinate value is not).
1491 if (grid.covers(coordState_.coordValue()))
1493 /* Save PMF sum and keep a histogram of the sampled coordinate values */
1494 points_[gridPointIndex].samplePmf(convolvedBias);
1498 /* Save probability weights for the update */
1499 sampleProbabilityWeights(grid, probWeightNeighbor);
1502 void BiasState::initHistoryFromState(AwhBiasHistory* biasHistory) const
1504 biasHistory->pointState.resize(points_.size());
1507 void BiasState::updateHistory(AwhBiasHistory* biasHistory, const BiasGrid& grid) const
1509 GMX_RELEASE_ASSERT(biasHistory->pointState.size() == points_.size(),
1510 "The AWH history setup does not match the AWH state.");
1512 AwhBiasStateHistory* stateHistory = &biasHistory->state;
1513 stateHistory->umbrellaGridpoint = coordState_.umbrellaGridpoint();
1515 for (size_t m = 0; m < biasHistory->pointState.size(); m++)
1517 AwhPointStateHistory* psh = &biasHistory->pointState[m];
1519 points_[m].storeState(psh);
1521 psh->weightsum_covering = weightSumCovering_[m];
1524 histogramSize_.storeState(stateHistory);
1526 stateHistory->origin_index_updatelist = multiDimGridIndexToLinear(grid, originUpdatelist_);
1527 stateHistory->end_index_updatelist = multiDimGridIndexToLinear(grid, endUpdatelist_);
1530 void BiasState::restoreFromHistory(const AwhBiasHistory& biasHistory, const BiasGrid& grid)
1532 const AwhBiasStateHistory& stateHistory = biasHistory.state;
1534 coordState_.restoreFromHistory(stateHistory);
1536 if (biasHistory.pointState.size() != points_.size())
1539 InvalidInputError("Bias grid size in checkpoint and simulation do not match. "
1540 "Likely you provided a checkpoint from a different simulation."));
1542 for (size_t m = 0; m < points_.size(); m++)
1544 points_[m].setFromHistory(biasHistory.pointState[m]);
1547 for (size_t m = 0; m < weightSumCovering_.size(); m++)
1549 weightSumCovering_[m] = biasHistory.pointState[m].weightsum_covering;
1552 histogramSize_.restoreFromHistory(stateHistory);
1554 linearGridindexToMultiDim(grid, stateHistory.origin_index_updatelist, originUpdatelist_);
1555 linearGridindexToMultiDim(grid, stateHistory.end_index_updatelist, endUpdatelist_);
1558 void BiasState::broadcast(const t_commrec* commRecord)
1560 gmx_bcast(sizeof(coordState_), &coordState_, commRecord->mpi_comm_mygroup);
1562 gmx_bcast(points_.size() * sizeof(PointState), points_.data(), commRecord->mpi_comm_mygroup);
1564 gmx_bcast(weightSumCovering_.size() * sizeof(double), weightSumCovering_.data(), commRecord->mpi_comm_mygroup);
1566 gmx_bcast(sizeof(histogramSize_), &histogramSize_, commRecord->mpi_comm_mygroup);
1569 void BiasState::setFreeEnergyToConvolvedPmf(ArrayRef<const DimParams> dimParams, const BiasGrid& grid)
1571 std::vector<float> convolvedPmf;
1573 calcConvolvedPmf(dimParams, grid, &convolvedPmf);
1575 for (size_t m = 0; m < points_.size(); m++)
1577 points_[m].setFreeEnergy(convolvedPmf[m]);
1582 * Count trailing data rows containing only zeros.
1584 * \param[in] data 2D data array.
1585 * \param[in] numRows Number of rows in array.
1586 * \param[in] numColumns Number of cols in array.
1587 * \returns the number of trailing zero rows.
1589 static int countTrailingZeroRows(const double* const* data, int numRows, int numColumns)
1591 int numZeroRows = 0;
1592 for (int m = numRows - 1; m >= 0; m--)
1594 bool rowIsZero = true;
1595 for (int d = 0; d < numColumns; d++)
1597 if (data[d][m] != 0)
1606 /* At a row with non-zero data */
1611 /* Still at a zero data row, keep checking rows higher up. */
1620 * Initializes the PMF and target with data read from an input table.
1622 * \param[in] dimParams The dimension parameters.
1623 * \param[in] grid The grid.
1624 * \param[in] filename The filename to read PMF and target from.
1625 * \param[in] numBias Number of biases.
1626 * \param[in] biasIndex The index of the bias.
1627 * \param[in,out] pointState The state of the points in this bias.
1629 static void readUserPmfAndTargetDistribution(ArrayRef<const DimParams> dimParams,
1630 const BiasGrid& grid,
1631 const std::string& filename,
1634 std::vector<PointState>* pointState)
1636 /* Read the PMF and target distribution.
1637 From the PMF, the convolved PMF, or the reference value free energy, can be calculated
1638 base on the force constant. The free energy and target together determine the bias.
1640 std::string filenameModified(filename);
1643 size_t n = filenameModified.rfind('.');
1644 GMX_RELEASE_ASSERT(n != std::string::npos,
1645 "The filename should contain an extension starting with .");
1646 filenameModified.insert(n, formatString("%d", biasIndex));
1649 std::string correctFormatMessage = formatString(
1650 "%s is expected in the following format. "
1651 "The first ndim column(s) should contain the coordinate values for each point, "
1652 "each column containing values of one dimension (in ascending order). "
1653 "For a multidimensional coordinate, points should be listed "
1654 "in the order obtained by traversing lower dimensions first. "
1655 "E.g. for two-dimensional grid of size nxn: "
1656 "(1, 1), (1, 2),..., (1, n), (2, 1), (2, 2), ..., , (n, n - 1), (n, n). "
1657 "Column ndim + 1 should contain the PMF value for each coordinate value. "
1658 "The target distribution values should be in column ndim + 2 or column ndim + 5. "
1659 "Make sure the input file ends with a new line but has no trailing new lines.",
1661 gmx::TextLineWrapper wrapper;
1662 wrapper.settings().setLineLength(c_linewidth);
1663 correctFormatMessage = wrapper.wrapToString(correctFormatMessage);
1667 int numRows = read_xvg(filenameModified.c_str(), &data, &numColumns);
1669 /* Check basic data properties here. BiasGrid takes care of more complicated things. */
1673 std::string mesg = gmx::formatString(
1674 "%s is empty!.\n\n%s", filename.c_str(), correctFormatMessage.c_str());
1675 GMX_THROW(InvalidInputError(mesg));
1678 /* Less than 2 points is not useful for PMF or target. */
1681 std::string mesg = gmx::formatString(
1682 "%s contains too few data points (%d)."
1683 "The minimum number of points is 2.",
1686 GMX_THROW(InvalidInputError(mesg));
1689 /* Make sure there are enough columns of data.
1691 Two formats are allowed. Either with columns {coords, PMF, target} or
1692 {coords, PMF, x, y, z, target, ...}. The latter format is allowed since that
1693 is how AWH output is written (x, y, z being other AWH variables). For this format,
1694 trailing columns are ignored.
1696 int columnIndexTarget;
1697 int numColumnsMin = dimParams.size() + 2;
1698 int columnIndexPmf = dimParams.size();
1699 if (numColumns == numColumnsMin)
1701 columnIndexTarget = columnIndexPmf + 1;
1705 columnIndexTarget = columnIndexPmf + 4;
1708 if (numColumns < numColumnsMin)
1710 std::string mesg = gmx::formatString(
1711 "The number of columns in %s should be at least %d."
1715 correctFormatMessage.c_str());
1716 GMX_THROW(InvalidInputError(mesg));
1719 /* read_xvg can give trailing zero data rows for trailing new lines in the input. We allow 1 zero row,
1720 since this could be real data. But multiple trailing zero rows cannot correspond to valid data. */
1721 int numZeroRows = countTrailingZeroRows(data, numRows, numColumns);
1722 if (numZeroRows > 1)
1724 std::string mesg = gmx::formatString(
1725 "Found %d trailing zero data rows in %s. Please remove trailing empty lines and "
1729 GMX_THROW(InvalidInputError(mesg));
1732 /* Convert from user units to internal units before sending the data of to grid. */
1733 for (size_t d = 0; d < dimParams.size(); d++)
1735 double scalingFactor = dimParams[d].scaleUserInputToInternal(1);
1736 if (scalingFactor == 1)
1740 for (size_t m = 0; m < pointState->size(); m++)
1742 data[d][m] *= scalingFactor;
1746 /* Get a data point for each AWH grid point so that they all get data. */
1747 std::vector<int> gridIndexToDataIndex(grid.numPoints());
1748 mapGridToDataGrid(&gridIndexToDataIndex, data, numRows, filename, grid, correctFormatMessage);
1750 /* Extract the data for each grid point.
1751 * We check if the target distribution is zero for all points.
1753 bool targetDistributionIsZero = true;
1754 for (size_t m = 0; m < pointState->size(); m++)
1756 (*pointState)[m].setLogPmfSum(-data[columnIndexPmf][gridIndexToDataIndex[m]]);
1757 double target = data[columnIndexTarget][gridIndexToDataIndex[m]];
1759 /* Check if the values are allowed. */
1762 std::string mesg = gmx::formatString(
1763 "Target distribution weight at point %zu (%g) in %s is negative.",
1767 GMX_THROW(InvalidInputError(mesg));
1771 targetDistributionIsZero = false;
1773 (*pointState)[m].setTargetConstantWeight(target);
1776 if (targetDistributionIsZero)
1779 gmx::formatString("The target weights given in column %d in %s are all 0",
1782 GMX_THROW(InvalidInputError(mesg));
1785 /* Free the arrays. */
1786 for (int m = 0; m < numColumns; m++)
1793 void BiasState::normalizePmf(int numSharingSims)
1795 /* The normalization of the PMF estimate matters because it determines how big effect the next sample has.
1796 Approximately (for large enough force constant) we should have:
1797 sum_x(exp(-pmf(x)) = nsamples*sum_xref(exp(-f(xref)).
1800 /* Calculate the normalization factor, i.e. divide by the pmf sum, multiply by the number of samples and the f sum */
1801 double expSumPmf = 0;
1803 for (const PointState& pointState : points_)
1805 if (pointState.inTargetRegion())
1807 expSumPmf += std::exp(pointState.logPmfSum());
1808 expSumF += std::exp(-pointState.freeEnergy());
1811 double numSamples = histogramSize_.histogramSize() / numSharingSims;
1814 double logRenorm = std::log(numSamples * expSumF / expSumPmf);
1815 for (PointState& pointState : points_)
1817 if (pointState.inTargetRegion())
1819 pointState.setLogPmfSum(pointState.logPmfSum() + logRenorm);
1824 void BiasState::initGridPointState(const AwhBiasParams& awhBiasParams,
1825 ArrayRef<const DimParams> dimParams,
1826 const BiasGrid& grid,
1827 const BiasParams& params,
1828 const std::string& filename,
1831 /* Modify PMF, free energy and the constant target distribution factor
1832 * to user input values if there is data given.
1834 if (awhBiasParams.bUserData)
1836 readUserPmfAndTargetDistribution(dimParams, grid, filename, numBias, params.biasIndex, &points_);
1837 setFreeEnergyToConvolvedPmf(dimParams, grid);
1840 /* The local Boltzmann distribution is special because the target distribution is updated as a function of the reference weighthistogram. */
1841 GMX_RELEASE_ASSERT(params.eTarget != AwhTargetType::LocalBoltzmann || points_[0].weightSumRef() != 0,
1842 "AWH reference weight histogram not initialized properly with local "
1843 "Boltzmann target distribution.");
1845 updateTargetDistribution(points_, params);
1847 for (PointState& pointState : points_)
1849 if (pointState.inTargetRegion())
1851 pointState.updateBias();
1855 /* Note that for zero target this is a value that represents -infinity but should not be used for biasing. */
1856 pointState.setTargetToZero();
1860 /* Set the initial reference weighthistogram. */
1861 const double histogramSize = histogramSize_.histogramSize();
1862 for (auto& pointState : points_)
1864 pointState.setInitialReferenceWeightHistogram(histogramSize);
1867 /* Make sure the pmf is normalized consistently with the histogram size.
1868 Note: the target distribution and free energy need to be set here. */
1869 normalizePmf(params.numSharedUpdate);
1872 BiasState::BiasState(const AwhBiasParams& awhBiasParams,
1873 double histogramSizeInitial,
1874 ArrayRef<const DimParams> dimParams,
1875 const BiasGrid& grid) :
1876 coordState_(awhBiasParams, dimParams, grid),
1877 points_(grid.numPoints()),
1878 weightSumCovering_(grid.numPoints()),
1879 histogramSize_(awhBiasParams, histogramSizeInitial)
1881 /* The minimum and maximum multidimensional point indices that are affected by the next update */
1882 for (size_t d = 0; d < dimParams.size(); d++)
1884 int index = grid.point(coordState_.gridpointIndex()).index[d];
1885 originUpdatelist_[d] = index;
1886 endUpdatelist_[d] = index;