2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
46 #include "gromacs/domdec/dlbtiming.h"
47 #include "gromacs/domdec/domdec.h"
48 #include "gromacs/domdec/domdec_struct.h"
49 #include "gromacs/ewald/ewald.h"
50 #include "gromacs/ewald/long_range_correction.h"
51 #include "gromacs/ewald/pme.h"
52 #include "gromacs/gmxlib/network.h"
53 #include "gromacs/gmxlib/nrnb.h"
54 #include "gromacs/math/vec.h"
55 #include "gromacs/math/vecdump.h"
56 #include "gromacs/mdlib/gmx_omp_nthreads.h"
57 #include "gromacs/mdtypes/commrec.h"
58 #include "gromacs/mdtypes/enerdata.h"
59 #include "gromacs/mdtypes/forceoutput.h"
60 #include "gromacs/mdtypes/forcerec.h"
61 #include "gromacs/mdtypes/inputrec.h"
62 #include "gromacs/mdtypes/interaction_const.h"
63 #include "gromacs/mdtypes/md_enums.h"
64 #include "gromacs/mdtypes/mdatom.h"
65 #include "gromacs/mdtypes/simulation_workload.h"
66 #include "gromacs/pbcutil/pbc.h"
67 #include "gromacs/timing/wallcycle.h"
68 #include "gromacs/utility/exceptions.h"
69 #include "gromacs/utility/fatalerror.h"
74 static void clearEwaldThreadOutput(ewald_corr_thread_t* ewc_t)
78 ewc_t->dvdl[FreeEnergyPerturbationCouplingType::Coul] = 0;
79 ewc_t->dvdl[FreeEnergyPerturbationCouplingType::Vdw] = 0;
80 clear_mat(ewc_t->vir_q);
81 clear_mat(ewc_t->vir_lj);
84 static void reduceEwaldThreadOuput(int nthreads, gmx::ArrayRef<ewald_corr_thread_t> ewc_t)
86 ewald_corr_thread_t& dest = ewc_t[0];
88 for (int t = 1; t < nthreads; t++)
90 dest.Vcorr_q += ewc_t[t].Vcorr_q;
91 dest.Vcorr_lj += ewc_t[t].Vcorr_lj;
92 dest.dvdl[FreeEnergyPerturbationCouplingType::Coul] +=
93 ewc_t[t].dvdl[FreeEnergyPerturbationCouplingType::Coul];
94 dest.dvdl[FreeEnergyPerturbationCouplingType::Vdw] +=
95 ewc_t[t].dvdl[FreeEnergyPerturbationCouplingType::Vdw];
96 m_add(dest.vir_q, ewc_t[t].vir_q, dest.vir_q);
97 m_add(dest.vir_lj, ewc_t[t].vir_lj, dest.vir_lj);
101 CpuPpLongRangeNonbondeds::CpuPpLongRangeNonbondeds(int numberOfTestPaticles,
104 gmx::ArrayRef<const double> chargeC6Sum,
105 CoulombInteractionType eeltype,
106 VanDerWaalsType vdwtype,
107 const t_inputrec& inputrec,
109 gmx_wallcycle* wcycle,
111 numTpiAtoms_(numberOfTestPaticles),
112 ewaldCoeffQ_(ewaldCoeffQ),
114 chargeC6Sum_(chargeC6Sum),
115 coulombInteractionType_(eeltype),
116 vanDerWaalsType_(vdwtype),
117 ewaldGeometry_(inputrec.ewald_geometry),
118 epsilonSurface_(inputrec.epsilon_surface),
119 haveEwaldSurfaceTerm_(haveEwaldSurfaceContribution(inputrec)),
120 wallEwaldZfac_(inputrec.wall_ewald_zfac),
121 havePbcXY2Walls_(inputrecPbcXY2Walls(&inputrec)),
122 freeEnergyPerturbationType_(inputrec.efep),
126 GMX_ASSERT(epsilonR == inputrec.epsilon_r,
127 "Forcerec and inputrec relative dielectrics don't match");
129 // Use the thread count for the bonded module since reducing CPU-side
130 // non-bonded contributions does not currently have its own thread count.
131 outputPerThread_.resize(gmx_omp_nthreads_get(ModuleMultiThread::Bonded));
133 if (inputrec.coulombtype == CoulombInteractionType::Ewald)
135 ewaldTable_ = std::make_unique<gmx_ewald_tab_t>(inputrec, fplog);
139 CpuPpLongRangeNonbondeds::~CpuPpLongRangeNonbondeds() = default;
141 void CpuPpLongRangeNonbondeds::updateAfterPartition(const t_mdatoms& md)
144 havePerturbed_ = md.nChargePerturbed != 0;
145 chargeA_ = md.chargeA ? gmx::constArrayRefFromArray(md.chargeA, md.nr) : gmx::ArrayRef<const real>{};
146 chargeB_ = md.chargeB ? gmx::constArrayRefFromArray(md.chargeB, md.nr) : gmx::ArrayRef<const real>{};
147 sqrt_c6A_ = md.sqrt_c6A ? gmx::constArrayRefFromArray(md.sqrt_c6A, md.nr)
148 : gmx::ArrayRef<const real>{};
149 sqrt_c6B_ = md.sqrt_c6B ? gmx::constArrayRefFromArray(md.sqrt_c6B, md.nr)
150 : gmx::ArrayRef<const real>{};
151 sigmaA_ = md.sigmaA ? gmx::constArrayRefFromArray(md.sigmaA, md.nr) : gmx::ArrayRef<const real>{};
152 sigmaB_ = md.sigmaB ? gmx::constArrayRefFromArray(md.sigmaB, md.nr) : gmx::ArrayRef<const real>{};
155 void CpuPpLongRangeNonbondeds::calculate(gmx_pme_t* pmedata,
156 const t_commrec* commrec,
157 gmx::ArrayRef<const RVec> coordinates,
158 gmx::ForceWithVirial* forceWithVirial,
159 gmx_enerdata_t* enerd,
161 gmx::ArrayRef<const real> lambda,
162 gmx::ArrayRef<const gmx::RVec> mu_tot,
163 const gmx::StepWorkload& stepWork,
164 const DDBalanceRegionHandler& ddBalanceRegionHandler)
166 const bool computePmeOnCpu = (EEL_PME(coulombInteractionType_) || EVDW_PME(vanDerWaalsType_))
167 && thisRankHasDuty(commrec, DUTY_PME)
168 && (pme_run_mode(pmedata) == PmeRunMode::CPU);
170 /* Do long-range electrostatics and/or LJ-PME
171 * and compute PME surface terms when necessary.
173 if ((computePmeOnCpu || coulombInteractionType_ == CoulombInteractionType::Ewald || haveEwaldSurfaceTerm_)
174 && stepWork.computeNonbondedForces)
176 real Vlr_q = 0, Vlr_lj = 0;
177 /* We reduce all virial, dV/dlambda and energy contributions, except
178 * for the reciprocal energies (Vlr_q, Vlr_lj) into the same struct.
180 ewald_corr_thread_t& ewaldOutput = outputPerThread_[0];
181 clearEwaldThreadOutput(&ewaldOutput);
183 if (EEL_PME_EWALD(coulombInteractionType_) || EVDW_PME(vanDerWaalsType_))
185 /* Calculate the Ewald surface force and energy contributions, when necessary */
186 if (haveEwaldSurfaceTerm_)
188 wallcycle_sub_start(wcycle_, WallCycleSubCounter::EwaldCorrection);
190 int nthreads = gmx::ssize(outputPerThread_);
191 #pragma omp parallel for num_threads(nthreads) schedule(static)
192 for (int t = 0; t < nthreads; t++)
196 ewald_corr_thread_t& ewc_t = outputPerThread_[t];
199 clearEwaldThreadOutput(&ewc_t);
202 /* Threading is only supported with the Verlet cut-off
203 * scheme and then only single particle forces (no
204 * exclusion forces) are calculated, so we can store
205 * the forces in the normal, single forceWithVirial->force_ array.
224 forceWithVirial->force_,
226 lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)],
227 &ewc_t.dvdl[FreeEnergyPerturbationCouplingType::Coul]);
229 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
233 reduceEwaldThreadOuput(nthreads, outputPerThread_);
235 wallcycle_sub_stop(wcycle_, WallCycleSubCounter::EwaldCorrection);
238 if (EEL_PME_EWALD(coulombInteractionType_) && numTpiAtoms_ == 0)
240 /* This is not in a subcounter because it takes a
241 negligible and constant-sized amount of time */
242 ewaldOutput.Vcorr_q += ewald_charge_correction(
247 lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)],
249 &ewaldOutput.dvdl[FreeEnergyPerturbationCouplingType::Coul],
255 /* Do reciprocal PME for Coulomb and/or LJ. */
256 assert(numTpiAtoms_ >= 0);
257 if (numTpiAtoms_ == 0 || stepWork.stateChanged)
259 /* With domain decomposition we close the CPU side load
260 * balancing region here, because PME does global
261 * communication that acts as a global barrier.
263 ddBalanceRegionHandler.closeAfterForceComputationCpu();
265 wallcycle_start(wcycle_, WallCycleCounter::PmeMesh);
266 int status = gmx_pme_do(
268 gmx::constArrayRefFromArray(coordinates.data(), homenr_ - numTpiAtoms_),
269 forceWithVirial->force_,
278 haveDDAtomOrdering(*commrec) ? dd_pme_maxshift_x(*commrec->dd) : 0,
279 haveDDAtomOrdering(*commrec) ? dd_pme_maxshift_y(*commrec->dd) : 0,
286 lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)],
287 lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)],
288 &ewaldOutput.dvdl[FreeEnergyPerturbationCouplingType::Coul],
289 &ewaldOutput.dvdl[FreeEnergyPerturbationCouplingType::Vdw],
291 wallcycle_stop(wcycle_, WallCycleCounter::PmeMesh);
294 gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status);
297 /* We should try to do as little computation after
298 * this as possible, because parallel PME synchronizes
299 * the nodes, so we want all load imbalance of the
300 * rest of the force calculation to be before the PME
301 * call. DD load balancing is done on the whole time
302 * of the force call (without PME).
305 if (numTpiAtoms_ > 0)
307 /* Determine the PME grid energy of the test molecule
308 * with the PME grid potential of the other charges.
310 Vlr_q = gmx_pme_calc_energy(
312 coordinates.subArray(homenr_ - numTpiAtoms_, numTpiAtoms_),
313 chargeA_.subArray(homenr_ - numTpiAtoms_, numTpiAtoms_));
318 if (coulombInteractionType_ == CoulombInteractionType::Ewald)
320 Vlr_q = do_ewald(havePbcXY2Walls_,
323 freeEnergyPerturbationType_,
325 forceWithVirial->force_,
333 lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)],
334 &ewaldOutput.dvdl[FreeEnergyPerturbationCouplingType::Coul],
338 /* Note that with separate PME nodes we get the real energies later */
339 // TODO it would be simpler if we just accumulated a single
340 // long-range virial contribution.
341 forceWithVirial->addVirialContribution(ewaldOutput.vir_q);
342 forceWithVirial->addVirialContribution(ewaldOutput.vir_lj);
343 enerd->dvdl_lin[FreeEnergyPerturbationCouplingType::Coul] +=
344 ewaldOutput.dvdl[FreeEnergyPerturbationCouplingType::Coul];
345 enerd->dvdl_lin[FreeEnergyPerturbationCouplingType::Vdw] +=
346 ewaldOutput.dvdl[FreeEnergyPerturbationCouplingType::Vdw];
347 enerd->term[F_COUL_RECIP] = Vlr_q + ewaldOutput.Vcorr_q;
348 enerd->term[F_LJ_RECIP] = Vlr_lj + ewaldOutput.Vcorr_lj;
353 "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n",
356 enerd->term[F_COUL_RECIP]);
357 pr_rvecs(debug, 0, "vir_el_recip after corr", ewaldOutput.vir_q, DIM);
359 "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n",
361 ewaldOutput.Vcorr_lj,
362 enerd->term[F_LJ_RECIP]);
363 pr_rvecs(debug, 0, "vir_lj_recip after corr", ewaldOutput.vir_lj, DIM);
369 print_nrnb(debug, nrnb_);