2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
46 #include "gromacs/domdec/dlbtiming.h"
47 #include "gromacs/domdec/domdec.h"
48 #include "gromacs/domdec/domdec_struct.h"
49 #include "gromacs/ewald/ewald.h"
50 #include "gromacs/ewald/long_range_correction.h"
51 #include "gromacs/ewald/pme.h"
52 #include "gromacs/gmxlib/network.h"
53 #include "gromacs/gmxlib/nrnb.h"
54 #include "gromacs/listed_forces/listed_forces.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/math/vecdump.h"
57 #include "gromacs/mdlib/forcerec_threading.h"
58 #include "gromacs/mdlib/qmmm.h"
59 #include "gromacs/mdlib/rf_util.h"
60 #include "gromacs/mdlib/wall.h"
61 #include "gromacs/mdtypes/commrec.h"
62 #include "gromacs/mdtypes/enerdata.h"
63 #include "gromacs/mdtypes/forceoutput.h"
64 #include "gromacs/mdtypes/forcerec.h"
65 #include "gromacs/mdtypes/inputrec.h"
66 #include "gromacs/mdtypes/interaction_const.h"
67 #include "gromacs/mdtypes/md_enums.h"
68 #include "gromacs/mdtypes/mdatom.h"
69 #include "gromacs/mdtypes/simulation_workload.h"
70 #include "gromacs/pbcutil/ishift.h"
71 #include "gromacs/pbcutil/mshift.h"
72 #include "gromacs/pbcutil/pbc.h"
73 #include "gromacs/timing/wallcycle.h"
74 #include "gromacs/utility/exceptions.h"
75 #include "gromacs/utility/fatalerror.h"
76 #include "gromacs/utility/smalloc.h"
78 static void clearEwaldThreadOutput(ewald_corr_thread_t* ewc_t)
82 ewc_t->dvdl[efptCOUL] = 0;
83 ewc_t->dvdl[efptVDW] = 0;
84 clear_mat(ewc_t->vir_q);
85 clear_mat(ewc_t->vir_lj);
88 static void reduceEwaldThreadOuput(int nthreads, ewald_corr_thread_t* ewc_t)
90 ewald_corr_thread_t& dest = ewc_t[0];
92 for (int t = 1; t < nthreads; t++)
94 dest.Vcorr_q += ewc_t[t].Vcorr_q;
95 dest.Vcorr_lj += ewc_t[t].Vcorr_lj;
96 dest.dvdl[efptCOUL] += ewc_t[t].dvdl[efptCOUL];
97 dest.dvdl[efptVDW] += ewc_t[t].dvdl[efptVDW];
98 m_add(dest.vir_q, ewc_t[t].vir_q, dest.vir_q);
99 m_add(dest.vir_lj, ewc_t[t].vir_lj, dest.vir_lj);
103 void do_force_lowlevel(t_forcerec* fr,
104 const t_inputrec* ir,
105 const InteractionDefinitions& idef,
107 const gmx_multisim_t* ms,
109 gmx_wallcycle_t wcycle,
111 gmx::ArrayRefWithPadding<gmx::RVec> coordinates,
113 gmx::ForceOutputs* forceOutputs,
114 gmx_enerdata_t* enerd,
118 const t_graph* graph,
120 const gmx::StepWorkload& stepWork,
121 const DDBalanceRegionHandler& ddBalanceRegionHandler)
123 // TODO: Replace all uses of x by const coordinates
124 rvec* x = as_rvec_array(coordinates.paddedArrayRef().data());
126 auto& forceWithVirial = forceOutputs->forceWithVirial();
128 /* do QMMM first if requested */
131 enerd->term[F_EQM] = calculate_QMMM(cr, &forceOutputs->forceWithShiftForces(), fr->qr);
134 /* Call the short range functions all in one go. */
138 /* foreign lambda component for walls */
139 real dvdl_walls = do_walls(*ir, *fr, box, *md, x, &forceWithVirial, lambda[efptVDW],
140 enerd->grpp.ener[egLJSR].data(), nrnb);
141 enerd->dvdl_lin[efptVDW] += dvdl_walls;
143 for (auto& dhdl : enerd->dhdlLambda)
149 /* Shift the coordinates. Must be done before listed forces and PPPM,
150 * but is also necessary for SHAKE and update, therefore it can NOT
151 * go when no listed forces have to be evaluated.
153 * The shifting and PBC code is deliberately not timed, since with
154 * the Verlet scheme it only takes non-zero time with triclinic
155 * boxes, and even then the time is around a factor of 100 less
156 * than the next smallest counter.
160 /* Here sometimes we would not need to shift with NBFonly,
161 * but we do so anyhow for consistency of the returned coordinates.
165 shift_self(graph, box, x);
168 inc_nrnb(nrnb, eNR_SHIFTX, 2 * graph->nnodes);
172 inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
179 /* Check whether we need to take into account PBC in listed interactions. */
180 const auto needPbcForListedForces =
181 fr->bMolPBC && stepWork.computeListedForces && haveCpuListedForces(*fr, idef, *fcd);
182 if (needPbcForListedForces)
184 /* Since all atoms are in the rectangular or triclinic unit-cell,
185 * only single box vector shifts (2 in x) are required.
187 set_pbc_dd(&pbc, fr->pbcType, DOMAINDECOMP(cr) ? cr->dd->numCells : nullptr, TRUE, box);
190 do_force_listed(wcycle, box, ir->fepvals, cr, ms, idef, x, hist, forceOutputs, fr, &pbc,
191 graph, enerd, nrnb, lambda, md, fcd,
192 DOMAINDECOMP(cr) ? cr->dd->globalAtomIndices.data() : nullptr, stepWork);
195 const bool computePmeOnCpu = (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
196 && thisRankHasDuty(cr, DUTY_PME)
197 && (pme_run_mode(fr->pmedata) == PmeRunMode::CPU);
199 const bool haveEwaldSurfaceTerm = haveEwaldSurfaceContribution(*ir);
201 /* Do long-range electrostatics and/or LJ-PME
202 * and compute PME surface terms when necessary.
204 if (computePmeOnCpu || fr->ic->eeltype == eelEWALD || haveEwaldSurfaceTerm)
207 real Vlr_q = 0, Vlr_lj = 0;
209 /* We reduce all virial, dV/dlambda and energy contributions, except
210 * for the reciprocal energies (Vlr_q, Vlr_lj) into the same struct.
212 ewald_corr_thread_t& ewaldOutput = fr->ewc_t[0];
213 clearEwaldThreadOutput(&ewaldOutput);
215 if (EEL_PME_EWALD(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
217 /* Calculate the Ewald surface force and energy contributions, when necessary */
218 if (haveEwaldSurfaceTerm)
220 wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
222 int nthreads = fr->nthread_ewc;
223 #pragma omp parallel for num_threads(nthreads) schedule(static)
224 for (int t = 0; t < nthreads; t++)
228 ewald_corr_thread_t& ewc_t = fr->ewc_t[t];
231 clearEwaldThreadOutput(&ewc_t);
234 /* Threading is only supported with the Verlet cut-off
235 * scheme and then only single particle forces (no
236 * exclusion forces) are calculated, so we can store
237 * the forces in the normal, single forceWithVirial->force_ array.
239 ewald_LRcorrection(md->homenr, cr, nthreads, t, *fr, *ir, md->chargeA,
240 md->chargeB, (md->nChargePerturbed != 0), x, box, mu_tot,
241 as_rvec_array(forceWithVirial.force_.data()),
242 &ewc_t.Vcorr_q, lambda[efptCOUL], &ewc_t.dvdl[efptCOUL]);
244 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
248 reduceEwaldThreadOuput(nthreads, fr->ewc_t);
250 wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
253 if (EEL_PME_EWALD(fr->ic->eeltype) && fr->n_tpi == 0)
255 /* This is not in a subcounter because it takes a
256 negligible and constant-sized amount of time */
257 ewaldOutput.Vcorr_q += ewald_charge_correction(
258 cr, fr, lambda[efptCOUL], box, &ewaldOutput.dvdl[efptCOUL], ewaldOutput.vir_q);
263 /* Do reciprocal PME for Coulomb and/or LJ. */
264 assert(fr->n_tpi >= 0);
265 if (fr->n_tpi == 0 || stepWork.stateChanged)
267 /* With domain decomposition we close the CPU side load
268 * balancing region here, because PME does global
269 * communication that acts as a global barrier.
271 ddBalanceRegionHandler.closeAfterForceComputationCpu();
273 wallcycle_start(wcycle, ewcPMEMESH);
276 gmx::constArrayRefFromArray(coordinates.unpaddedConstArrayRef().data(),
277 md->homenr - fr->n_tpi),
278 forceWithVirial.force_, md->chargeA, md->chargeB, md->sqrt_c6A,
279 md->sqrt_c6B, md->sigmaA, md->sigmaB, box, cr,
280 DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
281 DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, nrnb, wcycle,
282 ewaldOutput.vir_q, ewaldOutput.vir_lj, &Vlr_q, &Vlr_lj,
283 lambda[efptCOUL], lambda[efptVDW], &ewaldOutput.dvdl[efptCOUL],
284 &ewaldOutput.dvdl[efptVDW], stepWork);
285 wallcycle_stop(wcycle, ewcPMEMESH);
288 gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status);
291 /* We should try to do as little computation after
292 * this as possible, because parallel PME synchronizes
293 * the nodes, so we want all load imbalance of the
294 * rest of the force calculation to be before the PME
295 * call. DD load balancing is done on the whole time
296 * of the force call (without PME).
301 /* Determine the PME grid energy of the test molecule
302 * with the PME grid potential of the other charges.
306 coordinates.unpaddedConstArrayRef().subArray(md->homenr - fr->n_tpi, fr->n_tpi),
307 gmx::arrayRefFromArray(md->chargeA + md->homenr - fr->n_tpi, fr->n_tpi),
313 if (fr->ic->eeltype == eelEWALD)
315 Vlr_q = do_ewald(ir, x, as_rvec_array(forceWithVirial.force_.data()), md->chargeA,
316 md->chargeB, box, cr, md->homenr, ewaldOutput.vir_q, fr->ic->ewaldcoeff_q,
317 lambda[efptCOUL], &ewaldOutput.dvdl[efptCOUL], fr->ewald_table);
320 /* Note that with separate PME nodes we get the real energies later */
321 // TODO it would be simpler if we just accumulated a single
322 // long-range virial contribution.
323 forceWithVirial.addVirialContribution(ewaldOutput.vir_q);
324 forceWithVirial.addVirialContribution(ewaldOutput.vir_lj);
325 enerd->dvdl_lin[efptCOUL] += ewaldOutput.dvdl[efptCOUL];
326 enerd->dvdl_lin[efptVDW] += ewaldOutput.dvdl[efptVDW];
327 enerd->term[F_COUL_RECIP] = Vlr_q + ewaldOutput.Vcorr_q;
328 enerd->term[F_LJ_RECIP] = Vlr_lj + ewaldOutput.Vcorr_lj;
330 for (auto& dhdl : enerd->dhdlLambda)
332 dhdl += ewaldOutput.dvdl[efptVDW] + ewaldOutput.dvdl[efptCOUL];
337 fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n", Vlr_q,
338 ewaldOutput.Vcorr_q, enerd->term[F_COUL_RECIP]);
339 pr_rvecs(debug, 0, "vir_el_recip after corr", ewaldOutput.vir_q, DIM);
340 rvec* fshift = as_rvec_array(forceOutputs->forceWithShiftForces().shiftForces().data());
341 pr_rvecs(debug, 0, "fshift after LR Corrections", fshift, SHIFTS);
342 fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n", Vlr_lj,
343 ewaldOutput.Vcorr_lj, enerd->term[F_LJ_RECIP]);
344 pr_rvecs(debug, 0, "vir_lj_recip after corr", ewaldOutput.vir_lj, DIM);
350 print_nrnb(debug, nrnb);
355 rvec* fshift = as_rvec_array(forceOutputs->forceWithShiftForces().shiftForces().data());
356 pr_rvecs(debug, 0, "fshift after bondeds", fshift, SHIFTS);