2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief This file defines high-level functions for mdrun to compute
38 * energies and forces for listed interactions.
40 * \author Mark Abraham <mark.j.abraham@gmail.com>
42 * \ingroup module_listed-forces
46 #include "listed-forces.h"
52 #include "gromacs/gmxlib/network.h"
53 #include "gromacs/gmxlib/nrnb.h"
54 #include "gromacs/listed-forces/bonded.h"
55 #include "gromacs/listed-forces/disre.h"
56 #include "gromacs/listed-forces/orires.h"
57 #include "gromacs/listed-forces/pairs.h"
58 #include "gromacs/listed-forces/position-restraints.h"
59 #include "gromacs/math/vec.h"
60 #include "gromacs/mdlib/force.h"
61 #include "gromacs/mdlib/force_flags.h"
62 #include "gromacs/mdtypes/commrec.h"
63 #include "gromacs/mdtypes/fcdata.h"
64 #include "gromacs/mdtypes/forcerec.h"
65 #include "gromacs/mdtypes/inputrec.h"
66 #include "gromacs/mdtypes/md_enums.h"
67 #include "gromacs/pbcutil/ishift.h"
68 #include "gromacs/pbcutil/pbc.h"
69 #include "gromacs/simd/simd.h"
70 #include "gromacs/timing/wallcycle.h"
71 #include "gromacs/topology/topology.h"
72 #include "gromacs/utility/exceptions.h"
73 #include "gromacs/utility/fatalerror.h"
74 #include "gromacs/utility/smalloc.h"
76 #include "listed-internal.h"
81 /*! \brief Return true if ftype is an explicit pair-listed LJ or
82 * COULOMB interaction type: bonded LJ (usually 1-4), or special
83 * listed non-bonded for FEP. */
85 isPairInteraction(int ftype)
87 return ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB);
90 /*! \brief Zero thread-local output buffers */
92 zero_thread_output(struct bonded_threading_t *bt, int thread)
99 f_thread_t *f_t = &bt->f_t[thread];
100 const int nelem_fa = sizeof(*f_t->f)/sizeof(real);
102 for (int i = 0; i < f_t->nblock_used; i++)
104 int a0 = f_t->block_index[i]*reduction_block_size;
105 int a1 = a0 + reduction_block_size;
106 for (int a = a0; a < a1; a++)
108 for (int d = 0; d < nelem_fa; d++)
115 for (int i = 0; i < SHIFTS; i++)
117 clear_rvec(f_t->fshift[i]);
119 for (int i = 0; i < F_NRE; i++)
123 for (int i = 0; i < egNR; i++)
125 for (int j = 0; j < f_t->grpp.nener; j++)
127 f_t->grpp.ener[i][j] = 0;
130 for (int i = 0; i < efptNR; i++)
136 /*! \brief The max thread number is arbitrary, we used a fixed number
137 * to avoid memory management. Using more than 16 threads is probably
138 * never useful performance wise. */
139 #define MAX_BONDED_THREADS 256
141 /*! \brief Reduce thread-local force buffers */
143 reduce_thread_forces(int n, rvec *f,
144 struct bonded_threading_t *bt,
147 if (nthreads > MAX_BONDED_THREADS)
149 gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads",
153 /* This reduction can run on any number of threads,
154 * independently of bt->nthreads.
155 * But if nthreads matches bt->nthreads (which it currently does)
156 * the uniform distribution of the touched blocks over nthreads will
157 * match the distribution of bonded over threads well in most cases,
158 * which means that threads mostly reduce their own data which increases
159 * the number of cache hits.
161 #pragma omp parallel for num_threads(nthreads) schedule(static)
162 for (int b = 0; b < bt->nblock_used; b++)
166 int ind = bt->block_index[b];
167 rvec4 *fp[MAX_BONDED_THREADS];
169 /* Determine which threads contribute to this block */
171 for (int ft = 0; ft < bt->nthreads; ft++)
173 if (bitmask_is_set(bt->mask[ind], ft))
175 fp[nfb++] = bt->f_t[ft].f;
180 /* Reduce force buffers for threads that contribute */
181 int a0 = ind *reduction_block_size;
182 int a1 = (ind + 1)*reduction_block_size;
183 /* It would be nice if we could pad f to avoid this min */
184 a1 = std::min(a1, n);
185 for (int a = a0; a < a1; a++)
187 for (int fb = 0; fb < nfb; fb++)
189 rvec_inc(f[a], fp[fb][a]);
194 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
198 /*! \brief Reduce thread-local forces, shift forces and energies */
200 reduce_thread_output(int n, rvec *f, rvec *fshift,
201 real *ener, gmx_grppairener_t *grpp, real *dvdl,
202 struct bonded_threading_t *bt,
203 gmx_bool bCalcEnerVir,
206 assert(bt->haveBondeds);
208 if (bt->nblock_used > 0)
210 /* Reduce the bonded force buffer */
211 reduce_thread_forces(n, f, bt, bt->nthreads);
214 /* When necessary, reduce energy and virial using one thread only */
215 if (bCalcEnerVir && bt->nthreads > 1)
217 f_thread_t *f_t = bt->f_t;
219 for (int i = 0; i < SHIFTS; i++)
221 for (int t = 1; t < bt->nthreads; t++)
223 rvec_inc(fshift[i], f_t[t].fshift[i]);
226 for (int i = 0; i < F_NRE; i++)
228 for (int t = 1; t < bt->nthreads; t++)
230 ener[i] += f_t[t].ener[i];
233 for (int i = 0; i < egNR; i++)
235 for (int j = 0; j < f_t[1].grpp.nener; j++)
237 for (int t = 1; t < bt->nthreads; t++)
239 grpp->ener[i][j] += f_t[t].grpp.ener[i][j];
245 for (int i = 0; i < efptNR; i++)
248 for (int t = 1; t < bt->nthreads; t++)
250 dvdl[i] += f_t[t].dvdl[i];
257 /*! \brief Calculate one element of the list of bonded interactions
260 calc_one_bond(int thread,
261 int ftype, const t_idef *idef,
262 const rvec x[], rvec4 f[], rvec fshift[],
263 const t_forcerec *fr,
264 const t_pbc *pbc, const t_graph *g,
265 gmx_grppairener_t *grpp,
267 const real *lambda, real *dvdl,
268 const t_mdatoms *md, t_fcdata *fcd,
269 gmx_bool bCalcEnerVir,
270 int *global_atom_index)
272 #if GMX_SIMD_HAVE_REAL
273 bool bUseSIMD = fr->use_simd_kernels;
276 int nat1, nbonds, efptFTYPE;
281 if (IS_RESTRAINT_TYPE(ftype))
283 efptFTYPE = efptRESTRAINT;
287 efptFTYPE = efptBONDED;
290 nat1 = interaction_function[ftype].nratoms + 1;
291 nbonds = idef->il[ftype].nr/nat1;
292 iatoms = idef->il[ftype].iatoms;
294 nb0 = idef->il_thread_division[ftype*(idef->nthreads+1)+thread];
295 nbn = idef->il_thread_division[ftype*(idef->nthreads+1)+thread+1] - nb0;
297 if (!isPairInteraction(ftype))
301 /* TODO The execution time for CMAP dihedrals might be
302 nice to account to its own subtimer, but first
303 wallcycle needs to be extended to support calling from
305 v = cmap_dihs(nbn, iatoms+nb0,
306 idef->iparams, &idef->cmap_grid,
308 pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
309 md, fcd, global_atom_index);
311 #if GMX_SIMD_HAVE_REAL
312 else if (ftype == F_ANGLES && bUseSIMD &&
313 !bCalcEnerVir && fr->efep == efepNO)
315 /* No energies, shift forces, dvdl */
316 angles_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
319 pbc, g, lambda[efptFTYPE], md, fcd,
324 else if (ftype == F_UREY_BRADLEY && bUseSIMD &&
325 !bCalcEnerVir && fr->efep == efepNO)
327 /* No energies, shift forces, dvdl */
328 urey_bradley_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
331 pbc, g, lambda[efptFTYPE], md, fcd,
336 else if (ftype == F_PDIHS &&
337 !bCalcEnerVir && fr->efep == efepNO)
339 /* No energies, shift forces, dvdl */
340 #if GMX_SIMD_HAVE_REAL
343 pdihs_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
346 pbc, g, lambda[efptFTYPE], md, fcd,
352 pdihs_noener(nbn, idef->il[ftype].iatoms+nb0,
355 pbc, g, lambda[efptFTYPE], md, fcd,
360 #if GMX_SIMD_HAVE_REAL
361 else if (ftype == F_RBDIHS && bUseSIMD &&
362 !bCalcEnerVir && fr->efep == efepNO)
364 /* No energies, shift forces, dvdl */
365 rbdihs_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
368 pbc, g, lambda[efptFTYPE], md, fcd,
375 v = interaction_function[ftype].ifunc(nbn, iatoms+nb0,
378 pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
379 md, fcd, global_atom_index);
384 /* TODO The execution time for pairs might be nice to account
385 to its own subtimer, but first wallcycle needs to be
386 extended to support calling from multiple threads. */
387 do_pairs(ftype, nbn, iatoms+nb0, idef->iparams, x, f, fshift,
388 pbc, g, lambda, dvdl, md, fr,
389 bCalcEnerVir, grpp, global_atom_index);
395 inc_nrnb(nrnb, interaction_function[ftype].nrnb_ind, nbonds);
404 ftype_is_bonded_potential(int ftype)
407 (interaction_function[ftype].flags & IF_BOND) &&
408 !(ftype == F_CONNBONDS || ftype == F_POSRES || ftype == F_FBPOSRES);
411 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
414 calcBondedForces(const t_idef *idef,
416 const t_forcerec *fr,
417 const t_pbc *pbc_null,
419 gmx_enerdata_t *enerd,
425 gmx_bool bCalcEnerVir,
426 int *global_atom_index)
428 struct bonded_threading_t *bt = fr->bonded_threading;
430 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
431 for (int thread = 0; thread < bt->nthreads; thread++)
441 gmx_grppairener_t *grpp;
443 zero_thread_output(bt, thread);
445 ft = bt->f_t[thread].f;
456 fshift = bt->f_t[thread].fshift;
457 epot = bt->f_t[thread].ener;
458 grpp = &bt->f_t[thread].grpp;
459 dvdlt = bt->f_t[thread].dvdl;
461 /* Loop over all bonded force types to calculate the bonded forces */
462 for (ftype = 0; (ftype < F_NRE); ftype++)
464 if (idef->il[ftype].nr > 0 && ftype_is_bonded_potential(ftype))
466 v = calc_one_bond(thread, ftype, idef, x,
467 ft, fshift, fr, pbc_null, g, grpp,
469 md, fcd, bCalcEnerVir,
475 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
479 void calc_listed(const t_commrec *cr,
480 struct gmx_wallcycle *wcycle,
482 const rvec x[], history_t *hist,
484 gmx::ForceWithVirial *forceWithVirial,
485 const t_forcerec *fr,
486 const struct t_pbc *pbc,
487 const struct t_pbc *pbc_full,
488 const struct t_graph *g,
489 gmx_enerdata_t *enerd, t_nrnb *nrnb,
492 t_fcdata *fcd, int *global_atom_index,
495 struct bonded_threading_t *bt;
496 gmx_bool bCalcEnerVir;
497 const t_pbc *pbc_null;
499 bt = fr->bonded_threading;
501 assert(bt->nthreads == idef->nthreads);
503 bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
514 if ((idef->il[F_POSRES].nr > 0) ||
515 (idef->il[F_FBPOSRES].nr > 0) ||
516 fcd->orires.nr > 0 ||
517 fcd->disres.nres > 0)
519 /* TODO Use of restraints triggers further function calls
520 inside the loop over calc_one_bond(), but those are too
521 awkward to account to this subtimer properly in the present
522 code. We don't test / care much about performance with
523 restraints, anyway. */
524 wallcycle_sub_start(wcycle, ewcsRESTRAINTS);
526 if (idef->il[F_POSRES].nr > 0)
528 posres_wrapper(nrnb, idef, pbc_full, x, enerd, lambda, fr,
532 if (idef->il[F_FBPOSRES].nr > 0)
534 fbposres_wrapper(nrnb, idef, pbc_full, x, enerd, fr,
538 /* Do pre force calculation stuff which might require communication */
539 if (fcd->orires.nr > 0)
541 /* This assertion is to ensure we have whole molecules.
542 * Unfortunately we do not have an mdrun state variable that tells
543 * us if molecules in x are not broken over PBC, so we have to make
544 * do with checking graph!=nullptr, which should tell us if we made
545 * molecules whole before calling the current function.
547 GMX_RELEASE_ASSERT(fr->ePBC == epbcNONE || g != nullptr, "With orientation restraints molecules should be whole");
548 enerd->term[F_ORIRESDEV] =
549 calc_orires_dev(cr->ms, idef->il[F_ORIRES].nr,
550 idef->il[F_ORIRES].iatoms,
551 idef->iparams, md, x,
552 pbc_null, fcd, hist);
554 if (fcd->disres.nres > 0)
557 idef->il[F_DISRES].nr,
558 idef->il[F_DISRES].iatoms,
563 wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
568 wallcycle_sub_start(wcycle, ewcsLISTED);
569 /* The dummy array is to have a place to store the dhdl at other values
570 of lambda, which will be thrown away in the end */
571 real dvdl[efptNR] = {0};
572 calcBondedForces(idef, x, fr, pbc_null, g, enerd, nrnb, lambda, dvdl, md,
573 fcd, bCalcEnerVir, global_atom_index);
574 wallcycle_sub_stop(wcycle, ewcsLISTED);
576 wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
577 reduce_thread_output(fr->natoms_force, f, fr->fshift,
578 enerd->term, &enerd->grpp, dvdl,
581 force_flags & GMX_FORCE_DHDL);
583 if (force_flags & GMX_FORCE_DHDL)
585 for (int i = 0; i < efptNR; i++)
587 enerd->dvdl_nonlin[i] += dvdl[i];
590 wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
593 /* Copy the sum of violations for the distance restraints from fcd */
596 enerd->term[F_DISRESVIOL] = fcd->disres.sumviol;
600 void calc_listed_lambda(const t_idef *idef,
602 const t_forcerec *fr,
603 const struct t_pbc *pbc, const struct t_graph *g,
604 gmx_grppairener_t *grpp, real *epot, t_nrnb *nrnb,
608 int *global_atom_index)
610 int ftype, nr_nonperturbed, nr;
612 real dvdl_dum[efptNR] = {0};
615 const t_pbc *pbc_null;
627 /* Copy the whole idef, so we can modify the contents locally */
629 idef_fe.nthreads = 1;
630 snew(idef_fe.il_thread_division, F_NRE*(idef_fe.nthreads+1));
632 /* We already have the forces, so we use temp buffers here */
633 snew(f, fr->natoms_force);
634 snew(fshift, SHIFTS);
636 /* Loop over all bonded force types to calculate the bonded energies */
637 for (ftype = 0; (ftype < F_NRE); ftype++)
639 if (ftype_is_bonded_potential(ftype))
641 /* Set the work range of thread 0 to the perturbed bondeds only */
642 nr_nonperturbed = idef->il[ftype].nr_nonperturbed;
643 nr = idef->il[ftype].nr;
644 idef_fe.il_thread_division[ftype*2+0] = nr_nonperturbed;
645 idef_fe.il_thread_division[ftype*2+1] = nr;
647 /* This is only to get the flop count correct */
648 idef_fe.il[ftype].nr = nr - nr_nonperturbed;
650 if (nr - nr_nonperturbed > 0)
652 v = calc_one_bond(0, ftype, &idef_fe,
653 x, f, fshift, fr, pbc_null, g,
654 grpp, nrnb, lambda, dvdl_dum,
665 sfree(idef_fe.il_thread_division);
669 do_force_listed(struct gmx_wallcycle *wcycle,
671 const t_lambda *fepvals,
676 rvec *forceForUseWithShiftForces,
677 gmx::ForceWithVirial *forceWithVirial,
678 const t_forcerec *fr,
679 const struct t_pbc *pbc,
680 const struct t_graph *graph,
681 gmx_enerdata_t *enerd,
686 int *global_atom_index,
689 t_pbc pbc_full; /* Full PBC is needed for position restraints */
691 if (!(flags & GMX_FORCE_LISTED))
696 if ((idef->il[F_POSRES].nr > 0) ||
697 (idef->il[F_FBPOSRES].nr > 0))
699 /* Not enough flops to bother counting */
700 set_pbc(&pbc_full, fr->ePBC, box);
702 calc_listed(cr, wcycle, idef, x, hist,
703 forceForUseWithShiftForces, forceWithVirial,
705 graph, enerd, nrnb, lambda, md, fcd,
706 global_atom_index, flags);
708 /* Check if we have to determine energy differences
709 * at foreign lambda's.
711 if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL))
713 posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
715 if (idef->ilsort != ilsortNO_FE)
717 wallcycle_sub_start(wcycle, ewcsLISTED_FEP);
718 if (idef->ilsort != ilsortFE_SORTED)
720 gmx_incons("The bonded interactions are not sorted for free energy");
722 for (int i = 0; i < enerd->n_lambda; i++)
726 reset_foreign_enerdata(enerd);
727 for (int j = 0; j < efptNR; j++)
729 lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
731 calc_listed_lambda(idef, x, fr, pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md,
732 fcd, global_atom_index);
733 sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
734 enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
736 wallcycle_sub_stop(wcycle, ewcsLISTED_FEP);