2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief This file defines high-level functions for mdrun to compute
38 * energies and forces for listed interactions.
40 * \author Mark Abraham <mark.j.abraham@gmail.com>
42 * \ingroup module_listed-forces
46 #include "listed-forces.h"
54 #include "gromacs/legacyheaders/disre.h"
55 #include "gromacs/legacyheaders/force.h"
56 #include "gromacs/legacyheaders/network.h"
57 #include "gromacs/legacyheaders/nrnb.h"
58 #include "gromacs/legacyheaders/orires.h"
59 #include "gromacs/legacyheaders/types/force_flags.h"
60 #include "gromacs/listed-forces/bonded.h"
61 #include "gromacs/listed-forces/position-restraints.h"
62 #include "gromacs/math/vec.h"
63 #include "gromacs/pbcutil/ishift.h"
64 #include "gromacs/pbcutil/pbc.h"
65 #include "gromacs/simd/simd.h"
66 #include "gromacs/timing/wallcycle.h"
67 #include "gromacs/utility/smalloc.h"
69 #include "listed-internal.h"
75 /*! \brief Return true if ftype is an explicit pair-listed LJ or
76 * COULOMB interaction type: bonded LJ (usually 1-4), or special
77 * listed non-bonded for FEP. */
79 isPairInteraction(int ftype)
81 return ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB);
84 /*! \brief Zero thread-local force-output buffers */
86 zero_thread_forces(f_thread_t *f_t, int n,
87 int nblock, int blocksize)
89 int b, a0, a1, a, i, j;
91 if (n > f_t->f_nalloc)
93 f_t->f_nalloc = over_alloc_large(n);
94 srenew(f_t->f, f_t->f_nalloc);
97 if (!bitmask_is_zero(f_t->red_mask))
99 for (b = 0; b < nblock; b++)
101 if (bitmask_is_set(f_t->red_mask, b))
104 a1 = std::min((b+1)*blocksize, n);
105 for (a = a0; a < a1; a++)
107 clear_rvec(f_t->f[a]);
112 for (i = 0; i < SHIFTS; i++)
114 clear_rvec(f_t->fshift[i]);
116 for (i = 0; i < F_NRE; i++)
120 for (i = 0; i < egNR; i++)
122 for (j = 0; j < f_t->grpp.nener; j++)
124 f_t->grpp.ener[i][j] = 0;
127 for (i = 0; i < efptNR; i++)
133 /*! \brief The max thread number is arbitrary, we used a fixed number
134 * to avoid memory management. Using more than 16 threads is probably
135 * never useful performance wise. */
136 #define MAX_BONDED_THREADS 256
138 /*! \brief Reduce thread-local force buffers */
140 reduce_thread_force_buffer(int n, rvec *f,
141 int nthreads, f_thread_t *f_t,
142 int nblock, int block_size)
146 if (nthreads > MAX_BONDED_THREADS)
148 gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads",
152 /* This reduction can run on any number of threads,
153 * independently of nthreads.
155 #pragma omp parallel for num_threads(nthreads) schedule(static)
156 for (b = 0; b < nblock; b++)
158 rvec *fp[MAX_BONDED_THREADS];
162 /* Determine which threads contribute to this block */
164 for (ft = 1; ft < nthreads; ft++)
166 if (bitmask_is_set(f_t[ft].red_mask, b))
168 fp[nfb++] = f_t[ft].f;
173 /* Reduce force buffers for threads that contribute */
175 a1 = (b+1)*block_size;
176 a1 = std::min(a1, n);
177 for (a = a0; a < a1; a++)
179 for (fb = 0; fb < nfb; fb++)
181 rvec_inc(f[a], fp[fb][a]);
188 /*! \brief Reduce thread-local forces */
190 reduce_thread_forces(int n, rvec *f, rvec *fshift,
191 real *ener, gmx_grppairener_t *grpp, real *dvdl,
192 int nthreads, f_thread_t *f_t,
193 int nblock, int block_size,
194 gmx_bool bCalcEnerVir,
199 /* Reduce the bonded force buffer */
200 reduce_thread_force_buffer(n, f, nthreads, f_t, nblock, block_size);
203 /* When necessary, reduce energy and virial using one thread only */
208 for (i = 0; i < SHIFTS; i++)
210 for (t = 1; t < nthreads; t++)
212 rvec_inc(fshift[i], f_t[t].fshift[i]);
215 for (i = 0; i < F_NRE; i++)
217 for (t = 1; t < nthreads; t++)
219 ener[i] += f_t[t].ener[i];
222 for (i = 0; i < egNR; i++)
224 for (j = 0; j < f_t[1].grpp.nener; j++)
226 for (t = 1; t < nthreads; t++)
229 grpp->ener[i][j] += f_t[t].grpp.ener[i][j];
235 for (i = 0; i < efptNR; i++)
238 for (t = 1; t < nthreads; t++)
240 dvdl[i] += f_t[t].dvdl[i];
247 /*! \brief Calculate one element of the list of bonded interactions
250 calc_one_bond(int thread,
251 int ftype, const t_idef *idef,
252 const rvec x[], rvec f[], rvec fshift[],
254 const t_pbc *pbc, const t_graph *g,
255 gmx_grppairener_t *grpp,
257 real *lambda, real *dvdl,
258 const t_mdatoms *md, t_fcdata *fcd,
259 gmx_bool bCalcEnerVir,
260 int *global_atom_index)
262 #ifdef GMX_SIMD_HAVE_REAL
264 /* MSVC 2010 produces buggy SIMD PBC code, disable SIMD for MSVC <= 2010 */
265 #if defined _MSC_VER && _MSC_VER < 1700 && !defined(__ICL)
268 bUseSIMD = fr->use_simd_kernels;
272 int nat1, nbonds, efptFTYPE;
277 if (IS_RESTRAINT_TYPE(ftype))
279 efptFTYPE = efptRESTRAINT;
283 efptFTYPE = efptBONDED;
286 nat1 = interaction_function[ftype].nratoms + 1;
287 nbonds = idef->il[ftype].nr/nat1;
288 iatoms = idef->il[ftype].iatoms;
290 nb0 = idef->il_thread_division[ftype*(idef->nthreads+1)+thread];
291 nbn = idef->il_thread_division[ftype*(idef->nthreads+1)+thread+1] - nb0;
293 if (!isPairInteraction(ftype))
297 /* TODO The execution time for CMAP dihedrals might be
298 nice to account to its own subtimer, but first
299 wallcycle needs to be extended to support calling from
301 v = cmap_dihs(nbn, iatoms+nb0,
302 idef->iparams, &idef->cmap_grid,
304 pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
305 md, fcd, global_atom_index);
307 #ifdef GMX_SIMD_HAVE_REAL
308 else if (ftype == F_ANGLES && bUseSIMD &&
309 !bCalcEnerVir && fr->efep == efepNO)
311 /* No energies, shift forces, dvdl */
312 angles_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
315 pbc, g, lambda[efptFTYPE], md, fcd,
320 else if (ftype == F_PDIHS &&
321 !bCalcEnerVir && fr->efep == efepNO)
323 /* No energies, shift forces, dvdl */
324 #ifdef GMX_SIMD_HAVE_REAL
327 pdihs_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
330 pbc, g, lambda[efptFTYPE], md, fcd,
336 pdihs_noener(nbn, idef->il[ftype].iatoms+nb0,
339 pbc, g, lambda[efptFTYPE], md, fcd,
344 #ifdef GMX_SIMD_HAVE_REAL
345 else if (ftype == F_RBDIHS && bUseSIMD &&
346 !bCalcEnerVir && fr->efep == efepNO)
348 /* No energies, shift forces, dvdl */
349 rbdihs_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
352 pbc, g, lambda[efptFTYPE], md, fcd,
359 v = interaction_function[ftype].ifunc(nbn, iatoms+nb0,
362 pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
363 md, fcd, global_atom_index);
368 /* TODO The execution time for pairs might be nice to account
369 to its own subtimer, but first wallcycle needs to be
370 extended to support calling from multiple threads. */
371 v = do_pairs(ftype, nbn, iatoms+nb0, idef->iparams, x, f, fshift,
372 pbc, g, lambda, dvdl, md, fr, grpp, global_atom_index);
377 inc_nrnb(nrnb, interaction_function[ftype].nrnb_ind, nbonds);
386 ftype_is_bonded_potential(int ftype)
389 (interaction_function[ftype].flags & IF_BOND) &&
390 !(ftype == F_CONNBONDS || ftype == F_POSRES || ftype == F_FBPOSRES) &&
391 (ftype < F_GB12 || ftype > F_GB14);
394 void calc_listed(const gmx_multisim_t *ms,
395 gmx_wallcycle *wcycle,
397 const rvec x[], history_t *hist,
398 rvec f[], t_forcerec *fr,
399 const struct t_pbc *pbc,
400 const struct t_pbc *pbc_full,
401 const struct t_graph *g,
402 gmx_enerdata_t *enerd, t_nrnb *nrnb,
405 t_fcdata *fcd, int *global_atom_index,
408 struct bonded_threading_t *bt;
409 gmx_bool bCalcEnerVir;
411 /* The dummy array is to have a place to store the dhdl at other values
412 of lambda, which will be thrown away in the end */
414 const t_pbc *pbc_null;
417 bt = fr->bonded_threading;
419 assert(bt->nthreads == idef->nthreads);
421 bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
423 for (i = 0; i < efptNR; i++)
439 p_graph(debug, "Bondage is fun", g);
443 if ((idef->il[F_POSRES].nr > 0) ||
444 (idef->il[F_FBPOSRES].nr > 0) ||
445 (idef->il[F_ORIRES].nr > 0) ||
446 (idef->il[F_DISRES].nr > 0))
448 /* TODO Use of restraints triggers further function calls
449 inside the loop over calc_one_bond(), but those are too
450 awkward to account to this subtimer properly in the present
451 code. We don't test / care much about performance with
452 restraints, anyway. */
453 wallcycle_sub_start(wcycle, ewcsRESTRAINTS);
455 if (idef->il[F_POSRES].nr > 0)
457 posres_wrapper(nrnb, idef, pbc_full, x, enerd, lambda, fr);
460 if (idef->il[F_FBPOSRES].nr > 0)
462 fbposres_wrapper(nrnb, idef, pbc_full, x, enerd, fr);
465 /* Do pre force calculation stuff which might require communication */
466 if (idef->il[F_ORIRES].nr > 0)
468 enerd->term[F_ORIRESDEV] =
469 calc_orires_dev(ms, idef->il[F_ORIRES].nr,
470 idef->il[F_ORIRES].iatoms,
471 idef->iparams, md, x,
472 pbc_null, fcd, hist);
474 if (idef->il[F_DISRES].nr)
476 calc_disres_R_6(idef->il[F_DISRES].nr,
477 idef->il[F_DISRES].iatoms,
478 idef->iparams, x, pbc_null,
481 if (fcd->disres.nsystems > 1)
483 gmx_sum_sim(2*fcd->disres.nres, fcd->disres.Rt_6, ms);
488 wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
491 wallcycle_sub_start(wcycle, ewcsLISTED);
492 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
493 for (thread = 0; thread < bt->nthreads; thread++)
500 gmx_grppairener_t *grpp;
512 zero_thread_forces(&bt->f_t[thread], fr->natoms_force,
513 bt->red_nblock, 1<<bt->red_ashift);
515 ft = bt->f_t[thread].f;
516 fshift = bt->f_t[thread].fshift;
517 epot = bt->f_t[thread].ener;
518 grpp = &bt->f_t[thread].grpp;
519 dvdlt = bt->f_t[thread].dvdl;
521 /* Loop over all bonded force types to calculate the bonded forces */
522 for (ftype = 0; (ftype < F_NRE); ftype++)
524 if (idef->il[ftype].nr > 0 && ftype_is_bonded_potential(ftype))
526 v = calc_one_bond(thread, ftype, idef, x,
527 ft, fshift, fr, pbc_null, g, grpp,
529 md, fcd, bCalcEnerVir,
535 wallcycle_sub_stop(wcycle, ewcsLISTED);
537 if (bt->nthreads > 1)
539 wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
540 reduce_thread_forces(fr->natoms_force, f, fr->fshift,
541 enerd->term, &enerd->grpp, dvdl,
542 bt->nthreads, bt->f_t,
543 bt->red_nblock, 1<<bt->red_ashift,
545 force_flags & GMX_FORCE_DHDL);
546 wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
549 /* Remaining code does not have enough flops to bother counting */
550 if (force_flags & GMX_FORCE_DHDL)
552 for (i = 0; i < efptNR; i++)
554 enerd->dvdl_nonlin[i] += dvdl[i];
558 /* Copy the sum of violations for the distance restraints from fcd */
561 enerd->term[F_DISRESVIOL] = fcd->disres.sumviol;
566 void calc_listed_lambda(const t_idef *idef,
569 const struct t_pbc *pbc, const struct t_graph *g,
570 gmx_grppairener_t *grpp, real *epot, t_nrnb *nrnb,
574 int *global_atom_index)
576 int ftype, nr_nonperturbed, nr;
578 real dvdl_dum[efptNR] = {0};
580 const t_pbc *pbc_null;
592 /* Copy the whole idef, so we can modify the contents locally */
594 idef_fe.nthreads = 1;
595 snew(idef_fe.il_thread_division, F_NRE*(idef_fe.nthreads+1));
597 /* We already have the forces, so we use temp buffers here */
598 snew(f, fr->natoms_force);
599 snew(fshift, SHIFTS);
601 /* Loop over all bonded force types to calculate the bonded energies */
602 for (ftype = 0; (ftype < F_NRE); ftype++)
604 if (ftype_is_bonded_potential(ftype))
606 /* Set the work range of thread 0 to the perturbed bondeds only */
607 nr_nonperturbed = idef->il[ftype].nr_nonperturbed;
608 nr = idef->il[ftype].nr;
609 idef_fe.il_thread_division[ftype*2+0] = nr_nonperturbed;
610 idef_fe.il_thread_division[ftype*2+1] = nr;
612 /* This is only to get the flop count correct */
613 idef_fe.il[ftype].nr = nr - nr_nonperturbed;
615 if (nr - nr_nonperturbed > 0)
617 v = calc_one_bond(0, ftype, &idef_fe,
618 x, f, fshift, fr, pbc_null, g,
619 grpp, nrnb, lambda, dvdl_dum,
630 sfree(idef_fe.il_thread_division);
634 do_force_listed(gmx_wallcycle *wcycle,
636 const t_lambda *fepvals,
637 const gmx_multisim_t *ms,
643 const struct t_pbc *pbc,
644 const struct t_graph *graph,
645 gmx_enerdata_t *enerd,
650 int *global_atom_index,
653 t_pbc pbc_full; /* Full PBC is needed for position restraints */
655 if (!(flags & GMX_FORCE_LISTED))
660 if ((idef->il[F_POSRES].nr > 0) ||
661 (idef->il[F_FBPOSRES].nr > 0))
663 /* Not enough flops to bother counting */
664 set_pbc(&pbc_full, fr->ePBC, box);
666 calc_listed(ms, wcycle, idef, x, hist, f, fr, pbc, &pbc_full,
667 graph, enerd, nrnb, lambda, md, fcd,
668 global_atom_index, flags);
670 /* Check if we have to determine energy differences
671 * at foreign lambda's.
673 if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL))
675 posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
677 if (idef->ilsort != ilsortNO_FE)
679 wallcycle_sub_start(wcycle, ewcsLISTED_FEP);
680 if (idef->ilsort != ilsortFE_SORTED)
682 gmx_incons("The bonded interactions are not sorted for free energy");
684 for (int i = 0; i < enerd->n_lambda; i++)
688 reset_foreign_enerdata(enerd);
689 for (int j = 0; j < efptNR; j++)
691 lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
693 calc_listed_lambda(idef, x, fr, pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md,
694 fcd, global_atom_index);
695 sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
696 enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
698 wallcycle_sub_stop(wcycle, ewcsLISTED_FEP);