#endif
#include <math.h>
+#include <assert.h>
#include "physics.h"
#include "vec.h"
#include "maths.h"
return vtot;
}
+/* Return if this is a potential calculated in bondfree.c,
+ * i.e. an interaction that actually calculates a potential and
+ * works on multiple atoms (not e.g. a connection or a position restraint).
+ */
+static gmx_inline gmx_bool ftype_is_bonded_potential(int ftype)
+{
+ return
+ (interaction_function[ftype].flags & IF_BOND) &&
+ !(ftype == F_CONNBONDS || ftype == F_POSRES) &&
+ (ftype < F_GB12 || ftype > F_GB14);
+}
+
+static void divide_bondeds_over_threads(t_idef *idef, int nthreads)
+{
+ int ftype;
+ int nat1;
+ int t;
+ int il_nr_thread;
+
+ idef->nthreads = nthreads;
+
+ if (F_NRE*(nthreads+1) > idef->il_thread_division_nalloc)
+ {
+ idef->il_thread_division_nalloc = F_NRE*(nthreads+1);
+ snew(idef->il_thread_division, idef->il_thread_division_nalloc);
+ }
+
+ for (ftype = 0; ftype < F_NRE; ftype++)
+ {
+ if (ftype_is_bonded_potential(ftype))
+ {
+ nat1 = interaction_function[ftype].nratoms + 1;
+
+ for (t = 0; t <= nthreads; t++)
+ {
+ /* Divide the interactions equally over the threads.
+ * When the different types of bonded interactions
+ * are distributed roughly equally over the threads,
+ * this should lead to well localized output into
+ * the force buffer on each thread.
+ * If this is not the case, a more advanced scheme
+ * (not implemented yet) will do better.
+ */
+ il_nr_thread = (((idef->il[ftype].nr/nat1)*t)/nthreads)*nat1;
+
+ /* Ensure that distance restraint pairs with the same label
+ * end up on the same thread.
+ * This is slighlty tricky code, since the next for iteration
+ * may have an initial il_nr_thread lower than the final value
+ * in the previous iteration, but this will anyhow be increased
+ * to the approriate value again by this while loop.
+ */
+ while (ftype == F_DISRES &&
+ il_nr_thread > 0 &&
+ il_nr_thread < idef->il[ftype].nr &&
+ idef->iparams[idef->il[ftype].iatoms[il_nr_thread]].disres.label ==
+ idef->iparams[idef->il[ftype].iatoms[il_nr_thread-nat1]].disres.label)
+ {
+ il_nr_thread += nat1;
+ }
+
+ idef->il_thread_division[ftype*(nthreads+1)+t] = il_nr_thread;
+ }
+ }
+ }
+}
+
static unsigned
calc_bonded_reduction_mask(const t_idef *idef,
int shift,
for (ftype = 0; ftype < F_NRE; ftype++)
{
- if (interaction_function[ftype].flags & IF_BOND &&
- !(ftype == F_CONNBONDS || ftype == F_POSRES) &&
- (ftype<F_GB12 || ftype>F_GB14))
+ if (ftype_is_bonded_potential(ftype))
{
nb = idef->il[ftype].nr;
if (nb > 0)
/* Divide this interaction equally over the threads.
* This is not stored: should match division in calc_bonds.
*/
- nb0 = (((nb/nat1)* t )/nt)*nat1;
- nb1 = (((nb/nat1)*(t+1))/nt)*nat1;
+ nb0 = idef->il_thread_division[ftype*(nt+1)+t];
+ nb1 = idef->il_thread_division[ftype*(nt+1)+t+1];
for (i = nb0; i < nb1; i += nat1)
{
return mask;
}
-void init_bonded_thread_force_reduction(t_forcerec *fr,
- const t_idef *idef)
+void setup_bonded_threading(t_forcerec *fr, t_idef *idef)
{
#define MAX_BLOCK_BITS 32
int t;
int ctot, c, b;
- if (fr->nthreads <= 1)
+#ifndef NDEBUG
+ assert(fr->nthreads >= 1);
+#endif
+
+ /* Divide the bonded interaction over the threads */
+ divide_bondeds_over_threads(idef, fr->nthreads);
+
+ if (fr->nthreads == 1)
{
fr->red_nblock = 0;
rvec x[], rvec f[], rvec fshift[],
t_forcerec *fr,
const t_pbc *pbc, const t_graph *g,
- gmx_enerdata_t *enerd, gmx_grppairener_t *grpp,
+ gmx_grppairener_t *grpp,
t_nrnb *nrnb,
real *lambda, real *dvdl,
const t_mdatoms *md, t_fcdata *fcd,
gmx_bool bCalcEnerVir,
int *global_atom_index, gmx_bool bPrintSepPot)
{
- int ind, nat1, nbonds, efptFTYPE;
+ int nat1, nbonds, efptFTYPE;
real v = 0;
t_iatom *iatoms;
int nb0, nbn;
efptFTYPE = efptBONDED;
}
- if (interaction_function[ftype].flags & IF_BOND &&
- !(ftype == F_CONNBONDS || ftype == F_POSRES))
- {
- ind = interaction_function[ftype].nrnb_ind;
- nat1 = interaction_function[ftype].nratoms + 1;
- nbonds = idef->il[ftype].nr/nat1;
- iatoms = idef->il[ftype].iatoms;
+ nat1 = interaction_function[ftype].nratoms + 1;
+ nbonds = idef->il[ftype].nr/nat1;
+ iatoms = idef->il[ftype].iatoms;
- nb0 = ((nbonds* thread )/(fr->nthreads))*nat1;
- nbn = ((nbonds*(thread+1))/(fr->nthreads))*nat1 - nb0;
+ nb0 = idef->il_thread_division[ftype*(idef->nthreads+1)+thread];
+ nbn = idef->il_thread_division[ftype*(idef->nthreads+1)+thread+1] - nb0;
- if (!IS_LISTED_LJ_C(ftype))
+ if (!IS_LISTED_LJ_C(ftype))
+ {
+ if (ftype == F_CMAP)
{
- if (ftype == F_CMAP)
- {
- v = cmap_dihs(nbn, iatoms+nb0,
- idef->iparams, &idef->cmap_grid,
- (const rvec*)x, f, fshift,
- pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
- md, fcd, global_atom_index);
- }
+ v = cmap_dihs(nbn, iatoms+nb0,
+ idef->iparams, &idef->cmap_grid,
+ (const rvec*)x, f, fshift,
+ pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
+ md, fcd, global_atom_index);
+ }
#ifdef SIMD_BONDEDS
- else if (ftype == F_ANGLES &&
- !bCalcEnerVir && fr->efep == efepNO)
- {
- /* No energies, shift forces, dvdl */
- angles_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
- idef->iparams,
- (const rvec*)x, f,
- pbc, g, lambda[efptFTYPE], md, fcd,
- global_atom_index);
- v = 0;
- }
+ else if (ftype == F_ANGLES &&
+ !bCalcEnerVir && fr->efep == efepNO)
+ {
+ /* No energies, shift forces, dvdl */
+ angles_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
+ idef->iparams,
+ (const rvec*)x, f,
+ pbc, g, lambda[efptFTYPE], md, fcd,
+ global_atom_index);
+ v = 0;
+ }
#endif
- else if (ftype == F_PDIHS &&
- !bCalcEnerVir && fr->efep == efepNO)
- {
- /* No energies, shift forces, dvdl */
+ else if (ftype == F_PDIHS &&
+ !bCalcEnerVir && fr->efep == efepNO)
+ {
+ /* No energies, shift forces, dvdl */
#ifndef SIMD_BONDEDS
- pdihs_noener
+ pdihs_noener
#else
- pdihs_noener_simd
+ pdihs_noener_simd
#endif
- (nbn, idef->il[ftype].iatoms+nb0,
- idef->iparams,
- (const rvec*)x, f,
- pbc, g, lambda[efptFTYPE], md, fcd,
- global_atom_index);
- v = 0;
- }
- else
- {
- v = interaction_function[ftype].ifunc(nbn, iatoms+nb0,
- idef->iparams,
- (const rvec*)x, f, fshift,
- pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
- md, fcd, global_atom_index);
- }
- if (bPrintSepPot)
- {
- fprintf(fplog, " %-23s #%4d V %12.5e dVdl %12.5e\n",
- interaction_function[ftype].longname,
- nbonds/nat1, v, lambda[efptFTYPE]);
- }
+ (nbn, idef->il[ftype].iatoms+nb0,
+ idef->iparams,
+ (const rvec*)x, f,
+ pbc, g, lambda[efptFTYPE], md, fcd,
+ global_atom_index);
+ v = 0;
}
else
{
- v = do_nonbonded_listed(ftype, nbn, iatoms+nb0, idef->iparams, (const rvec*)x, f, fshift,
- pbc, g, lambda, dvdl, md, fr, grpp, global_atom_index);
-
- if (bPrintSepPot)
- {
- fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n",
- interaction_function[ftype].longname,
- interaction_function[F_LJ14].longname, nbonds/nat1, dvdl[efptVDW]);
- fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n",
- interaction_function[ftype].longname,
- interaction_function[F_COUL14].longname, nbonds/nat1, dvdl[efptCOUL]);
- }
+ v = interaction_function[ftype].ifunc(nbn, iatoms+nb0,
+ idef->iparams,
+ (const rvec*)x, f, fshift,
+ pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
+ md, fcd, global_atom_index);
}
- if (ind != -1 && thread == 0)
+ if (bPrintSepPot)
{
- inc_nrnb(nrnb, ind, nbonds);
+ fprintf(fplog, " %-23s #%4d V %12.5e dVdl %12.5e\n",
+ interaction_function[ftype].longname,
+ nbonds, v, lambda[efptFTYPE]);
}
}
-
- return v;
-}
-
-/* WARNING! THIS FUNCTION MUST EXACTLY TRACK THE calc
- function, or horrible things will happen when doing free energy
- calculations! In a good coding world, this would not be a
- different function, but for speed reasons, it needs to be made a
- separate function. TODO for 5.0 - figure out a way to reorganize
- to reduce duplication.
- */
-
-static real calc_one_bond_foreign(FILE *fplog, int ftype, const t_idef *idef,
- rvec x[], rvec f[], t_forcerec *fr,
- const t_pbc *pbc, const t_graph *g,
- gmx_grppairener_t *grpp, t_nrnb *nrnb,
- real *lambda, real *dvdl,
- const t_mdatoms *md, t_fcdata *fcd,
- int *global_atom_index, gmx_bool bPrintSepPot)
-{
- int ind, nat1, nbonds, efptFTYPE, nbonds_np;
- real v = 0;
- t_iatom *iatoms;
-
- if (IS_RESTRAINT_TYPE(ftype))
- {
- efptFTYPE = efptRESTRAINT;
- }
else
{
- efptFTYPE = efptBONDED;
+ v = do_nonbonded_listed(ftype, nbn, iatoms+nb0, idef->iparams, (const rvec*)x, f, fshift,
+ pbc, g, lambda, dvdl, md, fr, grpp, global_atom_index);
+
+ if (bPrintSepPot)
+ {
+ fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n",
+ interaction_function[ftype].longname,
+ interaction_function[F_LJ14].longname, nbonds, dvdl[efptVDW]);
+ fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n",
+ interaction_function[ftype].longname,
+ interaction_function[F_COUL14].longname, nbonds, dvdl[efptCOUL]);
+ }
}
- if (ftype < F_GB12 || ftype > F_GB14)
+ if (thread == 0)
{
- if (interaction_function[ftype].flags & IF_BOND &&
- !(ftype == F_CONNBONDS || ftype == F_POSRES))
- {
- ind = interaction_function[ftype].nrnb_ind;
- nat1 = interaction_function[ftype].nratoms+1;
- nbonds_np = idef->il[ftype].nr_nonperturbed;
- nbonds = idef->il[ftype].nr - nbonds_np;
- iatoms = idef->il[ftype].iatoms + nbonds_np;
- if (nbonds > 0)
- {
- if (!IS_LISTED_LJ_C(ftype))
- {
- if (ftype == F_CMAP)
- {
- v = cmap_dihs(nbonds, iatoms,
- idef->iparams, &idef->cmap_grid,
- (const rvec*)x, f, fr->fshift,
- pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd,
- global_atom_index);
- }
- else
- {
- v = interaction_function[ftype].ifunc(nbonds, iatoms,
- idef->iparams,
- (const rvec*)x, f, fr->fshift,
- pbc, g, lambda[efptFTYPE], &dvdl[efptFTYPE],
- md, fcd, global_atom_index);
- }
- }
- else
- {
- v = do_nonbonded_listed(ftype, nbonds, iatoms,
- idef->iparams,
- (const rvec*)x, f, fr->fshift,
- pbc, g, lambda, dvdl,
- md, fr, grpp, global_atom_index);
- }
- if (ind != -1)
- {
- inc_nrnb(nrnb, ind, nbonds/nat1);
- }
- }
- }
+ inc_nrnb(nrnb, interaction_function[ftype].nrnb_ind, nbonds);
}
+
return v;
}
char buf[22];
int thread;
+#ifndef NDEBUG
+ assert(fr->nthreads == idef->nthreads);
+#endif
+
bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
for (i = 0; i < efptNR; i++)
#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
for (thread = 0; thread < fr->nthreads; thread++)
{
- int ftype, nbonds, ind, nat1;
+ int ftype;
real *epot, v;
/* thread stuff */
rvec *ft, *fshift;
real *dvdlt;
gmx_grppairener_t *grpp;
- int nb0, nbn;
if (thread == 0)
{
/* Loop over all bonded force types to calculate the bonded forces */
for (ftype = 0; (ftype < F_NRE); ftype++)
{
- if (idef->il[ftype].nr > 0 &&
- (interaction_function[ftype].flags & IF_BOND) &&
- (ftype < F_GB12 || ftype > F_GB14) &&
- !(ftype == F_CONNBONDS || ftype == F_POSRES))
+ if (idef->il[ftype].nr > 0 && ftype_is_bonded_potential(ftype))
{
v = calc_one_bond(fplog, thread, ftype, idef, x,
- ft, fshift, fr, pbc_null, g, enerd, grpp,
+ ft, fshift, fr, pbc_null, g, grpp,
nrnb, lambda, dvdlt,
md, fcd, bCalcEnerVir,
global_atom_index, bPrintSepPot);
- epot[ftype] += v;
+ epot[ftype] += v;
}
}
}
t_fcdata *fcd,
int *global_atom_index)
{
- int i, ftype, nbonds_np, nbonds, ind, nat;
- real v, dr, dr2;
+ int i, ftype, nr_nonperturbed, nr;
+ real v;
real dvdl_dum[efptNR];
- rvec *f, *fshift_orig;
+ rvec *f, *fshift;
const t_pbc *pbc_null;
- t_iatom *iatom_fe;
+ t_idef idef_fe;
if (fr->bMolPBC)
{
pbc_null = NULL;
}
+ /* Copy the whole idef, so we can modify the contents locally */
+ idef_fe = *idef;
+ idef_fe.nthreads = 1;
+ snew(idef_fe.il_thread_division, F_NRE*(idef_fe.nthreads+1));
+
+ /* We already have the forces, so we use temp buffers here */
snew(f, fr->natoms_force);
- /* We want to preserve the fshift array in forcerec */
- fshift_orig = fr->fshift;
- snew(fr->fshift, SHIFTS);
+ snew(fshift, SHIFTS);
- /* Loop over all bonded force types to calculate the bonded forces */
+ /* Loop over all bonded force types to calculate the bonded energies */
for (ftype = 0; (ftype < F_NRE); ftype++)
{
- v = calc_one_bond_foreign(fplog, ftype, idef, x,
- f, fr, pbc_null, g, grpp, nrnb, lambda, dvdl_dum,
- md, fcd, global_atom_index, FALSE);
- epot[ftype] += v;
+ if (ftype_is_bonded_potential(ftype))
+ {
+ /* Set the work range of thread 0 to the perturbed bondeds only */
+ nr_nonperturbed = idef->il[ftype].nr_nonperturbed;
+ nr = idef->il[ftype].nr;
+ idef_fe.il_thread_division[ftype*2+0] = nr_nonperturbed;
+ idef_fe.il_thread_division[ftype*2+1] = nr;
+
+ /* This is only to get the flop count correct */
+ idef_fe.il[ftype].nr = nr - nr_nonperturbed;
+
+ if (nr - nr_nonperturbed > 0)
+ {
+ v = calc_one_bond(fplog, 0, ftype, &idef_fe,
+ x, f, fshift, fr, pbc_null, g,
+ grpp, nrnb, lambda, dvdl_dum,
+ md, fcd, TRUE,
+ global_atom_index, FALSE);
+ epot[ftype] += v;
+ }
+ }
}
- sfree(fr->fshift);
- fr->fshift = fshift_orig;
+ sfree(fshift);
sfree(f);
+
+ sfree(idef_fe.il_thread_division);
}