From: Berk Hess Date: Wed, 7 Aug 2013 15:33:25 +0000 (+0200) Subject: fixed multiple distance restraints with OpenMP X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=1bc531070a94a4d967ad209df71a53069145a85e;p=alexxy%2Fgromacs.git fixed multiple distance restraints with OpenMP Distance restraints with multiple pairs (the same label) are no longer split over multiple OpenMP threads. Some (beneficial) reorganization of the bonded thread division was required to do this, most importantly: removed calc_one_bond_foreign. Fixes #1316 Change-Id: I88d8eafede5cbc26c19026a9272639e652f7abd7 --- diff --git a/include/bondf.h b/include/bondf.h index 8fd573f814..9f20232a54 100644 --- a/include/bondf.h +++ b/include/bondf.h @@ -157,13 +157,13 @@ t_ifunc tab_bonds, tab_angles, tab_dihs; t_ifunc polarize, anharm_polarize, water_pol, thole_pol, angres, angresz, dihres, unimplemented; -/* Initialize the setup for the bonded force buffer reduction - * over threads. This should be called each time the bonded setup - * changes; i.e. at start-up without domain decomposition and at DD. +/* Divided the bonded interactions over the threads, count=fr->nthreads + * and set up the bonded thread-force buffer reduction. + * This should be called each time the bonded setup changes; + * i.e. at start-up without domain decomposition and at DD. */ GMX_LIBGMX_EXPORT -void init_bonded_thread_force_reduction(t_forcerec *fr, - const t_idef *idef); +void setup_bonded_threading(t_forcerec *fr, t_idef *idef); #ifdef __cplusplus } diff --git a/include/types/idef.h b/include/types/idef.h index 5f4ce1e0b7..37fd68f01d 100644 --- a/include/types/idef.h +++ b/include/types/idef.h @@ -350,6 +350,9 @@ typedef struct t_ilist il[F_NRE]; int ilsort; + int nthreads; + int *il_thread_division; + int il_thread_division_nalloc; } t_idef; /* @@ -382,6 +385,17 @@ typedef struct * t_ilist il[F_NRE] * The list of interactions for each type. Note that some, * such as LJ and COUL will have 0 entries. + * int ilsort + * The state of the sorting of il, values are provided above. + * int nthreads + * The number of threads used to set il_thread_division. + * int *il_thread_division + * The division of the normal bonded interactions of threads. + * il_thread_division[ftype*(nthreads+1)+t] contains an index + * into il[ftype].iatoms; thread th operates on t=th to t=th+1. + * int il_thread_division_nalloc + * The allocated size of il_thread_division, + * should be at least F_NRE*(nthreads+1). */ typedef struct { diff --git a/src/gmxlib/bondfree.c b/src/gmxlib/bondfree.c index 11ac8c62af..8e47e527fe 100644 --- a/src/gmxlib/bondfree.c +++ b/src/gmxlib/bondfree.c @@ -40,6 +40,7 @@ #endif #include +#include #include "physics.h" #include "vec.h" #include "maths.h" @@ -3618,6 +3619,73 @@ real tab_dihs(int nbonds, return vtot; } +/* Return if this is a potential calculated in bondfree.c, + * i.e. an interaction that actually calculates a potential and + * works on multiple atoms (not e.g. a connection or a position restraint). + */ +static gmx_inline gmx_bool ftype_is_bonded_potential(int ftype) +{ + return + (interaction_function[ftype].flags & IF_BOND) && + !(ftype == F_CONNBONDS || ftype == F_POSRES) && + (ftype < F_GB12 || ftype > F_GB14); +} + +static void divide_bondeds_over_threads(t_idef *idef, int nthreads) +{ + int ftype; + int nat1; + int t; + int il_nr_thread; + + idef->nthreads = nthreads; + + if (F_NRE*(nthreads+1) > idef->il_thread_division_nalloc) + { + idef->il_thread_division_nalloc = F_NRE*(nthreads+1); + snew(idef->il_thread_division, idef->il_thread_division_nalloc); + } + + for (ftype = 0; ftype < F_NRE; ftype++) + { + if (ftype_is_bonded_potential(ftype)) + { + nat1 = interaction_function[ftype].nratoms + 1; + + for (t = 0; t <= nthreads; t++) + { + /* Divide the interactions equally over the threads. + * When the different types of bonded interactions + * are distributed roughly equally over the threads, + * this should lead to well localized output into + * the force buffer on each thread. + * If this is not the case, a more advanced scheme + * (not implemented yet) will do better. + */ + il_nr_thread = (((idef->il[ftype].nr/nat1)*t)/nthreads)*nat1; + + /* Ensure that distance restraint pairs with the same label + * end up on the same thread. + * This is slighlty tricky code, since the next for iteration + * may have an initial il_nr_thread lower than the final value + * in the previous iteration, but this will anyhow be increased + * to the approriate value again by this while loop. + */ + while (ftype == F_DISRES && + il_nr_thread > 0 && + il_nr_thread < idef->il[ftype].nr && + idef->iparams[idef->il[ftype].iatoms[il_nr_thread]].disres.label == + idef->iparams[idef->il[ftype].iatoms[il_nr_thread-nat1]].disres.label) + { + il_nr_thread += nat1; + } + + idef->il_thread_division[ftype*(nthreads+1)+t] = il_nr_thread; + } + } + } +} + static unsigned calc_bonded_reduction_mask(const t_idef *idef, int shift, @@ -3630,9 +3698,7 @@ calc_bonded_reduction_mask(const t_idef *idef, for (ftype = 0; ftype < F_NRE; ftype++) { - if (interaction_function[ftype].flags & IF_BOND && - !(ftype == F_CONNBONDS || ftype == F_POSRES) && - (ftypeF_GB14)) + if (ftype_is_bonded_potential(ftype)) { nb = idef->il[ftype].nr; if (nb > 0) @@ -3642,8 +3708,8 @@ calc_bonded_reduction_mask(const t_idef *idef, /* Divide this interaction equally over the threads. * This is not stored: should match division in calc_bonds. */ - nb0 = (((nb/nat1)* t )/nt)*nat1; - nb1 = (((nb/nat1)*(t+1))/nt)*nat1; + nb0 = idef->il_thread_division[ftype*(nt+1)+t]; + nb1 = idef->il_thread_division[ftype*(nt+1)+t+1]; for (i = nb0; i < nb1; i += nat1) { @@ -3659,14 +3725,20 @@ calc_bonded_reduction_mask(const t_idef *idef, return mask; } -void init_bonded_thread_force_reduction(t_forcerec *fr, - const t_idef *idef) +void setup_bonded_threading(t_forcerec *fr, t_idef *idef) { #define MAX_BLOCK_BITS 32 int t; int ctot, c, b; - if (fr->nthreads <= 1) +#ifndef NDEBUG + assert(fr->nthreads >= 1); +#endif + + /* Divide the bonded interaction over the threads */ + divide_bondeds_over_threads(idef, fr->nthreads); + + if (fr->nthreads == 1) { fr->red_nblock = 0; @@ -3890,14 +3962,14 @@ static real calc_one_bond(FILE *fplog, int thread, rvec x[], rvec f[], rvec fshift[], t_forcerec *fr, const t_pbc *pbc, const t_graph *g, - gmx_enerdata_t *enerd, gmx_grppairener_t *grpp, + gmx_grppairener_t *grpp, t_nrnb *nrnb, real *lambda, real *dvdl, const t_mdatoms *md, t_fcdata *fcd, gmx_bool bCalcEnerVir, int *global_atom_index, gmx_bool bPrintSepPot) { - int ind, nat1, nbonds, efptFTYPE; + int nat1, nbonds, efptFTYPE; real v = 0; t_iatom *iatoms; int nb0, nbn; @@ -3911,170 +3983,88 @@ static real calc_one_bond(FILE *fplog, int thread, efptFTYPE = efptBONDED; } - if (interaction_function[ftype].flags & IF_BOND && - !(ftype == F_CONNBONDS || ftype == F_POSRES)) - { - ind = interaction_function[ftype].nrnb_ind; - nat1 = interaction_function[ftype].nratoms + 1; - nbonds = idef->il[ftype].nr/nat1; - iatoms = idef->il[ftype].iatoms; + nat1 = interaction_function[ftype].nratoms + 1; + nbonds = idef->il[ftype].nr/nat1; + iatoms = idef->il[ftype].iatoms; - nb0 = ((nbonds* thread )/(fr->nthreads))*nat1; - nbn = ((nbonds*(thread+1))/(fr->nthreads))*nat1 - nb0; + nb0 = idef->il_thread_division[ftype*(idef->nthreads+1)+thread]; + nbn = idef->il_thread_division[ftype*(idef->nthreads+1)+thread+1] - nb0; - if (!IS_LISTED_LJ_C(ftype)) + if (!IS_LISTED_LJ_C(ftype)) + { + if (ftype == F_CMAP) { - if (ftype == F_CMAP) - { - v = cmap_dihs(nbn, iatoms+nb0, - idef->iparams, &idef->cmap_grid, - (const rvec*)x, f, fshift, - pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]), - md, fcd, global_atom_index); - } + v = cmap_dihs(nbn, iatoms+nb0, + idef->iparams, &idef->cmap_grid, + (const rvec*)x, f, fshift, + pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]), + md, fcd, global_atom_index); + } #ifdef SIMD_BONDEDS - else if (ftype == F_ANGLES && - !bCalcEnerVir && fr->efep == efepNO) - { - /* No energies, shift forces, dvdl */ - angles_noener_simd(nbn, idef->il[ftype].iatoms+nb0, - idef->iparams, - (const rvec*)x, f, - pbc, g, lambda[efptFTYPE], md, fcd, - global_atom_index); - v = 0; - } + else if (ftype == F_ANGLES && + !bCalcEnerVir && fr->efep == efepNO) + { + /* No energies, shift forces, dvdl */ + angles_noener_simd(nbn, idef->il[ftype].iatoms+nb0, + idef->iparams, + (const rvec*)x, f, + pbc, g, lambda[efptFTYPE], md, fcd, + global_atom_index); + v = 0; + } #endif - else if (ftype == F_PDIHS && - !bCalcEnerVir && fr->efep == efepNO) - { - /* No energies, shift forces, dvdl */ + else if (ftype == F_PDIHS && + !bCalcEnerVir && fr->efep == efepNO) + { + /* No energies, shift forces, dvdl */ #ifndef SIMD_BONDEDS - pdihs_noener + pdihs_noener #else - pdihs_noener_simd + pdihs_noener_simd #endif - (nbn, idef->il[ftype].iatoms+nb0, - idef->iparams, - (const rvec*)x, f, - pbc, g, lambda[efptFTYPE], md, fcd, - global_atom_index); - v = 0; - } - else - { - v = interaction_function[ftype].ifunc(nbn, iatoms+nb0, - idef->iparams, - (const rvec*)x, f, fshift, - pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]), - md, fcd, global_atom_index); - } - if (bPrintSepPot) - { - fprintf(fplog, " %-23s #%4d V %12.5e dVdl %12.5e\n", - interaction_function[ftype].longname, - nbonds/nat1, v, lambda[efptFTYPE]); - } + (nbn, idef->il[ftype].iatoms+nb0, + idef->iparams, + (const rvec*)x, f, + pbc, g, lambda[efptFTYPE], md, fcd, + global_atom_index); + v = 0; } else { - v = do_nonbonded_listed(ftype, nbn, iatoms+nb0, idef->iparams, (const rvec*)x, f, fshift, - pbc, g, lambda, dvdl, md, fr, grpp, global_atom_index); - - if (bPrintSepPot) - { - fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n", - interaction_function[ftype].longname, - interaction_function[F_LJ14].longname, nbonds/nat1, dvdl[efptVDW]); - fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n", - interaction_function[ftype].longname, - interaction_function[F_COUL14].longname, nbonds/nat1, dvdl[efptCOUL]); - } + v = interaction_function[ftype].ifunc(nbn, iatoms+nb0, + idef->iparams, + (const rvec*)x, f, fshift, + pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]), + md, fcd, global_atom_index); } - if (ind != -1 && thread == 0) + if (bPrintSepPot) { - inc_nrnb(nrnb, ind, nbonds); + fprintf(fplog, " %-23s #%4d V %12.5e dVdl %12.5e\n", + interaction_function[ftype].longname, + nbonds, v, lambda[efptFTYPE]); } } - - return v; -} - -/* WARNING! THIS FUNCTION MUST EXACTLY TRACK THE calc - function, or horrible things will happen when doing free energy - calculations! In a good coding world, this would not be a - different function, but for speed reasons, it needs to be made a - separate function. TODO for 5.0 - figure out a way to reorganize - to reduce duplication. - */ - -static real calc_one_bond_foreign(FILE *fplog, int ftype, const t_idef *idef, - rvec x[], rvec f[], t_forcerec *fr, - const t_pbc *pbc, const t_graph *g, - gmx_grppairener_t *grpp, t_nrnb *nrnb, - real *lambda, real *dvdl, - const t_mdatoms *md, t_fcdata *fcd, - int *global_atom_index, gmx_bool bPrintSepPot) -{ - int ind, nat1, nbonds, efptFTYPE, nbonds_np; - real v = 0; - t_iatom *iatoms; - - if (IS_RESTRAINT_TYPE(ftype)) - { - efptFTYPE = efptRESTRAINT; - } else { - efptFTYPE = efptBONDED; + v = do_nonbonded_listed(ftype, nbn, iatoms+nb0, idef->iparams, (const rvec*)x, f, fshift, + pbc, g, lambda, dvdl, md, fr, grpp, global_atom_index); + + if (bPrintSepPot) + { + fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n", + interaction_function[ftype].longname, + interaction_function[F_LJ14].longname, nbonds, dvdl[efptVDW]); + fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n", + interaction_function[ftype].longname, + interaction_function[F_COUL14].longname, nbonds, dvdl[efptCOUL]); + } } - if (ftype < F_GB12 || ftype > F_GB14) + if (thread == 0) { - if (interaction_function[ftype].flags & IF_BOND && - !(ftype == F_CONNBONDS || ftype == F_POSRES)) - { - ind = interaction_function[ftype].nrnb_ind; - nat1 = interaction_function[ftype].nratoms+1; - nbonds_np = idef->il[ftype].nr_nonperturbed; - nbonds = idef->il[ftype].nr - nbonds_np; - iatoms = idef->il[ftype].iatoms + nbonds_np; - if (nbonds > 0) - { - if (!IS_LISTED_LJ_C(ftype)) - { - if (ftype == F_CMAP) - { - v = cmap_dihs(nbonds, iatoms, - idef->iparams, &idef->cmap_grid, - (const rvec*)x, f, fr->fshift, - pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd, - global_atom_index); - } - else - { - v = interaction_function[ftype].ifunc(nbonds, iatoms, - idef->iparams, - (const rvec*)x, f, fr->fshift, - pbc, g, lambda[efptFTYPE], &dvdl[efptFTYPE], - md, fcd, global_atom_index); - } - } - else - { - v = do_nonbonded_listed(ftype, nbonds, iatoms, - idef->iparams, - (const rvec*)x, f, fr->fshift, - pbc, g, lambda, dvdl, - md, fr, grpp, global_atom_index); - } - if (ind != -1) - { - inc_nrnb(nrnb, ind, nbonds/nat1); - } - } - } + inc_nrnb(nrnb, interaction_function[ftype].nrnb_ind, nbonds); } + return v; } @@ -4099,6 +4089,10 @@ void calc_bonds(FILE *fplog, const gmx_multisim_t *ms, char buf[22]; int thread; +#ifndef NDEBUG + assert(fr->nthreads == idef->nthreads); +#endif + bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)); for (i = 0; i < efptNR; i++) @@ -4146,13 +4140,12 @@ void calc_bonds(FILE *fplog, const gmx_multisim_t *ms, #pragma omp parallel for num_threads(fr->nthreads) schedule(static) for (thread = 0; thread < fr->nthreads; thread++) { - int ftype, nbonds, ind, nat1; + int ftype; real *epot, v; /* thread stuff */ rvec *ft, *fshift; real *dvdlt; gmx_grppairener_t *grpp; - int nb0, nbn; if (thread == 0) { @@ -4176,17 +4169,14 @@ void calc_bonds(FILE *fplog, const gmx_multisim_t *ms, /* Loop over all bonded force types to calculate the bonded forces */ for (ftype = 0; (ftype < F_NRE); ftype++) { - if (idef->il[ftype].nr > 0 && - (interaction_function[ftype].flags & IF_BOND) && - (ftype < F_GB12 || ftype > F_GB14) && - !(ftype == F_CONNBONDS || ftype == F_POSRES)) + if (idef->il[ftype].nr > 0 && ftype_is_bonded_potential(ftype)) { v = calc_one_bond(fplog, thread, ftype, idef, x, - ft, fshift, fr, pbc_null, g, enerd, grpp, + ft, fshift, fr, pbc_null, g, grpp, nrnb, lambda, dvdlt, md, fcd, bCalcEnerVir, global_atom_index, bPrintSepPot); - epot[ftype] += v; + epot[ftype] += v; } } } @@ -4226,12 +4216,12 @@ void calc_bonds_lambda(FILE *fplog, t_fcdata *fcd, int *global_atom_index) { - int i, ftype, nbonds_np, nbonds, ind, nat; - real v, dr, dr2; + int i, ftype, nr_nonperturbed, nr; + real v; real dvdl_dum[efptNR]; - rvec *f, *fshift_orig; + rvec *f, *fshift; const t_pbc *pbc_null; - t_iatom *iatom_fe; + t_idef idef_fe; if (fr->bMolPBC) { @@ -4242,21 +4232,43 @@ void calc_bonds_lambda(FILE *fplog, pbc_null = NULL; } + /* Copy the whole idef, so we can modify the contents locally */ + idef_fe = *idef; + idef_fe.nthreads = 1; + snew(idef_fe.il_thread_division, F_NRE*(idef_fe.nthreads+1)); + + /* We already have the forces, so we use temp buffers here */ snew(f, fr->natoms_force); - /* We want to preserve the fshift array in forcerec */ - fshift_orig = fr->fshift; - snew(fr->fshift, SHIFTS); + snew(fshift, SHIFTS); - /* Loop over all bonded force types to calculate the bonded forces */ + /* Loop over all bonded force types to calculate the bonded energies */ for (ftype = 0; (ftype < F_NRE); ftype++) { - v = calc_one_bond_foreign(fplog, ftype, idef, x, - f, fr, pbc_null, g, grpp, nrnb, lambda, dvdl_dum, - md, fcd, global_atom_index, FALSE); - epot[ftype] += v; + if (ftype_is_bonded_potential(ftype)) + { + /* Set the work range of thread 0 to the perturbed bondeds only */ + nr_nonperturbed = idef->il[ftype].nr_nonperturbed; + nr = idef->il[ftype].nr; + idef_fe.il_thread_division[ftype*2+0] = nr_nonperturbed; + idef_fe.il_thread_division[ftype*2+1] = nr; + + /* This is only to get the flop count correct */ + idef_fe.il[ftype].nr = nr - nr_nonperturbed; + + if (nr - nr_nonperturbed > 0) + { + v = calc_one_bond(fplog, 0, ftype, &idef_fe, + x, f, fshift, fr, pbc_null, g, + grpp, nrnb, lambda, dvdl_dum, + md, fcd, TRUE, + global_atom_index, FALSE); + epot[ftype] += v; + } + } } - sfree(fr->fshift); - fr->fshift = fshift_orig; + sfree(fshift); sfree(f); + + sfree(idef_fe.il_thread_division); } diff --git a/src/kernel/md.c b/src/kernel/md.c index f82e6974ca..de36746cbe 100644 --- a/src/kernel/md.c +++ b/src/kernel/md.c @@ -432,7 +432,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], make_local_shells(cr, mdatoms, shellfc); } - init_bonded_thread_force_reduction(fr, &top->idef); + setup_bonded_threading(fr, &top->idef); if (ir->pull && PAR(cr)) { diff --git a/src/mdlib/domdec.c b/src/mdlib/domdec.c index 010ab094dc..046ef51330 100644 --- a/src/mdlib/domdec.c +++ b/src/mdlib/domdec.c @@ -9676,7 +9676,7 @@ void dd_partition_system(FILE *fplog, make_local_gb(cr, fr->born, ir->gb_algorithm); } - init_bonded_thread_force_reduction(fr, &top_local->idef); + setup_bonded_threading(fr, &top_local->idef); if (!(cr->duty & DUTY_PME)) { diff --git a/src/mdlib/minimize.c b/src/mdlib/minimize.c index 2dfe20bf49..5df3936cfb 100644 --- a/src/mdlib/minimize.c +++ b/src/mdlib/minimize.c @@ -387,7 +387,7 @@ void init_em(FILE *fplog, const char *title, forcerec_set_excl_load(fr, *top, cr); - init_bonded_thread_force_reduction(fr, &(*top)->idef); + setup_bonded_threading(fr, &(*top)->idef); if (ir->ePBC != epbcNONE && !fr->bMolPBC) {