From 0a5a594e3eb093bc0e4183066ac5ccee1ba32c57 Mon Sep 17 00:00:00 2001 From: Berk Hess Date: Mon, 11 Aug 2014 18:11:17 +0200 Subject: [PATCH] Halved the cost of the pull communication With DD the PBC reference coordinates are now only communicated after DD repartitioning. This reduces the number of MPI_alltoall calls from 2 to 1 per step, which can significantly improve performance at high parallelization. Added a cycle counter for pull potential. Added checks for zero pull vectors to avoid div by 0. Change-Id: Ib89ba9e14eaa887f59a5087135580bc29a20d7d0 --- src/gromacs/legacyheaders/types/inputrec.h | 14 ++++---- src/gromacs/mdlib/sim_util.c | 14 ++++++-- src/gromacs/pulling/pull.c | 26 ++++++++++++++- src/gromacs/pulling/pullutil.c | 38 +++++++++++----------- src/gromacs/timing/wallcycle.c | 3 +- src/gromacs/timing/wallcycle.h | 3 +- 6 files changed, 67 insertions(+), 31 deletions(-) diff --git a/src/gromacs/legacyheaders/types/inputrec.h b/src/gromacs/legacyheaders/types/inputrec.h index bd9ea9818d..10fa0da0d3 100644 --- a/src/gromacs/legacyheaders/types/inputrec.h +++ b/src/gromacs/legacyheaders/types/inputrec.h @@ -225,13 +225,15 @@ typedef struct { t_pull_coord *coord; /* the pull coordinates */ /* Variables not present in mdp, but used at run time */ - t_pull_group *dyna; /* dynamic groups for use with local constraints */ - rvec *rbuf; /* COM calculation buffer */ - dvec *dbuf; /* COM calculation buffer */ - double *dbuf_cyl; /* cylinder ref. groups COM calculation buffer */ + t_pull_group *dyna; /* dynamic groups for use with local constraints */ + gmx_bool bSetPBCatoms; /* Do we need to set x_pbc for the groups? */ - FILE *out_x; /* output file for pull data */ - FILE *out_f; /* output file for pull data */ + rvec *rbuf; /* COM calculation buffer */ + dvec *dbuf; /* COM calculation buffer */ + double *dbuf_cyl; /* cylinder ref. groups COM calculation buffer */ + + FILE *out_x; /* output file for pull data */ + FILE *out_f; /* output file for pull data */ } t_pull; diff --git a/src/gromacs/mdlib/sim_util.c b/src/gromacs/mdlib/sim_util.c index a219905290..a904a2ef52 100644 --- a/src/gromacs/mdlib/sim_util.c +++ b/src/gromacs/mdlib/sim_util.c @@ -396,7 +396,8 @@ static void pull_potential_wrapper(FILE *fplog, t_mdatoms *mdatoms, gmx_enerdata_t *enerd, real *lambda, - double t) + double t, + gmx_wallcycle_t wcycle) { t_pbc pbc; real dvdl; @@ -406,6 +407,7 @@ static void pull_potential_wrapper(FILE *fplog, * The virial contribution is calculated directly, * which is why we call pull_potential after calc_virial. */ + wallcycle_start(wcycle, ewcPULLPOT); set_pbc(&pbc, ir->ePBC, box); dvdl = 0; enerd->term[F_COM_PULL] += @@ -416,6 +418,7 @@ static void pull_potential_wrapper(FILE *fplog, gmx_print_sepdvdl(fplog, "Com pull", enerd->term[F_COM_PULL], dvdl); } enerd->dvdl_lin[efptRESTRAINT] += dvdl; + wallcycle_stop(wcycle, ewcPULLPOT); } static void pme_receive_force_ener(FILE *fplog, @@ -1527,8 +1530,12 @@ void do_force_cutsVERLET(FILE *fplog, t_commrec *cr, if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F) { + /* Since the COM pulling is always done mass-weighted, no forces are + * applied to vsites and this call can be done after vsite spreading. + */ pull_potential_wrapper(fplog, bSepDVDL, cr, inputrec, box, x, - f, vir_force, mdatoms, enerd, lambda, t); + f, vir_force, mdatoms, enerd, lambda, t, + wcycle); } /* Add the forces from enforced rotation potentials (if any) */ @@ -2002,7 +2009,8 @@ void do_force_cutsGROUP(FILE *fplog, t_commrec *cr, if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F) { pull_potential_wrapper(fplog, bSepDVDL, cr, inputrec, box, x, - f, vir_force, mdatoms, enerd, lambda, t); + f, vir_force, mdatoms, enerd, lambda, t, + wcycle); } /* Add the forces from enforced rotation potentials (if any) */ diff --git a/src/gromacs/pulling/pull.c b/src/gromacs/pulling/pull.c index 64bdbfd3a5..458a2fb472 100644 --- a/src/gromacs/pulling/pull.c +++ b/src/gromacs/pulling/pull.c @@ -502,6 +502,11 @@ static void do_constraint(t_pull *pull, t_pbc *pbc, r_ij[c][m] = a*pull->coord[c].vec[m]; } } + + if (dnorm2(r_ij[c]) == 0) + { + gmx_fatal(FARGS, "Distance for pull coordinate %d is zero with constraint pulling, which is not allowed.", c + 1); + } } bConverged_all = FALSE; @@ -741,6 +746,7 @@ static void do_constraint(t_pull *pull, t_pbc *pbc, double f_invr; /* Add the pull contribution to the virial */ + /* We have already checked above that r_ij[c] != 0 */ f_invr = pcrd->f_scal/dnorm(r_ij[c]); for (j = 0; j < DIM; j++) @@ -785,7 +791,19 @@ static void do_pull_pot(int ePull, { case epullgDIST: ndr = dnorm(pcrd->dr); - invdr = 1/ndr; + if (ndr > 0) + { + invdr = 1/ndr; + } + else + { + /* With an harmonic umbrella, the force is 0 at r=0, + * so we can set invdr to any value. + * With a constant force, the force at r=0 is not defined, + * so we zero it (this is anyhow a very rare event). + */ + invdr = 0; + } if (ePull == epullUMBRELLA) { pcrd->f_scal = -k*dev; @@ -932,6 +950,9 @@ void dd_make_local_pull_groups(gmx_domdec_t *dd, t_pull *pull, t_mdatoms *md) make_local_pull_group(ga2la, &pull->group[g], 0, md->homenr); } + + /* Since the PBC of atoms might have changed, we need to update the PBC */ + pull->bSetPBCatoms = TRUE; } static void init_pull_group_index(FILE *fplog, t_commrec *cr, @@ -1243,6 +1264,9 @@ void init_pull(FILE *fplog, t_inputrec *ir, int nfile, const t_filenm fnm[], snew(pull->dyna, pull->ncoord); } + /* We still need to initialize the PBC reference coordinates */ + pull->bSetPBCatoms = TRUE; + /* Only do I/O when we are doing dynamics and if we are the MASTER */ pull->out_x = NULL; pull->out_f = NULL; diff --git a/src/gromacs/pulling/pullutil.c b/src/gromacs/pulling/pullutil.c index 6b30d111b1..19e762346c 100644 --- a/src/gromacs/pulling/pullutil.c +++ b/src/gromacs/pulling/pullutil.c @@ -59,26 +59,14 @@ #include "gmx_ga2la.h" static void pull_set_pbcatom(t_commrec *cr, t_pull_group *pgrp, - t_mdatoms *md, rvec *x, + rvec *x, rvec x_pbc) { int a, m; - if (cr && PAR(cr)) + if (cr != NULL && DOMAINDECOMP(cr)) { - if (DOMAINDECOMP(cr)) - { - if (!ga2la_get_home(cr->dd->ga2la, pgrp->pbcatom, &a)) - { - a = -1; - } - } - else - { - a = pgrp->pbcatom; - } - - if (a >= 0 && a < md->homenr) + if (ga2la_get_home(cr->dd->ga2la, pgrp->pbcatom, &a)) { copy_rvec(x[a], x_pbc); } @@ -94,7 +82,7 @@ static void pull_set_pbcatom(t_commrec *cr, t_pull_group *pgrp, } static void pull_set_pbcatoms(t_commrec *cr, t_pull *pull, - t_mdatoms *md, rvec *x, + rvec *x, rvec *x_pbc) { int g, n, m; @@ -108,7 +96,7 @@ static void pull_set_pbcatoms(t_commrec *cr, t_pull *pull, } else { - pull_set_pbcatom(cr, &pull->group[g], md, x, x_pbc[g]); + pull_set_pbcatom(cr, &pull->group[g], x, x_pbc[g]); for (m = 0; m < DIM; m++) { if (pull->dim[m] == 0) @@ -318,9 +306,21 @@ void pull_calc_coms(t_commrec *cr, snew(pull->dbuf, 3*pull->ngroup); } - if (pull->bRefAt) + if (pull->bRefAt && pull->bSetPBCatoms) { - pull_set_pbcatoms(cr, pull, md, x, pull->rbuf); + pull_set_pbcatoms(cr, pull, x, pull->rbuf); + + if (cr != NULL && DOMAINDECOMP(cr)) + { + /* We can keep these PBC reference coordinates fixed for nstlist + * steps, since atoms won't jump over PBC. + * This avoids a global reduction at the next nstlist-1 steps. + * Note that the exact values of the pbc reference coordinates + * are irrelevant, as long all atoms in the group are within + * half a box distance of the reference coordinate. + */ + pull->bSetPBCatoms = FALSE; + } } if (pull->cosdim >= 0) diff --git a/src/gromacs/timing/wallcycle.c b/src/gromacs/timing/wallcycle.c index 2570cfbb21..98c31b55b1 100644 --- a/src/gromacs/timing/wallcycle.c +++ b/src/gromacs/timing/wallcycle.c @@ -99,7 +99,8 @@ static const char *wcn[ewcNR] = "Comm. coord.", "Born radii", "Force", "Wait + Comm. F", "PME mesh", "PME redist. X/F", "PME spread/gather", "PME 3D-FFT", "PME 3D-FFT Comm.", "PME solve LJ", "PME solve Elec", "PME wait for PP", "Wait + Recv. PME F", "Wait GPU nonlocal", "Wait GPU local", "NB X/F buffer ops.", - "Vsite spread", "Write traj.", "Update", "Constraints", "Comm. energies", + "Vsite spread", "COM pull force", + "Write traj.", "Update", "Constraints", "Comm. energies", "Enforced rotation", "Add rot. forces", "Coordinate swapping", "IMD", "Test" }; diff --git a/src/gromacs/timing/wallcycle.h b/src/gromacs/timing/wallcycle.h index 45930f4412..f91a455fb5 100644 --- a/src/gromacs/timing/wallcycle.h +++ b/src/gromacs/timing/wallcycle.h @@ -51,7 +51,8 @@ enum { ewcMOVEX, ewcGB, ewcFORCE, ewcMOVEF, ewcPMEMESH, ewcPME_REDISTXF, ewcPME_SPREADGATHER, ewcPME_FFT, ewcPME_FFTCOMM, ewcLJPME, ewcPME_SOLVE, ewcPMEWAITCOMM, ewcPP_PMEWAITRECVF, ewcWAIT_GPU_NB_NL, ewcWAIT_GPU_NB_L, ewcNB_XF_BUF_OPS, - ewcVSITESPREAD, ewcTRAJ, ewcUPDATE, ewcCONSTR, ewcMoveE, ewcROT, ewcROTadd, ewcSWAP, ewcIMD, + ewcVSITESPREAD, ewcPULLPOT, + ewcTRAJ, ewcUPDATE, ewcCONSTR, ewcMoveE, ewcROT, ewcROTadd, ewcSWAP, ewcIMD, ewcTEST, ewcNR }; -- 2.22.0