From cddf15cbba2a361a85676bfdb1b285f386d57f22 Mon Sep 17 00:00:00 2001 From: Berk Hess Date: Thu, 21 Mar 2013 10:33:07 +0100 Subject: [PATCH] made PME work with a mix of 1 and more threads Using a mix of 1 and more OpenMP threads on different MPI ranks would make mdrun terminate with an MPI error. Fixes #1171 Change-Id: Iffa16e18baf0f74be826b59503208dca01d1ec14 --- src/mdlib/pme.c | 65 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 13 deletions(-) diff --git a/src/mdlib/pme.c b/src/mdlib/pme.c index 128f5e3602..d5113abdbe 100644 --- a/src/mdlib/pme.c +++ b/src/mdlib/pme.c @@ -271,7 +271,8 @@ typedef struct gmx_pme { MPI_Datatype rvec_mpi; /* the pme vector's MPI type */ #endif - int nthread; /* The number of threads doing PME */ + gmx_bool bUseThreads; /* Does any of the PME ranks have nthread>1 ? */ + int nthread; /* The number of threads doing PME on our rank */ gmx_bool bPPnode; /* Node also does particle-particle forces */ gmx_bool bFEP; /* Compute Free energy contribution */ @@ -1711,6 +1712,7 @@ static void make_subgrid_division(const ivec n, int ovl, int nthread, static void pmegrids_init(pmegrids_t *grids, int nx, int ny, int nz, int nz_base, int pme_order, + gmx_bool bUseThreads, int nthread, int overlap_x, int overlap_y) @@ -1733,7 +1735,7 @@ static void pmegrids_init(pmegrids_t *grids, make_subgrid_division(n_base, pme_order-1, grids->nthread, grids->nc); - if (grids->nthread > 1) + if (bUseThreads) { ivec nst; int gridsize; @@ -1783,6 +1785,10 @@ static void pmegrids_init(pmegrids_t *grids, } } } + else + { + grids->grid_th = NULL; + } snew(grids->g2t, DIM); tfac = 1; @@ -3063,7 +3069,7 @@ int gmx_pme_init(gmx_pme_t * pmedata, { gmx_pme_t pme = NULL; - pme_atomcomm_t *atc; + int use_threads, sum_use_threads; ivec ndata; if (debug) @@ -3163,6 +3169,21 @@ int gmx_pme_init(gmx_pme_t * pmedata, pme->nthread = nthread; + /* Check if any of the PME MPI ranks uses threads */ + use_threads = (pme->nthread > 1 ? 1 : 0); +#ifdef GMX_MPI + if (pme->nnodes > 1) + { + MPI_Allreduce(&use_threads, &sum_use_threads, 1, MPI_INT, + MPI_SUM, pme->mpi_comm); + } + else +#endif + { + sum_use_threads = use_threads; + } + pme->bUseThreads = (sum_use_threads > 0); + if (ir->ePBC == epbcSCREW) { gmx_fatal(FARGS, "pme does not (yet) work with pbc = screw"); @@ -3267,7 +3288,7 @@ int gmx_pme_init(gmx_pme_t * pmedata, /* Check for a limitation of the (current) sum_fftgrid_dd code. * We only allow multiple communication pulses in dim 1, not in dim 0. */ - if (pme->nthread > 1 && (pme->overlap[0].noverlap_nodes > 1 || + if (pme->bUseThreads && (pme->overlap[0].noverlap_nodes > 1 || pme->nkx < pme->nnodes_major*pme->pme_order)) { gmx_fatal(FARGS, "The number of PME grid lines per node along x is %g. But when using OpenMP threads, the number of grid lines per node along x and should be >= pme_order (%d). To resolve this issue, use less nodes along x (and possibly more along y and/or z) by specifying -dd manually.", @@ -3310,6 +3331,7 @@ int gmx_pme_init(gmx_pme_t * pmedata, pme->pmegrid_nx, pme->pmegrid_ny, pme->pmegrid_nz, pme->pmegrid_nz_base, pme->pme_order, + pme->bUseThreads, pme->nthread, pme->overlap[0].s2g1[pme->nodeid_major]-pme->overlap[0].s2g0[pme->nodeid_major+1], pme->overlap[1].s2g1[pme->nodeid_minor]-pme->overlap[1].s2g0[pme->nodeid_minor+1]); @@ -3333,6 +3355,7 @@ int gmx_pme_init(gmx_pme_t * pmedata, pme->pmegrid_nx, pme->pmegrid_ny, pme->pmegrid_nz, pme->pmegrid_nz_base, pme->pme_order, + pme->bUseThreads, pme->nthread, pme->nkx % pme->nnodes_major != 0, pme->nky % pme->nnodes_minor != 0); @@ -3406,7 +3429,7 @@ static void reuse_pmegrids(const pmegrids_t *old, pmegrids_t *new) sfree_aligned(new->grid.grid); new->grid.grid = old->grid.grid; - if (new->nthread > 1 && new->nthread == old->nthread) + if (new->grid_th != NULL && new->nthread == old->nthread) { sfree_aligned(new->grid_all); for (t = 0; t < new->nthread; t++) @@ -3937,22 +3960,34 @@ static void spread_on_grid(gmx_pme_t pme, for (thread = 0; thread < nthread; thread++) { splinedata_t *spline; - pmegrid_t *grid; + pmegrid_t *grid = NULL; /* make local bsplines */ - if (grids == NULL || grids->nthread == 1) + if (grids == NULL || !pme->bUseThreads) { spline = &atc->spline[0]; spline->n = atc->n; - grid = &grids->grid; + if (bSpread) + { + grid = &grids->grid; + } } else { spline = &atc->spline[thread]; - make_thread_local_ind(atc, thread, spline); + if (grids->nthread == 1) + { + /* One thread, we operate on all charges */ + spline->n = atc->n; + } + else + { + /* Get the indices our thread should operate on */ + make_thread_local_ind(atc, thread, spline); + } grid = &grids->grid_th[thread]; } @@ -3971,7 +4006,7 @@ static void spread_on_grid(gmx_pme_t pme, #endif spread_q_bsplines_thread(grid, atc, spline, pme->spline_work); - if (grids->nthread > 1) + if (pme->bUseThreads) { copy_local_grid(pme, grids, thread, fftgrid); } @@ -3986,7 +4021,7 @@ static void spread_on_grid(gmx_pme_t pme, cs2 += (double)c2; #endif - if (bSpread && grids->nthread > 1) + if (bSpread && pme->bUseThreads) { #ifdef PME_TIME_THREADS c3 = omp_cyc_start(); @@ -4006,7 +4041,10 @@ static void spread_on_grid(gmx_pme_t pme, if (pme->nnodes > 1) { - /* Communicate the overlapping part of the fftgrid */ + /* Communicate the overlapping part of the fftgrid. + * For this communication call we need to check pme->bUseThreads + * to have all ranks communicate here, regardless of pme->nthread. + */ sum_fftgrid_dd(pme, fftgrid); } } @@ -4101,6 +4139,7 @@ void gmx_pme_calc_energy(gmx_pme_t pme, int n, rvec *x, real *q, real *V) /* We only use the A-charges grid */ grid = &pme->pmegridA; + /* Only calculate the spline coefficients, don't actually spread */ spread_on_grid(pme, atc, NULL, TRUE, FALSE, pme->fftgridA); *V = gather_energy_bsplines(pme, grid->grid.grid, atc); @@ -4421,7 +4460,7 @@ int gmx_pme_do(gmx_pme_t pme, inc_nrnb(nrnb, eNR_SPREADQBSP, pme->pme_order*pme->pme_order*pme->pme_order*atc->n); - if (pme->nthread == 1) + if (!pme->bUseThreads) { wrap_periodic_pmegrid(pme, grid); -- 2.22.0