From 62518a2c1c18a34bb2d4c67a2bdea58f1669d0f8 Mon Sep 17 00:00:00 2001 From: Erik Lindahl Date: Tue, 12 Aug 2014 14:15:30 +0200 Subject: [PATCH] Avoid using function calls in OpenMP directives The direct calls to gmx_omp_nthreads_get() that were included in some OpenMP pragmas caused memory corruption and later segfaults on PGI compilers. This is likely a compiler bug, but we can work around it by assigning the function return value to a variable that we use in the pragma. Such variables are unused when OpenMP is not in use, which might offend some compiler some time, so adding a gmx_unused attribute is useful. However, uncrustify needs to be taught about our custom attributes, which is also done here. Change-Id: I3b482bdc2401b40a043975ffd4a741f65efd0cfc --- admin/uncrustify.cfg | 9 +++++++++ src/gromacs/mdlib/force.c | 4 +++- src/gromacs/mdlib/mdatom.c | 4 +++- src/gromacs/mdlib/minimize.c | 4 +++- .../nbnxn_kernel_simd_template.c.pre | 4 +++- src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c | 4 +++- .../nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c | 4 +++- .../mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c | 4 +++- src/gromacs/mdlib/nbnxn_search.c | 8 ++++++-- src/gromacs/mdlib/update.c | 4 +++- 10 files changed, 39 insertions(+), 10 deletions(-) diff --git a/admin/uncrustify.cfg b/admin/uncrustify.cfg index d89351a7e6..f7308115b2 100644 --- a/admin/uncrustify.cfg +++ b/admin/uncrustify.cfg @@ -1575,3 +1575,12 @@ pp_define_at_level = false # false/true # all tokens are separated by any mix of ',' commas, '=' equal signs # and whitespace (space, tab) # + +# Teach uncrustify about the GROMACS attribute aliases that we use +# to hide compiler differences. This means that declarations like +# +# int i, j; +# int nthreads gmx_unused; +# +# does not align i with gmx_unused. +set ATTRIBUTE gmx_unused gmx_inline gmx_restrict diff --git a/src/gromacs/mdlib/force.c b/src/gromacs/mdlib/force.c index 632c2f3a46..d4e6445491 100644 --- a/src/gromacs/mdlib/force.c +++ b/src/gromacs/mdlib/force.c @@ -117,9 +117,11 @@ static void reduce_thread_forces(int n, rvec *f, int nthreads, f_thread_t *f_t) { int t, i; + int nthreads_loop gmx_unused; /* This reduction can run over any number of threads */ -#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntBonded)) private(t) schedule(static) + nthreads_loop = gmx_omp_nthreads_get(emntBonded); +#pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static) for (i = 0; i < n; i++) { for (t = 1; t < nthreads; t++) diff --git a/src/gromacs/mdlib/mdatom.c b/src/gromacs/mdlib/mdatom.c index 9c0c5943d5..b8d51a76da 100644 --- a/src/gromacs/mdlib/mdatom.c +++ b/src/gromacs/mdlib/mdatom.c @@ -121,6 +121,7 @@ void atoms2md(gmx_mtop_t *mtop, t_inputrec *ir, t_grpopts *opts; gmx_groups_t *groups; gmx_molblock_t *molblock; + int nthreads gmx_unused; bLJPME = EVDW_PME(ir->vdwtype); @@ -233,7 +234,8 @@ void atoms2md(gmx_mtop_t *mtop, t_inputrec *ir, alook = gmx_mtop_atomlookup_init(mtop); -#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntDefault)) schedule(static) + nthreads = gmx_omp_nthreads_get(emntDefault); +#pragma omp parallel for num_threads(nthreads) schedule(static) for (i = 0; i < md->nr; i++) { int g, ag, molb; diff --git a/src/gromacs/mdlib/minimize.c b/src/gromacs/mdlib/minimize.c index 3cc1bbc8a0..69008f53fa 100644 --- a/src/gromacs/mdlib/minimize.c +++ b/src/gromacs/mdlib/minimize.c @@ -550,6 +550,7 @@ static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md, int start, end; rvec *x1, *x2; real dvdl_constr; + int nthreads gmx_unused; s1 = &ems1->s; s2 = &ems2->s; @@ -587,7 +588,8 @@ static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md, x1 = s1->x; x2 = s2->x; -#pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate)) + nthreads = gmx_omp_nthreads_get(emntUpdate); +#pragma omp parallel num_threads(nthreads) { int gf, i, m; diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.c.pre b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.c.pre index a3ede7190f..2aa4fa9fb5 100644 --- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.c.pre +++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.c.pre @@ -138,6 +138,7 @@ void nbnxn_pairlist_t **nbl; int coulkt, vdwkt = 0; int nb; + int nthreads gmx_unused; nnbl = nbl_list->nnbl; nbl = nbl_list->nbl; @@ -209,7 +210,8 @@ void gmx_incons("Unsupported VdW interaction type"); }} -#pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded)) + nthreads = gmx_omp_nthreads_get(emntNonbonded); +#pragma omp parallel for schedule(static) num_threads(nthreads) for (nb = 0; nb < nnbl; nb++) {{ nbnxn_atomdata_output_t *out; diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c index 0be4be644d..a104652965 100644 --- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c +++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c @@ -182,6 +182,7 @@ nbnxn_kernel_ref(const nbnxn_pairlist_set_t *nbl_list, int coult; int vdwt; int nb; + int nthreads gmx_unused; nnbl = nbl_list->nnbl; nbl = nbl_list->nbl; @@ -239,7 +240,8 @@ nbnxn_kernel_ref(const nbnxn_pairlist_set_t *nbl_list, gmx_incons("Unsupported vdwtype in nbnxn reference kernel"); } -#pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded)) + nthreads = gmx_omp_nthreads_get(emntNonbonded); +#pragma omp parallel for schedule(static) num_threads(nthreads) for (nb = 0; nb < nnbl; nb++) { nbnxn_atomdata_output_t *out; diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c index 45f09bc462..35181d43eb 100644 --- a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c +++ b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c @@ -273,6 +273,7 @@ nbnxn_kernel_simd_2xnn(nbnxn_pairlist_set_t gmx_unused *nbl_list, nbnxn_pairlist_t **nbl; int coulkt, vdwkt = 0; int nb; + int nthreads gmx_unused; nnbl = nbl_list->nnbl; nbl = nbl_list->nbl; @@ -344,7 +345,8 @@ nbnxn_kernel_simd_2xnn(nbnxn_pairlist_set_t gmx_unused *nbl_list, gmx_incons("Unsupported VdW interaction type"); } -#pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded)) + nthreads = gmx_omp_nthreads_get(emntNonbonded); +#pragma omp parallel for schedule(static) num_threads(nthreads) for (nb = 0; nb < nnbl; nb++) { nbnxn_atomdata_output_t *out; diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c index 0684149bad..5974873a07 100644 --- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c +++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c @@ -272,6 +272,7 @@ nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t gmx_unused *nbl_list, nbnxn_pairlist_t **nbl; int coulkt, vdwkt = 0; int nb; + int nthreads gmx_unused; nnbl = nbl_list->nnbl; nbl = nbl_list->nbl; @@ -343,7 +344,8 @@ nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t gmx_unused *nbl_list, gmx_incons("Unsupported VdW interaction type"); } -#pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded)) + nthreads = gmx_omp_nthreads_get(emntNonbonded); +#pragma omp parallel for schedule(static) num_threads(nthreads) for (nb = 0; nb < nnbl; nb++) { nbnxn_atomdata_output_t *out; diff --git a/src/gromacs/mdlib/nbnxn_search.c b/src/gromacs/mdlib/nbnxn_search.c index dc0b02013f..3114bebc22 100644 --- a/src/gromacs/mdlib/nbnxn_search.c +++ b/src/gromacs/mdlib/nbnxn_search.c @@ -1931,6 +1931,7 @@ void nbnxn_grid_add_simple(nbnxn_search_t nbs, float *bbcz; nbnxn_bb_t *bb; int ncd, sc; + int nthreads gmx_unused; grid = &nbs->grid[0]; @@ -1957,7 +1958,8 @@ void nbnxn_grid_add_simple(nbnxn_search_t nbs, bbcz = grid->bbcz_simple; bb = grid->bb_simple; -#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntPairsearch)) schedule(static) + nthreads = gmx_omp_nthreads_get(emntPairsearch); +#pragma omp parallel for num_threads(nthreads) schedule(static) for (sc = 0; sc < grid->nc; sc++) { int c, tx, na; @@ -4473,6 +4475,7 @@ static void combine_nblists(int nnbl, nbnxn_pairlist_t **nbl, { int nsci, ncj4, nexcl; int n, i; + int nthreads gmx_unused; if (nblc->bSimple) { @@ -4513,7 +4516,8 @@ static void combine_nblists(int nnbl, nbnxn_pairlist_t **nbl, /* Each thread should copy its own data to the combined arrays, * as otherwise data will go back and forth between different caches. */ -#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntPairsearch)) schedule(static) + nthreads = gmx_omp_nthreads_get(emntPairsearch); +#pragma omp parallel for num_threads(nthreads) schedule(static) for (n = 0; n < nnbl; n++) { int sci_offset; diff --git a/src/gromacs/mdlib/update.c b/src/gromacs/mdlib/update.c index 787135f715..93b7f59c1e 100644 --- a/src/gromacs/mdlib/update.c +++ b/src/gromacs/mdlib/update.c @@ -1771,7 +1771,9 @@ void update_constraints(FILE *fplog, } else { -#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntUpdate)) schedule(static) + nth = gmx_omp_nthreads_get(emntUpdate); + +#pragma omp parallel for num_threads(nth) schedule(static) for (i = start; i < nrend; i++) { copy_rvec(upd->xp[i], state->x[i]); -- 2.22.0