From 62518a2c1c18a34bb2d4c67a2bdea58f1669d0f8 Mon Sep 17 00:00:00 2001
From: Erik Lindahl <erik@kth.se>
Date: Tue, 12 Aug 2014 14:15:30 +0200
Subject: [PATCH] Avoid using function calls in OpenMP directives

The direct calls to gmx_omp_nthreads_get() that were
included in some OpenMP pragmas caused memory
corruption and later segfaults on PGI compilers. This
is likely a compiler bug, but we can work around it
by assigning the function return value to a variable
that we use in the pragma.

Such variables are unused when OpenMP is not in use, which might
offend some compiler some time, so adding a gmx_unused attribute is
useful. However, uncrustify needs to be taught about our custom
attributes, which is also done here.

Change-Id: I3b482bdc2401b40a043975ffd4a741f65efd0cfc
---
 admin/uncrustify.cfg                                     | 9 +++++++++
 src/gromacs/mdlib/force.c                                | 4 +++-
 src/gromacs/mdlib/mdatom.c                               | 4 +++-
 src/gromacs/mdlib/minimize.c                             | 4 +++-
 .../nbnxn_kernel_simd_template.c.pre                     | 4 +++-
 src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c       | 4 +++-
 .../nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c     | 4 +++-
 .../mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c | 4 +++-
 src/gromacs/mdlib/nbnxn_search.c                         | 8 ++++++--
 src/gromacs/mdlib/update.c                               | 4 +++-
 10 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/admin/uncrustify.cfg b/admin/uncrustify.cfg
index d89351a7e6..f7308115b2 100644
--- a/admin/uncrustify.cfg
+++ b/admin/uncrustify.cfg
@@ -1575,3 +1575,12 @@ pp_define_at_level                       = false    # false/true
 # all tokens are separated by any mix of ',' commas, '=' equal signs
 # and whitespace (space, tab)
 #
+
+# Teach uncrustify about the GROMACS attribute aliases that we use
+# to hide compiler differences. This means that declarations like
+#
+# int i, j;
+# int nthreads gmx_unused;
+#
+# does not align i with gmx_unused.
+set ATTRIBUTE gmx_unused gmx_inline gmx_restrict
diff --git a/src/gromacs/mdlib/force.c b/src/gromacs/mdlib/force.c
index 632c2f3a46..d4e6445491 100644
--- a/src/gromacs/mdlib/force.c
+++ b/src/gromacs/mdlib/force.c
@@ -117,9 +117,11 @@ static void reduce_thread_forces(int n, rvec *f,
                                  int nthreads, f_thread_t *f_t)
 {
     int t, i;
+    int nthreads_loop gmx_unused;
 
     /* This reduction can run over any number of threads */
-#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntBonded)) private(t) schedule(static)
+    nthreads_loop = gmx_omp_nthreads_get(emntBonded);
+#pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static)
     for (i = 0; i < n; i++)
     {
         for (t = 1; t < nthreads; t++)
diff --git a/src/gromacs/mdlib/mdatom.c b/src/gromacs/mdlib/mdatom.c
index 9c0c5943d5..b8d51a76da 100644
--- a/src/gromacs/mdlib/mdatom.c
+++ b/src/gromacs/mdlib/mdatom.c
@@ -121,6 +121,7 @@ void atoms2md(gmx_mtop_t *mtop, t_inputrec *ir,
     t_grpopts            *opts;
     gmx_groups_t         *groups;
     gmx_molblock_t       *molblock;
+    int                   nthreads gmx_unused;
 
     bLJPME = EVDW_PME(ir->vdwtype);
 
@@ -233,7 +234,8 @@ void atoms2md(gmx_mtop_t *mtop, t_inputrec *ir,
 
     alook = gmx_mtop_atomlookup_init(mtop);
 
-#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntDefault)) schedule(static)
+    nthreads = gmx_omp_nthreads_get(emntDefault);
+#pragma omp parallel for num_threads(nthreads) schedule(static)
     for (i = 0; i < md->nr; i++)
     {
         int      g, ag, molb;
diff --git a/src/gromacs/mdlib/minimize.c b/src/gromacs/mdlib/minimize.c
index 3cc1bbc8a0..69008f53fa 100644
--- a/src/gromacs/mdlib/minimize.c
+++ b/src/gromacs/mdlib/minimize.c
@@ -550,6 +550,7 @@ static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
     int      start, end;
     rvec    *x1, *x2;
     real     dvdl_constr;
+    int      nthreads gmx_unused;
 
     s1 = &ems1->s;
     s2 = &ems2->s;
@@ -587,7 +588,8 @@ static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
     x1 = s1->x;
     x2 = s2->x;
 
-#pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
+    nthreads = gmx_omp_nthreads_get(emntUpdate);
+#pragma omp parallel num_threads(nthreads)
     {
         int gf, i, m;
 
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.c.pre b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.c.pre
index a3ede7190f..2aa4fa9fb5 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.c.pre
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.c.pre
@@ -138,6 +138,7 @@ void
     nbnxn_pairlist_t **nbl;
     int                coulkt, vdwkt = 0;
     int                nb;
+    int                nthreads gmx_unused;
 
     nnbl = nbl_list->nnbl;
     nbl  = nbl_list->nbl;
@@ -209,7 +210,8 @@ void
         gmx_incons("Unsupported VdW interaction type");
     }}
 
-#pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
+    nthreads = gmx_omp_nthreads_get(emntNonbonded);
+#pragma omp parallel for schedule(static) num_threads(nthreads)
     for (nb = 0; nb < nnbl; nb++)
     {{
         nbnxn_atomdata_output_t *out;
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c
index 0be4be644d..a104652965 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c
@@ -182,6 +182,7 @@ nbnxn_kernel_ref(const nbnxn_pairlist_set_t *nbl_list,
     int                coult;
     int                vdwt;
     int                nb;
+    int                nthreads gmx_unused;
 
     nnbl = nbl_list->nnbl;
     nbl  = nbl_list->nbl;
@@ -239,7 +240,8 @@ nbnxn_kernel_ref(const nbnxn_pairlist_set_t *nbl_list,
         gmx_incons("Unsupported vdwtype in nbnxn reference kernel");
     }
 
-#pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
+    nthreads = gmx_omp_nthreads_get(emntNonbonded);
+#pragma omp parallel for schedule(static) num_threads(nthreads)
     for (nb = 0; nb < nnbl; nb++)
     {
         nbnxn_atomdata_output_t *out;
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c
index 45f09bc462..35181d43eb 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c
@@ -273,6 +273,7 @@ nbnxn_kernel_simd_2xnn(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
     nbnxn_pairlist_t **nbl;
     int                coulkt, vdwkt = 0;
     int                nb;
+    int                nthreads gmx_unused;
 
     nnbl = nbl_list->nnbl;
     nbl  = nbl_list->nbl;
@@ -344,7 +345,8 @@ nbnxn_kernel_simd_2xnn(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
         gmx_incons("Unsupported VdW interaction type");
     }
 
-#pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
+    nthreads = gmx_omp_nthreads_get(emntNonbonded);
+#pragma omp parallel for schedule(static) num_threads(nthreads)
     for (nb = 0; nb < nnbl; nb++)
     {
         nbnxn_atomdata_output_t *out;
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c
index 0684149bad..5974873a07 100644
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c
@@ -272,6 +272,7 @@ nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
     nbnxn_pairlist_t **nbl;
     int                coulkt, vdwkt = 0;
     int                nb;
+    int                nthreads gmx_unused;
 
     nnbl = nbl_list->nnbl;
     nbl  = nbl_list->nbl;
@@ -343,7 +344,8 @@ nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
         gmx_incons("Unsupported VdW interaction type");
     }
 
-#pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
+    nthreads = gmx_omp_nthreads_get(emntNonbonded);
+#pragma omp parallel for schedule(static) num_threads(nthreads)
     for (nb = 0; nb < nnbl; nb++)
     {
         nbnxn_atomdata_output_t *out;
diff --git a/src/gromacs/mdlib/nbnxn_search.c b/src/gromacs/mdlib/nbnxn_search.c
index dc0b02013f..3114bebc22 100644
--- a/src/gromacs/mdlib/nbnxn_search.c
+++ b/src/gromacs/mdlib/nbnxn_search.c
@@ -1931,6 +1931,7 @@ void nbnxn_grid_add_simple(nbnxn_search_t    nbs,
     float        *bbcz;
     nbnxn_bb_t   *bb;
     int           ncd, sc;
+    int           nthreads gmx_unused;
 
     grid = &nbs->grid[0];
 
@@ -1957,7 +1958,8 @@ void nbnxn_grid_add_simple(nbnxn_search_t    nbs,
     bbcz = grid->bbcz_simple;
     bb   = grid->bb_simple;
 
-#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntPairsearch)) schedule(static)
+    nthreads = gmx_omp_nthreads_get(emntPairsearch);
+#pragma omp parallel for num_threads(nthreads) schedule(static)
     for (sc = 0; sc < grid->nc; sc++)
     {
         int c, tx, na;
@@ -4473,6 +4475,7 @@ static void combine_nblists(int nnbl, nbnxn_pairlist_t **nbl,
 {
     int nsci, ncj4, nexcl;
     int n, i;
+    int nthreads gmx_unused;
 
     if (nblc->bSimple)
     {
@@ -4513,7 +4516,8 @@ static void combine_nblists(int nnbl, nbnxn_pairlist_t **nbl,
     /* Each thread should copy its own data to the combined arrays,
      * as otherwise data will go back and forth between different caches.
      */
-#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntPairsearch)) schedule(static)
+    nthreads = gmx_omp_nthreads_get(emntPairsearch);
+#pragma omp parallel for num_threads(nthreads) schedule(static)
     for (n = 0; n < nnbl; n++)
     {
         int                     sci_offset;
diff --git a/src/gromacs/mdlib/update.c b/src/gromacs/mdlib/update.c
index 787135f715..93b7f59c1e 100644
--- a/src/gromacs/mdlib/update.c
+++ b/src/gromacs/mdlib/update.c
@@ -1771,7 +1771,9 @@ void update_constraints(FILE             *fplog,
         }
         else
         {
-#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntUpdate)) schedule(static)
+            nth = gmx_omp_nthreads_get(emntUpdate);
+
+#pragma omp parallel for num_threads(nth) schedule(static)
             for (i = start; i < nrend; i++)
             {
                 copy_rvec(upd->xp[i], state->x[i]);
-- 
2.22.0