fixed SD and BD integrator OpenMP performance
authorBerk Hess <hess@kth.se>
Wed, 5 Jun 2013 22:09:37 +0000 (00:09 +0200)
committerGerrit Code Review <gerrit@gerrit.gromacs.org>
Thu, 20 Jun 2013 15:03:15 +0000 (17:03 +0200)
SD and BD integrator always integrated single threaded.
Really fixes #1121

Change-Id: I2217c40e9c188c7cd57801e413750035c6488f56

src/mdlib/update.c

index 4c32f9661855df6c92560148cb0b1a93216fe588..bc18d8887f836bbe2375c3ebd913ab7dfef2a748 100644 (file)
@@ -496,14 +496,19 @@ static void init_multiple_gaussrand(gmx_stochd_t *sd)
         seed[i] = gmx_rng_uniform_uint32(sd->gaussrand[0]);
     }
 
-#pragma omp parallel num_threads(ngr)
+    if (ngr != gmx_omp_nthreads_get(emntUpdate))
+    {
+        gmx_incons("The number of Gaussian number generators should be equal to gmx_omp_nthreads_get(emntUpdate)");
+    }
+
+#pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
     {
         int th;
 
         th = gmx_omp_get_thread_num();
         if (th > 0)
         {
-            /* Initialize on each thread to have thread-local memory alloced */
+            /* Initialize on each thread to get memory allocated thread-local */
             sd->gaussrand[th] = gmx_rng_init(seed[th]);
         }
     }
@@ -757,6 +762,31 @@ static void check_sd2_work_data_allocation(gmx_stochd_t *sd, int nrend)
     }
 }
 
+static void do_update_sd2_Tconsts(gmx_stochd_t *sd,
+                                  int ngtc,
+                                  const real tau_t[],
+                                  const real ref_t[])
+{
+    /* This is separated from the update below, because it is single threaded */
+    gmx_sd_const_t *sdc;
+    gmx_sd_sigma_t *sig;
+    int             gt;
+    real            kT;
+
+    sdc = sd->sdc;
+    sig = sd->sdsig;
+
+    for (gt = 0; gt < ngtc; gt++)
+    {
+        kT = BOLTZ*ref_t[gt];
+        /* The mass is encounted for later, since this differs per atom */
+        sig[gt].V  = sqrt(kT*(1-sdc[gt].em));
+        sig[gt].X  = sqrt(kT*sqr(tau_t[gt])*sdc[gt].c);
+        sig[gt].Yv = sqrt(kT*sdc[gt].b/sdc[gt].c);
+        sig[gt].Yx = sqrt(kT*sqr(tau_t[gt])*sdc[gt].b/(1-sdc[gt].em));
+    }
+}
+
 static void do_update_sd2(gmx_stochd_t *sd,
                           gmx_rng_t gaussrand,
                           gmx_bool bInitStep,
@@ -767,7 +797,7 @@ static void do_update_sd2(gmx_stochd_t *sd,
                           unsigned short cTC[],
                           rvec x[], rvec xprime[], rvec v[], rvec f[],
                           rvec sd_X[],
-                          int ngtc, real tau_t[], real ref_t[],
+                          const real tau_t[],
                           gmx_bool bFirstHalf)
 {
     gmx_sd_const_t *sdc;
@@ -786,19 +816,6 @@ static void do_update_sd2(gmx_stochd_t *sd,
     sig  = sd->sdsig;
     sd_V = sd->sd_V;
 
-    if (bFirstHalf)
-    {
-        for (n = 0; n < ngtc; n++)
-        {
-            kT = BOLTZ*ref_t[n];
-            /* The mass is encounted for later, since this differs per atom */
-            sig[n].V  = sqrt(kT*(1-sdc[n].em));
-            sig[n].X  = sqrt(kT*sqr(tau_t[n])*sdc[n].c);
-            sig[n].Yv = sqrt(kT*sdc[n].b/sdc[n].c);
-            sig[n].Yx = sqrt(kT*sqr(tau_t[n])*sdc[n].b/(1-sdc[n].em));
-        }
-    }
-
     for (n = start; n < nrend; n++)
     {
         ism = sqrt(invmass[n]);
@@ -869,13 +886,35 @@ static void do_update_sd2(gmx_stochd_t *sd,
     }
 }
 
+static void do_update_bd_Tconsts(double dt, real friction_coefficient,
+                                 int ngtc, const real ref_t[],
+                                 real *rf)
+{
+    /* This is separated from the update below, because it is single threaded */
+    int gt;
+
+    if (friction_coefficient != 0)
+    {
+        for (gt = 0; gt < ngtc; gt++)
+        {
+            rf[gt] = sqrt(2.0*BOLTZ*ref_t[gt]/(friction_coefficient*dt));
+        }
+    }
+    else
+    {
+        for (gt = 0; gt < ngtc; gt++)
+        {
+            rf[gt] = sqrt(2.0*BOLTZ*ref_t[gt]);
+        }
+    }
+}
+
 static void do_update_bd(int start, int nrend, double dt,
                          ivec nFreeze[],
                          real invmass[], unsigned short ptype[],
                          unsigned short cFREEZE[], unsigned short cTC[],
                          rvec x[], rvec xprime[], rvec v[],
                          rvec f[], real friction_coefficient,
-                         int ngtc, real tau_t[], real ref_t[],
                          real *rf, gmx_rng_t gaussrand)
 {
     /* note -- these appear to be full step velocities . . .  */
@@ -887,18 +926,8 @@ static void do_update_bd(int start, int nrend, double dt,
     if (friction_coefficient != 0)
     {
         invfr = 1.0/friction_coefficient;
-        for (n = 0; n < ngtc; n++)
-        {
-            rf[n] = sqrt(2.0*BOLTZ*ref_t[n]/(friction_coefficient*dt));
-        }
-    }
-    else
-    {
-        for (n = 0; n < ngtc; n++)
-        {
-            rf[n] = sqrt(2.0*BOLTZ*ref_t[n]);
-        }
     }
+
     for (n = start; (n < nrend); n++)
     {
         if (cFREEZE)
@@ -1639,8 +1668,8 @@ void update_constraints(FILE             *fplog,
                           md->invmass, md->ptype,
                           md->cFREEZE, md->cACC, md->cTC,
                           state->x, xprime, state->v, force, state->sd_X,
-                          inputrec->opts.ngtc, inputrec->opts.tau_t,
-                          inputrec->opts.ref_t, FALSE);
+                          inputrec->opts.tau_t,
+                          FALSE);
         }
         inc_nrnb(nrnb, eNR_UPDATE, homenr);
 
@@ -1884,22 +1913,23 @@ void update_coords(FILE             *fplog,
     dump_it_all(fplog, "Before update",
                 state->natoms, state->x, xprime, state->v, force);
 
-    if (EI_RANDOM(inputrec->eI))
+    if (inputrec->eI == eiSD2)
     {
-        /* We still need to take care of generating random seeds properly
-         * when multi-threading.
-         */
-        nth = 1;
+        check_sd2_work_data_allocation(upd->sd, nrend);
+
+        do_update_sd2_Tconsts(upd->sd,
+                              inputrec->opts.ngtc,
+                              inputrec->opts.tau_t,
+                              inputrec->opts.ref_t);
     }
-    else
+    if (inputrec->eI == eiBD)
     {
-        nth = gmx_omp_nthreads_get(emntUpdate);
+        do_update_bd_Tconsts(dt, inputrec->bd_fric,
+                             inputrec->opts.ngtc, inputrec->opts.ref_t,
+                             upd->sd->bd_rf);
     }
 
-    if (inputrec->eI == eiSD2)
-    {
-        check_sd2_work_data_allocation(upd->sd, nrend);
-    }
+    nth = gmx_omp_nthreads_get(emntUpdate);
 
 #pragma omp parallel for num_threads(nth) schedule(static) private(alpha)
     for (th = 0; th < nth; th++)
@@ -1954,7 +1984,7 @@ void update_coords(FILE             *fplog,
                               md->invmass, md->ptype,
                               md->cFREEZE, md->cACC, md->cTC,
                               state->x, xprime, state->v, force, state->sd_X,
-                              inputrec->opts.ngtc, inputrec->opts.tau_t, inputrec->opts.ref_t,
+                              inputrec->opts.tau_t,
                               TRUE);
                 break;
             case (eiBD):
@@ -1963,7 +1993,6 @@ void update_coords(FILE             *fplog,
                              md->cFREEZE, md->cTC,
                              state->x, xprime, state->v, force,
                              inputrec->bd_fric,
-                             inputrec->opts.ngtc, inputrec->opts.tau_t, inputrec->opts.ref_t,
                              upd->sd->bd_rf, upd->sd->gaussrand[th]);
                 break;
             case (eiVV):