fixed SD and BD integrator OpenMP performance

author Berk Hess <hess@kth.se>

Wed, 5 Jun 2013 22:09:37 +0000 (00:09 +0200)

committer Gerrit Code Review <gerrit@gerrit.gromacs.org>

Thu, 20 Jun 2013 15:03:15 +0000 (17:03 +0200)
author Berk Hess <hess@kth.se>
Wed, 5 Jun 2013 22:09:37 +0000 (00:09 +0200)
committer Gerrit Code Review <gerrit@gerrit.gromacs.org>
Thu, 20 Jun 2013 15:03:15 +0000 (17:03 +0200)
diff --git a/src/mdlib/update.c b/src/mdlib/update.c

index 4c32f9661855df6c92560148cb0b1a93216fe588..bc18d8887f836bbe2375c3ebd913ab7dfef2a748 100644 (file)
--- a/src/mdlib/update.c
+++ b/src/mdlib/update.c
@@ -496,14 +496,19 @@ static void init_multiple_gaussrand(gmx_stochd_t *sd)
          seed[i] = gmx_rng_uniform_uint32(sd->gaussrand[0]);
      }
  
          seed[i] = gmx_rng_uniform_uint32(sd->gaussrand[0]);
      }
  
-#pragma omp parallel num_threads(ngr)
+    if (ngr != gmx_omp_nthreads_get(emntUpdate))
+    {
+        gmx_incons("The number of Gaussian number generators should be equal to gmx_omp_nthreads_get(emntUpdate)");
+    }
+
+#pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
      {
          int th;
  
          th = gmx_omp_get_thread_num();
          if (th > 0)
          {
      {
          int th;
  
          th = gmx_omp_get_thread_num();
          if (th > 0)
          {
-            /* Initialize on each thread to have thread-local memory alloced */
+            /* Initialize on each thread to get memory allocated thread-local */
              sd->gaussrand[th] = gmx_rng_init(seed[th]);
          }
      }
              sd->gaussrand[th] = gmx_rng_init(seed[th]);
          }
      }
@@ -757,6 +762,31 @@ static void check_sd2_work_data_allocation(gmx_stochd_t *sd, int nrend)
      }
  }
  
      }
  }
  
+static void do_update_sd2_Tconsts(gmx_stochd_t *sd,
+                                  int ngtc,
+                                  const real tau_t[],
+                                  const real ref_t[])
+{
+    /* This is separated from the update below, because it is single threaded */
+    gmx_sd_const_t *sdc;
+    gmx_sd_sigma_t *sig;
+    int             gt;
+    real            kT;
+
+    sdc = sd->sdc;
+    sig = sd->sdsig;
+
+    for (gt = 0; gt < ngtc; gt++)
+    {
+        kT = BOLTZ*ref_t[gt];
+        /* The mass is encounted for later, since this differs per atom */
+        sig[gt].V  = sqrt(kT*(1-sdc[gt].em));
+        sig[gt].X  = sqrt(kT*sqr(tau_t[gt])*sdc[gt].c);
+        sig[gt].Yv = sqrt(kT*sdc[gt].b/sdc[gt].c);
+        sig[gt].Yx = sqrt(kT*sqr(tau_t[gt])*sdc[gt].b/(1-sdc[gt].em));
+    }
+}
+
  static void do_update_sd2(gmx_stochd_t *sd,
                            gmx_rng_t gaussrand,
                            gmx_bool bInitStep,
  static void do_update_sd2(gmx_stochd_t *sd,
                            gmx_rng_t gaussrand,
                            gmx_bool bInitStep,
@@ -767,7 +797,7 @@ static void do_update_sd2(gmx_stochd_t *sd,
                            unsigned short cTC[],
                            rvec x[], rvec xprime[], rvec v[], rvec f[],
                            rvec sd_X[],
                            unsigned short cTC[],
                            rvec x[], rvec xprime[], rvec v[], rvec f[],
                            rvec sd_X[],
-                          int ngtc, real tau_t[], real ref_t[],
+                          const real tau_t[],
                            gmx_bool bFirstHalf)
  {
      gmx_sd_const_t *sdc;
                            gmx_bool bFirstHalf)
  {
      gmx_sd_const_t *sdc;
@@ -786,19 +816,6 @@ static void do_update_sd2(gmx_stochd_t *sd,
      sig  = sd->sdsig;
      sd_V = sd->sd_V;
  
      sig  = sd->sdsig;
      sd_V = sd->sd_V;
  
-    if (bFirstHalf)
-    {
-        for (n = 0; n < ngtc; n++)
-        {
-            kT = BOLTZ*ref_t[n];
-            /* The mass is encounted for later, since this differs per atom */
-            sig[n].V  = sqrt(kT*(1-sdc[n].em));
-            sig[n].X  = sqrt(kT*sqr(tau_t[n])*sdc[n].c);
-            sig[n].Yv = sqrt(kT*sdc[n].b/sdc[n].c);
-            sig[n].Yx = sqrt(kT*sqr(tau_t[n])*sdc[n].b/(1-sdc[n].em));
-        }
-    }
-
      for (n = start; n < nrend; n++)
      {
          ism = sqrt(invmass[n]);
      for (n = start; n < nrend; n++)
      {
          ism = sqrt(invmass[n]);
@@ -869,13 +886,35 @@ static void do_update_sd2(gmx_stochd_t *sd,
      }
  }
  
      }
  }
  
+static void do_update_bd_Tconsts(double dt, real friction_coefficient,
+                                 int ngtc, const real ref_t[],
+                                 real *rf)
+{
+    /* This is separated from the update below, because it is single threaded */
+    int gt;
+
+    if (friction_coefficient != 0)
+    {
+        for (gt = 0; gt < ngtc; gt++)
+        {
+            rf[gt] = sqrt(2.0*BOLTZ*ref_t[gt]/(friction_coefficient*dt));
+        }
+    }
+    else
+    {
+        for (gt = 0; gt < ngtc; gt++)
+        {
+            rf[gt] = sqrt(2.0*BOLTZ*ref_t[gt]);
+        }
+    }
+}
+
  static void do_update_bd(int start, int nrend, double dt,
                           ivec nFreeze[],
                           real invmass[], unsigned short ptype[],
                           unsigned short cFREEZE[], unsigned short cTC[],
                           rvec x[], rvec xprime[], rvec v[],
                           rvec f[], real friction_coefficient,
  static void do_update_bd(int start, int nrend, double dt,
                           ivec nFreeze[],
                           real invmass[], unsigned short ptype[],
                           unsigned short cFREEZE[], unsigned short cTC[],
                           rvec x[], rvec xprime[], rvec v[],
                           rvec f[], real friction_coefficient,
-                         int ngtc, real tau_t[], real ref_t[],
                           real *rf, gmx_rng_t gaussrand)
  {
      /* note -- these appear to be full step velocities . . .  */
                           real *rf, gmx_rng_t gaussrand)
  {
      /* note -- these appear to be full step velocities . . .  */
@@ -887,18 +926,8 @@ static void do_update_bd(int start, int nrend, double dt,
      if (friction_coefficient != 0)
      {
          invfr = 1.0/friction_coefficient;
      if (friction_coefficient != 0)
      {
          invfr = 1.0/friction_coefficient;
-        for (n = 0; n < ngtc; n++)
-        {
-            rf[n] = sqrt(2.0*BOLTZ*ref_t[n]/(friction_coefficient*dt));
-        }
-    }
-    else
-    {
-        for (n = 0; n < ngtc; n++)
-        {
-            rf[n] = sqrt(2.0*BOLTZ*ref_t[n]);
-        }
      }
      }
+
      for (n = start; (n < nrend); n++)
      {
          if (cFREEZE)
      for (n = start; (n < nrend); n++)
      {
          if (cFREEZE)
@@ -1639,8 +1668,8 @@ void update_constraints(FILE             *fplog,
                            md->invmass, md->ptype,
                            md->cFREEZE, md->cACC, md->cTC,
                            state->x, xprime, state->v, force, state->sd_X,
                            md->invmass, md->ptype,
                            md->cFREEZE, md->cACC, md->cTC,
                            state->x, xprime, state->v, force, state->sd_X,
-                          inputrec->opts.ngtc, inputrec->opts.tau_t,
-                          inputrec->opts.ref_t, FALSE);
+                          inputrec->opts.tau_t,
+                          FALSE);
          }
          inc_nrnb(nrnb, eNR_UPDATE, homenr);
  
          }
          inc_nrnb(nrnb, eNR_UPDATE, homenr);
  
@@ -1884,22 +1913,23 @@ void update_coords(FILE             *fplog,
      dump_it_all(fplog, "Before update",
                  state->natoms, state->x, xprime, state->v, force);
  
      dump_it_all(fplog, "Before update",
                  state->natoms, state->x, xprime, state->v, force);
  
-    if (EI_RANDOM(inputrec->eI))
+    if (inputrec->eI == eiSD2)
      {
      {
-        /* We still need to take care of generating random seeds properly
-         * when multi-threading.
-         */
-        nth = 1;
+        check_sd2_work_data_allocation(upd->sd, nrend);
+
+        do_update_sd2_Tconsts(upd->sd,
+                              inputrec->opts.ngtc,
+                              inputrec->opts.tau_t,
+                              inputrec->opts.ref_t);
      }
      }
-    else
+    if (inputrec->eI == eiBD)
      {
      {
-        nth = gmx_omp_nthreads_get(emntUpdate);
+        do_update_bd_Tconsts(dt, inputrec->bd_fric,
+                             inputrec->opts.ngtc, inputrec->opts.ref_t,
+                             upd->sd->bd_rf);
      }
  
      }
  
-    if (inputrec->eI == eiSD2)
-    {
-        check_sd2_work_data_allocation(upd->sd, nrend);
-    }
+    nth = gmx_omp_nthreads_get(emntUpdate);
  
  #pragma omp parallel for num_threads(nth) schedule(static) private(alpha)
      for (th = 0; th < nth; th++)
  
  #pragma omp parallel for num_threads(nth) schedule(static) private(alpha)
      for (th = 0; th < nth; th++)
@@ -1954,7 +1984,7 @@ void update_coords(FILE             *fplog,
                                md->invmass, md->ptype,
                                md->cFREEZE, md->cACC, md->cTC,
                                state->x, xprime, state->v, force, state->sd_X,
                                md->invmass, md->ptype,
                                md->cFREEZE, md->cACC, md->cTC,
                                state->x, xprime, state->v, force, state->sd_X,
-                              inputrec->opts.ngtc, inputrec->opts.tau_t, inputrec->opts.ref_t,
+                              inputrec->opts.tau_t,
                                TRUE);
                  break;
              case (eiBD):
                                TRUE);
                  break;
              case (eiBD):
@@ -1963,7 +1993,6 @@ void update_coords(FILE             *fplog,
                               md->cFREEZE, md->cTC,
                               state->x, xprime, state->v, force,
                               inputrec->bd_fric,
                               md->cFREEZE, md->cTC,
                               state->x, xprime, state->v, force,
                               inputrec->bd_fric,
-                             inputrec->opts.ngtc, inputrec->opts.tau_t, inputrec->opts.ref_t,
                               upd->sd->bd_rf, upd->sd->gaussrand[th]);
                  break;
              case (eiVV):
                               upd->sd->bd_rf, upd->sd->gaussrand[th]);
                  break;
              case (eiVV):
author	Berk Hess <hess@kth.se>
	Wed, 5 Jun 2013 22:09:37 +0000 (00:09 +0200)
committer	Gerrit Code Review <gerrit@gerrit.gromacs.org>
	Thu, 20 Jun 2013 15:03:15 +0000 (17:03 +0200)