SD and BD integrator always integrated single threaded.
Really fixes #1121
Change-Id: I2217c40e9c188c7cd57801e413750035c6488f56
seed[i] = gmx_rng_uniform_uint32(sd->gaussrand[0]);
}
seed[i] = gmx_rng_uniform_uint32(sd->gaussrand[0]);
}
-#pragma omp parallel num_threads(ngr)
+ if (ngr != gmx_omp_nthreads_get(emntUpdate))
+ {
+ gmx_incons("The number of Gaussian number generators should be equal to gmx_omp_nthreads_get(emntUpdate)");
+ }
+
+#pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
{
int th;
th = gmx_omp_get_thread_num();
if (th > 0)
{
{
int th;
th = gmx_omp_get_thread_num();
if (th > 0)
{
- /* Initialize on each thread to have thread-local memory alloced */
+ /* Initialize on each thread to get memory allocated thread-local */
sd->gaussrand[th] = gmx_rng_init(seed[th]);
}
}
sd->gaussrand[th] = gmx_rng_init(seed[th]);
}
}
+static void do_update_sd2_Tconsts(gmx_stochd_t *sd,
+ int ngtc,
+ const real tau_t[],
+ const real ref_t[])
+{
+ /* This is separated from the update below, because it is single threaded */
+ gmx_sd_const_t *sdc;
+ gmx_sd_sigma_t *sig;
+ int gt;
+ real kT;
+
+ sdc = sd->sdc;
+ sig = sd->sdsig;
+
+ for (gt = 0; gt < ngtc; gt++)
+ {
+ kT = BOLTZ*ref_t[gt];
+ /* The mass is encounted for later, since this differs per atom */
+ sig[gt].V = sqrt(kT*(1-sdc[gt].em));
+ sig[gt].X = sqrt(kT*sqr(tau_t[gt])*sdc[gt].c);
+ sig[gt].Yv = sqrt(kT*sdc[gt].b/sdc[gt].c);
+ sig[gt].Yx = sqrt(kT*sqr(tau_t[gt])*sdc[gt].b/(1-sdc[gt].em));
+ }
+}
+
static void do_update_sd2(gmx_stochd_t *sd,
gmx_rng_t gaussrand,
gmx_bool bInitStep,
static void do_update_sd2(gmx_stochd_t *sd,
gmx_rng_t gaussrand,
gmx_bool bInitStep,
unsigned short cTC[],
rvec x[], rvec xprime[], rvec v[], rvec f[],
rvec sd_X[],
unsigned short cTC[],
rvec x[], rvec xprime[], rvec v[], rvec f[],
rvec sd_X[],
- int ngtc, real tau_t[], real ref_t[],
gmx_bool bFirstHalf)
{
gmx_sd_const_t *sdc;
gmx_bool bFirstHalf)
{
gmx_sd_const_t *sdc;
sig = sd->sdsig;
sd_V = sd->sd_V;
sig = sd->sdsig;
sd_V = sd->sd_V;
- if (bFirstHalf)
- {
- for (n = 0; n < ngtc; n++)
- {
- kT = BOLTZ*ref_t[n];
- /* The mass is encounted for later, since this differs per atom */
- sig[n].V = sqrt(kT*(1-sdc[n].em));
- sig[n].X = sqrt(kT*sqr(tau_t[n])*sdc[n].c);
- sig[n].Yv = sqrt(kT*sdc[n].b/sdc[n].c);
- sig[n].Yx = sqrt(kT*sqr(tau_t[n])*sdc[n].b/(1-sdc[n].em));
- }
- }
-
for (n = start; n < nrend; n++)
{
ism = sqrt(invmass[n]);
for (n = start; n < nrend; n++)
{
ism = sqrt(invmass[n]);
+static void do_update_bd_Tconsts(double dt, real friction_coefficient,
+ int ngtc, const real ref_t[],
+ real *rf)
+{
+ /* This is separated from the update below, because it is single threaded */
+ int gt;
+
+ if (friction_coefficient != 0)
+ {
+ for (gt = 0; gt < ngtc; gt++)
+ {
+ rf[gt] = sqrt(2.0*BOLTZ*ref_t[gt]/(friction_coefficient*dt));
+ }
+ }
+ else
+ {
+ for (gt = 0; gt < ngtc; gt++)
+ {
+ rf[gt] = sqrt(2.0*BOLTZ*ref_t[gt]);
+ }
+ }
+}
+
static void do_update_bd(int start, int nrend, double dt,
ivec nFreeze[],
real invmass[], unsigned short ptype[],
unsigned short cFREEZE[], unsigned short cTC[],
rvec x[], rvec xprime[], rvec v[],
rvec f[], real friction_coefficient,
static void do_update_bd(int start, int nrend, double dt,
ivec nFreeze[],
real invmass[], unsigned short ptype[],
unsigned short cFREEZE[], unsigned short cTC[],
rvec x[], rvec xprime[], rvec v[],
rvec f[], real friction_coefficient,
- int ngtc, real tau_t[], real ref_t[],
real *rf, gmx_rng_t gaussrand)
{
/* note -- these appear to be full step velocities . . . */
real *rf, gmx_rng_t gaussrand)
{
/* note -- these appear to be full step velocities . . . */
if (friction_coefficient != 0)
{
invfr = 1.0/friction_coefficient;
if (friction_coefficient != 0)
{
invfr = 1.0/friction_coefficient;
- for (n = 0; n < ngtc; n++)
- {
- rf[n] = sqrt(2.0*BOLTZ*ref_t[n]/(friction_coefficient*dt));
- }
- }
- else
- {
- for (n = 0; n < ngtc; n++)
- {
- rf[n] = sqrt(2.0*BOLTZ*ref_t[n]);
- }
for (n = start; (n < nrend); n++)
{
if (cFREEZE)
for (n = start; (n < nrend); n++)
{
if (cFREEZE)
md->invmass, md->ptype,
md->cFREEZE, md->cACC, md->cTC,
state->x, xprime, state->v, force, state->sd_X,
md->invmass, md->ptype,
md->cFREEZE, md->cACC, md->cTC,
state->x, xprime, state->v, force, state->sd_X,
- inputrec->opts.ngtc, inputrec->opts.tau_t,
- inputrec->opts.ref_t, FALSE);
+ inputrec->opts.tau_t,
+ FALSE);
}
inc_nrnb(nrnb, eNR_UPDATE, homenr);
}
inc_nrnb(nrnb, eNR_UPDATE, homenr);
dump_it_all(fplog, "Before update",
state->natoms, state->x, xprime, state->v, force);
dump_it_all(fplog, "Before update",
state->natoms, state->x, xprime, state->v, force);
- if (EI_RANDOM(inputrec->eI))
+ if (inputrec->eI == eiSD2)
- /* We still need to take care of generating random seeds properly
- * when multi-threading.
- */
- nth = 1;
+ check_sd2_work_data_allocation(upd->sd, nrend);
+
+ do_update_sd2_Tconsts(upd->sd,
+ inputrec->opts.ngtc,
+ inputrec->opts.tau_t,
+ inputrec->opts.ref_t);
+ if (inputrec->eI == eiBD)
- nth = gmx_omp_nthreads_get(emntUpdate);
+ do_update_bd_Tconsts(dt, inputrec->bd_fric,
+ inputrec->opts.ngtc, inputrec->opts.ref_t,
+ upd->sd->bd_rf);
- if (inputrec->eI == eiSD2)
- {
- check_sd2_work_data_allocation(upd->sd, nrend);
- }
+ nth = gmx_omp_nthreads_get(emntUpdate);
#pragma omp parallel for num_threads(nth) schedule(static) private(alpha)
for (th = 0; th < nth; th++)
#pragma omp parallel for num_threads(nth) schedule(static) private(alpha)
for (th = 0; th < nth; th++)
md->invmass, md->ptype,
md->cFREEZE, md->cACC, md->cTC,
state->x, xprime, state->v, force, state->sd_X,
md->invmass, md->ptype,
md->cFREEZE, md->cACC, md->cTC,
state->x, xprime, state->v, force, state->sd_X,
- inputrec->opts.ngtc, inputrec->opts.tau_t, inputrec->opts.ref_t,
TRUE);
break;
case (eiBD):
TRUE);
break;
case (eiBD):
md->cFREEZE, md->cTC,
state->x, xprime, state->v, force,
inputrec->bd_fric,
md->cFREEZE, md->cTC,
state->x, xprime, state->v, force,
inputrec->bd_fric,
- inputrec->opts.ngtc, inputrec->opts.tau_t, inputrec->opts.ref_t,
upd->sd->bd_rf, upd->sd->gaussrand[th]);
break;
case (eiVV):
upd->sd->bd_rf, upd->sd->gaussrand[th]);
break;
case (eiVV):