#include "orires.h"
#include "gmx_wallcycle.h"
#include "gmx_omp_nthreads.h"
+#include "gmx_omp.h"
/*For debugging, start at v(-dt/2) for velolcity verlet -- uncomment next line */
/*#define STARTFROMDT2*/
} gmx_sd_sigma_t;
typedef struct {
- /* The random state */
- gmx_rng_t gaussrand;
+ /* The random state for ngaussrand threads.
+ * Normal thermostats need just 1 random number generator,
+ * but SD and BD with OpenMP parallelization need 1 for each thread.
+ */
+ int ngaussrand;
+ gmx_rng_t *gaussrand;
/* BD stuff */
real *bd_rf;
/* SD stuff */
}
}
-static gmx_stochd_t *init_stochd(FILE *fplog,t_inputrec *ir)
+/* Allocates and initializes sd->gaussrand[i] for i=1, i<sd->ngaussrand,
+ * Using seeds generated from sd->gaussrand[0].
+ */
+static void init_multiple_gaussrand(gmx_stochd_t *sd)
+{
+ int ngr,i;
+ unsigned int *seed;
+
+ ngr = sd->ngaussrand;
+ snew(seed,ngr);
+
+ for(i=1; i<ngr; i++)
+ {
+ seed[i] = gmx_rng_uniform_uint32(sd->gaussrand[0]);
+ }
+
+#pragma omp parallel num_threads(ngr)
+ {
+ int th;
+
+ th = gmx_omp_get_thread_num();
+ if (th > 0)
+ {
+ /* Initialize on each thread to have thread-local memory alloced */
+ sd->gaussrand[th] = gmx_rng_init(seed[th]);
+ }
+ }
+
+ sfree(seed);
+}
+
+static gmx_stochd_t *init_stochd(FILE *fplog,t_inputrec *ir,int nthreads)
{
gmx_stochd_t *sd;
gmx_sd_const_t *sdc;
- int ngtc,n;
+ int ngtc,n,th;
real y;
snew(sd,1);
/* Initiate random number generator for langevin type dynamics,
* for BD, SD or velocity rescaling temperature coupling.
*/
- sd->gaussrand = gmx_rng_init(ir->ld_seed);
+ if (ir->eI == eiBD || EI_SD(ir->eI))
+ {
+ sd->ngaussrand = nthreads;
+ }
+ else
+ {
+ sd->ngaussrand = 1;
+ }
+ snew(sd->gaussrand,sd->ngaussrand);
+
+ /* Initialize the first random generator */
+ sd->gaussrand[0] = gmx_rng_init(ir->ld_seed);
+
+ if (sd->ngaussrand > 1)
+ {
+ /* Initialize the rest of the random number generators,
+ * using the first one to generate seeds.
+ */
+ init_multiple_gaussrand(sd);
+ }
ngtc = ir->opts.ngtc;
void get_stochd_state(gmx_update_t upd,t_state *state)
{
- gmx_rng_get_state(upd->sd->gaussrand,state->ld_rng,state->ld_rngi);
+ /* Note that we only get the state of the first random generator,
+ * even if there are multiple. This avoids repetition.
+ */
+ gmx_rng_get_state(upd->sd->gaussrand[0],state->ld_rng,state->ld_rngi);
}
void set_stochd_state(gmx_update_t upd,t_state *state)
{
- gmx_rng_set_state(upd->sd->gaussrand,state->ld_rng,state->ld_rngi[0]);
+ gmx_stochd_t *sd;
+ int i;
+
+ sd = upd->sd;
+
+ gmx_rng_set_state(sd->gaussrand[0],state->ld_rng,state->ld_rngi[0]);
+
+ if (sd->ngaussrand > 1)
+ {
+ /* We only end up here with SD or BD with OpenMP.
+ * Destroy and reinitialize the rest of the random number generators,
+ * using seeds generated from the first one.
+ * Although this doesn't recover the previous state,
+ * it at least avoids repetition, which is most important.
+ * Exaclty restoring states with all MPI+OpenMP setups is difficult
+ * and as the integrator is random to start with, doesn't gain us much.
+ */
+ for(i=1; i<sd->ngaussrand; i++)
+ {
+ gmx_rng_destroy(sd->gaussrand[i]);
+ }
+
+ init_multiple_gaussrand(sd);
+ }
}
gmx_update_t init_update(FILE *fplog,t_inputrec *ir)
if (ir->eI == eiBD || EI_SD(ir->eI) || ir->etc == etcVRESCALE || ETC_ANDERSEN(ir->etc))
{
- upd->sd = init_stochd(fplog,ir);
+ upd->sd = init_stochd(fplog,ir,gmx_omp_nthreads_get(emntUpdate));
}
upd->xp = NULL;
}
static void do_update_sd1(gmx_stochd_t *sd,
- int start,int homenr,double dt,
+ gmx_rng_t gaussrand,
+ int start,int nrend,double dt,
rvec accel[],ivec nFreeze[],
real invmass[],unsigned short ptype[],
unsigned short cFREEZE[],unsigned short cACC[],
{
gmx_sd_const_t *sdc;
gmx_sd_sigma_t *sig;
- gmx_rng_t gaussrand;
real kT;
int gf=0,ga=0,gt=0;
real ism,sd_V;
sdc = sd->sdc;
sig = sd->sdsig;
- if (homenr > sd->sd_V_nalloc)
+ if (nrend-start > sd->sd_V_nalloc)
{
- sd->sd_V_nalloc = over_alloc_dd(homenr);
+ sd->sd_V_nalloc = over_alloc_dd(nrend-start);
srenew(sd->sd_V,sd->sd_V_nalloc);
}
- gaussrand = sd->gaussrand;
for(n=0; n<ngtc; n++)
{
sig[n].V = sqrt(kT*(1 - sdc[n].em*sdc[n].em));
}
- for(n=start; n<start+homenr; n++)
+ for(n=start; n<nrend; n++)
{
ism = sqrt(invmass[n]);
if (cFREEZE)
}
}
-static void do_update_sd2(gmx_stochd_t *sd,gmx_bool bInitStep,
- int start,int homenr,
+static void do_update_sd2(gmx_stochd_t *sd,
+ gmx_rng_t gaussrand,
+ gmx_bool bInitStep,
+ int start,int nrend,
rvec accel[],ivec nFreeze[],
real invmass[],unsigned short ptype[],
unsigned short cFREEZE[],unsigned short cACC[],
* half of the update, needs to be remembered for the second half.
*/
rvec *sd_V;
- gmx_rng_t gaussrand;
real kT;
int gf=0,ga=0,gt=0;
real vn=0,Vmh,Xmh;
sdc = sd->sdc;
sig = sd->sdsig;
- if (homenr > sd->sd_V_nalloc)
+ if (nrend-start > sd->sd_V_nalloc)
{
- sd->sd_V_nalloc = over_alloc_dd(homenr);
+ sd->sd_V_nalloc = over_alloc_dd(nrend-start);
srenew(sd->sd_V,sd->sd_V_nalloc);
}
sd_V = sd->sd_V;
- gaussrand = sd->gaussrand;
if (bFirstHalf)
{
}
}
- for (n=start; n<start+homenr; n++)
+ for (n=start; n<nrend; n++)
{
ism = sqrt(invmass[n]);
if (cFREEZE)
break;
case etcVRESCALE:
vrescale_tcoupl(inputrec,ekind,dttc,
- state->therm_integral,upd->sd->gaussrand);
+ state->therm_integral,upd->sd->gaussrand[0]);
break;
}
/* rescale in place here */
int start,homenr,nrend,i,n,m,g,d;
tensor vir_con;
rvec *vbuf,*xprime=NULL;
+ int nth,th;
if (constr) {bDoConstr=TRUE;}
if (bFirstHalf && !EI_VV(inputrec->eI)) {bDoConstr=FALSE;}
{
xprime = get_xprime(state,upd);
- /* The second part of the SD integration */
- do_update_sd2(upd->sd,FALSE,start,homenr,
- inputrec->opts.acc,inputrec->opts.nFreeze,
- md->invmass,md->ptype,
- md->cFREEZE,md->cACC,md->cTC,
- state->x,xprime,state->v,force,state->sd_X,
- inputrec->opts.ngtc,inputrec->opts.tau_t,
- inputrec->opts.ref_t,FALSE);
+ nth = gmx_omp_nthreads_get(emntUpdate);
+
+#pragma omp parallel for num_threads(nth) schedule(static)
+ for(th=0; th<nth; th++)
+ {
+ int start_th,end_th;
+
+ start_th = start + ((nrend-start)* th )/nth;
+ end_th = start + ((nrend-start)*(th+1))/nth;
+
+ /* The second part of the SD integration */
+ do_update_sd2(upd->sd,upd->sd->gaussrand[th],
+ FALSE,start_th,end_th,
+ inputrec->opts.acc,inputrec->opts.nFreeze,
+ md->invmass,md->ptype,
+ md->cFREEZE,md->cACC,md->cTC,
+ state->x,xprime,state->v,force,state->sd_X,
+ inputrec->opts.ngtc,inputrec->opts.tau_t,
+ inputrec->opts.ref_t,FALSE);
+ }
inc_nrnb(nrnb, eNR_UPDATE, homenr);
if (bDoConstr)
nth = gmx_omp_nthreads_get(emntUpdate);
}
-# pragma omp parallel for num_threads(nth) schedule(static) private(alpha)
+#pragma omp parallel for num_threads(nth) schedule(static) private(alpha)
for(th=0; th<nth; th++)
{
int start_th,end_th;
}
break;
case (eiSD1):
- do_update_sd1(upd->sd,start,homenr,dt,
+ do_update_sd1(upd->sd,upd->sd->gaussrand[th],
+ start_th,end_th,dt,
inputrec->opts.acc,inputrec->opts.nFreeze,
md->invmass,md->ptype,
md->cFREEZE,md->cACC,md->cTC,
/* The SD update is done in 2 parts, because an extra constraint step
* is needed
*/
- do_update_sd2(upd->sd,bInitStep,start,homenr,
+ do_update_sd2(upd->sd,upd->sd->gaussrand[th],
+ bInitStep,start_th,end_th,
inputrec->opts.acc,inputrec->opts.nFreeze,
md->invmass,md->ptype,
md->cFREEZE,md->cACC,md->cTC,
TRUE);
break;
case (eiBD):
- do_update_bd(start,nrend,dt,
+ do_update_bd(start_th,end_th,dt,
inputrec->opts.nFreeze,md->invmass,md->ptype,
md->cFREEZE,md->cTC,
state->x,xprime,state->v,force,
inputrec->bd_fric,
inputrec->opts.ngtc,inputrec->opts.tau_t,inputrec->opts.ref_t,
- upd->sd->bd_rf,upd->sd->gaussrand);
+ upd->sd->bd_rf,upd->sd->gaussrand[th]);
break;
case (eiVV):
case (eiVVAK):
}
upd->randatom_list_init = TRUE;
}
- andersen_tcoupl(ir,md,state,upd->sd->gaussrand,rate,
+ andersen_tcoupl(ir,md,state,upd->sd->gaussrand[0],rate,
(ir->etc==etcANDERSEN)?idef:NULL,
constr?get_nblocks(constr):0,
constr?get_sblock(constr):NULL,