Merge release-4-5-patches into release-4-6
[alexxy/gromacs.git] / src / kernel / repl_ex.c
index 1ee8998d2129d4e618bea47a3b5b1ff2705baff4..8b6797051b52aada198dfda14d39f9b36a35bc4f 100644 (file)
 #include "domdec.h"
 #include "partdec.h"
 
+#define PROBABILITYCUTOFF 100
+/* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */
+
+enum { ereTEMP, ereLAMBDA, ereENDSINGLE ,ereTL, ereNR };
+const char *erename[ereNR] = { "temperature", "lambda", "end_single_marker", "temperature and lambda"};
+
 typedef struct gmx_repl_ex
 {
     int  repl;
     int  nrepl;
     real temp;
     int  type;
-    real *q;
+    real **q;
     gmx_bool bNPT;
     real *pres;
     int  *ind;
+    int *allswaps;
     int  nst;
+    int nex;
     int  seed;
     int  nattempt[2];
     real *prob_sum;
+    int  **nmoves;
     int  *nexchange;
 } t_gmx_repl_ex;
 
-enum { ereTEMP, ereLAMBDA, ereNR };
-const char *erename[ereNR] = { "temperature", "lambda" };
-
-static void repl_quantity(FILE *fplog,const gmx_multisim_t *ms,
-                          struct gmx_repl_ex *re,int ere,real q)
+static gmx_bool repl_quantity(FILE *fplog,const gmx_multisim_t *ms,
+                              struct gmx_repl_ex *re,int ere,real q)
 {
     real *qall;
     gmx_bool bDiff;
-    int  s;
+    int  i,s;
 
     snew(qall,ms->nsim);
     qall[re->repl] = q;
     gmx_sum_sim(ms->nsim,qall,ms);
 
     bDiff = FALSE;
-    for(s=1; s<ms->nsim; s++)
+    for (s=1; s<ms->nsim; s++)
     {
         if (qall[s] != qall[0])
         {
-            bDiff = TRUE;
+            bDiff = TRUE;   
         }
     }
+
     if (bDiff)
     {
-        if (re->type >= 0 && re->type < ereNR)
-        {
-            gmx_fatal(FARGS,"For replica exchange both %s and %s differ",
-                      erename[re->type],erename[ere]);
-        }
         /* Set the replica exchange type and quantities */
         re->type = ere;
-        snew(re->q,re->nrepl);
+
+        snew(re->q[ere],re->nrepl);
         for(s=0; s<ms->nsim; s++)
         {
-            re->q[s] = qall[s];
+            re->q[ere][s] = qall[s];
         }
     }
-
     sfree(qall);
+    return bDiff;
 }
 
 gmx_repl_ex_t init_replica_exchange(FILE *fplog,
                                     const gmx_multisim_t *ms,
                                     const t_state *state,
                                     const t_inputrec *ir,
-                                    int nst,int init_seed)
+                                    int nst, int nex, int init_seed)
 {
     real temp,pres;
     int  i,j,k;
     struct gmx_repl_ex *re;
+    gmx_bool bTemp;
+    gmx_bool bLambda=FALSE;
 
     fprintf(fplog,"\nInitializing Replica Exchange\n");
 
@@ -130,6 +135,7 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog,
 
     re->repl     = ms->sim;
     re->nrepl    = ms->nsim;
+    snew(re->q,ereENDSINGLE);
 
     fprintf(fplog,"Repl  There are %d replicas:\n",re->nrepl);
 
@@ -143,6 +149,7 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog,
                     "the number of temperature coupling groups");
     check_multi_int(fplog,ms,ir->epc,"the pressure coupling");
     check_multi_int(fplog,ms,ir->efep,"free energy");
+    check_multi_int(fplog,ms,ir->fepvals->n_lambda,"number of lambda states");
 
     re->temp = ir->opts.ref_t[0];
     for(i=1; (i<ir->opts.ngtc); i++)
@@ -155,31 +162,21 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog,
     }
 
     re->type = -1;
-    for(i=0; i<ereNR; i++)
+    bTemp = repl_quantity(fplog,ms,re,ereTEMP,re->temp);
+    if (ir->efep != efepNO)
     {
-        switch (i)
-        {
-        case ereTEMP:
-            repl_quantity(fplog,ms,re,i,re->temp);
-            break;
-        case ereLAMBDA:
-            if (ir->efep != efepNO)
-            {
-                repl_quantity(fplog,ms,re,i,ir->init_lambda);
-            }
-            break;
-        default:
-            gmx_incons("Unknown replica exchange quantity");
-        }
+        bLambda = repl_quantity(fplog,ms,re,ereLAMBDA,(real)ir->fepvals->init_fep_state);
     }
-    if (re->type == -1)
+    if (re->type == -1)  /* nothing was assigned */
     {
         gmx_fatal(FARGS,"The properties of the %d systems are all the same, there is nothing to exchange",re->nrepl);
     }
+    if (bLambda && bTemp) {
+        re->type = ereTL;
+    }
 
-    switch (re->type)
+    if (bTemp)
     {
-    case ereTEMP:
         please_cite(fplog,"Sugita1999a");
         if (ir->epc != epcNO)
         {
@@ -192,15 +189,13 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog,
             gmx_fatal(FARGS,"REMD with the %s thermostat does not produce correct potential energy distributions, consider using the %s thermostat instead",
                       ETCOUPLTYPE(ir->etc),ETCOUPLTYPE(etcVRESCALE));
         }
-        break;
-    case ereLAMBDA:
-        if (ir->delta_lambda != 0)
+    }
+    if (bLambda) {
+        if (ir->fepvals->delta_lambda != 0)   /* check this? */
         {
             gmx_fatal(FARGS,"delta_lambda is not zero");
         }
-        break;
     }
-
     if (re->bNPT)
     {
         snew(re->pres,re->nrepl);
@@ -226,48 +221,72 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog,
         gmx_sum_sim(re->nrepl,re->pres,ms);
     }
 
+    /* Make an index for increasing replica order */
+    /* only makes sense if one or the other is varying, not both!
+       if both are varying, we trust the order the person gave. */
     snew(re->ind,re->nrepl);
-    /* Make an index for increasing temperature order */
     for(i=0; i<re->nrepl; i++)
     {
         re->ind[i] = i;
     }
-    for(i=0; i<re->nrepl; i++)
-    {
-        for(j=i+1; j<re->nrepl; j++)
+
+    if (re->type<ereENDSINGLE) {
+
+        for(i=0; i<re->nrepl; i++)
         {
-            if (re->q[re->ind[j]] < re->q[re->ind[i]])
+            for(j=i+1; j<re->nrepl; j++)
             {
-                k = re->ind[i];
-                re->ind[i] = re->ind[j];
-                re->ind[j] = k;
-            }
-            else if (re->q[re->ind[j]] == re->q[re->ind[i]])
-            {
-                gmx_fatal(FARGS,"Two replicas have identical %ss",erename[re->type]);
+                if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]])
+                {
+                    k = re->ind[i];
+                    re->ind[i] = re->ind[j];
+                    re->ind[j] = k;
+                }
+                else if (re->q[re->type][re->ind[j]] == re->q[re->type][re->ind[i]])
+                {
+                    gmx_fatal(FARGS,"Two replicas have identical %ss",erename[re->type]);
+                }
             }
         }
     }
-    fprintf(fplog,"Repl   ");
+
+    /* keep track of all the swaps, starting with the initial placement. */
+    snew(re->allswaps,re->nrepl);
     for(i=0; i<re->nrepl; i++)
     {
-        fprintf(fplog," %3d  ",re->ind[i]);
+        re->allswaps[i] = re->ind[i];
     }
+
     switch (re->type)
     {
     case ereTEMP:
-        fprintf(fplog,"\nRepl  T");
+        fprintf(fplog,"\nReplica exchange in temperature\n");
         for(i=0; i<re->nrepl; i++)
         {
-            fprintf(fplog," %5.1f",re->q[re->ind[i]]);
+            fprintf(fplog," %5.1f",re->q[re->type][re->ind[i]]);
         }
+        fprintf(fplog,"\n");
         break;
     case ereLAMBDA:
-        fprintf(fplog,"\nRepl  l");
+        fprintf(fplog,"\nReplica exchange in lambda\n");
+        for(i=0; i<re->nrepl; i++)
+        {
+            fprintf(fplog," %3d",(int)re->q[re->type][re->ind[i]]);
+        }
+        fprintf(fplog,"\n");
+        break;
+    case ereTL:
+        fprintf(fplog,"\nReplica exchange in temperature and lambda state\n");
+        for(i=0; i<re->nrepl; i++)
+        {
+            fprintf(fplog," %5.1f",re->q[ereTEMP][re->ind[i]]);
+        }
+        fprintf(fplog,"\n");
         for(i=0; i<re->nrepl; i++)
         {
-            fprintf(fplog," %5.3f",re->q[re->ind[i]]);
+            fprintf(fplog," %5d",(int)re->q[ereLAMBDA][re->ind[i]]);
         }
+        fprintf(fplog,"\n");
         break;
     default:
         gmx_incons("Unknown replica exchange quantity");
@@ -284,12 +303,11 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog,
         {
             if ((i > 0) && (re->pres[re->ind[i]] < re->pres[re->ind[i-1]]))
             {
-                gmx_fatal(FARGS,"The reference pressure decreases with increasing temperature");
+                fprintf(fplog,"\nWARNING: The reference pressures decrease with increasing temperatures\n\n");
+                fprintf(stderr,"\nWARNING: The reference pressures decrease with increasing temperatures\n\n");
             }
         }
     }
-    fprintf(fplog,"\nRepl  ");
-  
     re->nst = nst;
     if (init_seed == -1)
     {
@@ -307,16 +325,22 @@ gmx_repl_ex_t init_replica_exchange(FILE *fplog,
     {
         re->seed = init_seed;
     }
-    fprintf(fplog,"\nRepl  exchange interval: %d\n",re->nst);
-    fprintf(fplog,"\nRepl  random seed: %d\n",re->seed);
+    fprintf(fplog,"\nReplica exchange interval: %d\n",re->nst);
+    fprintf(fplog,"\nReplica random seed: %d\n",re->seed);
 
     re->nattempt[0] = 0;
     re->nattempt[1] = 0;
+
     snew(re->prob_sum,re->nrepl);
     snew(re->nexchange,re->nrepl);
+    snew(re->nmoves,re->nrepl);
+    for (i=0;i<re->nrepl;i++) 
+    {
+        snew(re->nmoves[i],re->nrepl);
+    }
+    fprintf(fplog,"Replica exchange information below: x=exchange, pr=probability\n");
 
-    fprintf(fplog,"Repl  below: x=exchange, pr=probability\n");
-
+    re->nex = nex;
     return re;
 }
 
@@ -352,6 +376,38 @@ static void exchange_reals(const gmx_multisim_t *ms,int b,real *v,int n)
     }
 }
 
+
+static void exchange_ints(const gmx_multisim_t *ms,int b,int *v,int n)
+{
+  int *buf;
+  int  i;
+
+  if (v) {
+    snew(buf,n);
+#ifdef GMX_MPI
+    /*
+    MPI_Sendrecv(v,  n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0,
+                buf,n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0,
+                ms->mpi_comm_masters,MPI_STATUS_IGNORE);
+    */
+    {
+      MPI_Request mpi_req;
+
+      MPI_Isend(v,n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0,
+               ms->mpi_comm_masters,&mpi_req);
+      MPI_Recv(buf,n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0,
+              ms->mpi_comm_masters,MPI_STATUS_IGNORE);
+      MPI_Wait(&mpi_req,MPI_STATUS_IGNORE);
+    }
+#endif
+    for(i=0; i<n; i++) 
+    {
+        v[i] = buf[i];
+    }
+    sfree(buf);
+  }
+}
+
 static void exchange_doubles(const gmx_multisim_t *ms,int b,double *v,int n)
 {
     double *buf;
@@ -466,8 +522,36 @@ static void copy_doubles(const double *s,double *d,int n)
     }
 }
 
+static void copy_reals(const real *s,real *d,int n)
+{
+    int i;
+
+    if (d != NULL)
+    {
+        for(i=0; i<n; i++)
+        {
+            d[i] = s[i];
+        }
+    }
+}
+
+static void copy_ints(const int *s,int *d,int n)
+{
+    int i;
+
+    if (d != NULL)
+    {
+        for(i=0; i<n; i++)
+        {
+            d[i] = s[i];
+        }
+    }
+}
+
 #define scopy_rvecs(v,n)   copy_rvecs(state->v,state_local->v,n);
 #define scopy_doubles(v,n) copy_doubles(state->v,state_local->v,n);
+#define scopy_reals(v,n) copy_reals(state->v,state_local->v,n);
+#define scopy_ints(v,n)   copy_ints(state->v,state_local->v,n);
 
 static void copy_state_nonatomdata(t_state *state,t_state *state_local)
 {
@@ -491,6 +575,8 @@ static void copy_state_nonatomdata(t_state *state,t_state *state_local)
     scopy_rvecs(x,state->natoms);
     scopy_rvecs(v,state->natoms);
     scopy_rvecs(sd_X,state->natoms);
+    copy_ints(&(state->fep_state),&(state_local->fep_state),1);
+    scopy_reals(lambda,efptNR);
 }
 
 static void scale_velocities(t_state *state,real fac)
@@ -526,6 +612,34 @@ static void pd_collect_state(const t_commrec *cr,t_state *state)
     }
 }
 
+static void print_matrix(FILE *fplog,const char *leg,int n,int **nmoves, int *nattempt)
+{
+    int i,j,ntot;
+    float Tprint;
+
+    ntot = nattempt[0] + nattempt[1];
+
+    fprintf(fplog,"                  Empirical Transition Matrix\n");
+    for (i=0;i<n;i++)
+    {
+        fprintf(fplog,"%8d",(i+1));
+    }
+    fprintf(fplog,"\n");
+    for (i=0;i<n;i++)
+    {
+        for (j=0;j<n;j++)
+        {
+            Tprint = 0.0;
+            if (nmoves[i][j] > 0)
+            {
+                Tprint = nmoves[i][j]/(2.0*ntot);
+            }
+            fprintf(fplog,"%8.4f",Tprint);
+        }
+        fprintf(fplog,"%3d\n",i);
+    }
+}
+
 static void print_ind(FILE *fplog,const char *leg,int n,int *ind,gmx_bool *bEx)
 {
     int i;
@@ -538,6 +652,38 @@ static void print_ind(FILE *fplog,const char *leg,int n,int *ind,gmx_bool *bEx)
     fprintf(fplog,"\n");
 }
 
+static void print_allswitchind(FILE *fplog,int n,int *ind,int *pind, int *allswaps)
+{
+    int i;
+    int *tmpswap;
+
+    snew(tmpswap,n); /* need to save the data */
+    for (i=0;i<n;i++)
+    {
+        tmpswap[i] = allswaps[i];
+    }
+    for (i=0;i<n;i++)
+    {
+        allswaps[i] = tmpswap[pind[i]];
+    }
+
+    fprintf(fplog,"\nAccepted Exchanges:   ");
+    for (i=0;i<n;i++)
+    {
+        fprintf(fplog,"%d ",pind[i]);
+    }
+    fprintf(fplog,"\n");
+
+    fprintf(fplog,"Order After Exchange: ");
+    for (i=0;i<n;i++)
+    {
+        fprintf(fplog,"%d ",allswaps[i]);
+    }
+    fprintf(fplog,"\n\n");
+
+    sfree(tmpswap);
+}
+
 static void print_prob(FILE *fplog,const char *leg,int n,real *prob)
 {
     int  i;
@@ -571,136 +717,304 @@ static void print_count(FILE *fplog,const char *leg,int n,int *count)
     fprintf(fplog,"\n");
 }
 
-static int get_replica_exchange(FILE *fplog,const gmx_multisim_t *ms,
-                                struct gmx_repl_ex *re,real *ener,real vol,
-                                gmx_large_int_t step,real time)
-{
-    int  m,i,a,b;
-    real *Epot=NULL,*Vol=NULL,*dvdl=NULL,*prob;
-    real ediff=0,delta=0,dpV=0,betaA=0,betaB=0;
-    gmx_bool *bEx,bPrint;
-    int  exchange;
+static real calc_delta(FILE *fplog, gmx_bool bPrint, struct gmx_repl_ex *re, real *Epot, real **df, real* Vol, real *beta, int a, int b, int ap, int bp) {
+
+    real ediff,dpV,delta=0;
+
+    /* Two cases; we are permuted and not.  In all cases, setting ap = a and bp = b will reduce
+       to the non permuted case */
 
-    fprintf(fplog,"Replica exchange at step " gmx_large_int_pfmt " time %g\n",step,time);
-  
     switch (re->type)
     {
     case ereTEMP:
-        snew(Epot,re->nrepl);
-        snew(Vol,re->nrepl);
-        Epot[re->repl] = ener[F_EPOT];
-        Vol[re->repl]  = vol;
-        gmx_sum_sim(re->nrepl,Epot,ms);
-        gmx_sum_sim(re->nrepl,Vol,ms);
+        /*
+         * Okabe et. al. Chem. Phys. Lett. 335 (2001) 435-439
+         */
+        ediff = Epot[b] - Epot[a];
+        delta = -(beta[bp] - beta[ap])*ediff;
         break;
     case ereLAMBDA:
-        snew(dvdl,re->nrepl);
-        dvdl[re->repl] = ener[F_DVDL];
-        gmx_sum_sim(re->nrepl,dvdl,ms);
+        /* two cases:  when we are permuted, and not.  */
+        /* non-permuted:
+           ediff =  E_new - E_old
+                 =  [H_b(x_a) + H_a(x_b)] - [H_b(x_b) + H_a(x_a)]
+                 =  [H_b(x_a) - H_a(x_a)] + [H_a(x_b) - H_b(x_b)]
+                 =  df[b][a] + df[a][b] */
+        /* permuted:
+           ediff =  E_new - E_old
+                 =  [H_bp(x_a) + H_ap(x_b)] - [H_bp(x_b) + H_ap(x_a)]
+                 =  [H_bp(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_bp(x_b)]
+                 =  [H_bp(x_a) - H_a(x_a) + H_a(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_b(x_b) + H_b(x_b) - H_bp(x_b)]
+                 =  [H_bp(x_a) - H_a(x_a)] - [H_ap(x_a) - H_a(x_a)] + [H_ap(x_b) - H_b(x_b)] - H_bp(x_b) - H_b(x_b)]
+                 =  (df[bp][a] - df[ap][a]) + (df[ap][b] - df[bp][b])    */
+        ediff = (df[bp][a] - df[ap][a]) + (df[ap][b] - df[bp][b]);
+        delta = ediff*beta[a]; /* assume all same temperature in this case */
         break;
+    case ereTL:
+        /* not permuted:  */
+        /* delta =  reduced E_new - reduced E_old
+                 =  [beta_b H_b(x_a) + beta_a H_a(x_b)] - [beta_b H_b(x_b) + beta_a H_a(x_a)]
+                 =  [beta_b H_b(x_a) - beta_a H_a(x_a)] + [beta_a H_a(x_b) - beta_b H_b(x_b)]
+                 =  [beta_b dH_b(x_a) + beta_b H_a(x_a) - beta_a H_a(x_a)] +
+                    [beta_a dH_a(x_b) + beta_a H_b(x_b) - beta_b H_b(x_b)]
+                 =  [beta_b dH_b(x_a) + [beta_a dH_a(x_b) +
+                    beta_b (H_a(x_a) - H_b(x_b)]) - beta_a (H_a(x_a) - H_b(x_b))
+                 =  beta_b dH_b(x_a) + beta_a dH_a(x_b) - (beta_b - beta_a)(H_b(x_b) - H_a(x_a) */
+        /* delta = beta[b]*df[b][a] + beta[a]*df[a][b] - (beta[b] - beta[a])*(Epot[b] - Epot[a]; */
+        /* permuted (big breath!) */
+        /*   delta =  reduced E_new - reduced E_old
+                 =  [beta_bp H_bp(x_a) + beta_ap H_ap(x_b)] - [beta_bp H_bp(x_b) + beta_ap H_ap(x_a)]
+                 =  [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)]
+                 =  [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)]
+                    - beta_pb H_a(x_a) + beta_ap H_a(x_a) + beta_pb H_a(x_a) - beta_ap H_a(x_a)
+                    - beta_ap H_b(x_b) + beta_bp H_b(x_b) + beta_ap H_b(x_b) - beta_bp H_b(x_b)
+                 =  [(beta_bp H_bp(x_a) - beta_bp H_a(x_a)) - (beta_ap H_ap(x_a) - beta_ap H_a(x_a))] +
+                    [(beta_ap H_ap(x_b)  - beta_ap H_b(x_b)) - (beta_bp H_bp(x_b) - beta_bp H_b(x_b))]
+                    + beta_pb H_a(x_a) - beta_ap H_a(x_a) + beta_ap H_b(x_b) - beta_bp H_b(x_b)
+                 =  [beta_bp (H_bp(x_a) - H_a(x_a)) - beta_ap (H_ap(x_a) - H_a(x_a))] +
+                    [beta_ap (H_ap(x_b) - H_b(x_b)) - beta_bp (H_bp(x_b) - H_b(x_b))]
+                    + beta_pb (H_a(x_a) - H_b(x_b))  - beta_ap (H_a(x_a) - H_b(x_b))
+                 =  ([beta_bp df[bp][a] - beta_ap df[ap][a]) + beta_ap df[ap][b]  - beta_bp df[bp][b])
+                 + (beta_pb-beta_ap)(H_a(x_a) - H_b(x_b))  */
+        delta = beta[bp]*(df[bp][a] - df[bp][b]) + beta[ap]*(df[ap][b] - df[ap][a]) - (beta[bp]-beta[ap])*(Epot[b]-Epot[a]);
+        break;
+    default:
+        gmx_incons("Unknown replica exchange quantity");
+    }
+    if (bPrint)
+    {
+        fprintf(fplog,"Repl %d <-> %d  dE_term = %10.3e (kT)\n",a,b,delta);
     }
+    if (re->bNPT)
+    {
+        /* revist the calculation for 5.0.  Might be some improvements. */
+        dpV = (beta[ap]*re->pres[ap]-beta[bp]*re->pres[bp])*(Vol[b]-Vol[a])/PRESFAC;
+        if (bPrint) 
+        {
+            fprintf(fplog,"  dpV = %10.3e  d = %10.3e\nb",dpV,delta + dpV);
+        }
+        delta += dpV;
+    }
+    return delta;
+}
+
+static void get_replica_exchange(FILE *fplog,const gmx_multisim_t *ms,
+                                 struct gmx_repl_ex *re,gmx_enerdata_t *enerd,real vol,
+                                 gmx_large_int_t step,real time,int *pind)
+{
+    int  m,i,a,b,ap,bp,i0,i1,tmp;
+    real *Epot=NULL,*Vol=NULL,**flambda=NULL,*beta=NULL,*prob;
+    real ediff=0,delta=0,dpV=0;
+    gmx_bool *bEx,bPrint,bMultiEx;
+    gmx_bool bEpot=FALSE;
+    gmx_bool bFLambda=FALSE;
+    gmx_bool bVol=FALSE;
+
+    bMultiEx = (re->nex > 1);  /* multiple exchanges at each state */
+    fprintf(fplog,"Replica exchange at step " gmx_large_int_pfmt " time %g\n",step,time);
 
+    snew(beta,re->nrepl);
+    if (re->bNPT)
+    {
+        bVol = TRUE;
+        snew(Vol,re->nrepl);
+        Vol[re->repl]  = vol;
+    }
+
+    if ((re->type == ereTEMP || re->type == ereTL))
+    {
+        bEpot = TRUE;
+        snew(Epot,re->nrepl);
+        Epot[re->repl] = enerd->term[F_EPOT];
+        /* temperatures of different states*/
+        for (i=0;i<re->nrepl;i++)
+        {
+            beta[i] = 1.0/(re->q[ereTEMP][i]*BOLTZ);
+        }
+    }
+    else
+    {
+        for (i=0;i<re->nrepl;i++)
+        {
+            beta[i] = 1.0/(re->temp*BOLTZ);  /* we have a single temperature */
+        }
+    }
+    if (re->type == ereLAMBDA || re->type == ereTL)
+    {
+        bFLambda = TRUE;
+        /* lambda differences. */
+        /* flambda[i][j] is the energy of the jth simulation in the ith Hamiltonian
+           minus the energy of the jth simulation in the jth Hamiltonian */
+        snew(flambda,re->nrepl);
+        for (i=0;i<re->nrepl;i++)
+        {
+            snew(flambda[i],re->nrepl);
+            flambda[i][re->repl] = (enerd->enerpart_lambda[(int)re->q[ereLAMBDA][i]+1]-enerd->enerpart_lambda[0]);
+        }
+    }
+
+    /* now actually do the communication */
+    if (bVol)
+    {
+        gmx_sum_sim(re->nrepl,Vol,ms);
+    }
+    if (bEpot)
+    {
+        gmx_sum_sim(re->nrepl,Epot,ms);
+    }
+    if (bFLambda)
+    {
+        for (i=0;i<re->nrepl;i++)
+        {
+            gmx_sum_sim(re->nrepl,flambda[i],ms);
+        }
+    }
     snew(bEx,re->nrepl);
     snew(prob,re->nrepl);
 
-    exchange = -1;
-    m = (step / re->nst) % 2;
-    for(i=1; i<re->nrepl; i++)
+    /* make a duplicate set of indices for shuffling */
+    for(i=0;i<re->nrepl;i++)
+    {
+        pind[i] = re->ind[i];
+    }
+
+    if (bMultiEx)
     {
-        a = re->ind[i-1];
-        b = re->ind[i];
-        bPrint = (re->repl==a || re->repl==b);
-        if (i % 2 == m)
+        /* multiple random switch exchange */
+        for (i=0;i<re->nex;i++)
         {
-            switch (re->type)
+            /* randomly select a pair  */
+            /* find out which state it is from, and what label that state currently has */
+            i0 = (int)(re->nrepl*rando(&(re->seed)));
+            i1 = (int)(re->nrepl*rando(&(re->seed)));
+            if (i0==i1)
             {
-            case ereTEMP:
-                /* Use equations from:
-                 * Okabe et. al. Chem. Phys. Lett. 335 (2001) 435-439
-                 */
-                ediff = Epot[b] - Epot[a];
-                betaA = 1.0/(re->q[a]*BOLTZ);
-                betaB = 1.0/(re->q[b]*BOLTZ);
-                delta = (betaA - betaB)*ediff;
-                break;
-            case ereLAMBDA:
-                /* Here we exchange based on a linear extrapolation of dV/dlambda.
-                 * We would like to have the real energies
-                 * from foreign lambda calculations.
-                 */
-                ediff = (dvdl[a] - dvdl[b])*(re->q[b] - re->q[a]);
-                delta = ediff/(BOLTZ*re->temp);
-                break;
-            default:
-                gmx_incons("Unknown replica exchange quantity");
-            }
-            if (bPrint)
-            {
-                fprintf(fplog,"Repl %d <-> %d  dE = %10.3e",a,b,delta);
-            }
-            if (re->bNPT)
-            {
-                dpV = (betaA*re->pres[a]-betaB*re->pres[b])*(Vol[b]-Vol[a])/PRESFAC;
-                if (bPrint)
-                {
-                    fprintf(fplog,"  dpV = %10.3e  d = %10.3e",dpV,delta + dpV);
-                }
-                delta += dpV;
-            }
-            if (bPrint)
-            {
-                fprintf(fplog,"\n");
+                i--;
+                continue;  /* got the same pair, back up and do it again */
             }
+
+            a = re->ind[i0];
+            b = re->ind[i1];
+            ap = pind[i0];
+            bp = pind[i1];
+
+            bPrint = FALSE; /* too noisy */
+            delta = calc_delta(fplog,bPrint,re,Epot,flambda,Vol,beta,a,b,ap,bp); /* calculate the energy difference */
+
+            /* we actually only use the first space, since there are actually many switches between pairs. */
+
             if (delta <= 0)
             {
-                prob[i] = 1;
-                bEx[i] = TRUE;
+                /* accepted */
+                prob[0] = 1;
+                bEx[0] = TRUE;
             }
             else
             {
-                if (delta > 100)
+                if (delta > PROBABILITYCUTOFF)
                 {
-                    prob[i] = 0;
+                    prob[0] = 0;
                 }
                 else
                 {
-                    prob[i] = exp(-delta);
+                    prob[0] = exp(-delta);
                 }
-                bEx[i] = (rando(&(re->seed)) < prob[i]);
+                /* roll a number to determine if accepted */
+                bEx[0] = (rando(&(re->seed)) < prob[0]);
             }
-            re->prob_sum[i] += prob[i];    
-            if (bEx[i])
+            re->prob_sum[0] += prob[0];
+
+            if (bEx[0])
             {
-                if (a == re->repl)
+                /* swap the states */
+                tmp = pind[i0];
+                pind[i0] = pind[i1];
+                pind[i1] = tmp;
+            }
+        }
+        re->nattempt[0]++;  /* keep track of total permutation trials here */
+        print_allswitchind(fplog,re->nrepl,re->ind,pind,re->allswaps);
+    }
+    else
+    {
+        /* standard nearest neighbor replica exchange */
+        m = (step / re->nst) % 2;
+        for(i=1; i<re->nrepl; i++)
+        {
+            a = re->ind[i-1];
+            b = re->ind[i];
+            
+            bPrint = (re->repl==a || re->repl==b);
+            if (i % 2 == m)
+            {
+                delta = calc_delta(fplog,bPrint,re,Epot,flambda,Vol,beta,a,b,a,b);
+                if (delta <= 0) {
+                    /* accepted */
+                    prob[i] = 1;
+                    bEx[i] = TRUE;
+                }
+                else
                 {
-                    exchange = b;
+                    if (delta > PROBABILITYCUTOFF)
+                    {
+                        prob[i] = 0;
+                    }
+                    else
+                    {
+                        prob[i] = exp(-delta);
+                    }
+                    /* roll a number to determine if accepted */
+                    bEx[i] = (rando(&(re->seed)) < prob[i]);
                 }
-                else if (b == re->repl)
+                re->prob_sum[i] += prob[i];
+
+                if (bEx[i])
                 {
-                    exchange = a;
+                    /* swap these two */
+                    tmp = pind[i-1];
+                    pind[i-1] = pind[i];
+                    pind[i] = tmp;
                 }
-                re->nexchange[i]++;
+            }
+            else
+            {
+                prob[i] = -1;
+                bEx[i] = FALSE;
             }
         }
-        else
-        {
-            prob[i] = -1;
-            bEx[i] = FALSE;
-        }
+        /* print some statistics */
+        print_ind(fplog,"ex",re->nrepl,re->ind,bEx);
+        print_prob(fplog,"pr",re->nrepl,prob);
+        fprintf(fplog,"\n");
+        re->nattempt[m]++;
     }
-    print_ind(fplog,"ex",re->nrepl,re->ind,bEx);
-    print_prob(fplog,"pr",re->nrepl,prob);
-    fprintf(fplog,"\n");
 
+    /* record which moves were made and accepted */
+    for (i=0;i<re->nrepl;i++)
+    {
+        re->nmoves[re->ind[i]][pind[i]] +=1;
+        re->nmoves[pind[i]][re->ind[i]] +=1;
+    }
+    /* free up data */
     sfree(bEx);
     sfree(prob);
-    sfree(Epot);
-    sfree(Vol);
-    sfree(dvdl);
-  
-    re->nattempt[m]++;
-
-    return exchange;
+    sfree(beta);
+    if (re->bNPT)
+    {
+        sfree(Vol);
+    }
+    if ((re->type == ereTEMP || re->type == ereTL))
+    {
+        sfree(Epot);
+    }
+    if ((re->type == ereLAMBDA || re->type == ereTL))
+    {
+        for (i=0;i<re->nrepl;i++)
+        {
+            sfree(flambda[i]);
+        }
+        sfree(flambda);
+    }
 }
 
 static void write_debug_x(t_state *state)
@@ -716,24 +1030,152 @@ static void write_debug_x(t_state *state)
     }
 }
 
+static void cyclic_decomposition(FILE *fplog, int *pind, int **cyclic, int n, int *nswap)
+{
+
+    int i,j,c,p;
+    int *incycle;
+    int maxlen = 1;
+    snew(incycle,n);
+
+    /* compute cyclic decompositions */
+    for (i=0;i<n;i++)
+    {
+        snew(cyclic[i],n);
+        for (j=0;j<n;j++)
+        {
+            cyclic[i][j] = -1;
+        }
+    }
+
+    for (i=0;i<n;i++)  /* one cycle for each replica */
+    {
+        if (incycle[i])
+        {
+            cyclic[i][0] = -1;
+            continue;
+        }
+        cyclic[i][0] = i;
+        incycle[i] = TRUE;
+        c = 1;
+        p = i;
+        for (j=0;j<n;j++)  /* potentially all cycles are part, but we will break first */
+        {
+            p = pind[p]; /* start permuting */
+            if (p==i)
+            {
+                cyclic[i][c] = -1;
+                if (c > maxlen)
+                {
+                    maxlen = c;
+                }
+                break; /* we've reached the original element, the cycle is complete, and we marked the end. */
+            }
+            else
+            {
+                cyclic[i][c] = p;  /* each permutation gives a new member of the cycle */
+                incycle[p] = TRUE;
+                c++;
+            }
+        }
+    }
+    *nswap = maxlen - 1;
+
+    if (debug)
+    {
+        for (i=0;i<n;i++)
+        {
+            fprintf(fplog,"Cycle %d:",i);
+            for (j=0;j<n;j++)
+            {
+                if (cyclic[i][j] < 0)
+                {
+                    break;
+                }
+                fprintf(fplog,"%2d",cyclic[i][j]);
+            }
+            fprintf(fplog,"\n");
+        }
+        fflush(fplog);
+    }
+}
+
+static void compute_exchange_order(FILE *fplog, int **cyclic,int **order, int n, int maxswap)
+{
+    int i,j;
+
+    for (i=0;i<n;i++)
+    {
+        snew(order[i],maxswap);
+        for (j=0;j<maxswap;j++)
+        {
+            order[i][j] = -1;
+        }
+    }
+    for (j=0;j<maxswap;j++)
+    {
+        for (i=0;i<n;i++)
+        {
+            if (cyclic[i][j+1] >= 0)
+            {
+                order[cyclic[i][j+1]][j] = cyclic[i][j];
+                order[cyclic[i][j]][j] = cyclic[i][j+1];
+            }
+        }
+        for (i=0;i<n;i++)
+        {
+            if (order[i][j] < 0)
+            {
+                order[i][j] = i; /* if it's not exchanging, it should stay this round*/
+            }
+        }
+    }
+    if (debug)
+    {
+        fprintf(fplog,"Replica Exchange Order\n");
+        for (i=0;i<n;i++)
+        {
+            fprintf(fplog,"Replica %d:",i);
+            for (j=0;j<maxswap;j++)
+            {
+                if (order[i][j] < 0) break;
+                fprintf(fplog,"%2d",order[i][j]);
+            }
+            fprintf(fplog,"\n");
+        }
+        fflush(fplog);
+    }
+}
+
 gmx_bool replica_exchange(FILE *fplog,const t_commrec *cr,struct gmx_repl_ex *re,
-                          t_state *state,real *ener,
-                          t_state *state_local,
-                          gmx_large_int_t step,real time)
+                          t_state *state,gmx_enerdata_t *enerd,
+                          t_state *state_local,gmx_large_int_t step,real time)
 {
     gmx_multisim_t *ms;
-    int  exchange=-1,shift;
+    int exchange=-1,shift;
+    int i,j,maxswap=0;
+    int *exchanges=NULL;
+    int **cyclic=NULL;
+    int **order=NULL;
     gmx_bool bExchanged=FALSE;
-    
+
     ms = cr->ms;
-  
     if (MASTER(cr))
     {
-        exchange = get_replica_exchange(fplog,ms,re,ener,det(state->box),
-                                        step,time);
-        bExchanged = (exchange >= 0);
+        snew(exchanges,re->nrepl);
+        get_replica_exchange(fplog,ms,re,enerd,det(state->box),step,time,exchanges);
+        bExchanged = (exchanges[re->repl] != re->nrepl);  /* only mark as exchanged if it has a shuffled index */
+        snew(cyclic,re->nrepl);
+        snew(order,re->nrepl);
+
+        /* identify the cyclic decomposition of the permutation (very easy if neighbor replica exchange) */
+        cyclic_decomposition(fplog,exchanges,cyclic,re->nrepl,&maxswap); 
+
+        /* now translate the decompsition into a replica exchange order at each step */
+        compute_exchange_order(fplog,cyclic,order,re->nrepl,maxswap);
+
+        sfree(cyclic); /* don't need this anymore */
     }
-    
     if (PAR(cr))
     {
 #ifdef GMX_MPI
@@ -741,7 +1183,6 @@ gmx_bool replica_exchange(FILE *fplog,const t_commrec *cr,struct gmx_repl_ex *re
                   cr->mpi_comm_mygroup);
 #endif
     }
-    
     if (bExchanged)
     {
         /* Exchange the states */
@@ -761,17 +1202,25 @@ gmx_bool replica_exchange(FILE *fplog,const t_commrec *cr,struct gmx_repl_ex *re
         
         if (MASTER(cr))
         {
-            /* Exchange the global states between the master nodes */
-            if (debug)
+            for (i=0;i<maxswap;i++) /* there will be only one swap cycle with standard replica exchange */
             {
-                fprintf(debug,"Exchanging %d with %d\n",ms->sim,exchange);
+                exchange = order[ms->sim][i];
+
+                if (exchange != ms->sim)
+                {
+                    /* Exchange the global states between the master nodes */
+                    if (debug)
+                    {
+                        fprintf(debug,"Exchanging %d with %d\n",ms->sim,exchange);
+                    }
+                    exchange_state(ms,exchange,state);
+                }
             }
-            exchange_state(ms,exchange,state);
-            
-            if (re->type == ereTEMP)
+            if (re->type == ereTEMP || re->type == ereTL)
             {
-                scale_velocities(state,sqrt(re->q[ms->sim]/re->q[exchange]));
+                scale_velocities(state,sqrt(re->q[ereTEMP][ms->sim]/re->q[ereTEMP][exchanges[ms->sim]]));
             }
+            sfree(order);
         }
 
         /* With domain decomposition the global state is distributed later */
@@ -786,7 +1235,6 @@ gmx_bool replica_exchange(FILE *fplog,const t_commrec *cr,struct gmx_repl_ex *re
             }
         }
     }
-        
     return bExchanged;
 }
 
@@ -794,48 +1242,50 @@ void print_replica_exchange_statistics(FILE *fplog,struct gmx_repl_ex *re)
 {
     real *prob;
     int  i;
-  
+
     fprintf(fplog,"\nReplica exchange statistics\n");
-    fprintf(fplog,"Repl  %d attempts, %d odd, %d even\n",
-            re->nattempt[0]+re->nattempt[1],re->nattempt[1],re->nattempt[0]);
 
-    snew(prob,re->nrepl);
-  
-    fprintf(fplog,"Repl  average probabilities:\n");
-    for(i=1; i<re->nrepl; i++)
+    if (re->nex == 0)
     {
-        if (re->nattempt[i%2] == 0)
-        {
-            prob[i] = 0;
-        }
-        else
-        {
-            prob[i] =  re->prob_sum[i]/re->nattempt[i%2];
-        }
-    }
-    print_ind(fplog,"",re->nrepl,re->ind,NULL);
-    print_prob(fplog,"",re->nrepl,prob);
+        fprintf(fplog,"Repl  %d attempts, %d odd, %d even\n",
+                re->nattempt[0]+re->nattempt[1],re->nattempt[1],re->nattempt[0]);
 
-    fprintf(fplog,"Repl  number of exchanges:\n");
-    print_ind(fplog,"",re->nrepl,re->ind,NULL);
-    print_count(fplog,"",re->nrepl,re->nexchange);
-  
-    fprintf(fplog,"Repl  average number of exchanges:\n");
-    for(i=1; i<re->nrepl; i++)
-    {
-        if (re->nattempt[i%2] == 0)
+        snew(prob,re->nrepl);
+        for(i=1; i<re->nrepl; i++)
         {
-            prob[i] = 0;
+            if (re->nattempt[i%2] == 0)
+            {
+                prob[i] = 0;
+            }
+            else
+            {
+                prob[i] =  re->prob_sum[i]/re->nattempt[i%2];
+            }
         }
-        else
+        print_ind(fplog,"",re->nrepl,re->ind,NULL);
+        print_prob(fplog,"",re->nrepl,prob);
+
+        fprintf(fplog,"Repl  number of exchanges:\n");
+        print_ind(fplog,"",re->nrepl,re->ind,NULL);
+        print_count(fplog,"",re->nrepl,re->nexchange);
+
+        fprintf(fplog,"Repl  average number of exchanges:\n");
+        for(i=1; i<re->nrepl; i++) 
         {
-            prob[i] =  ((real)re->nexchange[i])/re->nattempt[i%2];
+            if (re->nattempt[i%2] == 0)
+            {
+                prob[i] = 0;
+            }
+            else
+            {
+                prob[i] =  ((real)re->nexchange[i])/re->nattempt[i%2];
+            }
         }
+        print_ind(fplog,"",re->nrepl,re->ind,NULL);
+        print_prob(fplog,"",re->nrepl,prob);
+        sfree(prob);
+        fprintf(fplog,"\n");
     }
-    print_ind(fplog,"",re->nrepl,re->ind,NULL);
-    print_prob(fplog,"",re->nrepl,prob);
-
-    sfree(prob);
-  
-    fprintf(fplog,"\n");
+    /* print the transition matrix */
+    print_matrix(fplog,"",re->nrepl,re->nmoves,re->nattempt);
 }